VEX/priv/host_nanomips_defs.c

   1 /*---------------------------------------------------------------*/
   2 /*--- begin                              host_NANOMIPS_defs.c ---*/
   3 /*---------------------------------------------------------------*/
   4
   5 /*
   6    This file is part of Valgrind, a dynamic binary instrumentation
   7    framework.
   8
   9    Copyright (C) 2017-2018 RT-RK
  10
  11    This program is free software; you can redistribute it and/or
  12    modify it under the terms of the GNU General Public License as
  13    published by the Free Software Foundation; either version 2 of the
  14    License, or (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful, but
  17    WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  24    02111-1307, USA.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 #include "libvex_basictypes.h"
  30 #include "libvex.h"
  31 #include "libvex_trc_values.h"
  32
  33 #include "main_util.h"
  34 #include "host_generic_regs.h"
  35 #include "host_nanomips_defs.h"
  36
  37 /* Register number for guest state pointer in host code. */
  38 #define GuestSP 23
  39
  40
  41 NANOMIPSInstr *NANOMIPSInstr_Imm(NANOMIPSImmOp op, HReg dst, HReg src,
  42                                  UInt imm)
  43 {
  44    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
  45    i->tag = NMin_Imm;
  46    i->NMin.Imm.op = op;
  47    i->NMin.Imm.dst = dst;
  48    i->NMin.Imm.src = src;
  49    i->NMin.Imm.imm = imm;
  50    return i;
  51 }
  52
  53 NANOMIPSInstr *NANOMIPSInstr_Alu(NANOMIPSAluOp op, HReg dst, HReg srcL,
  54                                  HReg srcR)
  55 {
  56    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
  57    i->tag = NMin_Alu;
  58    i->NMin.Alu.op = op;
  59    i->NMin.Alu.dst = dst;
  60    i->NMin.Alu.srcL = srcL;
  61    i->NMin.Alu.srcR = srcR;
  62    return i;
  63 }
  64
  65 NANOMIPSInstr *NANOMIPSInstr_Unary(NANOMIPSUnaryOp op, HReg dst, HReg src)
  66 {
  67    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
  68    i->tag = NMin_Unary;
  69    i->NMin.Unary.op = op;
  70    i->NMin.Unary.dst = dst;
  71    i->NMin.Unary.src = src;
  72    return i;
  73 }
  74
  75 NANOMIPSInstr *NANOMIPSInstr_Cmp(NANOMIPSCondCode cond, HReg dst, HReg srcL,
  76                                  HReg srcR)
  77 {
  78    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
  79    i->tag            = NMin_Cmp;
  80    i->NMin.Cmp.dst   = dst;
  81    i->NMin.Cmp.srcL  = srcL;
  82    i->NMin.Cmp.srcR  = srcR;
  83    i->NMin.Cmp.cond  = cond;
  84    return i;
  85 }
  86
  87 NANOMIPSInstr *NANOMIPSInstr_Call(Addr target,
  88                                   UInt argiregs, HReg guard, RetLoc rloc)
  89 {
  90    UInt mask;
  91    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
  92    i->tag = NMin_Call;
  93    i->NMin.Call.target = target;
  94    i->NMin.Call.argiregs = argiregs;
  95    i->NMin.Call.guard = guard;
  96    i->NMin.Call.rloc = rloc;
  97    /* Only $4 ... $11 inclusive may be used as arg regs.*/
  98    mask = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9)
  99           | (1 << 10) | (1 << 11);
 100    vassert(0 == (argiregs & ~mask));
 101    vassert(is_sane_RetLoc(rloc));
 102    return i;
 103 }
 104
 105 NANOMIPSInstr *NANOMIPSInstr_XDirect(Addr64 dstGA, HReg address, Int offset,
 106                                      HReg cond, Bool toFastEP)
 107 {
 108    NANOMIPSInstr* i            = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 109    i->tag                      = NMin_XDirect;
 110    i->NMin.XDirect.dstGA       = dstGA;
 111    i->NMin.XDirect.addr        = address;
 112    i->NMin.XDirect.addr_offset = offset;
 113    i->NMin.XDirect.cond        = cond;
 114    i->NMin.XDirect.toFastEP    = toFastEP;
 115    return i;
 116 }
 117
 118 NANOMIPSInstr *NANOMIPSInstr_XIndir(HReg dstGA, HReg address, Int offset,
 119                                     HReg cond)
 120 {
 121    NANOMIPSInstr* i           = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 122    i->tag                     = NMin_XIndir;
 123    i->NMin.XIndir.dstGA       = dstGA;
 124    i->NMin.XIndir.addr        = address;
 125    i->NMin.XIndir.addr_offset = offset;
 126    i->NMin.XIndir.cond        = cond;
 127    return i;
 128 }
 129
 130 NANOMIPSInstr *NANOMIPSInstr_XAssisted(HReg dstGA, HReg address, Int offset,
 131                                        HReg cond, IRJumpKind jk)
 132 {
 133    NANOMIPSInstr* i              = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 134    i->tag                        = NMin_XAssisted;
 135    i->NMin.XAssisted.dstGA       = dstGA;
 136    i->NMin.XAssisted.addr        = address;
 137    i->NMin.XAssisted.addr_offset = offset;
 138    i->NMin.XAssisted.cond        = cond;
 139    i->NMin.XAssisted.jk          = jk;
 140    return i;
 141 }
 142
 143 NANOMIPSInstr *NANOMIPSInstr_Load(UChar sz, HReg dst,
 144                                   HReg addr, Int addr_offset)
 145 {
 146    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 147    i->tag = NMin_Load;
 148    i->NMin.Load.sz = sz;
 149    i->NMin.Load.addr = addr;
 150    i->NMin.Load.addr_offset = addr_offset;
 151    i->NMin.Load.dst = dst;
 152    vassert(sz == 1 || sz == 2 || sz == 4);
 153    return i;
 154 }
 155
 156 NANOMIPSInstr *NANOMIPSInstr_Store(UChar sz, HReg addr, Int addr_offset,
 157                                    HReg src)
 158 {
 159    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 160    i->tag = NMin_Store;
 161    i->NMin.Store.sz = sz;
 162    i->NMin.Store.src = src;
 163    i->NMin.Store.addr = addr;
 164    i->NMin.Store.addr_offset = addr_offset;
 165    vassert(sz == 1 || sz == 2 || sz == 4);
 166    vassert(addr_offset < 0x1000);
 167    return i;
 168 }
 169
 170 NANOMIPSInstr *NANOMIPSInstr_LoadL(UChar sz, HReg dst,
 171                                    HReg addr, Int addr_offset)
 172 {
 173    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 174    i->tag = NMin_LoadL;
 175    i->NMin.LoadL.sz  = sz;
 176    i->NMin.LoadL.addr = addr;
 177    i->NMin.LoadL.addr_offset = addr_offset;
 178    vassert(sz == 4);
 179    return i;
 180 }
 181
 182 NANOMIPSInstr *NANOMIPSInstr_Cas(UChar sz, HReg oldLo, HReg oldHi, HReg addr,
 183                                  HReg expdLo, HReg expdHi,
 184                                  HReg dataLo, HReg dataHi)
 185 {
 186    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 187    i->tag           = NMin_Cas;
 188    i->NMin.Cas.sz   = sz;
 189    i->NMin.Cas.oldLo  = oldLo;
 190    i->NMin.Cas.addr = addr;
 191    i->NMin.Cas.expdLo = expdLo;
 192    i->NMin.Cas.dataLo = dataLo;
 193
 194    vassert((sz == 4) || (sz == 8));
 195
 196    if (sz == 8) {
 197       i->NMin.Cas.oldHi  = oldHi;
 198       i->NMin.Cas.expdHi = expdHi;
 199       i->NMin.Cas.dataHi = dataHi;
 200    }
 201    return i;
 202 }
 203
 204 NANOMIPSInstr *NANOMIPSInstr_StoreC(UChar sz, HReg addr, Int addr_offset,
 205                                     HReg src)
 206 {
 207    NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 208    i->tag = NMin_StoreC;
 209    i->NMin.StoreC.sz  = sz;
 210    i->NMin.StoreC.src = src;
 211    i->NMin.StoreC.addr = addr;
 212    i->NMin.StoreC.addr_offset = addr_offset;
 213    vassert(sz == 4);
 214    return i;
 215 }
 216
 217 NANOMIPSInstr *NANOMIPSInstr_MoveCond(NANOMIPSMoveCondOp op, HReg dst,
 218                                       HReg src, HReg cond)
 219 {
 220    NANOMIPSInstr *i        = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 221    i->tag                  = NMin_MoveCond;
 222    i->NMin.MoveCond.op     = op;
 223    i->NMin.MoveCond.dst    = dst;
 224    i->NMin.MoveCond.src    = src;
 225    i->NMin.MoveCond.cond   = cond;
 226    return i;
 227 }
 228
 229 NANOMIPSInstr *NANOMIPSInstr_EvCheck(HReg r_amCounter,
 230                                      Int offset_amCounter,
 231                                      HReg r_amFailAddr,
 232                                      Int offset_amFailAddr)
 233 {
 234    NANOMIPSInstr* i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 235    i->tag                            = NMin_EvCheck;
 236    i->NMin.EvCheck.r_amCounter       = r_amCounter;
 237    i->NMin.EvCheck.offset_amCounter  = offset_amCounter;
 238    i->NMin.EvCheck.r_amFailAddr      = r_amFailAddr;
 239    i->NMin.EvCheck.offset_amFailAddr = offset_amFailAddr;
 240    return i;
 241 }
 242
 243 NANOMIPSInstr* NANOMIPSInstr_ProfInc ( void )
 244 {
 245    NANOMIPSInstr* i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
 246    i->tag       = NMin_ProfInc;
 247    return i;
 248 }
 249
 250 UInt ppHRegNANOMIPS(HReg r)
 251 {
 252    static const HChar* regnames[32] = {
 253       "zero", "at", "t4", "t5", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
 254       "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
 255       "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
 256    };
 257    UChar r_no;
 258
 259    if (hregIsVirtual(r)) {
 260       return ppHReg(r);
 261    }
 262
 263    vassert(hregClass(r) == HRcInt32);
 264    r_no = hregEncoding(r);
 265    vassert(r_no < 32);
 266
 267    return vex_printf("%s", regnames[r_no]);
 268 }
 269
 270 void ppNANOMIPSInstr(const NANOMIPSInstr* i)
 271 {
 272    switch (i->tag) {
 273       case NMin_Imm:
 274          switch (i->NMin.Imm.op) {
 275             case NMimm_SLL:
 276                vex_printf("sll ");
 277                break;
 278
 279             case NMimm_SRL:
 280                vex_printf("srl ");
 281                break;
 282
 283             case NMimm_LI:
 284                vex_printf("LI ");
 285                break;
 286
 287             case NMimm_SRA:
 288                vex_printf("sra ");
 289                break;
 290
 291             case NMimm_SGN:
 292                vex_printf("SGN ");
 293                break;
 294
 295             case NMimm_ORI:
 296                vex_printf("ori ");
 297                break;
 298
 299             case NMimm_XORI:
 300                vex_printf("xori ");
 301                break;
 302
 303             case NMimm_ANDI:
 304                vex_printf("andi ");
 305                break;
 306             case NMimm_ROTX:
 307                vex_printf("rotx ");
 308                break;
 309
 310             default:
 311                vassert(0);
 312          }
 313
 314          ppHRegNANOMIPS(i->NMin.Imm.dst);
 315          vex_printf(", ");
 316
 317          if (i->NMin.Imm.op != NMimm_LI) {
 318             ppHRegNANOMIPS(i->NMin.Imm.src);
 319             vex_printf(", ");
 320          }
 321
 322          if (i->NMin.Imm.op == NMimm_ROTX)
 323             vex_printf("%u, %u, %u", (i->NMin.Imm.imm >> 7) & 0xF,
 324                        (i->NMin.Imm.imm >> 6) & 1, i->NMin.Imm.imm & 0x1F);
 325          else
 326             vex_printf("0x%X (%d)", i->NMin.Imm.imm, (Int)i->NMin.Imm.imm);
 327          break;
 328
 329       case NMin_Alu:
 330
 331          switch (i->NMin.Alu.op) {
 332             case NMalu_SLL:
 333                vex_printf("sllv ");
 334                break;
 335
 336             case NMalu_SRL:
 337                vex_printf("srlv ");
 338                break;
 339
 340             case NMalu_SRA:
 341                vex_printf("srav ");
 342                break;
 343
 344             case NMalu_OR:
 345                if (sameHReg(i->NMin.Alu.srcL, i->NMin.Alu.srcR))
 346                   vex_printf("move ");
 347                else
 348                   vex_printf("or ");
 349
 350                break;
 351
 352             case NMalu_XOR:
 353                vex_printf("xor ");
 354                break;
 355
 356             case NMalu_AND:
 357                vex_printf("and ");
 358                break;
 359
 360             case NMalu_ADD:
 361                vex_printf("add ");
 362                break;
 363
 364             case NMalu_SUB:
 365                vex_printf("sub ");
 366                break;
 367
 368             case NMalu_SLT:
 369                vex_printf("slt ");
 370                break;
 371
 372             case NMalu_NOR:
 373                vex_printf("nor ");
 374                break;
 375
 376             case NMalu_MUL:
 377                vex_printf("mul ");
 378                break;
 379
 380             case NMalu_MULU:
 381                vex_printf("mulu ");
 382                break;
 383
 384             case NMalu_MUH:
 385                vex_printf("muh ");
 386                break;
 387
 388             case NMalu_MUHU:
 389                vex_printf("muhu ");
 390                break;
 391
 392             case NMalu_DIV:
 393                vex_printf("div ");
 394                break;
 395
 396             case NMalu_DIVU:
 397                vex_printf("divu ");
 398                break;
 399
 400             case NMalu_MOD:
 401                vex_printf("mod ");
 402                break;
 403
 404             case NMalu_MODU:
 405                vex_printf("modu ");
 406                break;
 407
 408             default:
 409                vassert(0);
 410          }
 411
 412          ppHRegNANOMIPS(i->NMin.Alu.dst);
 413          vex_printf(", ");
 414          ppHRegNANOMIPS(i->NMin.Alu.srcL);
 415
 416          if ((i->NMin.Alu.op != NMalu_OR) ||
 417                !sameHReg(i->NMin.Alu.srcL, i->NMin.Alu.srcR)) {
 418             vex_printf(", ");
 419             ppHRegNANOMIPS(i->NMin.Alu.srcR);
 420          }
 421
 422          break;
 423
 424       case NMin_Unary:
 425          switch (i->NMin.Unary.op) {
 426             case NMun_CLO:
 427                vex_printf("clo");
 428                break;
 429
 430             case NMun_CLZ:
 431                vex_printf("clz");
 432                break;
 433
 434             case NMun_NOP:
 435                vex_printf("nop");
 436                break;
 437
 438             default:
 439                vassert(0);
 440          }
 441          if (i->NMin.Unary.op != NMun_NOP)
 442          {
 443             ppHRegNANOMIPS(i->NMin.Unary.dst);
 444             vex_printf(",");
 445             ppHRegNANOMIPS(i->NMin.Unary.src);
 446          }
 447          break;
 448       case NMin_Cmp:
 449
 450          switch (i->NMin.Cmp.cond) {
 451             case NMcc_EQ:
 452                vex_printf("EQ ");
 453                break;
 454
 455             case NMcc_NE:
 456                vex_printf("NE ");
 457                break;
 458
 459             case NMcc_LTS:
 460                vex_printf("LTS ");
 461                break;
 462
 463             case NMcc_LTU:
 464                vex_printf("LTU ");
 465                break;
 466
 467             case NMcc_LES:
 468                vex_printf("LES ");
 469                break;
 470
 471             case NMcc_LEU:
 472                vex_printf("LEU ");
 473                break;
 474
 475             case NMcc_AL:
 476                vex_printf("AL ");
 477                break;
 478
 479             case NMcc_NV:
 480                vex_printf("NV ");
 481                break;
 482
 483             default:
 484                vassert(0);
 485          }
 486
 487          ppHRegNANOMIPS(i->NMin.Cmp.dst);
 488          vex_printf(", ");
 489          ppHRegNANOMIPS(i->NMin.Cmp.srcL);
 490          vex_printf(", ");
 491          ppHRegNANOMIPS(i->NMin.Cmp.srcR);
 492
 493          break;
 494
 495       case NMin_Call:
 496          vex_printf("CALL 0x%lX, #%X, ", i->NMin.Call.target,
 497                     i->NMin.Call.argiregs);
 498          ppHRegNANOMIPS(i->NMin.Call.guard);
 499          break;
 500
 501       case NMin_XDirect:
 502          vex_printf("(xDirect) ");
 503          if (!hregIsInvalid(i->NMin.XDirect.cond)) {
 504             vex_printf("beqc ");
 505             ppHRegNANOMIPS(i->NMin.XDirect.cond);
 506             vex_printf(", zero, 12; ");
 507          }
 508          vex_printf("LI a5, 0x%08lX; ", i->NMin.XDirect.dstGA);
 509          vex_printf("sw a5, %d(", i->NMin.XDirect.addr_offset);
 510          ppHRegNANOMIPS(i->NMin.XDirect.addr);
 511          vex_printf("); LI a5, <%s>; ", i->NMin.XDirect.toFastEP ?
 512             "disp_cp_chain_me_to_fastEP" : "disp_cp_chain_me_to_slowEP");
 513          vex_printf("jalrc a5");
 514          break;
 515
 516       case NMin_XIndir:
 517          vex_printf("(xIndir) ");
 518          if (!hregIsInvalid(i->NMin.XIndir.cond)) {
 519             vex_printf("beqc ");
 520             ppHRegNANOMIPS(i->NMin.XIndir.cond);
 521             vex_printf(", zero, 16; ");
 522          }
 523          vex_printf("sw ");
 524          ppHRegNANOMIPS(i->NMin.XIndir.dstGA);
 525          vex_printf(", %d(", i->NMin.XIndir.addr_offset);
 526          ppHRegNANOMIPS(i->NMin.XIndir.addr);
 527          vex_printf("); LI a5, <disp_cp_xindir>; ");
 528          vex_printf("jalrc a5");
 529          break;
 530
 531       case NMin_XAssisted:
 532          vex_printf("(xAssisted) ");
 533          if (!hregIsInvalid(i->NMin.XAssisted.cond)) {
 534             vex_printf("beqc ");
 535             ppHRegNANOMIPS(i->NMin.XAssisted.cond);
 536             vex_printf(", zero, 24; ");
 537          }
 538          vex_printf("sw ");
 539          ppHRegNANOMIPS(i->NMin.XAssisted.dstGA);
 540          vex_printf(", %d(", i->NMin.XAssisted.addr_offset);
 541          ppHRegNANOMIPS(i->NMin.XAssisted.addr);
 542          vex_printf("); move a5, $IRJumpKind_to_TRCVAL(%d)",
 543                     (Int)i->NMin.XAssisted.jk);
 544          vex_printf("; LI a5, <disp_cp_xassisted>; ");
 545          vex_printf("jalrc a5");
 546          break;
 547
 548       case NMin_EvCheck:
 549          vex_printf("(evCheck) ");
 550          vex_printf("lw a5, %d(", i->NMin.EvCheck.offset_amCounter);
 551          ppHRegNANOMIPS(i->NMin.EvCheck.r_amCounter);
 552          vex_printf("); addiu $9, $9, -1");
 553          vex_printf("; sw a5, %d(", i->NMin.EvCheck.offset_amCounter);
 554          ppHRegNANOMIPS(i->NMin.EvCheck.r_amCounter);
 555          vex_printf("); begc a5, zero, nofail;");
 556          vex_printf("lw a5, %d(", i->NMin.EvCheck.offset_amFailAddr);
 557          ppHRegNANOMIPS(i->NMin.EvCheck.r_amFailAddr);
 558          vex_printf("); jalrc a5; nofail:");
 559          break;
 560
 561       case NMin_ProfInc:
 562          vex_printf("(profInc) li a5, ($NotKnownYet); "
 563                        "lw a4, 0(a5); "
 564                        "addiu $a4, a4, 1; "
 565                        "sw $a4, 0(a5); "
 566                        "sltiu at, a4, 1; "
 567                        "lw a4, 4(a5); "
 568                        "addu a4, a4, at; "
 569                        "sw a4, 4(a5); " );
 570          break;
 571
 572       case NMin_Load:
 573          switch (i->NMin.Load.sz) {
 574             case 1:
 575                vex_printf("lb ");
 576                break;
 577
 578             case 2:
 579                vex_printf("lh ");
 580                break;
 581
 582             case 4:
 583                vex_printf("lw ");
 584                break;
 585          }
 586
 587          ppHRegNANOMIPS(i->NMin.Load.dst);
 588          vex_printf(", (%d)", i->NMin.Load.addr_offset);
 589          ppHRegNANOMIPS(i->NMin.Load.addr);
 590          break;
 591
 592       case NMin_Store:
 593          switch (i->NMin.Store.sz) {
 594             case 1:
 595                vex_printf("sb ");
 596                break;
 597
 598             case 2:
 599                vex_printf("sh ");
 600                break;
 601
 602             case 4:
 603                vex_printf("sw ");
 604                break;
 605          }
 606
 607          ppHRegNANOMIPS(i->NMin.Store.src);
 608          vex_printf(", (%d)", i->NMin.Store.addr_offset);
 609          ppHRegNANOMIPS(i->NMin.Store.addr);
 610          break;
 611
 612       case NMin_Cas:
 613          vex_printf("cas: \n");
 614          if (i->NMin.Cas.sz == 4) {
 615             vex_printf("ll ");
 616             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 617             vex_printf(", 0(");
 618             ppHRegNANOMIPS(i->NMin.Cas.addr);
 619             vex_printf("); ");
 620
 621             vex_printf("bnec ");
 622             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 623             vex_printf(", ");
 624             ppHRegNANOMIPS(i->NMin.Cas.expdLo);
 625             vex_printf(", end; ");
 626
 627             vex_printf("addiu ");
 628             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 629             vex_printf(", ");
 630             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 631             vex_printf(", 1; ");
 632
 633             vex_printf("sc ");
 634             ppHRegNANOMIPS(i->NMin.Cas.dataLo);
 635             vex_printf(", 0(");
 636             ppHRegNANOMIPS(i->NMin.Cas.addr);
 637             vex_printf("); ");
 638
 639             vex_printf("movn ");
 640             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 641             vex_printf(", ");
 642             ppHRegNANOMIPS(i->NMin.Cas.expdLo);
 643             vex_printf(", ");
 644             ppHRegNANOMIPS(i->NMin.Cas.dataLo);
 645             vex_printf("; end:");
 646          } else {
 647             vex_printf("llwp ");
 648             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 649             vex_printf(", ");
 650             ppHRegNANOMIPS(i->NMin.Cas.oldHi);
 651             vex_printf(", 0(");
 652             ppHRegNANOMIPS(i->NMin.Cas.addr);
 653             vex_printf("); ");
 654
 655             vex_printf("bnec ");
 656             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 657             vex_printf(", ");
 658             ppHRegNANOMIPS(i->NMin.Cas.expdLo);
 659             vex_printf(", end; ");
 660
 661             vex_printf("bnec ");
 662             ppHRegNANOMIPS(i->NMin.Cas.oldHi);
 663             vex_printf(", ");
 664             ppHRegNANOMIPS(i->NMin.Cas.expdHi);
 665             vex_printf(", end; ");
 666
 667             vex_printf("addiu ");
 668             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 669             vex_printf(", ");
 670             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 671             vex_printf(", 1; ");
 672
 673             vex_printf("addiu ");
 674             ppHRegNANOMIPS(i->NMin.Cas.oldHi);
 675             vex_printf(", ");
 676             ppHRegNANOMIPS(i->NMin.Cas.oldHi);
 677             vex_printf(", 1; ");
 678
 679             vex_printf("scwp ");
 680             ppHRegNANOMIPS(i->NMin.Cas.dataLo);
 681             vex_printf(", ");
 682             ppHRegNANOMIPS(i->NMin.Cas.dataHi);
 683             vex_printf(", 0(");
 684             ppHRegNANOMIPS(i->NMin.Cas.addr);
 685             vex_printf("); ");
 686
 687             vex_printf("movn ");
 688             ppHRegNANOMIPS(i->NMin.Cas.oldLo);
 689             vex_printf(", ");
 690             ppHRegNANOMIPS(i->NMin.Cas.expdLo);
 691             vex_printf(", ");
 692             ppHRegNANOMIPS(i->NMin.Cas.dataLo);
 693
 694             vex_printf("movn ");
 695             ppHRegNANOMIPS(i->NMin.Cas.oldHi);
 696             vex_printf(", ");
 697             ppHRegNANOMIPS(i->NMin.Cas.expdHi);
 698             vex_printf(", ");
 699             ppHRegNANOMIPS(i->NMin.Cas.dataHi);
 700             vex_printf("; end:");
 701          }
 702          break;
 703
 704       case NMin_LoadL:
 705          vex_printf("ll ");
 706          ppHRegNANOMIPS(i->NMin.LoadL.dst);
 707          vex_printf(", %d(", i->NMin.LoadL.addr_offset);
 708          ppHRegNANOMIPS(i->NMin.LoadL.addr);
 709          vex_printf("); ");
 710          break;
 711
 712       case NMin_StoreC:
 713          vex_printf("sc ");
 714          ppHRegNANOMIPS(i->NMin.StoreC.src);
 715          vex_printf(", %d(", i->NMin.StoreC.addr_offset);
 716          ppHRegNANOMIPS(i->NMin.StoreC.addr);
 717          vex_printf("); ");
 718          break;
 719
 720       case NMin_MoveCond:
 721          vassert(i->NMin.MoveCond.op == NMMoveCond_movn);
 722          vex_printf("movn ");
 723          ppHRegNANOMIPS(i->NMin.MoveCond.dst);
 724          vex_printf(", ");
 725          ppHRegNANOMIPS(i->NMin.MoveCond.src);
 726          vex_printf(", ");
 727          ppHRegNANOMIPS(i->NMin.MoveCond.cond);
 728          break;
 729    }
 730 }
 731
 732 /* --------- Helpers for register allocation. --------- */
 733
 734 void getRegUsage_NANOMIPSInstr(HRegUsage* u, const NANOMIPSInstr* i)
 735 {
 736    initHRegUsage(u);
 737
 738    switch (i->tag) {
 739       case NMin_Imm:
 740          addHRegUse(u, HRmWrite, i->NMin.Imm.dst);
 741
 742          if (!hregIsInvalid(i->NMin.Imm.src))
 743             addHRegUse(u, HRmRead, i->NMin.Imm.src);
 744
 745          return;
 746
 747       case NMin_Alu:
 748          addHRegUse(u, HRmRead, i->NMin.Alu.srcL);
 749          addHRegUse(u, HRmRead, i->NMin.Alu.srcR);
 750          addHRegUse(u, HRmWrite, i->NMin.Alu.dst);
 751
 752          /* or Rd,Rs,Rs == mr Rd,Rs */
 753          if ((i->NMin.Alu.op == NMalu_OR)
 754                && sameHReg(i->NMin.Alu.srcR, i->NMin.Alu.srcL)) {
 755             u->isRegRegMove = True;
 756             u->regMoveSrc   = i->NMin.Alu.srcL;
 757             u->regMoveDst   = i->NMin.Alu.dst;
 758          }
 759
 760          return;
 761
 762       case NMin_Cmp:
 763          addHRegUse(u, HRmRead, i->NMin.Cmp.srcL);
 764          addHRegUse(u, HRmRead, i->NMin.Cmp.srcR);
 765          addHRegUse(u, HRmWrite, i->NMin.Cmp.dst);
 766          return;
 767
 768       case NMin_Unary:
 769          addHRegUse(u, HRmRead, i->NMin.Unary.src);
 770          addHRegUse(u, HRmWrite, i->NMin.Unary.dst);
 771          return;
 772
 773       case NMin_Call: {
 774          UInt argir = i->NMin.Call.argiregs;
 775
 776          if (!hregIsInvalid(i->NMin.Call.guard))
 777             addHRegUse(u, HRmRead, i->NMin.Call.guard);
 778
 779          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR1());
 780          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR2());
 781          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR3());
 782          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR4());
 783          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR5());
 784          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR6());
 785          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR7());
 786          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR8());
 787          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR9());
 788          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR10());
 789          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR11());
 790          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR12());
 791          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR13());
 792          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR14());
 793          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR15());
 794          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR24());
 795          addHRegUse(u, HRmWrite, hregNANOMIPS_GPR25());
 796
 797          if (argir & (1 << 11)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR11());
 798
 799          if (argir & (1 << 10)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR10());
 800
 801          if (argir & (1 << 9)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR9());
 802
 803          if (argir & (1 << 8)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR8());
 804
 805          if (argir & (1 << 7)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR7());
 806
 807          if (argir & (1 << 6)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR6());
 808
 809          if (argir & (1 << 5)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR5());
 810
 811          if (argir & (1 << 4)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR4());
 812
 813          vassert(0 == (argir & ~((1 << 4) | (1 << 5) | (1 << 6)
 814                                  | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
 815                                  | (1 << 11))));
 816          return;
 817       }
 818
 819       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
 820          conditionally exit the block.  Hence we only need to list (1)
 821          the registers that they read, and (2) the registers that they
 822          write in the case where the block is not exited.  (2) is
 823          empty, hence only (1) is relevant here. */
 824       case NMin_XDirect:
 825          addHRegUse(u, HRmRead, i->NMin.XDirect.addr);
 826
 827          if (!hregIsInvalid(i->NMin.XDirect.cond))
 828             addHRegUse(u, HRmRead, i->NMin.XDirect.cond);
 829
 830          return;
 831
 832       case NMin_XIndir:
 833          addHRegUse(u, HRmRead, i->NMin.XIndir.dstGA);
 834          addHRegUse(u, HRmRead, i->NMin.XIndir.addr);
 835
 836          if (!hregIsInvalid(i->NMin.XDirect.cond))
 837             addHRegUse(u, HRmRead, i->NMin.XDirect.cond);
 838
 839          return;
 840
 841       case NMin_XAssisted:
 842          addHRegUse(u, HRmRead, i->NMin.XAssisted.dstGA);
 843          addHRegUse(u, HRmRead, i->NMin.XAssisted.addr);
 844
 845          if (!hregIsInvalid(i->NMin.XAssisted.cond))
 846             addHRegUse(u, HRmRead, i->NMin.XAssisted.cond);
 847
 848          return;
 849
 850       case NMin_Load:
 851          addHRegUse(u, HRmRead, i->NMin.Load.addr);
 852          addHRegUse(u, HRmWrite, i->NMin.Load.dst);
 853          return;
 854
 855       case NMin_Store:
 856          addHRegUse(u, HRmRead, i->NMin.Store.addr);
 857          addHRegUse(u, HRmRead, i->NMin.Store.src);
 858          return;
 859
 860       case NMin_LoadL:
 861          addHRegUse(u, HRmRead, i->NMin.LoadL.addr);
 862          addHRegUse(u, HRmWrite, i->NMin.LoadL.dst);
 863          return;
 864
 865       case NMin_Cas:
 866          addHRegUse(u, HRmWrite, i->NMin.Cas.oldLo);
 867          addHRegUse(u, HRmRead, i->NMin.Cas.addr);
 868          addHRegUse(u, HRmRead, i->NMin.Cas.expdLo);
 869          addHRegUse(u, HRmModify, i->NMin.Cas.dataLo);
 870          if (i->NMin.Cas.sz == 8) {
 871             addHRegUse(u, HRmWrite, i->NMin.Cas.oldHi);
 872             addHRegUse(u, HRmRead, i->NMin.Cas.expdHi);
 873             addHRegUse(u, HRmModify, i->NMin.Cas.dataHi);
 874          }
 875          return;
 876
 877       case NMin_StoreC:
 878          addHRegUse(u, HRmRead, i->NMin.StoreC.addr);
 879          addHRegUse(u, HRmWrite, i->NMin.StoreC.src);
 880          addHRegUse(u, HRmRead, i->NMin.StoreC.src);
 881          return;
 882
 883       case NMin_MoveCond:
 884          addHRegUse(u, HRmWrite, i->NMin.MoveCond.dst);
 885          addHRegUse(u, HRmRead, i->NMin.MoveCond.src);
 886          addHRegUse(u, HRmRead, i->NMin.MoveCond.cond);
 887          return;
 888
 889       case NMin_EvCheck:
 890          addHRegUse(u, HRmRead, i->NMin.EvCheck.r_amCounter);
 891          addHRegUse(u, HRmRead, i->NMin.EvCheck.r_amFailAddr);
 892          return;
 893
 894       case NMin_ProfInc:
 895          /* does not use any registers. */
 896          return;
 897
 898       default:
 899          ppNANOMIPSInstr(i);
 900          vpanic("getRegUsage_NANOMIPSInstr");
 901          break;
 902    }
 903 }
 904
 905 /* local helper */
 906 static void mapReg(HRegRemap * m, HReg * r)
 907 {
 908    *r = lookupHRegRemap(m, *r);
 909 }
 910
 911 void mapRegs_NANOMIPSInstr(HRegRemap * m, NANOMIPSInstr * i)
 912 {
 913    switch (i->tag) {
 914       case NMin_Imm:
 915          mapReg(m, &i->NMin.Imm.dst);
 916
 917          if (!hregIsInvalid(i->NMin.Imm.src))
 918             mapReg(m, &i->NMin.Imm.src);
 919
 920          break;
 921
 922       case NMin_Alu:
 923          mapReg(m, &i->NMin.Alu.srcL);
 924          mapReg(m, &i->NMin.Alu.srcR);
 925          mapReg(m, &i->NMin.Alu.dst);
 926          return;
 927
 928       case NMin_Cmp:
 929          mapReg(m, &i->NMin.Cmp.srcL);
 930          mapReg(m, &i->NMin.Cmp.srcR);
 931          mapReg(m, &i->NMin.Cmp.dst);
 932          return;
 933
 934       case NMin_Unary:
 935          mapReg(m, &i->NMin.Unary.src);
 936          mapReg(m, &i->NMin.Unary.dst);
 937          return;
 938
 939       case NMin_Call: {
 940          if (!hregIsInvalid(i->NMin.Call.guard))
 941             mapReg(m, &i->NMin.Call.guard);
 942
 943          return;
 944       }
 945
 946       case NMin_XDirect:
 947          mapReg(m, &i->NMin.XDirect.addr);
 948
 949          if (!hregIsInvalid(i->NMin.XDirect.cond))
 950             mapReg(m, &i->NMin.XDirect.cond);
 951
 952          return;
 953
 954       case NMin_XIndir:
 955          mapReg(m, &i->NMin.XIndir.dstGA);
 956          mapReg(m, &i->NMin.XIndir.addr);
 957
 958          if (!hregIsInvalid(i->NMin.XIndir.cond))
 959             mapReg(m, &i->NMin.XIndir.cond);
 960
 961          return;
 962
 963       case NMin_XAssisted:
 964          mapReg(m, &i->NMin.XAssisted.dstGA);
 965          mapReg(m, &i->NMin.XAssisted.addr);
 966
 967          if (!hregIsInvalid(i->NMin.XAssisted.cond))
 968             mapReg(m, &i->NMin.XAssisted.cond);
 969
 970          return;
 971
 972       case NMin_Load:
 973          mapReg(m, &i->NMin.Load.addr);
 974          mapReg(m, &i->NMin.Load.dst);
 975          return;
 976
 977       case NMin_Store:
 978          mapReg(m, &i->NMin.Store.addr);
 979          mapReg(m, &i->NMin.Store.src);
 980          return;
 981
 982       case NMin_LoadL:
 983          mapReg(m, &i->NMin.LoadL.addr);
 984          mapReg(m, &i->NMin.LoadL.dst);
 985          return;
 986
 987       case NMin_Cas:
 988          mapReg(m, &i->NMin.Cas.oldLo);
 989          mapReg(m, &i->NMin.Cas.addr);
 990          mapReg(m, &i->NMin.Cas.expdLo);
 991          mapReg(m, &i->NMin.Cas.dataLo);
 992          if (i->NMin.Cas.sz == 8) {
 993             mapReg(m, &i->NMin.Cas.oldHi);
 994             mapReg(m, &i->NMin.Cas.expdHi);
 995             mapReg(m, &i->NMin.Cas.dataHi);
 996          }
 997          return;
 998
 999       case NMin_StoreC:
1000          mapReg(m, &i->NMin.StoreC.addr);
1001          mapReg(m, &i->NMin.StoreC.src);
1002          return;
1003
1004       case NMin_MoveCond:
1005          mapReg(m, &i->NMin.MoveCond.dst);
1006          mapReg(m, &i->NMin.MoveCond.src);
1007          mapReg(m, &i->NMin.MoveCond.cond);
1008          return;
1009
1010       case NMin_EvCheck:
1011          /* We expect both amodes only to mention %ebp, so this is in
1012             fact pointless, since %ebp isn't allocatable, but anyway.. */
1013          mapReg(m, &i->NMin.EvCheck.r_amCounter);
1014          mapReg(m, &i->NMin.EvCheck.r_amFailAddr);
1015          return;
1016
1017       case NMin_ProfInc:
1018          /* does not use any registers. */
1019          return;
1020
1021       default:
1022          ppNANOMIPSInstr(i);
1023          vpanic("mapRegs_NANOMIPSInstr");
1024          break;
1025    }
1026 }
1027
1028 /* Generate NANOMIPS spill/reload instructions under the direction of the
1029    register allocator. */
1030 void genSpill_NANOMIPS( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, HReg rreg,
1031                                 Int offsetB, Bool mode64)
1032 {
1033    vassert(offsetB >= 0);
1034    vassert(offsetB < 0x1000);
1035    vassert(!mode64);
1036    vassert(!hregIsVirtual(rreg));
1037    vassert(hregClass(rreg) == HRcInt32);
1038    *i2 = NULL;
1039    *i1 = NANOMIPSInstr_Store(4, GuestStatePointer, offsetB, rreg);
1040 }
1041
1042 void genReload_NANOMIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
1043                                  Int offsetB, Bool mode64)
1044 {
1045    vassert(offsetB >= 0);
1046    vassert(offsetB < 0x1000);
1047    vassert(!mode64);
1048    vassert(!hregIsVirtual(rreg));
1049    vassert(hregClass(rreg) == HRcInt32);
1050    *i2 = NULL;
1051    *i1 = NANOMIPSInstr_Load(4, rreg, GuestStatePointer, offsetB);
1052 }
1053
1054 NANOMIPSInstr* genMove_NANOMIPS(HReg r_src, HReg r_dst)
1055 {
1056    vassert(hregClass(r_dst) == hregClass(r_src));
1057    vassert(hregClass(r_src) == HRcInt32);
1058    return NANOMIPSInstr_Alu(NMalu_OR, r_dst, r_src, r_src);
1059 }
1060
1061 /* --------- The NANOMIPS assembler --------- */
1062
1063 inline static UInt iregNo(HReg r)
1064 {
1065    UInt n;
1066    vassert(hregClass(r) == (HRcInt32));
1067    vassert(!hregIsVirtual(r));
1068    n = hregEncoding(r);
1069    vassert(n <= 32);
1070    return n;
1071 }
1072
1073 /* Emit 32bit instruction */
1074 static UChar *emit32(UChar * p, UInt w32)
1075 {
1076 #if defined (_MIPSEB)
1077    *p++ = toUChar((w32 >> 24) & 0x000000FF);
1078    *p++ = toUChar((w32 >> 16) & 0x000000FF);
1079    *p++ = toUChar((w32 >> 8) & 0x000000FF);
1080    *p++ = toUChar(w32 & 0x000000FF);
1081 #else
1082    *p++ = toUChar((w32 >> 16) & 0x000000FF);
1083    *p++ = toUChar((w32 >> 24) & 0x000000FF);
1084    *p++ = toUChar(w32 & 0x000000FF);
1085    *p++ = toUChar((w32 >> 8) & 0x000000FF);
1086 #endif
1087    return p;
1088 }
1089
1090 static UChar *mkFormNano2Regs12imm(UChar * p, UInt opc, UInt rt, UInt rs,
1091                                    UInt opc2, UInt imm)
1092 {
1093    UInt theInstr;
1094    vassert(opc < 0x40);
1095    vassert(rs < 0x20);
1096    vassert(rt < 0x20);
1097    vassert(opc2 < 0x10);
1098    vassert(imm < 0x1000);
1099    theInstr = ((opc << 26) | (rt << 21) | (rs << 16) | (opc2 << 12) | (imm));
1100    return emit32(p, theInstr);
1101 }
1102
1103 static UChar *mkFormNano2Regs16imm(UChar * p, UInt opc, UInt rt, UInt rs,
1104                                    UShort imm)
1105 {
1106    UInt theInstr;
1107    vassert(opc < 0x40);
1108    vassert(rs < 0x20);
1109    vassert(rt < 0x20);
1110    theInstr = ((opc << 26) | (rt << 21) | (rs << 16) | (imm));
1111    return emit32(p, theInstr);
1112 }
1113
1114 static UChar *mkFormNano1Reg(UChar * p, UInt opc, UInt rt, UInt opc2,
1115                              UInt imm)
1116 {
1117    UInt theInstr;
1118    vassert(opc < 0x40);
1119    vassert(rt < 0x20);
1120
1121    switch (opc) {
1122       case 0x38: /* LUI */
1123          theInstr = ((opc << 26) | (rt << 21) | (imm & 0x1FF000) |
1124                      ((imm & 0x7FE00000) >> 19) | ((imm & 0x80000000) >> 31));
1125          return emit32(p, theInstr);
1126
1127       default:
1128          vassert(0);
1129    }
1130 }
1131
1132 static UChar* mkFormNanoPShift(UChar * p, UInt rt, UInt rs, UInt opc2,
1133                                UInt imm)
1134 {
1135    UInt theInstr;
1136    vassert(rt < 0x20);
1137    vassert(rs < 0x20);
1138    vassert(opc2 < 0x10);
1139    vassert(imm < 0x20);
1140
1141    switch (opc2) {
1142       case PSLL:      /* SLL  */
1143       case SRL32:     /* SRL  */
1144       case SRA:       /* SRA  */
1145          theInstr = ((PU12 << 26) | (rt << 21) | (rs << 16) |
1146                      (PU12_PSHIFT << 12) | (opc2 << 5) | (imm));
1147          return emit32(p, theInstr);
1148
1149       default:
1150          vassert(0);
1151    }
1152 }
1153
1154 static UChar *mkFormNanoP32A0(UChar * p, UInt rt, UInt rs, UInt rd, UInt opc2)
1155 {
1156    UInt theInstr;
1157    vassert(rt < 0x20);
1158    vassert(rs < 0x20);
1159    vassert(rd < 0x20);
1160    vassert(opc2 < 0x80);
1161
1162    switch (opc2) {
1163       case _POOL32A0_ADDU32: /* ADDU */
1164       case _POOL32A0_AND32:  /* AND  */
1165       case _POOL32A0_SUBU32: /* SUBU */
1166       case _POOL32A0_SLLV:   /* SLLV */
1167       case _POOL32A0_SRLV:   /* SRLV */
1168       case _POOL32A0_SRAV:   /* SRAV */
1169       case _POOL32A0_XOR32:  /* XOR  */
1170       case _POOL32A0_SLT:    /* SLT  */
1171       case _POOL32A0_OR32:   /* OR   */
1172       case _POOL32A0_NOR:    /* NOR  */
1173       case _POOL32A0_PSLTU:  /* SLTU */
1174       case _POOL32A0_DIV:    /* DIV  */
1175       case _POOL32A0_DIVU:   /* DIVU */
1176       case _POOL32A0_MOD:    /* MOD  */
1177       case _POOL32A0_MODU:   /* MODU */
1178       case _POOL32A0_MUL32:  /* MUL  */
1179       case _POOL32A0_MULU:   /* MULU */
1180       case _POOL32A0_MUH:    /* MUH  */
1181       case _POOL32A0_MUHU:   /* MUHU */
1182          theInstr = ((P32A << 26) | (rt << 21) | (rs << 16) | (rd << 11) |
1183                      (opc2 << 3));
1184          return emit32(p, theInstr);
1185
1186       case _POOL32A0_PCMOVE: /* MOVN */
1187          theInstr = ((P32A << 26) | (rt << 21) | (rs << 16) | (rd << 11) |
1188                      (1 << 10) | (opc2 << 3));
1189          return emit32(p, theInstr);
1190
1191       default:
1192          vassert(0);
1193    }
1194 }
1195
1196 static UChar *mkFormNanoPU12(UChar * p, UInt rt, UInt rs, UInt opc2, UInt imm)
1197 {
1198    UInt theInstr;
1199    vassert(rt < 0x20);
1200    vassert(rs < 0x20);
1201    vassert(opc2 < 0x10);
1202    vassert(imm < 0x1000);
1203
1204    switch (opc2) {
1205       case PU12_ANDI:      /* ANDI      */
1206       case PU12_ADDIU_NEG: /* ADDIU_NEG */
1207       case PU12_ORI:       /* ORI       */
1208       case PU12_SLTIU:     /* SLTIU     */
1209       case PU12_XORI:      /* XORI      */
1210       case PU12_PROTX:     /* ROTX      */
1211          theInstr = ((PU12 << 26) | (rt << 21) | (rs << 16) | (opc2 << 12) |
1212                      (imm));
1213          return emit32(p, theInstr);
1214
1215       default:
1216          vassert(0);
1217    }
1218 }
1219
1220 static UChar *mkFormNanoPBR1(UChar * p, UInt rt, UInt rs, UInt opc2, UInt imm)
1221 {
1222    UInt theInstr;
1223    vassert(rt < 0x20);
1224    vassert(rs < 0x20);
1225    vassert(opc2 < 0x04);
1226    vassert(imm < 0x4000);
1227
1228    theInstr = ((PBR1 << 26) | (rt << 21) | (rs << 16) | (opc2 << 14) |
1229                (imm & 0x3FFE) | (imm >> 14));
1230    return emit32(p, theInstr);
1231 }
1232
1233 static UChar *mkFormNanoPBR2(UChar * p, UInt rt, UInt rs, UInt opc2, UInt imm)
1234 {
1235    UInt theInstr;
1236    vassert(rt < 0x20);
1237    vassert(rs < 0x20);
1238    vassert(opc2 < 0x04);
1239    vassert(imm < 0x4000);
1240
1241    theInstr = ((PBR2 << 26) | (rt << 21) | (rs << 16) | (opc2 << 14) |
1242                (imm & 0x3FFE) | (imm >> 14));
1243    return emit32(p, theInstr);
1244 }
1245
1246 static UChar *mkFormNanoPLSS9(UChar * p, UInt rt, UInt rs, nanoPLSS9 opc,
1247                               UInt opc1, UInt opc2, UInt imm_ru)
1248 {
1249    UInt theInstr;
1250    vassert(rt < 0x20);
1251    vassert(rs < 0x20);
1252    vassert(opc < 0x04);
1253    vassert(opc1 < 0x10);
1254    vassert(opc2 < 0x02);
1255
1256    switch (opc2){
1257          case LL: /* LL/SC */
1258             vassert(imm_ru < 0x4000);
1259             theInstr = ((PLSS9 << 26) | (rt << 21) | (rs << 16) | (opc << 8) |
1260                (opc1 << 11) | opc2 | (imm_ru & 0xFC) | ((imm_ru & 0x100) << 7));
1261             break;
1262          case LLWP: /* LLWP/SCWP */
1263             vassert(imm_ru < 0x20);
1264             theInstr = ((PLSS9 << 26) | (rt << 21) | (rs << 16) | (opc << 8) |
1265                (opc1 << 11) | ( imm_ru << 3 ) | opc2);
1266             break;
1267          default:
1268             vassert(0);
1269
1270    }
1271    return emit32(p, theInstr);
1272 }
1273
1274 static UChar *doMemAccess_IR(UChar *p, UChar sz, UChar r_dst,
1275                              HReg addr, Int addr_offset, Bool isLoad)
1276 {
1277    UInt rA, opc2;
1278    vassert(((UInt)addr_offset) < 0x1000);
1279    rA = iregNo(addr);
1280    opc2 = isLoad ? 0x00 : 0x01;
1281
1282    switch (sz) {
1283       case 1:
1284          break;
1285
1286       case 2:
1287          opc2 = opc2 | 0x04;
1288          break;
1289
1290       case 4:
1291          opc2 = opc2 | 0x08;
1292          break;
1293
1294       default:
1295          vassert(0);
1296    }
1297
1298    p = mkFormNano2Regs12imm(p, 0x21, r_dst, rA, opc2, addr_offset);
1299    return p;
1300 }
1301
1302 /* Load 32-bit immediate in exactely two 32-bit instructions even if it
1303    could generate fewer. This is needed for generating fixed sized patchable
1304    sequences. */
1305 static inline UChar* mkLoadImm32_EXACTLY2(UChar* p, UInt r_dst, UInt imm)
1306 {
1307    vassert(r_dst < 0x20);
1308    /* lui r_dst, (imm >> 20) */
1309    p = mkFormNano1Reg(p, 0x38, r_dst, 0, imm);
1310    /* ori r_dst, r_dst, (imm & 0xFFF) */
1311    p = mkFormNanoPU12(p, r_dst, r_dst, PU12_ORI, imm & 0xFFF);
1312    return p;
1313 }
1314
1315 /* Load imm to r_dst */
1316 static UChar *mkLoadImm(UChar * p, UInt r_dst, UInt imm)
1317 {
1318
1319    if (imm <= 0xFFFF) {
1320       /* addiu[32] r_dst, 0, imm */
1321       p = mkFormNano2Regs16imm(p, 0x00, r_dst, 0, imm & 0xFFFF);
1322    } else if (imm > 0xFFFFF000ULL) {
1323       /* addiu[neg] r_dst, 0, imm */
1324       p = mkFormNano2Regs12imm(p, 0x20, r_dst, 0, 0x08, (~imm + 1) & 0xFFF);
1325    } else {
1326       /* lui r_dst, (imm >> 20) */
1327       p = mkFormNano1Reg(p, 0x38, r_dst, 0, imm);
1328       imm &= 0xFFF;
1329
1330       if (imm != 0) {
1331          /* ori r_dst, r_dst, (imm & 0xFFF) */
1332          p = mkFormNanoPU12(p, r_dst, r_dst, PU12_ORI, imm & 0xFFF);
1333       }
1334    }
1335
1336    return p;
1337 }
1338
1339 /* Emit an instruction into buf and return the number of bytes used.
1340    Note that buf is not the insn's final place, and therefore it is
1341    imperative to emit position-independent code.  If the emitted
1342    instruction was a profiler inc, set *is_profInc to True, else
1343    leave it unchanged. */
1344 Int emit_NANOMIPSInstr ( /*MB_MOD*/Bool* is_profInc,
1345                                    UChar* buf, Int nbuf,
1346                                    const NANOMIPSInstr* i,
1347                                    Bool mode64,
1348                                    VexEndness endness_host,
1349                                    const void* disp_cp_chain_me_to_slowEP,
1350                                    const void* disp_cp_chain_me_to_fastEP,
1351                                    const void* disp_cp_xindir,
1352                                    const void* disp_cp_xassisted )
1353 {
1354    UChar *p = &buf[0];
1355    vassert(nbuf >= 32);
1356    vassert(!mode64);
1357
1358    switch (i->tag) {
1359       case NMin_Imm: {
1360          UInt r_dst = iregNo(i->NMin.Imm.dst);
1361          UInt r_src = hregIsInvalid(i->NMin.Imm.src) ?
1362                       0 : iregNo(i->NMin.Imm.src);
1363
1364          switch (i->NMin.Imm.op) {
1365             case NMimm_LI:
1366                p = mkLoadImm(p, r_dst, i->NMin.Imm.imm);
1367                break;
1368
1369             case NMimm_SLL:
1370             case NMimm_SRL:
1371             case NMimm_SRA:
1372                p = mkFormNanoPShift(p, r_dst, r_src, i->NMin.Imm.op,
1373                                     i->NMin.Imm.imm);
1374                break;
1375
1376             case NMimm_SGN:
1377                p = mkFormNanoPShift(p, r_dst, r_src, NMimm_SLL,
1378                                     32 - i->NMin.Imm.imm);
1379                p = mkFormNanoPShift(p, r_dst, r_dst, NMimm_SRA,
1380                                     32 - i->NMin.Imm.imm);
1381                break;
1382
1383             case NMimm_ANDI:
1384             case NMimm_ORI:
1385             case NMimm_XORI:
1386                p = mkFormNanoPU12(p, r_dst, r_src, i->NMin.Imm.op - 0x6,
1387                                   i->NMin.Imm.imm);
1388                break;
1389             case NMimm_ROTX:
1390                p = mkFormNanoPU12(p, r_dst, r_src, PU12_PROTX, i->NMin.Imm.imm);
1391                break;
1392
1393             default:
1394                goto bad;
1395          };
1396
1397          goto done;
1398       }
1399
1400       case NMin_Alu: {
1401          UInt r_dst = iregNo(i->NMin.Alu.dst);
1402          UInt r_srcL = iregNo(i->NMin.Alu.srcL);
1403          UInt r_srcR = iregNo(i->NMin.Alu.srcR);
1404
1405          switch (i->NMin.Alu.op) {
1406             /* NMalu_ADD, NMalu_SUB, NMalu_AND, NMalu_OR, NMalu_NOR,
1407                NMalu_XOR, NMalu_SLT */
1408             case NMalu_ADD:
1409                /* addu[32] */
1410                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_ADDU32);
1411                break;
1412
1413             case NMalu_SUB:
1414                /* subu[32] */
1415                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SUBU32);
1416                break;
1417
1418             case NMalu_AND:
1419                /* and */
1420                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_AND32);
1421                break;
1422
1423             case NMalu_OR:
1424                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_OR32);
1425                break;
1426
1427             case NMalu_NOR:
1428                /* nor */
1429                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_NOR);
1430                break;
1431
1432             case NMalu_XOR:
1433                /* xor */
1434                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_XOR32);
1435                break;
1436
1437             case NMalu_SLT:
1438                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SLT);
1439                break;
1440
1441             case NMalu_SLL:
1442                /* sllv */
1443                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SLLV);
1444                break;
1445
1446             case NMalu_SRL:
1447                /* srlv */
1448                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SRLV);
1449                break;
1450
1451             case NMalu_SRA:
1452                /* srav */
1453                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SRAV);
1454                break;
1455
1456             case NMalu_DIV:
1457                /* div */
1458                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_DIV);
1459                break;
1460
1461             case NMalu_DIVU:
1462                /* divu */
1463                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_DIVU);
1464                break;
1465
1466             case NMalu_MOD:
1467                /* mod */
1468                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MOD);
1469                break;
1470
1471             case NMalu_MODU:
1472                /* modu */
1473                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MODU);
1474                break;
1475
1476             case NMalu_MUL:
1477                /* mul */
1478                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MUL32);
1479                break;
1480
1481             case NMalu_MULU:
1482                /* mulu */
1483                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MULU);
1484                break;
1485
1486             case NMalu_MUH:
1487                /* muh */
1488                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MUH);
1489                break;
1490
1491             case NMalu_MUHU:
1492                /* muhu */
1493                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MUHU);
1494                break;
1495
1496             default:
1497                goto bad;
1498          }
1499
1500          goto done;
1501       }
1502
1503       case NMin_Unary: {
1504          UInt r_dst = iregNo(i->NMin.Unary.dst);
1505          UInt r_src = iregNo(i->NMin.Unary.src);
1506
1507          switch (i->NMin.Unary.op) {
1508             /* NMun_CLO, NMun_CLZ, NMun_NOP */
1509             case NMun_CLO:  /* clo */
1510                p = mkFormNano2Regs16imm(p, 0x08, r_dst, r_src, 0x4B3F);
1511                break;
1512
1513             case NMun_CLZ:  /* clz */
1514                p = mkFormNano2Regs16imm(p, 0x08, r_dst, r_src, 0x5B3F);
1515                break;
1516
1517             case NMun_NOP:  /* nop (sll r0,r0,0) */
1518                p = mkFormNano2Regs16imm(p, 0x20, 0, 0, 0xC000);
1519                break;
1520          }
1521
1522          goto done;
1523       }
1524
1525       case NMin_Cmp: {
1526          UInt r_srcL = iregNo(i->NMin.Cmp.srcL);
1527          UInt r_srcR = iregNo(i->NMin.Cmp.srcR);
1528          UInt r_dst = iregNo(i->NMin.Cmp.dst);
1529
1530          switch (i->NMin.Cmp.cond) {
1531             case NMcc_EQ:
1532                /* xor r_dst, r_srcL, r_srcR
1533                   sltiu r_dst, r_dst, 1 */
1534                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_XOR32);
1535                p = mkFormNanoPU12(p, r_dst, r_dst, PU12_SLTIU, 1);
1536                break;
1537
1538             case NMcc_NE:
1539                /* xor r_dst, r_srcL, r_srcR
1540                   sltu r_dst, zero, r_dst */
1541                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_XOR32);
1542                p = mkFormNanoP32A0(p, r_dst, 0, r_dst, _POOL32A0_PSLTU);
1543                break;
1544
1545             case NMcc_LTS:
1546                /* slt r_dst, r_srcL, r_srcR */
1547                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SLT);
1548                break;
1549
1550             case NMcc_LTU:
1551                /* sltu r_dst, r_srcL, r_srcR */
1552                p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_PSLTU);
1553                break;
1554
1555             case NMcc_LES:
1556                /* slt r_dst, r_srcR, r_srcL
1557                   xori r_dst, r_dst, 1 */
1558                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_SLT);
1559                p = mkFormNanoPU12(p, r_dst, r_dst, PU12_XORI, 1);
1560                break;
1561
1562             case NMcc_LEU:
1563                /* sltu r_dst, r_srcR, r_srcL
1564                   xori r_dst, r_dst, 1 */
1565                p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_PSLTU);
1566                p = mkFormNanoPU12(p, r_dst, r_dst, PU12_XORI, 1);
1567                break;
1568
1569             default:
1570                goto bad;
1571          }
1572
1573          goto done;
1574       }
1575
1576       case NMin_Call: {
1577          /* If this is conditional, create a conditional
1578             jump over the rest of it. */
1579          if (!hregIsInvalid(i->NMin.Call.guard)) {
1580             switch (i->NMin.Call.rloc.pri) {
1581                case RLPri_2Int:
1582                   /* li $a0, 0x55555555 */
1583                   p = mkLoadImm(p, 4, 0x55555555);
1584                   /* move $a1, $a0 */
1585                   p = mkFormNanoP32A0(p, 0, 4, 5, _POOL32A0_OR32);
1586                   break;
1587
1588                case RLPri_Int:
1589                   /* li $a1, 0x55555555 */
1590                   p = mkLoadImm(p, 4, 0x55555555);
1591                   break;
1592
1593                case RLPri_None:
1594                   break;
1595
1596                default:
1597                   vassert(0);
1598             }
1599
1600             /* Skip 3 instructions
1601                beqc $[cond], $0, 12 */
1602             p = mkFormNanoPBR1(p, iregNo(i->NMin.Call.guard), 0,
1603                                PBR1_BEQC32, 12);
1604          }
1605
1606          /* li $25, #target */
1607          p = mkLoadImm32_EXACTLY2(p, 25, i->NMin.Call.target);
1608          /* jalrc $25 */
1609          p = mkFormNano2Regs16imm(p, 0x12, 31, 25, 0);
1610
1611          goto done;
1612       }
1613
1614       case NMin_XDirect: {
1615          /* NB: what goes on here has to be very closely coordinated
1616             with the chainXDirect_NANOMIPS and
1617             unchainXDirect_NANOMIPS below. */
1618          /* We're generating chain-me requests here, so we need to be
1619             sure this is actually allowed -- no-redir translations
1620             can't use chain-me's.  Hence: */
1621          vassert(disp_cp_chain_me_to_slowEP != NULL);
1622          vassert(disp_cp_chain_me_to_fastEP != NULL);
1623
1624          /* Use ptmp for backpatching conditional jumps. */
1625
1626          /* If this is conditional, create a conditional
1627             jump over the rest of it. */
1628          if (!hregIsInvalid(i->NMin.XDirect.cond)) {
1629             /* Skip 6 instructions
1630                beqc $[cond], $0, 24 */
1631             p = mkFormNanoPBR1(p, iregNo(i->NMin.XDirect.cond), 0,
1632                                PBR1_BEQC32, 24);
1633          }
1634
1635          /* Update the guest PC. */
1636          /* li r9, dstGA */
1637          /* sw r9, (offset)addr */
1638          p = mkLoadImm32_EXACTLY2(p, 9, i->NMin.XDirect.dstGA);
1639          p = doMemAccess_IR(p, 4, 9, i->NMin.XDirect.addr,
1640                             i->NMin.XDirect.addr_offset,
1641                             False /* Store */);
1642          /* --- FIRST PATCHABLE BYTE follows --- */
1643          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
1644             calling to) backs up the return address, so as to find the
1645             address of the first patchable byte.  So: don't change the
1646             number of instructions (3) below. */
1647          /* move r9, VG_(disp_cp_chain_me_to_{slowEP,fastEP}) */
1648          /* jr  r9  */
1649          const void* disp_cp_chain_me
1650             = i->NMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
1651               : disp_cp_chain_me_to_slowEP;
1652          p = mkLoadImm32_EXACTLY2(p, /*r*/ 9,
1653                                   (Addr)disp_cp_chain_me);
1654
1655          /* jalrc r9 */
1656          p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0); /* p += 4 */
1657          /* --- END of PATCHABLE BYTES --- */
1658
1659          goto done;
1660       }
1661
1662       case NMin_XIndir: {
1663          /* We're generating transfers that could lead indirectly to a
1664             chain-me, so we need to be sure this is actually allowed --
1665             no-redir translations are not allowed to reach normal
1666             translations without going through the scheduler.  That means
1667             no XDirects or XIndirs out from no-redir translations.
1668             Hence: */
1669          vassert(disp_cp_xindir != NULL);
1670
1671          /* If this is conditional, create a conditional
1672             jump over the rest of it. */
1673          if (!hregIsInvalid(i->NMin.XDirect.cond)) {
1674             /* Skip 4 instructions
1675                beqc $[cond], $0, 16 */
1676             p = mkFormNanoPBR1(p, iregNo(i->NMin.XIndir.cond), 0,
1677                                PBR1_BEQC32, 16);
1678          }
1679
1680          /* sw r-dstGA, amPC */
1681          p = doMemAccess_IR(p, 4,  iregNo(i->NMin.XIndir.dstGA),
1682                             i->NMin.XIndir.addr,
1683                             i->NMin.XIndir.addr_offset,
1684                             False /* Store */);
1685
1686          /* move r9, VG_(disp_cp_xindir) */
1687          p = mkLoadImm32_EXACTLY2(p, /*r*/ 9,
1688                                   (Addr)disp_cp_xindir);
1689          /* jalrc r9 */
1690          p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0); /* p += 4 */
1691
1692          goto done;
1693       }
1694
1695       case NMin_XAssisted: {
1696          /* First off, if this is conditional, create a conditional jump
1697             over the rest of it.  Or at least, leave a space for it that
1698             we will shortly fill in. */
1699
1700          /* If this is conditional, create a conditional
1701             jump over the rest of it. */
1702          if (!hregIsInvalid(i->NMin.XAssisted.cond)) {
1703             /* Skip 4 instructions
1704                beqc $[cond], $0, 12 */
1705             p = mkFormNanoPBR1(p, iregNo(i->NMin.XAssisted.cond), 0,
1706                                PBR1_BEQC32, 24);
1707          }
1708          /* sw r-dstGA, amPC */
1709          p = doMemAccess_IR(p, 4,  iregNo(i->NMin.XAssisted.dstGA),
1710                             i->NMin.XAssisted.addr,
1711                             i->NMin.XAssisted.addr_offset,
1712                             False /* Store */);
1713
1714          UInt trcval = 0;
1715
1716          switch (i->NMin.XAssisted.jk) {
1717             case Ijk_ClientReq:
1718                trcval = VEX_TRC_JMP_CLIENTREQ;
1719                break;
1720
1721             case Ijk_Sys_syscall:
1722                trcval = VEX_TRC_JMP_SYS_SYSCALL;
1723                break;
1724
1725             /* case Ijk_Sys_int128:
1726                   trcval = VEX_TRC_JMP_SYS_INT128;
1727                   break;
1728             */
1729
1730             case Ijk_Yield:
1731                trcval = VEX_TRC_JMP_YIELD;
1732                break;
1733
1734             case Ijk_EmWarn:
1735                trcval = VEX_TRC_JMP_EMWARN;
1736                break;
1737
1738             case Ijk_EmFail:
1739                trcval = VEX_TRC_JMP_EMFAIL;
1740                break;
1741
1742             /* case Ijk_MapFail:
1743                   trcval = VEX_TRC_JMP_MAPFAIL;
1744                   break;
1745             */
1746
1747             case Ijk_NoDecode:
1748                trcval = VEX_TRC_JMP_NODECODE;
1749                break;
1750
1751             case Ijk_InvalICache:
1752                trcval = VEX_TRC_JMP_INVALICACHE;
1753                break;
1754
1755             case Ijk_NoRedir:
1756                trcval = VEX_TRC_JMP_NOREDIR;
1757                break;
1758
1759             case Ijk_SigILL:
1760                trcval = VEX_TRC_JMP_SIGILL;
1761                break;
1762
1763             case Ijk_SigTRAP:
1764                trcval = VEX_TRC_JMP_SIGTRAP;
1765                break;
1766
1767             /* case Ijk_SigSEGV:
1768                   trcval = VEX_TRC_JMP_SIGSEGV;
1769                   break;
1770             */
1771
1772             case Ijk_SigBUS:
1773                trcval = VEX_TRC_JMP_SIGBUS;
1774                break;
1775
1776             case Ijk_SigFPE_IntDiv:
1777                trcval = VEX_TRC_JMP_SIGFPE_INTDIV;
1778                break;
1779
1780             case Ijk_SigFPE_IntOvf:
1781                trcval = VEX_TRC_JMP_SIGFPE_INTOVF;
1782                break;
1783
1784             case Ijk_Boring:
1785                trcval = VEX_TRC_JMP_BORING;
1786                break;
1787
1788             /* We don't expect to see the following being assisted.
1789                case Ijk_Ret:
1790                case Ijk_Call:
1791             fallthrough */
1792             default:
1793                ppIRJumpKind(i->NMin.XAssisted.jk);
1794                vpanic("emit_NANOMIPSInstr.NMin_XAssisted: unexpected jump"
1795                       "kind");
1796          }
1797
1798          vassert(trcval != 0);
1799          p = mkLoadImm32_EXACTLY2(p, /*r*/ GuestSP, trcval);
1800
1801          /* move r9, VG_(disp_cp_xassisted) */
1802          p = mkLoadImm32_EXACTLY2(p, /*r*/ 9,
1803                                   (ULong)(Addr)disp_cp_xassisted);
1804          /* jalrc r9 */
1805          p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0); /* p += 4 */
1806
1807          goto done;
1808       }
1809
1810       case NMin_Load:
1811          p = doMemAccess_IR(p, i->NMin.Load.sz, iregNo(i->NMin.Load.dst),
1812                             i->NMin.Load.addr,
1813                             i->NMin.Load.addr_offset,
1814                             True /* Load */);
1815          goto done;
1816          break;
1817
1818       case NMin_Store:
1819          p = doMemAccess_IR(p, i->NMin.Store.sz, iregNo(i->NMin.Store.src),
1820                             i->NMin.Store.addr,
1821                             i->NMin.Store.addr_offset,
1822                             False /* Store */);
1823          goto done;
1824          break;
1825
1826       case NMin_LoadL: {
1827          p = mkFormNanoPLSS9(p, iregNo(i->NMin.LoadL.dst),
1828                                 iregNo(i->NMin.LoadL.addr),
1829                                 PLSS1, PLL, LL, i->NMin.LoadL.addr_offset);
1830          goto done;
1831          break;
1832       }
1833
1834       case NMin_StoreC: {
1835          p = mkFormNanoPLSS9(p, iregNo(i->NMin.StoreC.src),
1836                                 iregNo(i->NMin.StoreC.addr),
1837                                 PLSS1, PSC, PSC, i->NMin.StoreC.addr_offset);
1838          goto done;
1839          break;
1840       }
1841
1842       case NMin_Cas: {
1843          vassert((i->NMin.Cas.sz == 4) || (i->NMin.Cas.sz == 8));
1844          UInt oldLo  = iregNo(i->NMin.Cas.oldLo);
1845          UInt addr = iregNo(i->NMin.Cas.addr);
1846          UInt expdLo = iregNo(i->NMin.Cas.expdLo);
1847          UInt dataLo = iregNo(i->NMin.Cas.dataLo);
1848          UInt oldHi = 0, expdHi = 0, dataHi = 0;
1849          if (i->NMin.Cas.sz == 8) {
1850             oldHi  = iregNo(i->NMin.Cas.oldHi);
1851             expdHi = iregNo(i->NMin.Cas.expdHi);
1852             dataHi = iregNo(i->NMin.Cas.dataHi);
1853          }
1854
1855          if (i->NMin.Cas.sz == 4) {
1856          /*
1857           * ll       old,  0(addr)
1858           * bnec     old,  expd, end
1859           * addiu    old,  old,  1
1860           * sc       data, 0(addr)
1861           * movn     old,  expd, data
1862           * end:
1863           */
1864             p = mkFormNanoPLSS9(p, oldLo, addr, PLSS1, PLL, LL, 0);
1865             p = mkFormNanoPBR2(p, oldLo, expdLo, PBR2_BNEC32, 12);
1866             p = mkFormNano2Regs16imm(p, 0x00, oldLo, oldLo, 1);
1867             p = mkFormNanoPLSS9(p, dataLo, addr, PLSS1, PSC, SC, 0);
1868             p = mkFormNanoP32A0(p, dataLo, expdLo, oldLo, _POOL32A0_PCMOVE);
1869          } else {
1870          /*
1871           * llwp     oldLo, oldHi  0(addr)
1872           * bnec     oldLo, expdLo, end
1873           * bnec     oldHi, expdHi, end
1874           * addiu    oldLo,  oldLo,  1
1875           * addiu    oldHi,  oldHi,  1
1876           * scwp     dataLo, dataHi, 0(addr)
1877           * movn     oldLo, expdLo, dataLo
1878           * movn     oldHi, expdHi, dataHi
1879           * end:
1880           */
1881             p = mkFormNanoPLSS9(p, oldLo, addr, PLSS1, PLL, LLWP, oldHi);
1882             p = mkFormNanoPBR2(p, oldLo, expdLo, PBR2_BNEC32, 24);
1883             p = mkFormNanoPBR2(p, oldHi, expdHi, PBR2_BNEC32, 20);
1884             p = mkFormNano2Regs16imm(p, 0x00, oldLo, oldLo, 1);
1885             p = mkFormNano2Regs16imm(p, 0x00, oldHi, oldHi, 1);
1886             p = mkFormNanoPLSS9(p, dataLo, addr, PLSS1, PSC, SCWP, dataHi);
1887             p = mkFormNanoP32A0(p, dataLo, expdLo, oldLo, _POOL32A0_PCMOVE);
1888             p = mkFormNanoP32A0(p, dataHi, expdHi, oldHi, _POOL32A0_PCMOVE);
1889          }
1890          goto done;
1891       }
1892
1893       case NMin_MoveCond: {
1894          UInt r_dst = iregNo(i->NMin.MoveCond.dst);
1895          UInt r_src = iregNo(i->NMin.MoveCond.src);
1896          UInt r_cond = iregNo(i->NMin.MoveCond.cond);
1897
1898          switch (i->NMin.MoveCond.op) {
1899             case NMMoveCond_movn: {
1900                p = mkFormNanoP32A0(p, r_cond, r_src, r_dst, _POOL32A0_PCMOVE);
1901                break;
1902             }
1903
1904             default:
1905                vassert(0);
1906          }
1907
1908          goto done;
1909       }
1910
1911       case NMin_EvCheck: {
1912          /* This requires a 32-bit dec/test in 32 mode. */
1913          /* We generate:
1914                lw      r9, amCounter
1915                addiu   r9, r9, -1
1916                sw      r9, amCounter
1917                bgec    r9, zero, nofail
1918                lw      r9, amFailAddr
1919                jalrc   r9
1920               nofail:
1921          */
1922          UChar* p0 = p;
1923          /* lw  r9, amCounter */
1924          p = doMemAccess_IR(p, 4, /*r*/ 9, i->NMin.EvCheck.r_amCounter,
1925                             i->NMin.EvCheck.offset_amCounter,
1926                             True /* Load */);
1927          /* addiu r9,r9,-1 */
1928          p = mkFormNanoPU12(p, 9, 9, PU12_ADDIU_NEG, 1);
1929          /* sw r9, amCounter */
1930          p = doMemAccess_IR(p, 4, /*r*/ 9, i->NMin.EvCheck.r_amCounter,
1931                             i->NMin.EvCheck.offset_amCounter,
1932                             False /* Store */);
1933          /* bgec r9, zero, nofail */
1934          p = emit32(p, 0x88098008);
1935          /* lw r9, amFailAddr */
1936          p = doMemAccess_IR(p, sizeof(Addr), /*r*/ 9,
1937                             i->NMin.EvCheck.r_amFailAddr,
1938                             i->NMin.EvCheck.offset_amFailAddr,
1939                             True /* Load */);
1940          /* jalrc[32] r9  */
1941          p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0);  /* p += 4 */
1942          /* nofail: */
1943          /* Crosscheck */
1944          vassert(evCheckSzB_NANOMIPS() == (UChar*)p - (UChar*)p0);
1945          goto done;
1946       }
1947
1948       case NMin_ProfInc: {
1949          /* 32-bit:
1950                li r9, 0x65556555
1951                lw r8, 0(r9)
1952                addiu r8, r8, 1         # add least significant word
1953                sw r8, 0(r9)
1954                sltiu r1, r8, 1         # set carry-in bit
1955                lw r8, 4(r9)
1956                addu r8, r8, r1
1957                sw r8, 4(r9) */
1958
1959          /* li r9, 0x65556555 */
1960          p = mkLoadImm32_EXACTLY2(p, 9, 0x65556555);
1961
1962          /* lw r8, 0(r9) */
1963          p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x8, 0);
1964
1965          /* addiu r8, r8, 1 */
1966          p = mkFormNano2Regs16imm(p, 0x00, 8, 8, 0x01);
1967
1968          /* sw r8, 0(r9) */
1969          p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x9, 0);
1970
1971          /* sltiu r1, r8, 1 */
1972          p = mkFormNanoPU12(p, 1, 8, PU12_SLTIU, 1);
1973
1974          /* lw r8, 4(r9) */
1975          p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x8, 4);
1976
1977          /* addu r8, r8, r1 */
1978          p = mkFormNanoP32A0(p, 8, 1, 8, _POOL32A0_ADDU32);
1979
1980          /* sw r8, 0(r9) */
1981          p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x9, 4);
1982
1983          break;
1984       }
1985
1986       default:
1987          goto bad;
1988    }
1989
1990 bad:
1991    vex_printf("\n=> ");
1992    ppNANOMIPSInstr(i);
1993    vpanic("emit_NANOMIPSInstr");
1994 /* NOTREACHED */ done:
1995    vassert(p - &buf[0] <= 128);
1996    return p - &buf[0];
1997 }
1998
1999 /* How big is an event check?  See case for Min_EvCheck in
2000    emit_MIPSInstr just above.  That crosschecks what this returns, so
2001    we can tell if we're inconsistent. */
2002 Int evCheckSzB_NANOMIPS(void)
2003 {
2004    return 6 * 4;
2005 }
2006
2007 VexInvalRange chainXDirect_NANOMIPS(VexEndness endness_host,
2008                                     void* place_to_chain,
2009                                     const void* disp_cp_chain_me_EXPECTED,
2010                                     const void* place_to_jump_to)
2011 {
2012    UInt tmp[3];
2013    UInt* p = (UInt*)place_to_chain;
2014    /* li r9, disp_cp_chain_me_EXPECTED */
2015    mkLoadImm32_EXACTLY2((UChar*)tmp, 9, (Addr)disp_cp_chain_me_EXPECTED);
2016    /* jalrc r9  */
2017    mkFormNano2Regs16imm((UChar*)(tmp + 2), 0x12, 31, 9, 0);
2018    vassert((tmp[0] == p[0]) && (tmp[1] == p[1]) && (tmp[2] == p[2]));
2019    /* li r9, place_to_jump_to */
2020    mkLoadImm32_EXACTLY2((UChar*)place_to_chain, 9, (Addr)place_to_jump_to);
2021    VexInvalRange vir = {(HWord)place_to_chain, 8};
2022    return vir;
2023 }
2024
2025 /* NB: what goes on here has to be very closely coordinated with the
2026    emitInstr case for XDirect, above. */
2027 VexInvalRange unchainXDirect_NANOMIPS ( VexEndness endness_host,
2028                                         void* place_to_unchain,
2029                                         const void* place_to_jump_to_EXPECTED,
2030                                         const void* disp_cp_chain_me)
2031 {
2032    UInt tmp[3];
2033    UInt* p = (UInt*)place_to_unchain;
2034    /* li r9, disp_cp_chain_me_EXPECTED */
2035    mkLoadImm32_EXACTLY2((UChar*)tmp, 9, (Addr)place_to_jump_to_EXPECTED);
2036    /* jalrc r9  */
2037    mkFormNano2Regs16imm((UChar*)(tmp + 2), 0x12, 31, 9, 0);
2038    vassert((tmp[0] == p[0]) && (tmp[1] == p[1]) && (tmp[2] == p[2]));
2039    /* li r9, place_to_jump_to */
2040    mkLoadImm32_EXACTLY2((UChar*)place_to_unchain, 9, (Addr)disp_cp_chain_me);
2041    VexInvalRange vir = {(HWord)place_to_unchain, 8};
2042    return vir;
2043 }
2044
2045 /* Patch the counter address into a profile inc point, as previously
2046    created by the Min_ProfInc case for emit_NANOMIPSInstr. */
2047 VexInvalRange patchProfInc_NANOMIPS ( VexEndness endness_host,
2048                                       void*  place_to_patch,
2049                                       const ULong* location_of_counter)
2050 {
2051    UInt tmp[9];
2052    UInt* p = (UInt*)place_to_patch;
2053
2054    vassert(endness_host == VexEndnessLE || endness_host == VexEndnessBE);
2055    vassert(sizeof(ULong*) == 4);
2056    vassert(0 == (3 & (HWord)p));
2057
2058    mkLoadImm32_EXACTLY2((UChar*)tmp, 9, 0x65556555);
2059    mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x8, 0);
2060    mkFormNano2Regs16imm((UChar*)tmp, 0x00, 8, 8, 0x01);
2061    mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x9, 0);
2062    mkFormNanoPU12((UChar*)tmp, 1, 8, PU12_SLTIU, 1);
2063    mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x8, 4);
2064    mkFormNanoP32A0((UChar*)tmp, 8, 1, 8, _POOL32A0_ADDU32);
2065    mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x9, 4);
2066
2067    for(int i = 0; i < 9; i++)
2068       vassert(tmp[i] == p[i]);
2069
2070    /* li r9, place_to_jump_to */
2071    mkLoadImm32_EXACTLY2((UChar*)place_to_patch, 9, (Addr)location_of_counter);
2072    VexInvalRange vir = {(HWord)place_to_patch, 8};
2073    return vir;
2074 }
2075
2076 const RRegUniverse* getRRegUniverse_NANOMIPS ( Bool mode64 )
2077 {
2078    /* The real-register universe is a big constant, so we just want to
2079       initialise it once.  rRegUniverse_MIPS_initted values: 0=not initted,
2080       1=initted for 32-bit-mode, 2=initted for 64-bit-mode */
2081    static RRegUniverse rRegUniverse_MIPS;
2082    static UInt         rRegUniverse_MIPS_initted = 0;
2083    UInt gpr;
2084
2085    RRegUniverse* ru = &rRegUniverse_MIPS;
2086
2087    if (LIKELY(rRegUniverse_MIPS_initted == 1))
2088       return ru;
2089
2090    vassert(!mode64);
2091
2092    RRegUniverse__init(ru);
2093
2094    /* Add the registers.  The initial segment of this array must be
2095       those available for allocation by reg-alloc, and those that
2096       follow are not available for allocation. */
2097    ru->allocable_start[HRcInt32] = ru->size;
2098
2099    for (gpr = 16; gpr <= 22; gpr++) {
2100       ru->regs[ru->size] = mkHReg(False, HRcInt32, gpr, ru->size);
2101       ru->size++;
2102    }
2103
2104    for (gpr = 12; gpr <= 15; gpr++) {
2105       ru->regs[ru->size] = mkHReg(False, HRcInt32, gpr, ru->size);
2106       ru->size++;
2107    }
2108
2109    ru->regs[ru->size] = mkHReg(False, HRcInt32, 24, ru->size);
2110
2111    ru->allocable_end[HRcInt32] = ru->size;
2112
2113    ru->allocable = ++ru->size;
2114
2115    for (gpr = 0; gpr <= 11; gpr++) {
2116       ru->regs[ru->size] = mkHReg(False, HRcInt32, gpr, ru->size);
2117       ru->size++;
2118    }
2119
2120    ru->regs[ru->size] = mkHReg(False, HRcInt32, 23, ru->size);
2121    ru->size++;
2122    ru->regs[ru->size] = mkHReg(False, HRcInt32, 25, ru->size);
2123    ru->size++;
2124    ru->regs[ru->size] = mkHReg(False, HRcInt32, 29, ru->size);
2125    ru->size++;
2126    ru->regs[ru->size] = mkHReg(False, HRcInt32, 31, ru->size);
2127    ru->size++;
2128
2129    rRegUniverse_MIPS_initted = 1;
2130
2131    RRegUniverse__check_is_sane(ru);
2132    return ru;
2133 }
2134
2135 /*---------------------------------------------------------------*/
2136 /*--- end                                host_NANOMIPS_defs.c ---*/
2137 /*---------------------------------------------------------------*/