VEX/priv/host_x86_isel.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                                   host_x86_isel.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27
  28    Neither the names of the U.S. Department of Energy nor the
  29    University of California nor the names of its contributors may be
  30    used to endorse or promote products derived from this software
  31    without prior written permission.
  32 */
  33
  34 #include "libvex_basictypes.h"
  35 #include "libvex_ir.h"
  36 #include "libvex.h"
  37
  38 #include "ir_match.h"
  39 #include "main_util.h"
  40 #include "main_globals.h"
  41 #include "host_generic_regs.h"
  42 #include "host_generic_simd64.h"
  43 #include "host_generic_simd128.h"
  44 #include "host_x86_defs.h"
  45
  46 /* TODO 21 Apr 2005:
  47
  48    -- (Really an assembler issue) don't emit CMov32 as a cmov
  49       insn, since that's expensive on P4 and conditional branch
  50       is cheaper if (as we expect) the condition is highly predictable
  51
  52    -- preserve xmm registers across function calls (by declaring them
  53       as trashed by call insns)
  54
  55    -- preserve x87 ST stack discipline across function calls.  Sigh.
  56
  57    -- Check doHelperCall: if a call is conditional, we cannot safely
  58       compute any regparm args directly to registers.  Hence, the
  59       fast-regparm marshalling should be restricted to unconditional
  60       calls only.
  61 */
  62
  63 /*---------------------------------------------------------*/
  64 /*--- x87 control word stuff                            ---*/
  65 /*---------------------------------------------------------*/
  66
  67 /* Vex-generated code expects to run with the FPU set as follows: all
  68    exceptions masked, round-to-nearest, precision = 53 bits.  This
  69    corresponds to a FPU control word value of 0x027F.
  70
  71    Similarly the SSE control word (%mxcsr) should be 0x1F80.
  72
  73    %fpucw and %mxcsr should have these values on entry to
  74    Vex-generated code, and should those values should be
  75    unchanged at exit.
  76 */
  77
  78 #define DEFAULT_FPUCW 0x027F
  79
  80 /* debugging only, do not use */
  81 /* define DEFAULT_FPUCW 0x037F */
  82
  83
  84 /*---------------------------------------------------------*/
  85 /*--- misc helpers                                      ---*/
  86 /*---------------------------------------------------------*/
  87
  88 /* These are duplicated in guest-x86/toIR.c */
  89 static IRExpr* unop ( IROp op, IRExpr* a )
  90 {
  91    return IRExpr_Unop(op, a);
  92 }
  93
  94 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
  95 {
  96    return IRExpr_Binop(op, a1, a2);
  97 }
  98
  99 static IRExpr* bind ( Int binder )
 100 {
 101    return IRExpr_Binder(binder);
 102 }
 103
 104 static Bool isZeroU8 ( IRExpr* e )
 105 {
 106    return e->tag == Iex_Const
 107           && e->Iex.Const.con->tag == Ico_U8
 108           && e->Iex.Const.con->Ico.U8 == 0;
 109 }
 110
 111 static Bool isZeroU32 ( IRExpr* e )
 112 {
 113    return e->tag == Iex_Const
 114           && e->Iex.Const.con->tag == Ico_U32
 115           && e->Iex.Const.con->Ico.U32 == 0;
 116 }
 117
 118 //static Bool isZeroU64 ( IRExpr* e )
 119 //{
 120 //   return e->tag == Iex_Const
 121 //          && e->Iex.Const.con->tag == Ico_U64
 122 //          && e->Iex.Const.con->Ico.U64 == 0ULL;
 123 //}
 124
 125
 126 /*---------------------------------------------------------*/
 127 /*--- ISelEnv                                           ---*/
 128 /*---------------------------------------------------------*/
 129
 130 /* This carries around:
 131
 132    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
 133      might encounter.  This is computed before insn selection starts,
 134      and does not change.
 135
 136    - A mapping from IRTemp to HReg.  This tells the insn selector
 137      which virtual register(s) are associated with each IRTemp
 138      temporary.  This is computed before insn selection starts, and
 139      does not change.  We expect this mapping to map precisely the
 140      same set of IRTemps as the type mapping does.
 141
 142         - vregmap   holds the primary register for the IRTemp.
 143         - vregmapHI is only used for 64-bit integer-typed
 144              IRTemps.  It holds the identity of a second
 145              32-bit virtual HReg, which holds the high half
 146              of the value.
 147
 148    - The code array, that is, the insns selected so far.
 149
 150    - A counter, for generating new virtual registers.
 151
 152    - The host subarchitecture we are selecting insns for.
 153      This is set at the start and does not change.
 154
 155    - A Bool for indicating whether we may generate chain-me
 156      instructions for control flow transfers, or whether we must use
 157      XAssisted.
 158
 159    - The maximum guest address of any guest insn in this block.
 160      Actually, the address of the highest-addressed byte from any insn
 161      in this block.  Is set at the start and does not change.  This is
 162      used for detecting jumps which are definitely forward-edges from
 163      this block, and therefore can be made (chained) to the fast entry
 164      point of the destination, thereby avoiding the destination's
 165      event check.
 166
 167    Note, this is all (well, mostly) host-independent.
 168 */
 169
 170 typedef
 171    struct {
 172       /* Constant -- are set at the start and do not change. */
 173       IRTypeEnv*   type_env;
 174
 175       HReg*        vregmap;
 176       HReg*        vregmapHI;
 177       Int          n_vregmap;
 178
 179       UInt         hwcaps;
 180
 181       Bool         chainingAllowed;
 182       Addr32       max_ga;
 183
 184       /* These are modified as we go along. */
 185       HInstrArray* code;
 186       Int          vreg_ctr;
 187    }
 188    ISelEnv;
 189
 190
 191 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
 192 {
 193    vassert(tmp >= 0);
 194    vassert(tmp < env->n_vregmap);
 195    return env->vregmap[tmp];
 196 }
 197
 198 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
 199 {
 200    vassert(tmp >= 0);
 201    vassert(tmp < env->n_vregmap);
 202    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
 203    *vrLO = env->vregmap[tmp];
 204    *vrHI = env->vregmapHI[tmp];
 205 }
 206
 207 static void addInstr ( ISelEnv* env, X86Instr* instr )
 208 {
 209    addHInstr(env->code, instr);
 210    if (vex_traceflags & VEX_TRACE_VCODE) {
 211       ppX86Instr(instr, False);
 212       vex_printf("\n");
 213    }
 214 }
 215
 216 static HReg newVRegI ( ISelEnv* env )
 217 {
 218    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
 219    env->vreg_ctr++;
 220    return reg;
 221 }
 222
 223 static HReg newVRegF ( ISelEnv* env )
 224 {
 225    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
 226    env->vreg_ctr++;
 227    return reg;
 228 }
 229
 230 static HReg newVRegV ( ISelEnv* env )
 231 {
 232    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
 233    env->vreg_ctr++;
 234    return reg;
 235 }
 236
 237
 238 /*---------------------------------------------------------*/
 239 /*--- ISEL: Forward declarations                        ---*/
 240 /*---------------------------------------------------------*/
 241
 242 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
 243    iselXXX_wrk do the real work, but are not to be called directly.
 244    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
 245    checks that all returned registers are virtual.  You should not
 246    call the _wrk version directly.
 247 */
 248 static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
 249 static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, const IRExpr* e );
 250
 251 static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
 252 static X86RI*      iselIntExpr_RI     ( ISelEnv* env, const IRExpr* e );
 253
 254 static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
 255 static X86RM*      iselIntExpr_RM     ( ISelEnv* env, const IRExpr* e );
 256
 257 static HReg        iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
 258 static HReg        iselIntExpr_R     ( ISelEnv* env, const IRExpr* e );
 259
 260 static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
 261 static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, const IRExpr* e );
 262
 263 static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
 264                                        ISelEnv* env, const IRExpr* e );
 265 static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
 266                                        ISelEnv* env, const IRExpr* e );
 267
 268 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
 269 static X86CondCode iselCondCode     ( ISelEnv* env, const IRExpr* e );
 270
 271 static HReg        iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
 272 static HReg        iselDblExpr     ( ISelEnv* env, const IRExpr* e );
 273
 274 static HReg        iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
 275 static HReg        iselFltExpr     ( ISelEnv* env, const IRExpr* e );
 276
 277 static HReg        iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
 278 static HReg        iselVecExpr     ( ISelEnv* env, const IRExpr* e );
 279
 280
 281 /*---------------------------------------------------------*/
 282 /*--- ISEL: Misc helpers                                ---*/
 283 /*---------------------------------------------------------*/
 284
 285 /* Make a int reg-reg move. */
 286
 287 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
 288 {
 289    vassert(hregClass(src) == HRcInt32);
 290    vassert(hregClass(dst) == HRcInt32);
 291    return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
 292 }
 293
 294
 295 /* Make a vector reg-reg move. */
 296
 297 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
 298 {
 299    vassert(hregClass(src) == HRcVec128);
 300    vassert(hregClass(dst) == HRcVec128);
 301    return X86Instr_SseReRg(Xsse_MOV, src, dst);
 302 }
 303
 304 /* Advance/retreat %esp by n. */
 305
 306 static void add_to_esp ( ISelEnv* env, Int n )
 307 {
 308    vassert(n > 0 && n < 256 && (n%4) == 0);
 309    addInstr(env,
 310             X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
 311 }
 312
 313 static void sub_from_esp ( ISelEnv* env, Int n )
 314 {
 315    vassert(n > 0 && n < 256 && (n%4) == 0);
 316    addInstr(env,
 317             X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
 318 }
 319
 320
 321 /* Given an amode, return one which references 4 bytes further
 322    along. */
 323
 324 static X86AMode* advance4 ( X86AMode* am )
 325 {
 326    X86AMode* am4 = dopyX86AMode(am);
 327    switch (am4->tag) {
 328       case Xam_IRRS:
 329          am4->Xam.IRRS.imm += 4; break;
 330       case Xam_IR:
 331          am4->Xam.IR.imm += 4; break;
 332       default:
 333          vpanic("advance4(x86,host)");
 334    }
 335    return am4;
 336 }
 337
 338
 339 /* Push an arg onto the host stack, in preparation for a call to a
 340    helper function of some kind.  Returns the number of 32-bit words
 341    pushed.  If we encounter an IRExpr_VECRET() then we expect that
 342    r_vecRetAddr will be a valid register, that holds the relevant
 343    address.
 344 */
 345 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
 346 {
 347    if (UNLIKELY(arg->tag == Iex_VECRET)) {
 348       vassert(0); //ATC
 349       vassert(!hregIsInvalid(r_vecRetAddr));
 350       addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
 351       return 1;
 352    }
 353    if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 354       addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
 355       return 1;
 356    }
 357    /* Else it's a "normal" expression. */
 358    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
 359    if (arg_ty == Ity_I32) {
 360       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
 361       return 1;
 362    } else
 363    if (arg_ty == Ity_I64) {
 364       HReg rHi, rLo;
 365       iselInt64Expr(&rHi, &rLo, env, arg);
 366       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
 367       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
 368       return 2;
 369    }
 370    ppIRExpr(arg);
 371    vpanic("pushArg(x86): can't handle arg of this type");
 372 }
 373
 374
 375 /* Complete the call to a helper function, by calling the
 376    helper and clearing the args off the stack. */
 377
 378 static
 379 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
 380                               IRCallee* cee, Int n_arg_ws,
 381                               RetLoc rloc )
 382 {
 383    /* Complication.  Need to decide which reg to use as the fn address
 384       pointer, in a way that doesn't trash regparm-passed
 385       parameters. */
 386    vassert(sizeof(void*) == 4);
 387
 388    addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
 389                                 cee->regparms, rloc));
 390    if (n_arg_ws > 0)
 391       add_to_esp(env, 4*n_arg_ws);
 392 }
 393
 394
 395 /* Used only in doHelperCall.  See big comment in doHelperCall re
 396    handling of regparm args.  This function figures out whether
 397    evaluation of an expression might require use of a fixed register.
 398    If in doubt return True (safe but suboptimal).
 399 */
 400 static
 401 Bool mightRequireFixedRegs ( IRExpr* e )
 402 {
 403    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
 404       // These are always "safe" -- either a copy of %esp in some
 405       // arbitrary vreg, or a copy of %ebp, respectively.
 406       return False;
 407    }
 408    /* Else it's a "normal" expression. */
 409    switch (e->tag) {
 410       case Iex_RdTmp: case Iex_Const: case Iex_Get:
 411          return False;
 412       default:
 413          return True;
 414    }
 415 }
 416
 417
 418 /* Do a complete function call.  |guard| is a Ity_Bit expression
 419    indicating whether or not the call happens.  If guard==NULL, the
 420    call is unconditional.  |retloc| is set to indicate where the
 421    return value is after the call.  The caller (of this fn) must
 422    generate code to add |stackAdjustAfterCall| to the stack pointer
 423    after the call is done. */
 424
 425 static
 426 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
 427                     /*OUT*/RetLoc* retloc,
 428                     ISelEnv* env,
 429                     IRExpr* guard,
 430                     IRCallee* cee, IRType retTy, IRExpr** args )
 431 {
 432    X86CondCode cc;
 433    HReg        argregs[3];
 434    HReg        tmpregs[3];
 435    Bool        danger;
 436    Int         not_done_yet, n_args, n_arg_ws, stack_limit,
 437                i, argreg, argregX;
 438
 439    /* Set default returns.  We'll update them later if needed. */
 440    *stackAdjustAfterCall = 0;
 441    *retloc               = mk_RetLoc_INVALID();
 442
 443    /* These are used for cross-checking that IR-level constraints on
 444       the use of Iex_VECRET and Iex_GSPTR are observed. */
 445    UInt nVECRETs = 0;
 446    UInt nGSPTRs  = 0;
 447
 448    /* Marshal args for a call, do the call, and clear the stack.
 449       Complexities to consider:
 450
 451       * The return type can be I{64,32,16,8} or V128.  In the V128
 452         case, it is expected that |args| will contain the special
 453         node IRExpr_VECRET(), in which case this routine generates
 454         code to allocate space on the stack for the vector return
 455         value.  Since we are not passing any scalars on the stack, it
 456         is enough to preallocate the return space before marshalling
 457         any arguments, in this case.
 458
 459         |args| may also contain IRExpr_GSPTR(), in which case the
 460         value in %ebp is passed as the corresponding argument.
 461
 462       * If the callee claims regparmness of 1, 2 or 3, we must pass the
 463         first 1, 2 or 3 args in registers (EAX, EDX, and ECX
 464         respectively).  To keep things relatively simple, only args of
 465         type I32 may be passed as regparms -- just bomb out if anything
 466         else turns up.  Clearly this depends on the front ends not
 467         trying to pass any other types as regparms.
 468    */
 469
 470    /* 16 Nov 2004: the regparm handling is complicated by the
 471       following problem.
 472
 473       Consider a call two a function with two regparm parameters:
 474       f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
 475       Suppose code is first generated to compute e1 into %eax.  Then,
 476       code is generated to compute e2 into %edx.  Unfortunately, if
 477       the latter code sequence uses %eax, it will trash the value of
 478       e1 computed by the former sequence.  This could happen if (for
 479       example) e2 itself involved a function call.  In the code below,
 480       args are evaluated right-to-left, not left-to-right, but the
 481       principle and the problem are the same.
 482
 483       One solution is to compute all regparm-bound args into vregs
 484       first, and once they are all done, move them to the relevant
 485       real regs.  This always gives correct code, but it also gives
 486       a bunch of vreg-to-rreg moves which are usually redundant but
 487       are hard for the register allocator to get rid of.
 488
 489       A compromise is to first examine all regparm'd argument
 490       expressions.  If they are all so simple that it is clear
 491       they will be evaluated without use of any fixed registers,
 492       use the old compute-directly-to-fixed-target scheme.  If not,
 493       be safe and use the via-vregs scheme.
 494
 495       Note this requires being able to examine an expression and
 496       determine whether or not evaluation of it might use a fixed
 497       register.  That requires knowledge of how the rest of this
 498       insn selector works.  Currently just the following 3 are
 499       regarded as safe -- hopefully they cover the majority of
 500       arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
 501    */
 502    vassert(cee->regparms >= 0 && cee->regparms <= 3);
 503
 504    /* Count the number of args and also the VECRETs */
 505    n_args = n_arg_ws = 0;
 506    while (args[n_args]) {
 507       IRExpr* arg = args[n_args];
 508       n_args++;
 509       if (UNLIKELY(arg->tag == Iex_VECRET)) {
 510          nVECRETs++;
 511       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 512          nGSPTRs++;
 513       }
 514    }
 515
 516    /* If this fails, the IR is ill-formed */
 517    vassert(nGSPTRs == 0 || nGSPTRs == 1);
 518
 519    /* If we have a VECRET, allocate space on the stack for the return
 520       value, and record the stack pointer after that. */
 521    HReg r_vecRetAddr = INVALID_HREG;
 522    if (nVECRETs == 1) {
 523       vassert(retTy == Ity_V128 || retTy == Ity_V256);
 524       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
 525       r_vecRetAddr = newVRegI(env);
 526       sub_from_esp(env, 16);
 527       addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
 528    } else {
 529       // If either of these fail, the IR is ill-formed
 530       vassert(retTy != Ity_V128 && retTy != Ity_V256);
 531       vassert(nVECRETs == 0);
 532    }
 533
 534    not_done_yet = n_args;
 535
 536    stack_limit = cee->regparms;
 537
 538    /* ------ BEGIN marshall all arguments ------ */
 539
 540    /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
 541    for (i = n_args-1; i >= stack_limit; i--) {
 542       n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
 543       not_done_yet--;
 544    }
 545
 546    /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
 547       registers. */
 548
 549    if (cee->regparms > 0) {
 550
 551       /* ------ BEGIN deal with regparms ------ */
 552
 553       /* deal with regparms, not forgetting %ebp if needed. */
 554       argregs[0] = hregX86_EAX();
 555       argregs[1] = hregX86_EDX();
 556       argregs[2] = hregX86_ECX();
 557       tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
 558
 559       argreg = cee->regparms;
 560
 561       /* In keeping with big comment above, detect potential danger
 562          and use the via-vregs scheme if needed. */
 563       danger = False;
 564       for (i = stack_limit-1; i >= 0; i--) {
 565          if (mightRequireFixedRegs(args[i])) {
 566             danger = True;
 567             break;
 568          }
 569       }
 570
 571       if (danger) {
 572
 573          /* Move via temporaries */
 574          argregX = argreg;
 575          for (i = stack_limit-1; i >= 0; i--) {
 576
 577             if (0) {
 578                vex_printf("x86 host: register param is complex: ");
 579                ppIRExpr(args[i]);
 580                vex_printf("\n");
 581             }
 582
 583             IRExpr* arg = args[i];
 584             argreg--;
 585             vassert(argreg >= 0);
 586             if (UNLIKELY(arg->tag == Iex_VECRET)) {
 587                vassert(0); //ATC
 588             }
 589             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 590                vassert(0); //ATC
 591             } else {
 592                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
 593                tmpregs[argreg] = iselIntExpr_R(env, arg);
 594             }
 595             not_done_yet--;
 596          }
 597          for (i = stack_limit-1; i >= 0; i--) {
 598             argregX--;
 599             vassert(argregX >= 0);
 600             addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
 601          }
 602
 603       } else {
 604          /* It's safe to compute all regparm args directly into their
 605             target registers. */
 606          for (i = stack_limit-1; i >= 0; i--) {
 607             IRExpr* arg = args[i];
 608             argreg--;
 609             vassert(argreg >= 0);
 610             if (UNLIKELY(arg->tag == Iex_VECRET)) {
 611                vassert(!hregIsInvalid(r_vecRetAddr));
 612                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
 613                                              X86RMI_Reg(r_vecRetAddr),
 614                                              argregs[argreg]));
 615             }
 616             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 617                vassert(0); //ATC
 618             } else {
 619                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
 620                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
 621                                              iselIntExpr_RMI(env, arg),
 622                                              argregs[argreg]));
 623             }
 624             not_done_yet--;
 625          }
 626
 627       }
 628
 629       /* ------ END deal with regparms ------ */
 630
 631    }
 632
 633    vassert(not_done_yet == 0);
 634
 635    /* ------ END marshall all arguments ------ */
 636
 637    /* Now we can compute the condition.  We can't do it earlier
 638       because the argument computations could trash the condition
 639       codes.  Be a bit clever to handle the common case where the
 640       guard is 1:Bit. */
 641    cc = Xcc_ALWAYS;
 642    if (guard) {
 643       if (guard->tag == Iex_Const
 644           && guard->Iex.Const.con->tag == Ico_U1
 645           && guard->Iex.Const.con->Ico.U1 == True) {
 646          /* unconditional -- do nothing */
 647       } else {
 648          cc = iselCondCode( env, guard );
 649       }
 650    }
 651
 652    /* Do final checks, set the return values, and generate the call
 653       instruction proper. */
 654    vassert(*stackAdjustAfterCall == 0);
 655    vassert(is_RetLoc_INVALID(*retloc));
 656    switch (retTy) {
 657          case Ity_INVALID:
 658             /* Function doesn't return a value. */
 659             *retloc = mk_RetLoc_simple(RLPri_None);
 660             break;
 661          case Ity_I64:
 662             *retloc = mk_RetLoc_simple(RLPri_2Int);
 663             break;
 664          case Ity_I32: case Ity_I16: case Ity_I8:
 665             *retloc = mk_RetLoc_simple(RLPri_Int);
 666             break;
 667          case Ity_V128:
 668             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
 669             *stackAdjustAfterCall = 16;
 670             break;
 671          case Ity_V256:
 672             vassert(0); // ATC
 673             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
 674             *stackAdjustAfterCall = 32;
 675             break;
 676          default:
 677             /* IR can denote other possible return types, but we don't
 678                handle those here. */
 679            vassert(0);
 680    }
 681
 682    /* Finally, generate the call itself.  This needs the *retloc value
 683       set in the switch above, which is why it's at the end. */
 684    callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
 685 }
 686
 687
 688 /* Given a guest-state array descriptor, an index expression and a
 689    bias, generate an X86AMode holding the relevant guest state
 690    offset. */
 691
 692 static
 693 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
 694                                 IRExpr* off, Int bias )
 695 {
 696    HReg tmp, roff;
 697    Int  elemSz = sizeofIRType(descr->elemTy);
 698    Int  nElems = descr->nElems;
 699    Int  shift  = 0;
 700
 701    /* throw out any cases not generated by an x86 front end.  In
 702       theory there might be a day where we need to handle them -- if
 703       we ever run non-x86-guest on x86 host. */
 704
 705    if (nElems != 8)
 706       vpanic("genGuestArrayOffset(x86 host)(1)");
 707
 708    switch (elemSz) {
 709       case 1:  shift = 0; break;
 710       case 4:  shift = 2; break;
 711       case 8:  shift = 3; break;
 712       default: vpanic("genGuestArrayOffset(x86 host)(2)");
 713    }
 714
 715    /* Compute off into a reg, %off.  Then return:
 716
 717          movl %off, %tmp
 718          addl $bias, %tmp  (if bias != 0)
 719          andl %tmp, 7
 720          ... base(%ebp, %tmp, shift) ...
 721    */
 722    tmp  = newVRegI(env);
 723    roff = iselIntExpr_R(env, off);
 724    addInstr(env, mk_iMOVsd_RR(roff, tmp));
 725    if (bias != 0) {
 726       addInstr(env,
 727                X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
 728    }
 729    addInstr(env,
 730             X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
 731    return
 732       X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
 733 }
 734
 735
 736 /* Mess with the FPU's rounding mode: set to the default rounding mode
 737    (DEFAULT_FPUCW). */
 738 static
 739 void set_FPU_rounding_default ( ISelEnv* env )
 740 {
 741    /* pushl $DEFAULT_FPUCW
 742       fldcw 0(%esp)
 743       addl $4, %esp
 744    */
 745    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
 746    addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
 747    addInstr(env, X86Instr_FpLdCW(zero_esp));
 748    add_to_esp(env, 4);
 749 }
 750
 751
 752 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
 753    expression denoting a value in the range 0 .. 3, indicating a round
 754    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
 755    the same rounding.
 756 */
 757 static
 758 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
 759 {
 760    HReg rrm  = iselIntExpr_R(env, mode);
 761    HReg rrm2 = newVRegI(env);
 762    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
 763
 764    /* movl  %rrm, %rrm2
 765       andl  $3, %rrm2   -- shouldn't be needed; paranoia
 766       shll  $10, %rrm2
 767       orl   $DEFAULT_FPUCW, %rrm2
 768       pushl %rrm2
 769       fldcw 0(%esp)
 770       addl  $4, %esp
 771    */
 772    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
 773    addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
 774    addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
 775    addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
 776    addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
 777    addInstr(env, X86Instr_FpLdCW(zero_esp));
 778    add_to_esp(env, 4);
 779 }
 780
 781
 782 /* Generate !src into a new vector register, and be sure that the code
 783    is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
 784    way to do this.
 785 */
 786 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
 787 {
 788    HReg dst = newVRegV(env);
 789    /* Set dst to zero.  If dst contains a NaN then all hell might
 790       break loose after the comparison.  So, first zero it. */
 791    addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
 792    /* And now make it all 1s ... */
 793    addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
 794    /* Finally, xor 'src' into it. */
 795    addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
 796    /* Doesn't that just totally suck? */
 797    return dst;
 798 }
 799
 800
 801 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
 802    after most non-simple FPU operations (simple = +, -, *, / and
 803    sqrt).
 804
 805    This could be done a lot more efficiently if needed, by loading
 806    zero and adding it to the value to be rounded (fldz ; faddp?).
 807 */
 808 static void roundToF64 ( ISelEnv* env, HReg reg )
 809 {
 810    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
 811    sub_from_esp(env, 8);
 812    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
 813    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
 814    add_to_esp(env, 8);
 815 }
 816
 817
 818 /*---------------------------------------------------------*/
 819 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
 820 /*---------------------------------------------------------*/
 821
 822 /* Select insns for an integer-typed expression, and add them to the
 823    code list.  Return a reg holding the result.  This reg will be a
 824    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
 825    want to modify it, ask for a new vreg, copy it in there, and modify
 826    the copy.  The register allocator will do its best to map both
 827    vregs to the same real register, so the copies will often disappear
 828    later in the game.
 829
 830    This should handle expressions of 32, 16 and 8-bit type.  All
 831    results are returned in a 32-bit register.  For 16- and 8-bit
 832    expressions, the upper 16/24 bits are arbitrary, so you should mask
 833    or sign extend partial values if necessary.
 834 */
 835
 836 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
 837 {
 838    HReg r = iselIntExpr_R_wrk(env, e);
 839    /* sanity checks ... */
 840 #  if 0
 841    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 842 #  endif
 843    vassert(hregClass(r) == HRcInt32);
 844    vassert(hregIsVirtual(r));
 845    return r;
 846 }
 847
 848 /* DO NOT CALL THIS DIRECTLY ! */
 849 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
 850 {
 851    MatchInfo mi;
 852
 853    IRType ty = typeOfIRExpr(env->type_env,e);
 854    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
 855
 856    switch (e->tag) {
 857
 858    /* --------- TEMP --------- */
 859    case Iex_RdTmp: {
 860       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
 861    }
 862
 863    /* --------- LOAD --------- */
 864    case Iex_Load: {
 865       HReg dst = newVRegI(env);
 866       X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
 867
 868       /* We can't handle big-endian loads, nor load-linked. */
 869       if (e->Iex.Load.end != Iend_LE)
 870          goto irreducible;
 871
 872       if (ty == Ity_I32) {
 873          addInstr(env, X86Instr_Alu32R(Xalu_MOV,
 874                                        X86RMI_Mem(amode), dst) );
 875          return dst;
 876       }
 877       if (ty == Ity_I16) {
 878          addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
 879          return dst;
 880       }
 881       if (ty == Ity_I8) {
 882          addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
 883          return dst;
 884       }
 885       break;
 886    }
 887
 888    /* --------- TERNARY OP --------- */
 889    case Iex_Triop: {
 890       IRTriop *triop = e->Iex.Triop.details;
 891       /* C3210 flags following FPU partial remainder (fprem), both
 892          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
 893       if (triop->op == Iop_PRemC3210F64
 894           || triop->op == Iop_PRem1C3210F64) {
 895          HReg junk = newVRegF(env);
 896          HReg dst  = newVRegI(env);
 897          HReg srcL = iselDblExpr(env, triop->arg2);
 898          HReg srcR = iselDblExpr(env, triop->arg3);
 899          /* XXXROUNDINGFIXME */
 900          /* set roundingmode here */
 901          addInstr(env, X86Instr_FpBinary(
 902                            e->Iex.Binop.op==Iop_PRemC3210F64
 903                               ? Xfp_PREM : Xfp_PREM1,
 904                            srcL,srcR,junk
 905                  ));
 906          /* The previous pseudo-insn will have left the FPU's C3210
 907             flags set correctly.  So bag them. */
 908          addInstr(env, X86Instr_FpStSW_AX());
 909          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
 910          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
 911          return dst;
 912       }
 913
 914       break;
 915    }
 916
 917    /* --------- BINARY OP --------- */
 918    case Iex_Binop: {
 919       X86AluOp   aluOp;
 920       X86ShiftOp shOp;
 921
 922       /* Pattern: Sub32(0,x) */
 923       if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
 924          HReg dst = newVRegI(env);
 925          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
 926          addInstr(env, mk_iMOVsd_RR(reg,dst));
 927          addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
 928          return dst;
 929       }
 930
 931       /* Is it an addition or logical style op? */
 932       switch (e->Iex.Binop.op) {
 933          case Iop_Add8: case Iop_Add16: case Iop_Add32:
 934             aluOp = Xalu_ADD; break;
 935          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
 936             aluOp = Xalu_SUB; break;
 937          case Iop_And8: case Iop_And16: case Iop_And32:
 938             aluOp = Xalu_AND; break;
 939          case Iop_Or8: case Iop_Or16: case Iop_Or32:
 940             aluOp = Xalu_OR; break;
 941          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
 942             aluOp = Xalu_XOR; break;
 943          case Iop_Mul16: case Iop_Mul32:
 944             aluOp = Xalu_MUL; break;
 945          default:
 946             aluOp = Xalu_INVALID; break;
 947       }
 948       /* For commutative ops we assume any literal
 949          values are on the second operand. */
 950       if (aluOp != Xalu_INVALID) {
 951          HReg dst    = newVRegI(env);
 952          HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
 953          X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
 954          addInstr(env, mk_iMOVsd_RR(reg,dst));
 955          addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
 956          return dst;
 957       }
 958       /* Could do better here; forcing the first arg into a reg
 959          isn't always clever.
 960          -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
 961                         LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
 962                         t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
 963             movl 0xFFFFFFA0(%vr41),%vr107
 964             movl 0xFFFFFFA4(%vr41),%vr108
 965             movl %vr107,%vr106
 966             xorl %vr108,%vr106
 967             movl 0xFFFFFFA8(%vr41),%vr109
 968             movl %vr106,%vr105
 969             andl %vr109,%vr105
 970             movl 0xFFFFFFA0(%vr41),%vr110
 971             movl %vr105,%vr104
 972             xorl %vr110,%vr104
 973             movl %vr104,%vr70
 974       */
 975
 976       /* Perhaps a shift op? */
 977       switch (e->Iex.Binop.op) {
 978          case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
 979             shOp = Xsh_SHL; break;
 980          case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
 981             shOp = Xsh_SHR; break;
 982          case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
 983             shOp = Xsh_SAR; break;
 984          default:
 985             shOp = Xsh_INVALID; break;
 986       }
 987       if (shOp != Xsh_INVALID) {
 988          HReg dst = newVRegI(env);
 989
 990          /* regL = the value to be shifted */
 991          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
 992          addInstr(env, mk_iMOVsd_RR(regL,dst));
 993
 994          /* Do any necessary widening for 16/8 bit operands */
 995          switch (e->Iex.Binop.op) {
 996             case Iop_Shr8:
 997                addInstr(env, X86Instr_Alu32R(
 998                                 Xalu_AND, X86RMI_Imm(0xFF), dst));
 999                break;
1000             case Iop_Shr16:
1001                addInstr(env, X86Instr_Alu32R(
1002                                 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1003                break;
1004             case Iop_Sar8:
1005                addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1006                addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1007                break;
1008             case Iop_Sar16:
1009                addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1010                addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1011                break;
1012             default: break;
1013          }
1014
1015          /* Now consider the shift amount.  If it's a literal, we
1016             can do a much better job than the general case. */
1017          if (e->Iex.Binop.arg2->tag == Iex_Const) {
1018             /* assert that the IR is well-typed */
1019             Int nshift;
1020             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1021             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1022             vassert(nshift >= 0);
1023             if (nshift > 0)
1024                /* Can't allow nshift==0 since that means %cl */
1025                addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1026          } else {
1027             /* General case; we have to force the amount into %cl. */
1028             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1029             addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1030             addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1031          }
1032          return dst;
1033       }
1034
1035       /* Handle misc other ops. */
1036
1037       if (e->Iex.Binop.op == Iop_Max32U) {
1038          HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1039          HReg dst  = newVRegI(env);
1040          HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1041          addInstr(env, mk_iMOVsd_RR(src1,dst));
1042          addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1043          addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1044          return dst;
1045       }
1046
1047       if (e->Iex.Binop.op == Iop_8HLto16) {
1048          HReg hi8  = newVRegI(env);
1049          HReg lo8  = newVRegI(env);
1050          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1051          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1052          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1053          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1054          addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1055          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1056          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1057          return hi8;
1058       }
1059
1060       if (e->Iex.Binop.op == Iop_16HLto32) {
1061          HReg hi16  = newVRegI(env);
1062          HReg lo16  = newVRegI(env);
1063          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1064          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1065          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1066          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1067          addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1068          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1069          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1070          return hi16;
1071       }
1072
1073       if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1074           || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1075          HReg a16   = newVRegI(env);
1076          HReg b16   = newVRegI(env);
1077          HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1078          HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1079          Int  shift = (e->Iex.Binop.op == Iop_MullS8
1080                        || e->Iex.Binop.op == Iop_MullU8)
1081                          ? 24 : 16;
1082          X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1083                               || e->Iex.Binop.op == Iop_MullS16)
1084                                 ? Xsh_SAR : Xsh_SHR;
1085
1086          addInstr(env, mk_iMOVsd_RR(a16s, a16));
1087          addInstr(env, mk_iMOVsd_RR(b16s, b16));
1088          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1089          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1090          addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
1091          addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
1092          addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1093          return b16;
1094       }
1095
1096       if (e->Iex.Binop.op == Iop_CmpF64) {
1097          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1098          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1099          HReg dst = newVRegI(env);
1100          addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1101          /* shift this right 8 bits so as to conform to CmpF64
1102             definition. */
1103          addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1104          return dst;
1105       }
1106
1107       if (e->Iex.Binop.op == Iop_F64toI32S
1108           || e->Iex.Binop.op == Iop_F64toI16S) {
1109          Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1110          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
1111          HReg dst = newVRegI(env);
1112
1113          /* Used several times ... */
1114          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1115
1116          /* rf now holds the value to be converted, and rrm holds the
1117             rounding mode value, encoded as per the IRRoundingMode
1118             enum.  The first thing to do is set the FPU's rounding
1119             mode accordingly. */
1120
1121          /* Create a space for the format conversion. */
1122          /* subl $4, %esp */
1123          sub_from_esp(env, 4);
1124
1125          /* Set host rounding mode */
1126          set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1127
1128          /* gistw/l %rf, 0(%esp) */
1129          addInstr(env, X86Instr_FpLdStI(False/*store*/,
1130                                         toUChar(sz), rf, zero_esp));
1131
1132          if (sz == 2) {
1133             /* movzwl 0(%esp), %dst */
1134             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1135          } else {
1136             /* movl 0(%esp), %dst */
1137             vassert(sz == 4);
1138             addInstr(env, X86Instr_Alu32R(
1139                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1140          }
1141
1142          /* Restore default FPU rounding. */
1143          set_FPU_rounding_default( env );
1144
1145          /* addl $4, %esp */
1146          add_to_esp(env, 4);
1147          return dst;
1148       }
1149
1150       break;
1151    }
1152
1153    /* --------- UNARY OP --------- */
1154    case Iex_Unop: {
1155
1156       /* 1Uto8(32to1(expr32)) */
1157       if (e->Iex.Unop.op == Iop_1Uto8) {
1158          DECLARE_PATTERN(p_32to1_then_1Uto8);
1159          DEFINE_PATTERN(p_32to1_then_1Uto8,
1160                         unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1161          if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1162             const IRExpr* expr32 = mi.bindee[0];
1163             HReg dst = newVRegI(env);
1164             HReg src = iselIntExpr_R(env, expr32);
1165             addInstr(env, mk_iMOVsd_RR(src,dst) );
1166             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1167                                           X86RMI_Imm(1), dst));
1168             return dst;
1169          }
1170       }
1171
1172       /* 8Uto32(LDle(expr32)) */
1173       if (e->Iex.Unop.op == Iop_8Uto32) {
1174          DECLARE_PATTERN(p_LDle8_then_8Uto32);
1175          DEFINE_PATTERN(p_LDle8_then_8Uto32,
1176                         unop(Iop_8Uto32,
1177                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1178          if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1179             HReg dst = newVRegI(env);
1180             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1181             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1182             return dst;
1183          }
1184       }
1185
1186       /* 8Sto32(LDle(expr32)) */
1187       if (e->Iex.Unop.op == Iop_8Sto32) {
1188          DECLARE_PATTERN(p_LDle8_then_8Sto32);
1189          DEFINE_PATTERN(p_LDle8_then_8Sto32,
1190                         unop(Iop_8Sto32,
1191                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1192          if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1193             HReg dst = newVRegI(env);
1194             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1195             addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1196             return dst;
1197          }
1198       }
1199
1200       /* 16Uto32(LDle(expr32)) */
1201       if (e->Iex.Unop.op == Iop_16Uto32) {
1202          DECLARE_PATTERN(p_LDle16_then_16Uto32);
1203          DEFINE_PATTERN(p_LDle16_then_16Uto32,
1204                         unop(Iop_16Uto32,
1205                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1206          if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1207             HReg dst = newVRegI(env);
1208             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1209             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1210             return dst;
1211          }
1212       }
1213
1214       /* 8Uto32(GET:I8) */
1215       if (e->Iex.Unop.op == Iop_8Uto32) {
1216          if (e->Iex.Unop.arg->tag == Iex_Get) {
1217             HReg      dst;
1218             X86AMode* amode;
1219             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1220             dst = newVRegI(env);
1221             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1222                                 hregX86_EBP());
1223             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1224             return dst;
1225          }
1226       }
1227
1228       /* 16to32(GET:I16) */
1229       if (e->Iex.Unop.op == Iop_16Uto32) {
1230          if (e->Iex.Unop.arg->tag == Iex_Get) {
1231             HReg      dst;
1232             X86AMode* amode;
1233             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1234             dst = newVRegI(env);
1235             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1236                                 hregX86_EBP());
1237             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1238             return dst;
1239          }
1240       }
1241
1242       switch (e->Iex.Unop.op) {
1243          case Iop_8Uto16:
1244          case Iop_8Uto32:
1245          case Iop_16Uto32: {
1246             HReg dst = newVRegI(env);
1247             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1248             UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1249             addInstr(env, mk_iMOVsd_RR(src,dst) );
1250             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1251                                           X86RMI_Imm(mask), dst));
1252             return dst;
1253          }
1254          case Iop_8Sto16:
1255          case Iop_8Sto32:
1256          case Iop_16Sto32: {
1257             HReg dst = newVRegI(env);
1258             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1259             UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1260             addInstr(env, mk_iMOVsd_RR(src,dst) );
1261             addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1262             addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1263             return dst;
1264          }
1265          case Iop_Not8:
1266          case Iop_Not16:
1267          case Iop_Not32: {
1268             HReg dst = newVRegI(env);
1269             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1270             addInstr(env, mk_iMOVsd_RR(src,dst) );
1271             addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1272             return dst;
1273          }
1274          case Iop_64HIto32: {
1275             HReg rHi, rLo;
1276             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1277             return rHi; /* and abandon rLo .. poor wee thing :-) */
1278          }
1279          case Iop_64to32: {
1280             HReg rHi, rLo;
1281             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1282             return rLo; /* similar stupid comment to the above ... */
1283          }
1284          case Iop_16HIto8:
1285          case Iop_32HIto16: {
1286             HReg dst  = newVRegI(env);
1287             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1288             Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1289             addInstr(env, mk_iMOVsd_RR(src,dst) );
1290             addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1291             return dst;
1292          }
1293          case Iop_1Uto32:
1294          case Iop_1Uto8: {
1295             HReg dst         = newVRegI(env);
1296             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1297             addInstr(env, X86Instr_Set32(cond,dst));
1298             return dst;
1299          }
1300          case Iop_1Sto8:
1301          case Iop_1Sto16:
1302          case Iop_1Sto32: {
1303             /* could do better than this, but for now ... */
1304             HReg dst         = newVRegI(env);
1305             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1306             addInstr(env, X86Instr_Set32(cond,dst));
1307             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1308             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1309             return dst;
1310          }
1311          case Iop_Ctz32: {
1312             /* Count trailing zeroes, implemented by x86 'bsfl' */
1313             HReg dst = newVRegI(env);
1314             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1315             addInstr(env, X86Instr_Bsfr32(True,src,dst));
1316             return dst;
1317          }
1318          case Iop_Clz32: {
1319             /* Count leading zeroes.  Do 'bsrl' to establish the index
1320                of the highest set bit, and subtract that value from
1321                31. */
1322             HReg tmp = newVRegI(env);
1323             HReg dst = newVRegI(env);
1324             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1325             addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1326             addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1327                                           X86RMI_Imm(31), dst));
1328             addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1329                                           X86RMI_Reg(tmp), dst));
1330             return dst;
1331          }
1332
1333          case Iop_CmpwNEZ32: {
1334             HReg dst = newVRegI(env);
1335             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1336             addInstr(env, mk_iMOVsd_RR(src,dst));
1337             addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1338             addInstr(env, X86Instr_Alu32R(Xalu_OR,
1339                                           X86RMI_Reg(src), dst));
1340             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1341             return dst;
1342          }
1343          case Iop_Left8:
1344          case Iop_Left16:
1345          case Iop_Left32: {
1346             HReg dst = newVRegI(env);
1347             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1348             addInstr(env, mk_iMOVsd_RR(src, dst));
1349             addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1350             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1351             return dst;
1352          }
1353
1354          case Iop_V128to32: {
1355             HReg      dst  = newVRegI(env);
1356             HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1357             X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1358             sub_from_esp(env, 16);
1359             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1360             addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1361             add_to_esp(env, 16);
1362             return dst;
1363          }
1364
1365          /* ReinterpF32asI32(e) */
1366          /* Given an IEEE754 single, produce an I32 with the same bit
1367             pattern.  Keep stack 8-aligned even though only using 4
1368             bytes. */
1369          case Iop_ReinterpF32asI32: {
1370             HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
1371             HReg dst  = newVRegI(env);
1372             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1373             /* paranoia */
1374             set_FPU_rounding_default(env);
1375             /* subl $8, %esp */
1376             sub_from_esp(env, 8);
1377             /* gstF %rf, 0(%esp) */
1378             addInstr(env,
1379                      X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1380             /* movl 0(%esp), %dst */
1381             addInstr(env,
1382                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1383             /* addl $8, %esp */
1384             add_to_esp(env, 8);
1385             return dst;
1386          }
1387
1388          case Iop_16to8:
1389          case Iop_32to8:
1390          case Iop_32to16:
1391             /* These are no-ops. */
1392             return iselIntExpr_R(env, e->Iex.Unop.arg);
1393
1394          case Iop_GetMSBs8x8: {
1395             /* Note: the following assumes the helper is of
1396                signature
1397                   UInt fn ( ULong ), and is not a regparm fn.
1398             */
1399             HReg  xLo, xHi;
1400             HReg  dst = newVRegI(env);
1401             Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1402             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1403             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1404             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1405             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1406                                          0, mk_RetLoc_simple(RLPri_Int) ));
1407             add_to_esp(env, 2*4);
1408             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1409             return dst;
1410          }
1411
1412          default:
1413             break;
1414       }
1415       break;
1416    }
1417
1418    /* --------- GET --------- */
1419    case Iex_Get: {
1420       if (ty == Ity_I32) {
1421          HReg dst = newVRegI(env);
1422          addInstr(env, X86Instr_Alu32R(
1423                           Xalu_MOV,
1424                           X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1425                                                  hregX86_EBP())),
1426                           dst));
1427          return dst;
1428       }
1429       if (ty == Ity_I8 || ty == Ity_I16) {
1430          HReg dst = newVRegI(env);
1431          addInstr(env, X86Instr_LoadEX(
1432                           toUChar(ty==Ity_I8 ? 1 : 2),
1433                           False,
1434                           X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1435                           dst));
1436          return dst;
1437       }
1438       break;
1439    }
1440
1441    case Iex_GetI: {
1442       X86AMode* am
1443          = genGuestArrayOffset(
1444               env, e->Iex.GetI.descr,
1445                    e->Iex.GetI.ix, e->Iex.GetI.bias );
1446       HReg dst = newVRegI(env);
1447       if (ty == Ity_I8) {
1448          addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1449          return dst;
1450       }
1451       if (ty == Ity_I32) {
1452          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1453          return dst;
1454       }
1455       break;
1456    }
1457
1458    /* --------- CCALL --------- */
1459    case Iex_CCall: {
1460       HReg    dst = newVRegI(env);
1461       vassert(ty == e->Iex.CCall.retty);
1462
1463       /* be very restrictive for now.  Only 32/64-bit ints allowed for
1464          args, and 32 bits for return type.  Don't forget to change
1465          the RetLoc if more return types are allowed in future. */
1466       if (e->Iex.CCall.retty != Ity_I32)
1467          goto irreducible;
1468
1469       /* Marshal args, do the call, clear stack. */
1470       UInt   addToSp = 0;
1471       RetLoc rloc    = mk_RetLoc_INVALID();
1472       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1473                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1474       vassert(is_sane_RetLoc(rloc));
1475       vassert(rloc.pri == RLPri_Int);
1476       vassert(addToSp == 0);
1477
1478       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1479       return dst;
1480    }
1481
1482    /* --------- LITERAL --------- */
1483    /* 32/16/8-bit literals */
1484    case Iex_Const: {
1485       X86RMI* rmi = iselIntExpr_RMI ( env, e );
1486       HReg    r   = newVRegI(env);
1487       addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1488       return r;
1489    }
1490
1491    /* --------- MULTIPLEX --------- */
1492    case Iex_ITE: { // VFD
1493      if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1494          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1495         HReg   r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1496         X86RM* r0  = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1497         HReg   dst = newVRegI(env);
1498         addInstr(env, mk_iMOVsd_RR(r1,dst));
1499         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1500         addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1501         return dst;
1502       }
1503       break;
1504    }
1505
1506    default:
1507    break;
1508    } /* switch (e->tag) */
1509
1510    /* We get here if no pattern matched. */
1511   irreducible:
1512    ppIRExpr(e);
1513    vpanic("iselIntExpr_R: cannot reduce tree");
1514 }
1515
1516
1517 /*---------------------------------------------------------*/
1518 /*--- ISEL: Integer expression auxiliaries              ---*/
1519 /*---------------------------------------------------------*/
1520
1521 /* --------------------- AMODEs --------------------- */
1522
1523 /* Return an AMode which computes the value of the specified
1524    expression, possibly also adding insns to the code list as a
1525    result.  The expression may only be a 32-bit one.
1526 */
1527
1528 static Bool sane_AMode ( X86AMode* am )
1529 {
1530    switch (am->tag) {
1531       case Xam_IR:
1532          return
1533             toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1534                     && (hregIsVirtual(am->Xam.IR.reg)
1535                         || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1536       case Xam_IRRS:
1537          return
1538             toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1539                     && hregIsVirtual(am->Xam.IRRS.base)
1540                     && hregClass(am->Xam.IRRS.index) == HRcInt32
1541                     && hregIsVirtual(am->Xam.IRRS.index) );
1542       default:
1543         vpanic("sane_AMode: unknown x86 amode tag");
1544    }
1545 }
1546
1547 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
1548 {
1549    X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1550    vassert(sane_AMode(am));
1551    return am;
1552 }
1553
1554 /* DO NOT CALL THIS DIRECTLY ! */
1555 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
1556 {
1557    IRType ty = typeOfIRExpr(env->type_env,e);
1558    vassert(ty == Ity_I32);
1559
1560    /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1561    if (e->tag == Iex_Binop
1562        && e->Iex.Binop.op == Iop_Add32
1563        && e->Iex.Binop.arg2->tag == Iex_Const
1564        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1565        && e->Iex.Binop.arg1->tag == Iex_Binop
1566        && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1567        && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1568        && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1569        && e->Iex.Binop.arg1
1570            ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1571        && e->Iex.Binop.arg1
1572            ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1573       UInt shift = e->Iex.Binop.arg1
1574                     ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1575       UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1576       if (shift == 1 || shift == 2 || shift == 3) {
1577          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1578          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1579                                        ->Iex.Binop.arg2->Iex.Binop.arg1 );
1580          return X86AMode_IRRS(imm32, r1, r2, shift);
1581       }
1582    }
1583
1584    /* Add32(expr1, Shl32(expr2, imm)) */
1585    if (e->tag == Iex_Binop
1586        && e->Iex.Binop.op == Iop_Add32
1587        && e->Iex.Binop.arg2->tag == Iex_Binop
1588        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1589        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1590        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1591       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1592       if (shift == 1 || shift == 2 || shift == 3) {
1593          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1594          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1595          return X86AMode_IRRS(0, r1, r2, shift);
1596       }
1597    }
1598
1599    /* Add32(expr,i) */
1600    if (e->tag == Iex_Binop
1601        && e->Iex.Binop.op == Iop_Add32
1602        && e->Iex.Binop.arg2->tag == Iex_Const
1603        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1604       HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
1605       return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1606    }
1607
1608    /* Doesn't match anything in particular.  Generate it into
1609       a register and use that. */
1610    {
1611       HReg r1 = iselIntExpr_R(env, e);
1612       return X86AMode_IR(0, r1);
1613    }
1614 }
1615
1616
1617 /* --------------------- RMIs --------------------- */
1618
1619 /* Similarly, calculate an expression into an X86RMI operand.  As with
1620    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1621
1622 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
1623 {
1624    X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1625    /* sanity checks ... */
1626    switch (rmi->tag) {
1627       case Xrmi_Imm:
1628          return rmi;
1629       case Xrmi_Reg:
1630          vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1631          vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1632          return rmi;
1633       case Xrmi_Mem:
1634          vassert(sane_AMode(rmi->Xrmi.Mem.am));
1635          return rmi;
1636       default:
1637          vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1638    }
1639 }
1640
1641 /* DO NOT CALL THIS DIRECTLY ! */
1642 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
1643 {
1644    IRType ty = typeOfIRExpr(env->type_env,e);
1645    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1646
1647    /* special case: immediate */
1648    if (e->tag == Iex_Const) {
1649       UInt u;
1650       switch (e->Iex.Const.con->tag) {
1651          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1652          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1653          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1654          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1655       }
1656       return X86RMI_Imm(u);
1657    }
1658
1659    /* special case: 32-bit GET */
1660    if (e->tag == Iex_Get && ty == Ity_I32) {
1661       return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1662                                     hregX86_EBP()));
1663    }
1664
1665    /* special case: 32-bit load from memory */
1666    if (e->tag == Iex_Load && ty == Ity_I32
1667        && e->Iex.Load.end == Iend_LE) {
1668       X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1669       return X86RMI_Mem(am);
1670    }
1671
1672    /* default case: calculate into a register and return that */
1673    {
1674       HReg r = iselIntExpr_R ( env, e );
1675       return X86RMI_Reg(r);
1676    }
1677 }
1678
1679
1680 /* --------------------- RIs --------------------- */
1681
1682 /* Calculate an expression into an X86RI operand.  As with
1683    iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1684
1685 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
1686 {
1687    X86RI* ri = iselIntExpr_RI_wrk(env, e);
1688    /* sanity checks ... */
1689    switch (ri->tag) {
1690       case Xri_Imm:
1691          return ri;
1692       case Xri_Reg:
1693          vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1694          vassert(hregIsVirtual(ri->Xri.Reg.reg));
1695          return ri;
1696       default:
1697          vpanic("iselIntExpr_RI: unknown x86 RI tag");
1698    }
1699 }
1700
1701 /* DO NOT CALL THIS DIRECTLY ! */
1702 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
1703 {
1704    IRType ty = typeOfIRExpr(env->type_env,e);
1705    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1706
1707    /* special case: immediate */
1708    if (e->tag == Iex_Const) {
1709       UInt u;
1710       switch (e->Iex.Const.con->tag) {
1711          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1712          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1713          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1714          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1715       }
1716       return X86RI_Imm(u);
1717    }
1718
1719    /* default case: calculate into a register and return that */
1720    {
1721       HReg r = iselIntExpr_R ( env, e );
1722       return X86RI_Reg(r);
1723    }
1724 }
1725
1726
1727 /* --------------------- RMs --------------------- */
1728
1729 /* Similarly, calculate an expression into an X86RM operand.  As with
1730    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1731
1732 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
1733 {
1734    X86RM* rm = iselIntExpr_RM_wrk(env, e);
1735    /* sanity checks ... */
1736    switch (rm->tag) {
1737       case Xrm_Reg:
1738          vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1739          vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1740          return rm;
1741       case Xrm_Mem:
1742          vassert(sane_AMode(rm->Xrm.Mem.am));
1743          return rm;
1744       default:
1745          vpanic("iselIntExpr_RM: unknown x86 RM tag");
1746    }
1747 }
1748
1749 /* DO NOT CALL THIS DIRECTLY ! */
1750 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
1751 {
1752    IRType ty = typeOfIRExpr(env->type_env,e);
1753    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1754
1755    /* special case: 32-bit GET */
1756    if (e->tag == Iex_Get && ty == Ity_I32) {
1757       return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1758                                    hregX86_EBP()));
1759    }
1760
1761    /* special case: load from memory */
1762
1763    /* default case: calculate into a register and return that */
1764    {
1765       HReg r = iselIntExpr_R ( env, e );
1766       return X86RM_Reg(r);
1767    }
1768 }
1769
1770
1771 /* --------------------- CONDCODE --------------------- */
1772
1773 /* Generate code to evaluated a bit-typed expression, returning the
1774    condition code which would correspond when the expression would
1775    notionally have returned 1. */
1776
1777 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
1778 {
1779    /* Uh, there's nothing we can sanity check here, unfortunately. */
1780    return iselCondCode_wrk(env,e);
1781 }
1782
1783 /* DO NOT CALL THIS DIRECTLY ! */
1784 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
1785 {
1786    MatchInfo mi;
1787
1788    vassert(e);
1789    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1790
1791    /* var */
1792    if (e->tag == Iex_RdTmp) {
1793       HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1794       /* Test32 doesn't modify r32; so this is OK. */
1795       addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1796       return Xcc_NZ;
1797    }
1798
1799    /* Constant 1:Bit */
1800    if (e->tag == Iex_Const) {
1801       HReg r;
1802       vassert(e->Iex.Const.con->tag == Ico_U1);
1803       vassert(e->Iex.Const.con->Ico.U1 == True
1804               || e->Iex.Const.con->Ico.U1 == False);
1805       r = newVRegI(env);
1806       addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1807       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1808       return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1809    }
1810
1811    /* Not1(e) */
1812    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1813       /* Generate code for the arg, and negate the test condition */
1814       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1815    }
1816
1817    /* --- patterns rooted at: 32to1 --- */
1818
1819    if (e->tag == Iex_Unop
1820        && e->Iex.Unop.op == Iop_32to1) {
1821       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1822       addInstr(env, X86Instr_Test32(1,rm));
1823       return Xcc_NZ;
1824    }
1825
1826    /* --- patterns rooted at: CmpNEZ8 --- */
1827
1828    /* CmpNEZ8(x) */
1829    if (e->tag == Iex_Unop
1830        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1831       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1832       addInstr(env, X86Instr_Test32(0xFF,rm));
1833       return Xcc_NZ;
1834    }
1835
1836    /* --- patterns rooted at: CmpNEZ16 --- */
1837
1838    /* CmpNEZ16(x) */
1839    if (e->tag == Iex_Unop
1840        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1841       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1842       addInstr(env, X86Instr_Test32(0xFFFF,rm));
1843       return Xcc_NZ;
1844    }
1845
1846    /* --- patterns rooted at: CmpNEZ32 --- */
1847
1848    /* CmpNEZ32(And32(x,y)) */
1849    {
1850       DECLARE_PATTERN(p_CmpNEZ32_And32);
1851       DEFINE_PATTERN(p_CmpNEZ32_And32,
1852                      unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1853       if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1854          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1855          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1856          HReg    tmp  = newVRegI(env);
1857          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1858          addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1859          return Xcc_NZ;
1860       }
1861    }
1862
1863    /* CmpNEZ32(Or32(x,y)) */
1864    {
1865       DECLARE_PATTERN(p_CmpNEZ32_Or32);
1866       DEFINE_PATTERN(p_CmpNEZ32_Or32,
1867                      unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1868       if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1869          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1870          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1871          HReg    tmp  = newVRegI(env);
1872          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1873          addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1874          return Xcc_NZ;
1875       }
1876    }
1877
1878    /* CmpNEZ32(GET(..):I32) */
1879    if (e->tag == Iex_Unop
1880        && e->Iex.Unop.op == Iop_CmpNEZ32
1881        && e->Iex.Unop.arg->tag == Iex_Get) {
1882       X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1883                                  hregX86_EBP());
1884       addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1885       return Xcc_NZ;
1886    }
1887
1888    /* CmpNEZ32(x) */
1889    if (e->tag == Iex_Unop
1890        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1891       HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1892       X86RMI* rmi2 = X86RMI_Imm(0);
1893       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1894       return Xcc_NZ;
1895    }
1896
1897    /* --- patterns rooted at: CmpNEZ64 --- */
1898
1899    /* CmpNEZ64(Or64(x,y)) */
1900    {
1901       DECLARE_PATTERN(p_CmpNEZ64_Or64);
1902       DEFINE_PATTERN(p_CmpNEZ64_Or64,
1903                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1904       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1905          HReg    hi1, lo1, hi2, lo2;
1906          HReg    tmp  = newVRegI(env);
1907          iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1908          addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1909          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1910          iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1911          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1912          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1913          return Xcc_NZ;
1914       }
1915    }
1916
1917    /* CmpNEZ64(x) */
1918    if (e->tag == Iex_Unop
1919        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1920       HReg hi, lo;
1921       HReg tmp = newVRegI(env);
1922       iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1923       addInstr(env, mk_iMOVsd_RR(hi, tmp));
1924       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1925       return Xcc_NZ;
1926    }
1927
1928    /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1929
1930    /* CmpEQ8 / CmpNE8 */
1931    if (e->tag == Iex_Binop
1932        && (e->Iex.Binop.op == Iop_CmpEQ8
1933            || e->Iex.Binop.op == Iop_CmpNE8
1934            || e->Iex.Binop.op == Iop_CasCmpEQ8
1935            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1936       if (isZeroU8(e->Iex.Binop.arg2)) {
1937          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1938          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1939          switch (e->Iex.Binop.op) {
1940             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1941             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1942             default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1943          }
1944       } else {
1945          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1946          X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1947          HReg    r    = newVRegI(env);
1948          addInstr(env, mk_iMOVsd_RR(r1,r));
1949          addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1950          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1951          switch (e->Iex.Binop.op) {
1952             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1953             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1954             default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1955          }
1956       }
1957    }
1958
1959    /* CmpEQ16 / CmpNE16 */
1960    if (e->tag == Iex_Binop
1961        && (e->Iex.Binop.op == Iop_CmpEQ16
1962            || e->Iex.Binop.op == Iop_CmpNE16
1963            || e->Iex.Binop.op == Iop_CasCmpEQ16
1964            || e->Iex.Binop.op == Iop_CasCmpNE16
1965            || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1966       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1967       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1968       HReg    r    = newVRegI(env);
1969       addInstr(env, mk_iMOVsd_RR(r1,r));
1970       addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1971       addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1972       switch (e->Iex.Binop.op) {
1973          case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1974             return Xcc_Z;
1975          case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1976             return Xcc_NZ;
1977          default:
1978             vpanic("iselCondCode(x86): CmpXX16");
1979       }
1980    }
1981
1982    /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1983       Saves a "movl %eax, %tmp" compared to the default route. */
1984    if (e->tag == Iex_Binop
1985        && e->Iex.Binop.op == Iop_CmpNE32
1986        && e->Iex.Binop.arg1->tag == Iex_CCall
1987        && e->Iex.Binop.arg2->tag == Iex_Const) {
1988       IRExpr* cal = e->Iex.Binop.arg1;
1989       IRExpr* con = e->Iex.Binop.arg2;
1990       /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1991       vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1992       vassert(con->Iex.Const.con->tag == Ico_U32);
1993       /* Marshal args, do the call. */
1994       UInt   addToSp = 0;
1995       RetLoc rloc    = mk_RetLoc_INVALID();
1996       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1997                     cal->Iex.CCall.cee,
1998                     cal->Iex.CCall.retty, cal->Iex.CCall.args );
1999       vassert(is_sane_RetLoc(rloc));
2000       vassert(rloc.pri == RLPri_Int);
2001       vassert(addToSp == 0);
2002       /* */
2003       addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2004                                     X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2005                                     hregX86_EAX()));
2006       return Xcc_NZ;
2007    }
2008
2009    /* Cmp*32*(x,y) */
2010    if (e->tag == Iex_Binop
2011        && (e->Iex.Binop.op == Iop_CmpEQ32
2012            || e->Iex.Binop.op == Iop_CmpNE32
2013            || e->Iex.Binop.op == Iop_CmpLT32S
2014            || e->Iex.Binop.op == Iop_CmpLT32U
2015            || e->Iex.Binop.op == Iop_CmpLE32S
2016            || e->Iex.Binop.op == Iop_CmpLE32U
2017            || e->Iex.Binop.op == Iop_CasCmpEQ32
2018            || e->Iex.Binop.op == Iop_CasCmpNE32
2019            || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2020       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2021       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2022       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2023       switch (e->Iex.Binop.op) {
2024          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2025          case Iop_CmpNE32:
2026          case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2027          case Iop_CmpLT32S: return Xcc_L;
2028          case Iop_CmpLT32U: return Xcc_B;
2029          case Iop_CmpLE32S: return Xcc_LE;
2030          case Iop_CmpLE32U: return Xcc_BE;
2031          default: vpanic("iselCondCode(x86): CmpXX32");
2032       }
2033    }
2034
2035    /* CmpNE64 */
2036    if (e->tag == Iex_Binop
2037        && (e->Iex.Binop.op == Iop_CmpNE64
2038            || e->Iex.Binop.op == Iop_CmpEQ64)) {
2039       HReg hi1, hi2, lo1, lo2;
2040       HReg tHi = newVRegI(env);
2041       HReg tLo = newVRegI(env);
2042       iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2043       iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2044       addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2045       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2046       addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2047       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2048       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2049       switch (e->Iex.Binop.op) {
2050          case Iop_CmpNE64: return Xcc_NZ;
2051          case Iop_CmpEQ64: return Xcc_Z;
2052          default: vpanic("iselCondCode(x86): CmpXX64");
2053       }
2054    }
2055
2056    /* And1(x,y), Or1(x,y) */
2057    /* FIXME: We could (and probably should) do a lot better here.  If both args
2058       are in temps already then we can just emit a reg-reg And/Or directly,
2059       followed by the final Test. */
2060    if (e->tag == Iex_Binop
2061        && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
2062       // We could probably be cleverer about this.  In the meantime ..
2063       HReg x_as_32 = newVRegI(env);
2064       X86CondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
2065       addInstr(env, X86Instr_Set32(cc_x, x_as_32));
2066       HReg y_as_32 = newVRegI(env);
2067       X86CondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
2068       addInstr(env, X86Instr_Set32(cc_y, y_as_32));
2069       X86AluOp aop = e->Iex.Binop.op == Iop_And1 ? Xalu_AND : Xalu_OR;
2070       addInstr(env, X86Instr_Alu32R(aop, X86RMI_Reg(x_as_32), y_as_32));
2071       addInstr(env, X86Instr_Test32(1, X86RM_Reg(y_as_32)));
2072       return Xcc_NZ;
2073    }
2074
2075    ppIRExpr(e);
2076    vpanic("iselCondCode");
2077 }
2078
2079
2080 /*---------------------------------------------------------*/
2081 /*--- ISEL: Integer expressions (64 bit)                ---*/
2082 /*---------------------------------------------------------*/
2083
2084 /* Compute a 64-bit value into a register pair, which is returned as
2085    the first two parameters.  As with iselIntExpr_R, these may be
2086    either real or virtual regs; in any case they must not be changed
2087    by subsequent code emitted by the caller.  */
2088
2089 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2090                             const IRExpr* e )
2091 {
2092    iselInt64Expr_wrk(rHi, rLo, env, e);
2093 #  if 0
2094    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2095 #  endif
2096    vassert(hregClass(*rHi) == HRcInt32);
2097    vassert(hregIsVirtual(*rHi));
2098    vassert(hregClass(*rLo) == HRcInt32);
2099    vassert(hregIsVirtual(*rLo));
2100 }
2101
2102 /* DO NOT CALL THIS DIRECTLY ! */
2103 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2104                                 const IRExpr* e )
2105 {
2106    MatchInfo mi;
2107    HWord fn = 0; /* helper fn for most SIMD64 stuff */
2108    vassert(e);
2109    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2110
2111    /* 64-bit literal */
2112    if (e->tag == Iex_Const) {
2113       ULong w64 = e->Iex.Const.con->Ico.U64;
2114       UInt  wHi = toUInt(w64 >> 32);
2115       UInt  wLo = toUInt(w64);
2116       HReg  tLo = newVRegI(env);
2117       HReg  tHi = newVRegI(env);
2118       vassert(e->Iex.Const.con->tag == Ico_U64);
2119       if (wLo == wHi) {
2120          /* Save a precious Int register in this special case. */
2121          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2122          *rHi = tLo;
2123          *rLo = tLo;
2124       } else {
2125          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2126          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2127          *rHi = tHi;
2128          *rLo = tLo;
2129       }
2130       return;
2131    }
2132
2133    /* read 64-bit IRTemp */
2134    if (e->tag == Iex_RdTmp) {
2135       lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2136       return;
2137    }
2138
2139    /* 64-bit load */
2140    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2141       HReg     tLo, tHi;
2142       X86AMode *am0, *am4;
2143       vassert(e->Iex.Load.ty == Ity_I64);
2144       tLo = newVRegI(env);
2145       tHi = newVRegI(env);
2146       am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2147       am4 = advance4(am0);
2148       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2149       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2150       *rHi = tHi;
2151       *rLo = tLo;
2152       return;
2153    }
2154
2155    /* 64-bit GET */
2156    if (e->tag == Iex_Get) {
2157       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2158       X86AMode* am4 = advance4(am);
2159       HReg tLo = newVRegI(env);
2160       HReg tHi = newVRegI(env);
2161       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2162       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2163       *rHi = tHi;
2164       *rLo = tLo;
2165       return;
2166    }
2167
2168    /* 64-bit GETI */
2169    if (e->tag == Iex_GetI) {
2170       X86AMode* am
2171          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2172                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
2173       X86AMode* am4 = advance4(am);
2174       HReg tLo = newVRegI(env);
2175       HReg tHi = newVRegI(env);
2176       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2177       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2178       *rHi = tHi;
2179       *rLo = tLo;
2180       return;
2181    }
2182
2183    /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2184    if (e->tag == Iex_ITE) {
2185       HReg e0Lo, e0Hi, e1Lo, e1Hi;
2186       HReg tLo = newVRegI(env);
2187       HReg tHi = newVRegI(env);
2188       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2189       iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2190       addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2191       addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2192       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2193       /* This assumes the first cmov32 doesn't trash the condition
2194          codes, so they are still available for the second cmov32 */
2195       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2196       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2197       *rHi = tHi;
2198       *rLo = tLo;
2199       return;
2200    }
2201
2202    /* --------- BINARY ops --------- */
2203    if (e->tag == Iex_Binop) {
2204       switch (e->Iex.Binop.op) {
2205          /* 32 x 32 -> 64 multiply */
2206          case Iop_MullU32:
2207          case Iop_MullS32: {
2208             /* get one operand into %eax, and the other into a R/M.
2209                Need to make an educated guess about which is better in
2210                which. */
2211             HReg   tLo    = newVRegI(env);
2212             HReg   tHi    = newVRegI(env);
2213             Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
2214             X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2215             HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2216             addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2217             addInstr(env, X86Instr_MulL(syned, rmLeft));
2218             /* Result is now in EDX:EAX.  Tell the caller. */
2219             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2220             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2221             *rHi = tHi;
2222             *rLo = tLo;
2223             return;
2224          }
2225
2226          /* 64 x 32 -> (32(rem),32(div)) division */
2227          case Iop_DivModU64to32:
2228          case Iop_DivModS64to32: {
2229             /* Get the 64-bit operand into edx:eax, and the other into
2230                any old R/M. */
2231             HReg sHi, sLo;
2232             HReg   tLo     = newVRegI(env);
2233             HReg   tHi     = newVRegI(env);
2234             Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2235             X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2236             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2237             addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2238             addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2239             addInstr(env, X86Instr_Div(syned, rmRight));
2240             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2241             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2242             *rHi = tHi;
2243             *rLo = tLo;
2244             return;
2245          }
2246
2247          /* Or64/And64/Xor64 */
2248          case Iop_Or64:
2249          case Iop_And64:
2250          case Iop_Xor64: {
2251             HReg xLo, xHi, yLo, yHi;
2252             HReg tLo = newVRegI(env);
2253             HReg tHi = newVRegI(env);
2254             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2255                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2256                           : Xalu_XOR;
2257             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2258             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2259             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2260             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2261             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2262             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2263             *rHi = tHi;
2264             *rLo = tLo;
2265             return;
2266          }
2267
2268          /* Add64/Sub64 */
2269          case Iop_Add64:
2270             if (e->Iex.Binop.arg2->tag == Iex_Const) {
2271                /* special case Add64(e, const) */
2272                ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2273                UInt  wHi = toUInt(w64 >> 32);
2274                UInt  wLo = toUInt(w64);
2275                HReg  tLo = newVRegI(env);
2276                HReg  tHi = newVRegI(env);
2277                HReg  xLo, xHi;
2278                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2279                iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2280                addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2281                addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2282                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2283                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2284                *rHi = tHi;
2285                *rLo = tLo;
2286                return;
2287             }
2288             /* else fall through to the generic case */
2289          case Iop_Sub64: {
2290             HReg xLo, xHi, yLo, yHi;
2291             HReg tLo = newVRegI(env);
2292             HReg tHi = newVRegI(env);
2293             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2294             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2295             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2296             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2297             if (e->Iex.Binop.op==Iop_Add64) {
2298                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2299                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2300             } else {
2301                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2302                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2303             }
2304             *rHi = tHi;
2305             *rLo = tLo;
2306             return;
2307          }
2308
2309          /* 32HLto64(e1,e2) */
2310          case Iop_32HLto64:
2311             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2312             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2313             return;
2314
2315          /* 64-bit shifts */
2316          case Iop_Shl64: {
2317             /* We use the same ingenious scheme as gcc.  Put the value
2318                to be shifted into %hi:%lo, and the shift amount into
2319                %cl.  Then (dsts on right, a la ATT syntax):
2320
2321                shldl %cl, %lo, %hi   -- make %hi be right for the
2322                                      -- shift amt %cl % 32
2323                shll  %cl, %lo        -- make %lo be right for the
2324                                      -- shift amt %cl % 32
2325
2326                Now, if (shift amount % 64) is in the range 32 .. 63,
2327                we have to do a fixup, which puts the result low half
2328                into the result high half, and zeroes the low half:
2329
2330                testl $32, %ecx
2331
2332                cmovnz %lo, %hi
2333                movl $0, %tmp         -- sigh; need yet another reg
2334                cmovnz %tmp, %lo
2335             */
2336             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2337             tLo = newVRegI(env);
2338             tHi = newVRegI(env);
2339             tTemp = newVRegI(env);
2340             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2341             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2342             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2343             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2344             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2345             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2346                and those regs are legitimately modifiable. */
2347             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2348             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2349             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2350             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2351             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2352             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2353             *rHi = tHi;
2354             *rLo = tLo;
2355             return;
2356          }
2357
2358          case Iop_Shr64: {
2359             /* We use the same ingenious scheme as gcc.  Put the value
2360                to be shifted into %hi:%lo, and the shift amount into
2361                %cl.  Then:
2362
2363                shrdl %cl, %hi, %lo   -- make %lo be right for the
2364                                      -- shift amt %cl % 32
2365                shrl  %cl, %hi        -- make %hi be right for the
2366                                      -- shift amt %cl % 32
2367
2368                Now, if (shift amount % 64) is in the range 32 .. 63,
2369                we have to do a fixup, which puts the result high half
2370                into the result low half, and zeroes the high half:
2371
2372                testl $32, %ecx
2373
2374                cmovnz %hi, %lo
2375                movl $0, %tmp         -- sigh; need yet another reg
2376                cmovnz %tmp, %hi
2377             */
2378             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2379             tLo = newVRegI(env);
2380             tHi = newVRegI(env);
2381             tTemp = newVRegI(env);
2382             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2383             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2384             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2385             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2386             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2387             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2388                and those regs are legitimately modifiable. */
2389             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2390             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2391             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2392             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2393             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2394             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2395             *rHi = tHi;
2396             *rLo = tLo;
2397             return;
2398          }
2399
2400          case Iop_Sar64: {
2401             /* gcc -O2 does the following.  I don't know how it works, but it
2402                does work.  Don't mess with it.  This is hard to test because the
2403                x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2404                so it's impossible to write a test program that feeds values
2405                through Iop_Sar64 and prints their results.  The implementation
2406                here was tested by using psrlq on mmx registers -- that generates
2407                Iop_Shr64 -- and temporarily hacking the front end to generate
2408                Iop_Sar64 for that instruction instead.
2409
2410                movl  %amount, %ecx
2411                movl  %srcHi,  %r1
2412                movl  %srcLo,  %r2
2413
2414                movl   %r1, %r3
2415                sarl   %cl, %r3
2416                movl   %r2, %r4
2417                shrdl  %cl, %r1, %r4
2418                movl   %r3, %r2
2419                sarl   $31, %r2
2420                andl   $32, %ecx
2421                cmovne %r3, %r4   // = resLo
2422                cmovne %r2, %r3   // = resHi
2423             */
2424             HReg amount = iselIntExpr_R(env, e->Iex.Binop.arg2);
2425             HReg srcHi = INVALID_HREG, srcLo = INVALID_HREG;
2426             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg1);
2427             HReg r1 = newVRegI(env);
2428             HReg r2 = newVRegI(env);
2429             HReg r3 = newVRegI(env);
2430             HReg r4 = newVRegI(env);
2431             addInstr(env, mk_iMOVsd_RR(amount, hregX86_ECX()));
2432             addInstr(env, mk_iMOVsd_RR(srcHi, r1));
2433             addInstr(env, mk_iMOVsd_RR(srcLo, r2));
2434
2435             addInstr(env, mk_iMOVsd_RR(r1, r3));
2436             addInstr(env, X86Instr_Sh32(Xsh_SAR, 0/*%cl*/, r3));
2437             addInstr(env, mk_iMOVsd_RR(r2, r4));
2438             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, r1, r4));
2439             addInstr(env, mk_iMOVsd_RR(r3, r2));
2440             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, r2));
2441             addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(32),
2442                                                     hregX86_ECX()));
2443             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r3), r4));
2444             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r2), r3));
2445             *rHi = r3;
2446             *rLo = r4;
2447             return;
2448          }
2449
2450          /* F64 -> I64 */
2451          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2452             case.  Unfortunately I see no easy way to avoid the
2453             duplication. */
2454          case Iop_F64toI64S: {
2455             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2456             HReg tLo = newVRegI(env);
2457             HReg tHi = newVRegI(env);
2458
2459             /* Used several times ... */
2460             /* Careful ... this sharing is only safe because
2461                zero_esp/four_esp do not hold any registers which the
2462                register allocator could attempt to swizzle later. */
2463             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2464             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2465
2466             /* rf now holds the value to be converted, and rrm holds
2467                the rounding mode value, encoded as per the
2468                IRRoundingMode enum.  The first thing to do is set the
2469                FPU's rounding mode accordingly. */
2470
2471             /* Create a space for the format conversion. */
2472             /* subl $8, %esp */
2473             sub_from_esp(env, 8);
2474
2475             /* Set host rounding mode */
2476             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2477
2478             /* gistll %rf, 0(%esp) */
2479             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2480
2481             /* movl 0(%esp), %dstLo */
2482             /* movl 4(%esp), %dstHi */
2483             addInstr(env, X86Instr_Alu32R(
2484                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2485             addInstr(env, X86Instr_Alu32R(
2486                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2487
2488             /* Restore default FPU rounding. */
2489             set_FPU_rounding_default( env );
2490
2491             /* addl $8, %esp */
2492             add_to_esp(env, 8);
2493
2494             *rHi = tHi;
2495             *rLo = tLo;
2496             return;
2497          }
2498
2499          case Iop_Add8x8:
2500             fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2501          case Iop_Add16x4:
2502             fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2503          case Iop_Add32x2:
2504             fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2505
2506          case Iop_Avg8Ux8:
2507             fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2508          case Iop_Avg16Ux4:
2509             fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2510
2511          case Iop_CmpEQ8x8:
2512             fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2513          case Iop_CmpEQ16x4:
2514             fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2515          case Iop_CmpEQ32x2:
2516             fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2517
2518          case Iop_CmpGT8Sx8:
2519             fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2520          case Iop_CmpGT16Sx4:
2521             fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2522          case Iop_CmpGT32Sx2:
2523             fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2524
2525          case Iop_InterleaveHI8x8:
2526             fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2527          case Iop_InterleaveLO8x8:
2528             fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2529          case Iop_InterleaveHI16x4:
2530             fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2531          case Iop_InterleaveLO16x4:
2532             fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2533          case Iop_InterleaveHI32x2:
2534             fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2535          case Iop_InterleaveLO32x2:
2536             fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2537          case Iop_CatOddLanes16x4:
2538             fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2539          case Iop_CatEvenLanes16x4:
2540             fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2541          case Iop_Perm8x8:
2542             fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2543
2544          case Iop_Max8Ux8:
2545             fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2546          case Iop_Max16Sx4:
2547             fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2548          case Iop_Min8Ux8:
2549             fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2550          case Iop_Min16Sx4:
2551             fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2552
2553          case Iop_Mul16x4:
2554             fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2555          case Iop_Mul32x2:
2556             fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2557          case Iop_MulHi16Sx4:
2558             fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2559          case Iop_MulHi16Ux4:
2560             fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2561
2562          case Iop_QAdd8Sx8:
2563             fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2564          case Iop_QAdd16Sx4:
2565             fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2566          case Iop_QAdd8Ux8:
2567             fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2568          case Iop_QAdd16Ux4:
2569             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2570
2571          case Iop_QNarrowBin32Sto16Sx4:
2572             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2573          case Iop_QNarrowBin16Sto8Sx8:
2574             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2575          case Iop_QNarrowBin16Sto8Ux8:
2576             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2577          case Iop_NarrowBin16to8x8:
2578             fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2579          case Iop_NarrowBin32to16x4:
2580             fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2581
2582          case Iop_QSub8Sx8:
2583             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2584          case Iop_QSub16Sx4:
2585             fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2586          case Iop_QSub8Ux8:
2587             fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2588          case Iop_QSub16Ux4:
2589             fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2590
2591          case Iop_Sub8x8:
2592             fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2593          case Iop_Sub16x4:
2594             fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2595          case Iop_Sub32x2:
2596             fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2597
2598          binnish: {
2599             /* Note: the following assumes all helpers are of
2600                signature
2601                   ULong fn ( ULong, ULong ), and they are
2602                not marked as regparm functions.
2603             */
2604             HReg xLo, xHi, yLo, yHi;
2605             HReg tLo = newVRegI(env);
2606             HReg tHi = newVRegI(env);
2607             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2608             addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2609             addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2610             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2611             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2612             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2613             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2614                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2615             add_to_esp(env, 4*4);
2616             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2617             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2618             *rHi = tHi;
2619             *rLo = tLo;
2620             return;
2621          }
2622
2623          case Iop_ShlN32x2:
2624             fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2625          case Iop_ShlN16x4:
2626             fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2627          case Iop_ShlN8x8:
2628             fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
2629          case Iop_ShrN32x2:
2630             fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2631          case Iop_ShrN16x4:
2632             fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2633          case Iop_SarN32x2:
2634             fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2635          case Iop_SarN16x4:
2636             fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2637          case Iop_SarN8x8:
2638             fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
2639          shifty: {
2640             /* Note: the following assumes all helpers are of
2641                signature
2642                   ULong fn ( ULong, UInt ), and they are
2643                not marked as regparm functions.
2644             */
2645             HReg xLo, xHi;
2646             HReg tLo = newVRegI(env);
2647             HReg tHi = newVRegI(env);
2648             X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2649             addInstr(env, X86Instr_Push(y));
2650             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2651             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2652             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2653             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2654                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2655             add_to_esp(env, 3*4);
2656             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2657             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2658             *rHi = tHi;
2659             *rLo = tLo;
2660             return;
2661          }
2662
2663          default:
2664             break;
2665       }
2666    } /* if (e->tag == Iex_Binop) */
2667
2668
2669    /* --------- UNARY ops --------- */
2670    if (e->tag == Iex_Unop) {
2671       switch (e->Iex.Unop.op) {
2672
2673          /* 32Sto64(e) */
2674          case Iop_32Sto64: {
2675             HReg tLo = newVRegI(env);
2676             HReg tHi = newVRegI(env);
2677             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2678             addInstr(env, mk_iMOVsd_RR(src,tHi));
2679             addInstr(env, mk_iMOVsd_RR(src,tLo));
2680             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2681             *rHi = tHi;
2682             *rLo = tLo;
2683             return;
2684          }
2685
2686          /* 32Uto64(e) */
2687          case Iop_32Uto64: {
2688             HReg tLo = newVRegI(env);
2689             HReg tHi = newVRegI(env);
2690             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2691             addInstr(env, mk_iMOVsd_RR(src,tLo));
2692             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2693             *rHi = tHi;
2694             *rLo = tLo;
2695             return;
2696          }
2697
2698          /* 16Uto64(e) */
2699          case Iop_16Uto64: {
2700             HReg tLo = newVRegI(env);
2701             HReg tHi = newVRegI(env);
2702             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2703             addInstr(env, mk_iMOVsd_RR(src,tLo));
2704             addInstr(env, X86Instr_Alu32R(Xalu_AND,
2705                                           X86RMI_Imm(0xFFFF), tLo));
2706             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2707             *rHi = tHi;
2708             *rLo = tLo;
2709             return;
2710          }
2711
2712          /* V128{HI}to64 */
2713          case Iop_V128HIto64:
2714          case Iop_V128to64: {
2715             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2716             HReg tLo = newVRegI(env);
2717             HReg tHi = newVRegI(env);
2718             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2719             X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
2720             X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
2721             X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2722             sub_from_esp(env, 16);
2723             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2724             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2725                                            X86RMI_Mem(espLO), tLo ));
2726             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2727                                            X86RMI_Mem(espHI), tHi ));
2728             add_to_esp(env, 16);
2729             *rHi = tHi;
2730             *rLo = tLo;
2731             return;
2732          }
2733
2734          /* could do better than this, but for now ... */
2735          case Iop_1Sto64: {
2736             HReg tLo = newVRegI(env);
2737             HReg tHi = newVRegI(env);
2738             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2739             addInstr(env, X86Instr_Set32(cond,tLo));
2740             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2741             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2742             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2743             *rHi = tHi;
2744             *rLo = tLo;
2745             return;
2746          }
2747
2748          /* Not64(e) */
2749          case Iop_Not64: {
2750             HReg tLo = newVRegI(env);
2751             HReg tHi = newVRegI(env);
2752             HReg sHi, sLo;
2753             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2754             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2755             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2756             addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2757             addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2758             *rHi = tHi;
2759             *rLo = tLo;
2760             return;
2761          }
2762
2763          /* Left64(e) */
2764          case Iop_Left64: {
2765             HReg yLo, yHi;
2766             HReg tLo = newVRegI(env);
2767             HReg tHi = newVRegI(env);
2768             /* yHi:yLo = arg */
2769             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2770             /* tLo = 0 - yLo, and set carry */
2771             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2772             addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2773             /* tHi = 0 - yHi - carry */
2774             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2775             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2776             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2777                back in, so as to give the final result
2778                tHi:tLo = arg | -arg. */
2779             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2780             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2781             *rHi = tHi;
2782             *rLo = tLo;
2783             return;
2784          }
2785
2786          /* --- patterns rooted at: CmpwNEZ64 --- */
2787
2788          /* CmpwNEZ64(e) */
2789          case Iop_CmpwNEZ64: {
2790
2791          DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2792          DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2793                         unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2794          if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2795             /* CmpwNEZ64(Or64(x,y)) */
2796             HReg xHi,xLo,yHi,yLo;
2797             HReg xBoth = newVRegI(env);
2798             HReg merged = newVRegI(env);
2799             HReg tmp2 = newVRegI(env);
2800
2801             iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2802             addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2803             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2804                                           X86RMI_Reg(xLo),xBoth));
2805
2806             iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2807             addInstr(env, mk_iMOVsd_RR(yHi,merged));
2808             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2809                                           X86RMI_Reg(yLo),merged));
2810             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2811                                              X86RMI_Reg(xBoth),merged));
2812
2813             /* tmp2 = (merged | -merged) >>s 31 */
2814             addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2815             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2816             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2817                                           X86RMI_Reg(merged), tmp2));
2818             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2819             *rHi = tmp2;
2820             *rLo = tmp2;
2821             return;
2822          } else {
2823             /* CmpwNEZ64(e) */
2824             HReg srcLo, srcHi;
2825             HReg tmp1  = newVRegI(env);
2826             HReg tmp2  = newVRegI(env);
2827             /* srcHi:srcLo = arg */
2828             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2829             /* tmp1 = srcHi | srcLo */
2830             addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2831             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2832                                           X86RMI_Reg(srcLo), tmp1));
2833             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2834             addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2835             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2836             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2837                                           X86RMI_Reg(tmp1), tmp2));
2838             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2839             *rHi = tmp2;
2840             *rLo = tmp2;
2841             return;
2842          }
2843          }
2844
2845          /* ReinterpF64asI64(e) */
2846          /* Given an IEEE754 double, produce an I64 with the same bit
2847             pattern. */
2848          case Iop_ReinterpF64asI64: {
2849             HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
2850             HReg tLo  = newVRegI(env);
2851             HReg tHi  = newVRegI(env);
2852             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2853             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2854             /* paranoia */
2855             set_FPU_rounding_default(env);
2856             /* subl $8, %esp */
2857             sub_from_esp(env, 8);
2858             /* gstD %rf, 0(%esp) */
2859             addInstr(env,
2860                      X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2861             /* movl 0(%esp), %tLo */
2862             addInstr(env,
2863                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2864             /* movl 4(%esp), %tHi */
2865             addInstr(env,
2866                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2867             /* addl $8, %esp */
2868             add_to_esp(env, 8);
2869             *rHi = tHi;
2870             *rLo = tLo;
2871             return;
2872          }
2873
2874          case Iop_CmpNEZ32x2:
2875             fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2876          case Iop_CmpNEZ16x4:
2877             fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2878          case Iop_CmpNEZ8x8:
2879             fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2880          unish: {
2881             /* Note: the following assumes all helpers are of
2882                signature
2883                   ULong fn ( ULong ), and they are
2884                not marked as regparm functions.
2885             */
2886             HReg xLo, xHi;
2887             HReg tLo = newVRegI(env);
2888             HReg tHi = newVRegI(env);
2889             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2890             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2891             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2892             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2893                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2894             add_to_esp(env, 2*4);
2895             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2896             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2897             *rHi = tHi;
2898             *rLo = tLo;
2899             return;
2900          }
2901
2902          default:
2903             break;
2904       }
2905    } /* if (e->tag == Iex_Unop) */
2906
2907
2908    /* --------- CCALL --------- */
2909    if (e->tag == Iex_CCall) {
2910       HReg tLo = newVRegI(env);
2911       HReg tHi = newVRegI(env);
2912
2913       /* Marshal args, do the call, clear stack. */
2914       UInt   addToSp = 0;
2915       RetLoc rloc    = mk_RetLoc_INVALID();
2916       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2917                     e->Iex.CCall.cee,
2918                     e->Iex.CCall.retty, e->Iex.CCall.args );
2919       vassert(is_sane_RetLoc(rloc));
2920       vassert(rloc.pri == RLPri_2Int);
2921       vassert(addToSp == 0);
2922       /* */
2923
2924       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2925       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2926       *rHi = tHi;
2927       *rLo = tLo;
2928       return;
2929    }
2930
2931    ppIRExpr(e);
2932    vpanic("iselInt64Expr");
2933 }
2934
2935
2936 /*---------------------------------------------------------*/
2937 /*--- ISEL: Floating point expressions (32 bit)         ---*/
2938 /*---------------------------------------------------------*/
2939
2940 /* Nothing interesting here; really just wrappers for
2941    64-bit stuff. */
2942
2943 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
2944 {
2945    HReg r = iselFltExpr_wrk( env, e );
2946 #  if 0
2947    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2948 #  endif
2949    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2950    vassert(hregIsVirtual(r));
2951    return r;
2952 }
2953
2954 /* DO NOT CALL THIS DIRECTLY */
2955 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
2956 {
2957    IRType ty = typeOfIRExpr(env->type_env,e);
2958    vassert(ty == Ity_F32);
2959
2960    if (e->tag == Iex_RdTmp) {
2961       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2962    }
2963
2964    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2965       X86AMode* am;
2966       HReg res = newVRegF(env);
2967       vassert(e->Iex.Load.ty == Ity_F32);
2968       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2969       addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2970       return res;
2971    }
2972
2973    if (e->tag == Iex_Binop
2974        && e->Iex.Binop.op == Iop_F64toF32) {
2975       /* Although the result is still held in a standard FPU register,
2976          we need to round it to reflect the loss of accuracy/range
2977          entailed in casting it to a 32-bit float. */
2978       HReg dst = newVRegF(env);
2979       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2980       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2981       addInstr(env, X86Instr_Fp64to32(src,dst));
2982       set_FPU_rounding_default( env );
2983       return dst;
2984    }
2985
2986    if (e->tag == Iex_Get) {
2987       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2988                                   hregX86_EBP() );
2989       HReg res = newVRegF(env);
2990       addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2991       return res;
2992    }
2993
2994    if (e->tag == Iex_Unop
2995        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2996        /* Given an I32, produce an IEEE754 float with the same bit
2997           pattern. */
2998       HReg    dst = newVRegF(env);
2999       X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3000       /* paranoia */
3001       addInstr(env, X86Instr_Push(rmi));
3002       addInstr(env, X86Instr_FpLdSt(
3003                        True/*load*/, 4, dst,
3004                        X86AMode_IR(0, hregX86_ESP())));
3005       add_to_esp(env, 4);
3006       return dst;
3007    }
3008
3009    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
3010       HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
3011       HReg dst = newVRegF(env);
3012
3013       /* rf now holds the value to be rounded.  The first thing to do
3014          is set the FPU's rounding mode accordingly. */
3015
3016       /* Set host rounding mode */
3017       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3018
3019       /* grndint %rf, %dst */
3020       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3021
3022       /* Restore default FPU rounding. */
3023       set_FPU_rounding_default( env );
3024
3025       return dst;
3026    }
3027
3028    ppIRExpr(e);
3029    vpanic("iselFltExpr_wrk");
3030 }
3031
3032
3033 /*---------------------------------------------------------*/
3034 /*--- ISEL: Floating point expressions (64 bit)         ---*/
3035 /*---------------------------------------------------------*/
3036
3037 /* Compute a 64-bit floating point value into a register, the identity
3038    of which is returned.  As with iselIntExpr_R, the reg may be either
3039    real or virtual; in any case it must not be changed by subsequent
3040    code emitted by the caller.  */
3041
3042 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3043
3044     Type                  S (1 bit)   E (11 bits)   F (52 bits)
3045     ----                  ---------   -----------   -----------
3046     signalling NaN        u           2047 (max)    .0uuuuu---u
3047                                                     (with at least
3048                                                      one 1 bit)
3049     quiet NaN             u           2047 (max)    .1uuuuu---u
3050
3051     negative infinity     1           2047 (max)    .000000---0
3052
3053     positive infinity     0           2047 (max)    .000000---0
3054
3055     negative zero         1           0             .000000---0
3056
3057     positive zero         0           0             .000000---0
3058 */
3059
3060 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
3061 {
3062    HReg r = iselDblExpr_wrk( env, e );
3063 #  if 0
3064    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3065 #  endif
3066    vassert(hregClass(r) == HRcFlt64);
3067    vassert(hregIsVirtual(r));
3068    return r;
3069 }
3070
3071 /* DO NOT CALL THIS DIRECTLY */
3072 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
3073 {
3074    IRType ty = typeOfIRExpr(env->type_env,e);
3075    vassert(e);
3076    vassert(ty == Ity_F64);
3077
3078    if (e->tag == Iex_RdTmp) {
3079       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3080    }
3081
3082    if (e->tag == Iex_Const) {
3083       union { UInt u32x2[2]; ULong u64; Double f64; } u;
3084       HReg freg = newVRegF(env);
3085       vassert(sizeof(u) == 8);
3086       vassert(sizeof(u.u64) == 8);
3087       vassert(sizeof(u.f64) == 8);
3088       vassert(sizeof(u.u32x2) == 8);
3089
3090       if (e->Iex.Const.con->tag == Ico_F64) {
3091          u.f64 = e->Iex.Const.con->Ico.F64;
3092       }
3093       else if (e->Iex.Const.con->tag == Ico_F64i) {
3094          u.u64 = e->Iex.Const.con->Ico.F64i;
3095       }
3096       else
3097          vpanic("iselDblExpr(x86): const");
3098
3099       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3100       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3101       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3102                                     X86AMode_IR(0, hregX86_ESP())));
3103       add_to_esp(env, 8);
3104       return freg;
3105    }
3106
3107    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3108       X86AMode* am;
3109       HReg res = newVRegF(env);
3110       vassert(e->Iex.Load.ty == Ity_F64);
3111       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3112       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3113       return res;
3114    }
3115
3116    if (e->tag == Iex_Get) {
3117       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3118                                   hregX86_EBP() );
3119       HReg res = newVRegF(env);
3120       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3121       return res;
3122    }
3123
3124    if (e->tag == Iex_GetI) {
3125       X86AMode* am
3126          = genGuestArrayOffset(
3127               env, e->Iex.GetI.descr,
3128                    e->Iex.GetI.ix, e->Iex.GetI.bias );
3129       HReg res = newVRegF(env);
3130       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3131       return res;
3132    }
3133
3134    if (e->tag == Iex_Triop) {
3135       X86FpOp fpop = Xfp_INVALID;
3136       IRTriop *triop = e->Iex.Triop.details;
3137       switch (triop->op) {
3138          case Iop_AddF64:    fpop = Xfp_ADD; break;
3139          case Iop_SubF64:    fpop = Xfp_SUB; break;
3140          case Iop_MulF64:    fpop = Xfp_MUL; break;
3141          case Iop_DivF64:    fpop = Xfp_DIV; break;
3142          case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
3143          case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
3144          case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3145          case Iop_AtanF64:   fpop = Xfp_ATAN; break;
3146          case Iop_PRemF64:   fpop = Xfp_PREM; break;
3147          case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
3148          default: break;
3149       }
3150       if (fpop != Xfp_INVALID) {
3151          HReg res  = newVRegF(env);
3152          HReg srcL = iselDblExpr(env, triop->arg2);
3153          HReg srcR = iselDblExpr(env, triop->arg3);
3154          /* XXXROUNDINGFIXME */
3155          /* set roundingmode here */
3156          addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3157          if (fpop != Xfp_ADD && fpop != Xfp_SUB
3158              && fpop != Xfp_MUL && fpop != Xfp_DIV)
3159             roundToF64(env, res);
3160          return res;
3161       }
3162    }
3163
3164    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3165       HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
3166       HReg dst = newVRegF(env);
3167
3168       /* rf now holds the value to be rounded.  The first thing to do
3169          is set the FPU's rounding mode accordingly. */
3170
3171       /* Set host rounding mode */
3172       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3173
3174       /* grndint %rf, %dst */
3175       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3176
3177       /* Restore default FPU rounding. */
3178       set_FPU_rounding_default( env );
3179
3180       return dst;
3181    }
3182
3183    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3184       HReg dst = newVRegF(env);
3185       HReg rHi,rLo;
3186       iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3187       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3188       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3189
3190       /* Set host rounding mode */
3191       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3192
3193       addInstr(env, X86Instr_FpLdStI(
3194                        True/*load*/, 8, dst,
3195                        X86AMode_IR(0, hregX86_ESP())));
3196
3197       /* Restore default FPU rounding. */
3198       set_FPU_rounding_default( env );
3199
3200       add_to_esp(env, 8);
3201       return dst;
3202    }
3203
3204    if (e->tag == Iex_Binop) {
3205       X86FpOp fpop = Xfp_INVALID;
3206       switch (e->Iex.Binop.op) {
3207          case Iop_SinF64:  fpop = Xfp_SIN; break;
3208          case Iop_CosF64:  fpop = Xfp_COS; break;
3209          case Iop_TanF64:  fpop = Xfp_TAN; break;
3210          case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3211          case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3212          default: break;
3213       }
3214       if (fpop != Xfp_INVALID) {
3215          HReg res = newVRegF(env);
3216          HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3217          /* XXXROUNDINGFIXME */
3218          /* set roundingmode here */
3219          /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3220             codes.  I don't think that matters, since this insn
3221             selector never generates such an instruction intervening
3222             between an flag-setting instruction and a flag-using
3223             instruction. */
3224          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3225          if (fpop != Xfp_SQRT
3226              && fpop != Xfp_NEG && fpop != Xfp_ABS)
3227             roundToF64(env, res);
3228          return res;
3229       }
3230    }
3231
3232    if (e->tag == Iex_Unop) {
3233       X86FpOp fpop = Xfp_INVALID;
3234       switch (e->Iex.Unop.op) {
3235          case Iop_NegF64:  fpop = Xfp_NEG; break;
3236          case Iop_AbsF64:  fpop = Xfp_ABS; break;
3237          default: break;
3238       }
3239       if (fpop != Xfp_INVALID) {
3240          HReg res = newVRegF(env);
3241          HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3242          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3243          /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3244             but might need to do that for other unary ops. */
3245          return res;
3246       }
3247    }
3248
3249    if (e->tag == Iex_Unop) {
3250       switch (e->Iex.Unop.op) {
3251          case Iop_I32StoF64: {
3252             HReg dst = newVRegF(env);
3253             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
3254             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3255             set_FPU_rounding_default(env);
3256             addInstr(env, X86Instr_FpLdStI(
3257                              True/*load*/, 4, dst,
3258                              X86AMode_IR(0, hregX86_ESP())));
3259             add_to_esp(env, 4);
3260             return dst;
3261          }
3262          case Iop_ReinterpI64asF64: {
3263             /* Given an I64, produce an IEEE754 double with the same
3264                bit pattern. */
3265             HReg dst = newVRegF(env);
3266             HReg rHi, rLo;
3267             iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3268             /* paranoia */
3269             set_FPU_rounding_default(env);
3270             addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3271             addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3272             addInstr(env, X86Instr_FpLdSt(
3273                              True/*load*/, 8, dst,
3274                              X86AMode_IR(0, hregX86_ESP())));
3275             add_to_esp(env, 8);
3276             return dst;
3277          }
3278          case Iop_F32toF64: {
3279             /* this is a no-op */
3280             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3281             return res;
3282          }
3283          default:
3284             break;
3285       }
3286    }
3287
3288    /* --------- MULTIPLEX --------- */
3289    if (e->tag == Iex_ITE) { // VFD
3290      if (ty == Ity_F64
3291          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3292         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3293         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3294         HReg dst = newVRegF(env);
3295         addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3296         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3297         addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3298         return dst;
3299       }
3300    }
3301
3302    ppIRExpr(e);
3303    vpanic("iselDblExpr_wrk");
3304 }
3305
3306
3307 /*---------------------------------------------------------*/
3308 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
3309 /*---------------------------------------------------------*/
3310
3311 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
3312 {
3313    HReg r = iselVecExpr_wrk( env, e );
3314 #  if 0
3315    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3316 #  endif
3317    vassert(hregClass(r) == HRcVec128);
3318    vassert(hregIsVirtual(r));
3319    return r;
3320 }
3321
3322
3323 /* DO NOT CALL THIS DIRECTLY */
3324 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
3325 {
3326
3327 #  define REQUIRE_SSE1                                    \
3328       do { if (env->hwcaps == 0/*baseline, no sse*/       \
3329                ||  env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3330               goto vec_fail;                              \
3331       } while (0)
3332
3333 #  define REQUIRE_SSE2                                    \
3334       do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
3335               goto vec_fail;                              \
3336       } while (0)
3337
3338 #  define SSE2_OR_ABOVE                                   \
3339        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3340
3341    HWord     fn = 0; /* address of helper fn, if required */
3342    MatchInfo mi;
3343    Bool      arg1isEReg = False;
3344    X86SseOp  op = Xsse_INVALID;
3345    IRType    ty = typeOfIRExpr(env->type_env,e);
3346    vassert(e);
3347    vassert(ty == Ity_V128);
3348
3349    REQUIRE_SSE1;
3350
3351    if (e->tag == Iex_RdTmp) {
3352       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3353    }
3354
3355    if (e->tag == Iex_Get) {
3356       HReg dst = newVRegV(env);
3357       addInstr(env, X86Instr_SseLdSt(
3358                        True/*load*/,
3359                        dst,
3360                        X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3361                     )
3362               );
3363       return dst;
3364    }
3365
3366    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3367       HReg      dst = newVRegV(env);
3368       X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
3369       addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3370       return dst;
3371    }
3372
3373    if (e->tag == Iex_Const) {
3374       HReg dst = newVRegV(env);
3375       vassert(e->Iex.Const.con->tag == Ico_V128);
3376       addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3377       return dst;
3378    }
3379
3380    if (e->tag == Iex_Unop) {
3381
3382    if (SSE2_OR_ABOVE) {
3383       /* 64UtoV128(LDle:I64(addr)) */
3384       DECLARE_PATTERN(p_zwiden_load64);
3385       DEFINE_PATTERN(p_zwiden_load64,
3386                      unop(Iop_64UtoV128,
3387                           IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3388       if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3389          X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3390          HReg dst = newVRegV(env);
3391          addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3392          return dst;
3393       }
3394    }
3395
3396    switch (e->Iex.Unop.op) {
3397
3398       case Iop_NotV128: {
3399          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3400          return do_sse_Not128(env, arg);
3401       }
3402
3403       case Iop_CmpNEZ64x2: {
3404          /* We can use SSE2 instructions for this. */
3405          /* Ideally, we want to do a 64Ix2 comparison against zero of
3406             the operand.  Problem is no such insn exists.  Solution
3407             therefore is to do a 32Ix4 comparison instead, and bitwise-
3408             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
3409             let the not'd result of this initial comparison be a:b:c:d.
3410             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
3411             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3412             giving the required result.
3413
3414             The required selection sequence is 2,3,0,1, which
3415             according to Intel's documentation means the pshufd
3416             literal value is 0xB1, that is,
3417             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3418          */
3419          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3420          HReg tmp  = newVRegV(env);
3421          HReg dst  = newVRegV(env);
3422          REQUIRE_SSE2;
3423          addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3424          addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3425          tmp = do_sse_Not128(env, tmp);
3426          addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3427          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3428          return dst;
3429       }
3430
3431       case Iop_CmpNEZ32x4: {
3432          /* Sigh, we have to generate lousy code since this has to
3433             work on SSE1 hosts */
3434          /* basically, the idea is: for each lane:
3435                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
3436                sbbl %r, %r               (now %r = 1Sto32(CF))
3437                movl %r, lane
3438          */
3439          Int       i;
3440          X86AMode* am;
3441          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3442          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
3443          HReg      dst  = newVRegV(env);
3444          HReg      r32  = newVRegI(env);
3445          sub_from_esp(env, 16);
3446          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3447          for (i = 0; i < 4; i++) {
3448             am = X86AMode_IR(i*4, hregX86_ESP());
3449             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3450             addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3451             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3452             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3453          }
3454          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3455          add_to_esp(env, 16);
3456          return dst;
3457       }
3458
3459       case Iop_CmpNEZ8x16:
3460       case Iop_CmpNEZ16x8: {
3461          /* We can use SSE2 instructions for this. */
3462          HReg arg;
3463          HReg vec0 = newVRegV(env);
3464          HReg vec1 = newVRegV(env);
3465          HReg dst  = newVRegV(env);
3466          X86SseOp cmpOp
3467             = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3468                                              : Xsse_CMPEQ8;
3469          REQUIRE_SSE2;
3470          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3471          addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3472          addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3473          /* defer arg computation to here so as to give CMPEQF as long
3474             as possible to complete */
3475          arg = iselVecExpr(env, e->Iex.Unop.arg);
3476          /* vec0 is all 0s; vec1 is all 1s */
3477          addInstr(env, mk_vMOVsd_RR(arg, dst));
3478          /* 16x8 or 8x16 comparison == */
3479          addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3480          /* invert result */
3481          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3482          return dst;
3483       }
3484
3485       case Iop_RecipEst32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
3486       case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3487       do_32Fx4_unary:
3488       {
3489          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3490          HReg dst = newVRegV(env);
3491          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3492          return dst;
3493       }
3494
3495       case Iop_RecipEst32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
3496       case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3497       case Iop_Sqrt32F0x4:     op = Xsse_SQRTF;  goto do_32F0x4_unary;
3498       do_32F0x4_unary:
3499       {
3500          /* A bit subtle.  We have to copy the arg to the result
3501             register first, because actually doing the SSE scalar insn
3502             leaves the upper 3/4 of the destination register
3503             unchanged.  Whereas the required semantics of these
3504             primops is that the upper 3/4 is simply copied in from the
3505             argument. */
3506          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3507          HReg dst = newVRegV(env);
3508          addInstr(env, mk_vMOVsd_RR(arg, dst));
3509          addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3510          return dst;
3511       }
3512
3513       case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
3514       do_64F0x2_unary:
3515       {
3516          /* A bit subtle.  We have to copy the arg to the result
3517             register first, because actually doing the SSE scalar insn
3518             leaves the upper half of the destination register
3519             unchanged.  Whereas the required semantics of these
3520             primops is that the upper half is simply copied in from the
3521             argument. */
3522          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3523          HReg dst = newVRegV(env);
3524          REQUIRE_SSE2;
3525          addInstr(env, mk_vMOVsd_RR(arg, dst));
3526          addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3527          return dst;
3528       }
3529
3530       case Iop_32UtoV128: {
3531          HReg      dst  = newVRegV(env);
3532          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3533          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3534          addInstr(env, X86Instr_Push(rmi));
3535          addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3536          add_to_esp(env, 4);
3537          return dst;
3538       }
3539
3540       case Iop_64UtoV128: {
3541          HReg      rHi, rLo;
3542          HReg      dst  = newVRegV(env);
3543          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3544          iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3545          addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3546          addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3547          addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3548          add_to_esp(env, 8);
3549          return dst;
3550       }
3551
3552       default:
3553          break;
3554    } /* switch (e->Iex.Unop.op) */
3555    } /* if (e->tag == Iex_Unop) */
3556
3557    if (e->tag == Iex_Binop) {
3558    switch (e->Iex.Binop.op) {
3559
3560       case Iop_Sqrt64Fx2:
3561          REQUIRE_SSE2;
3562          /* fallthrough */
3563       case Iop_Sqrt32Fx4: {
3564          /* :: (rmode, vec) -> vec */
3565          HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3566          HReg dst = newVRegV(env);
3567          /* XXXROUNDINGFIXME */
3568          /* set roundingmode here */
3569          addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3570                            ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3571                        (Xsse_SQRTF, arg, dst));
3572          return dst;
3573       }
3574
3575       case Iop_SetV128lo32: {
3576          HReg dst = newVRegV(env);
3577          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3578          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3579          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3580          sub_from_esp(env, 16);
3581          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3582          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3583          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3584          add_to_esp(env, 16);
3585          return dst;
3586       }
3587
3588       case Iop_SetV128lo64: {
3589          HReg dst = newVRegV(env);
3590          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3591          HReg srcIhi, srcIlo;
3592          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3593          X86AMode* esp4 = advance4(esp0);
3594          iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3595          sub_from_esp(env, 16);
3596          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3597          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3598          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3599          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3600          add_to_esp(env, 16);
3601          return dst;
3602       }
3603
3604       case Iop_64HLtoV128: {
3605          HReg r3, r2, r1, r0;
3606          X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
3607          X86AMode* esp4  = advance4(esp0);
3608          X86AMode* esp8  = advance4(esp4);
3609          X86AMode* esp12 = advance4(esp8);
3610          HReg dst = newVRegV(env);
3611          /* do this via the stack (easy, convenient, etc) */
3612          sub_from_esp(env, 16);
3613          /* Do the less significant 64 bits */
3614          iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3615          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3616          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3617          /* Do the more significant 64 bits */
3618          iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3619          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3620          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3621          /* Fetch result back from stack. */
3622          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3623          add_to_esp(env, 16);
3624          return dst;
3625       }
3626
3627       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3628       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3629       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3630       case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3631       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
3632       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
3633       do_32Fx4:
3634       {
3635          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3636          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3637          HReg dst = newVRegV(env);
3638          addInstr(env, mk_vMOVsd_RR(argL, dst));
3639          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3640          return dst;
3641       }
3642
3643       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3644       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3645       case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3646       case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3647       case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
3648       case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
3649       do_64Fx2:
3650       {
3651          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3652          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3653          HReg dst = newVRegV(env);
3654          REQUIRE_SSE2;
3655          addInstr(env, mk_vMOVsd_RR(argL, dst));
3656          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3657          return dst;
3658       }
3659
3660       case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3661       case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3662       case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3663       case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3664       case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
3665       case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
3666       case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
3667       case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
3668       case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
3669       case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
3670       do_32F0x4: {
3671          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3672          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3673          HReg dst = newVRegV(env);
3674          addInstr(env, mk_vMOVsd_RR(argL, dst));
3675          addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3676          return dst;
3677       }
3678
3679       case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3680       case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3681       case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3682       case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3683       case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
3684       case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
3685       case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
3686       case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
3687       case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
3688       case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
3689       do_64F0x2: {
3690          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3691          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3692          HReg dst = newVRegV(env);
3693          REQUIRE_SSE2;
3694          addInstr(env, mk_vMOVsd_RR(argL, dst));
3695          addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3696          return dst;
3697       }
3698
3699       case Iop_QNarrowBin32Sto16Sx8:
3700          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3701       case Iop_QNarrowBin16Sto8Sx16:
3702          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3703       case Iop_QNarrowBin16Sto8Ux16:
3704          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3705
3706       case Iop_InterleaveHI8x16:
3707          op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3708       case Iop_InterleaveHI16x8:
3709          op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3710       case Iop_InterleaveHI32x4:
3711          op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3712       case Iop_InterleaveHI64x2:
3713          op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3714
3715       case Iop_InterleaveLO8x16:
3716          op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3717       case Iop_InterleaveLO16x8:
3718          op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3719       case Iop_InterleaveLO32x4:
3720          op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3721       case Iop_InterleaveLO64x2:
3722          op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3723
3724       case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
3725       case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
3726       case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
3727       case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
3728       case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
3729       case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
3730       case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
3731       case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
3732       case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
3733       case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
3734       case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
3735       case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
3736       case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
3737       case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
3738       case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
3739       case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
3740       case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
3741       case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3742       case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3743       case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
3744       case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
3745       case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
3746       case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
3747       case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3748       case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3749       case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
3750       case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
3751       case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
3752       case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
3753       case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
3754       case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
3755       case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
3756       case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
3757       case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
3758       do_SseReRg: {
3759          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3760          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3761          HReg dst = newVRegV(env);
3762          if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3763             REQUIRE_SSE2;
3764          if (arg1isEReg) {
3765             addInstr(env, mk_vMOVsd_RR(arg2, dst));
3766             addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3767          } else {
3768             addInstr(env, mk_vMOVsd_RR(arg1, dst));
3769             addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3770          }
3771          return dst;
3772       }
3773
3774       case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3775       case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3776       case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3777       case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3778       case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3779       case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3780       case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3781       case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3782       do_SseShift: {
3783          HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
3784          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3785          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3786          HReg      ereg = newVRegV(env);
3787          HReg      dst  = newVRegV(env);
3788          REQUIRE_SSE2;
3789          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3790          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3791          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3792          addInstr(env, X86Instr_Push(rmi));
3793          addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3794          addInstr(env, mk_vMOVsd_RR(greg, dst));
3795          addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3796          add_to_esp(env, 16);
3797          return dst;
3798       }
3799
3800       case Iop_NarrowBin32to16x8:
3801          fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3802          goto do_SseAssistedBinary;
3803       case Iop_NarrowBin16to8x16:
3804          fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3805          goto do_SseAssistedBinary;
3806       do_SseAssistedBinary: {
3807          /* As with the amd64 case (where this is copied from) we
3808             generate pretty bad code. */
3809          vassert(fn != 0);
3810          HReg dst = newVRegV(env);
3811          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3812          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3813          HReg argp = newVRegI(env);
3814          /* subl $112, %esp         -- make a space */
3815          sub_from_esp(env, 112);
3816          /* leal 48(%esp), %r_argp  -- point into it */
3817          addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3818                                       argp));
3819          /* andl $-16, %r_argp      -- 16-align the pointer */
3820          addInstr(env, X86Instr_Alu32R(Xalu_AND,
3821                                        X86RMI_Imm( ~(UInt)15 ),
3822                                        argp));
3823          /* Prepare 3 arg regs:
3824             leal  0(%r_argp), %eax
3825             leal 16(%r_argp), %edx
3826             leal 32(%r_argp), %ecx
3827          */
3828          addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3829                                       hregX86_EAX()));
3830          addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3831                                       hregX86_EDX()));
3832          addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3833                                       hregX86_ECX()));
3834          /* Store the two args, at (%edx) and (%ecx):
3835             movupd  %argL, 0(%edx)
3836             movupd  %argR, 0(%ecx)
3837          */
3838          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3839                                         X86AMode_IR(0, hregX86_EDX())));
3840          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3841                                         X86AMode_IR(0, hregX86_ECX())));
3842          /* call the helper */
3843          addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3844                                       3, mk_RetLoc_simple(RLPri_None) ));
3845          /* fetch the result from memory, using %r_argp, which the
3846             register allocator will keep alive across the call. */
3847          addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3848                                         X86AMode_IR(0, argp)));
3849          /* and finally, clear the space */
3850          add_to_esp(env, 112);
3851          return dst;
3852       }
3853
3854       default:
3855          break;
3856    } /* switch (e->Iex.Binop.op) */
3857    } /* if (e->tag == Iex_Binop) */
3858
3859
3860    if (e->tag == Iex_Triop) {
3861    IRTriop *triop = e->Iex.Triop.details;
3862    switch (triop->op) {
3863
3864       case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3865       case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3866       case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3867       case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3868       do_32Fx4_w_rm:
3869       {
3870          HReg argL = iselVecExpr(env, triop->arg2);
3871          HReg argR = iselVecExpr(env, triop->arg3);
3872          HReg dst = newVRegV(env);
3873          addInstr(env, mk_vMOVsd_RR(argL, dst));
3874          /* XXXROUNDINGFIXME */
3875          /* set roundingmode here */
3876          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3877          return dst;
3878       }
3879
3880       case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3881       case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3882       case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3883       case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3884       do_64Fx2_w_rm:
3885       {
3886          HReg argL = iselVecExpr(env, triop->arg2);
3887          HReg argR = iselVecExpr(env, triop->arg3);
3888          HReg dst = newVRegV(env);
3889          REQUIRE_SSE2;
3890          addInstr(env, mk_vMOVsd_RR(argL, dst));
3891          /* XXXROUNDINGFIXME */
3892          /* set roundingmode here */
3893          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3894          return dst;
3895       }
3896
3897       default:
3898          break;
3899    } /* switch (triop->op) */
3900    } /* if (e->tag == Iex_Triop) */
3901
3902
3903    if (e->tag == Iex_ITE) { // VFD
3904       HReg r1  = iselVecExpr(env, e->Iex.ITE.iftrue);
3905       HReg r0  = iselVecExpr(env, e->Iex.ITE.iffalse);
3906       HReg dst = newVRegV(env);
3907       addInstr(env, mk_vMOVsd_RR(r1,dst));
3908       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3909       addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3910       return dst;
3911    }
3912
3913    vec_fail:
3914    vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3915               LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3916    ppIRExpr(e);
3917    vpanic("iselVecExpr_wrk");
3918
3919 #  undef REQUIRE_SSE1
3920 #  undef REQUIRE_SSE2
3921 #  undef SSE2_OR_ABOVE
3922 }
3923
3924
3925 /*---------------------------------------------------------*/
3926 /*--- ISEL: Statements                                  ---*/
3927 /*---------------------------------------------------------*/
3928
3929 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3930 {
3931    if (vex_traceflags & VEX_TRACE_VCODE) {
3932       vex_printf("\n-- ");
3933       ppIRStmt(stmt);
3934       vex_printf("\n");
3935    }
3936
3937    switch (stmt->tag) {
3938
3939    /* --------- STORE --------- */
3940    case Ist_Store: {
3941       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3942       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3943       IREndness end   = stmt->Ist.Store.end;
3944
3945       if (tya != Ity_I32 || end != Iend_LE)
3946          goto stmt_fail;
3947
3948       if (tyd == Ity_I32) {
3949          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3950          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3951          addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3952          return;
3953       }
3954       if (tyd == Ity_I8 || tyd == Ity_I16) {
3955          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3956          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3957          addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3958                                        r,am ));
3959          return;
3960       }
3961       if (tyd == Ity_F64) {
3962          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3963          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3964          addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3965          return;
3966       }
3967       if (tyd == Ity_F32) {
3968          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3969          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3970          addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3971          return;
3972       }
3973       if (tyd == Ity_I64) {
3974          HReg vHi, vLo, rA;
3975          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3976          rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3977          addInstr(env, X86Instr_Alu32M(
3978                           Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3979          addInstr(env, X86Instr_Alu32M(
3980                           Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3981          return;
3982       }
3983       if (tyd == Ity_V128) {
3984          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3985          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3986          addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3987          return;
3988       }
3989       break;
3990    }
3991
3992    /* --------- PUT --------- */
3993    case Ist_Put: {
3994       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3995       if (ty == Ity_I32) {
3996          /* We're going to write to memory, so compute the RHS into an
3997             X86RI. */
3998          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3999          addInstr(env,
4000                   X86Instr_Alu32M(
4001                      Xalu_MOV,
4002                      ri,
4003                      X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
4004                  ));
4005          return;
4006       }
4007       if (ty == Ity_I8 || ty == Ity_I16) {
4008          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4009          addInstr(env, X86Instr_Store(
4010                           toUChar(ty==Ity_I8 ? 1 : 2),
4011                           r,
4012                           X86AMode_IR(stmt->Ist.Put.offset,
4013                                       hregX86_EBP())));
4014          return;
4015       }
4016       if (ty == Ity_I64) {
4017          HReg vHi, vLo;
4018          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4019          X86AMode* am4 = advance4(am);
4020          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
4021          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
4022          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
4023          return;
4024       }
4025       if (ty == Ity_V128) {
4026          HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
4027          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4028          addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
4029          return;
4030       }
4031       if (ty == Ity_F32) {
4032          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4033          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4034          set_FPU_rounding_default(env); /* paranoia */
4035          addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
4036          return;
4037       }
4038       if (ty == Ity_F64) {
4039          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4040          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4041          set_FPU_rounding_default(env); /* paranoia */
4042          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
4043          return;
4044       }
4045       break;
4046    }
4047
4048    /* --------- Indexed PUT --------- */
4049    case Ist_PutI: {
4050       IRPutI *puti = stmt->Ist.PutI.details;
4051
4052       X86AMode* am
4053          = genGuestArrayOffset(
4054               env, puti->descr,
4055                    puti->ix, puti->bias );
4056
4057       IRType ty = typeOfIRExpr(env->type_env, puti->data);
4058       if (ty == Ity_F64) {
4059          HReg val = iselDblExpr(env, puti->data);
4060          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
4061          return;
4062       }
4063       if (ty == Ity_I8) {
4064          HReg r = iselIntExpr_R(env, puti->data);
4065          addInstr(env, X86Instr_Store( 1, r, am ));
4066          return;
4067       }
4068       if (ty == Ity_I32) {
4069          HReg r = iselIntExpr_R(env, puti->data);
4070          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4071          return;
4072       }
4073       if (ty == Ity_I64) {
4074          HReg rHi, rLo;
4075          X86AMode* am4 = advance4(am);
4076          iselInt64Expr(&rHi, &rLo, env, puti->data);
4077          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4078          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4079          return;
4080       }
4081       break;
4082    }
4083
4084    /* --------- TMP --------- */
4085    case Ist_WrTmp: {
4086       IRTemp tmp = stmt->Ist.WrTmp.tmp;
4087       IRType ty = typeOfIRTemp(env->type_env, tmp);
4088
4089       /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4090          compute it into an AMode and then use LEA.  This usually
4091          produces fewer instructions, often because (for memcheck
4092          created IR) we get t = address-expression, (t is later used
4093          twice) and so doing this naturally turns address-expression
4094          back into an X86 amode. */
4095       if (ty == Ity_I32
4096           && stmt->Ist.WrTmp.data->tag == Iex_Binop
4097           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4098          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4099          HReg dst = lookupIRTemp(env, tmp);
4100          if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4101             /* Hmm, iselIntExpr_AMode wimped out and just computed the
4102                value into a register.  Just emit a normal reg-reg move
4103                so reg-alloc can coalesce it away in the usual way. */
4104             HReg src = am->Xam.IR.reg;
4105             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4106          } else {
4107             addInstr(env, X86Instr_Lea32(am,dst));
4108          }
4109          return;
4110       }
4111
4112       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4113          X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4114          HReg dst = lookupIRTemp(env, tmp);
4115          addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4116          return;
4117       }
4118       if (ty == Ity_I64) {
4119          HReg rHi, rLo, dstHi, dstLo;
4120          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4121          lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4122          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4123          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4124          return;
4125       }
4126       if (ty == Ity_I1) {
4127          X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4128          HReg dst = lookupIRTemp(env, tmp);
4129          addInstr(env, X86Instr_Set32(cond, dst));
4130          return;
4131       }
4132       if (ty == Ity_F64) {
4133          HReg dst = lookupIRTemp(env, tmp);
4134          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4135          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4136          return;
4137       }
4138       if (ty == Ity_F32) {
4139          HReg dst = lookupIRTemp(env, tmp);
4140          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4141          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4142          return;
4143       }
4144       if (ty == Ity_V128) {
4145          HReg dst = lookupIRTemp(env, tmp);
4146          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4147          addInstr(env, mk_vMOVsd_RR(src,dst));
4148          return;
4149       }
4150       break;
4151    }
4152
4153    /* --------- Call to DIRTY helper --------- */
4154    case Ist_Dirty: {
4155       IRDirty* d = stmt->Ist.Dirty.details;
4156
4157       /* Figure out the return type, if any. */
4158       IRType retty = Ity_INVALID;
4159       if (d->tmp != IRTemp_INVALID)
4160          retty = typeOfIRTemp(env->type_env, d->tmp);
4161
4162       Bool retty_ok = False;
4163       switch (retty) {
4164          case Ity_INVALID: /* function doesn't return anything */
4165          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4166          case Ity_V128:
4167             retty_ok = True; break;
4168          default:
4169             break;
4170       }
4171       if (!retty_ok)
4172          break; /* will go to stmt_fail: */
4173
4174       /* Marshal args, do the call, and set the return value to
4175          0x555..555 if this is a conditional call that returns a value
4176          and the call is skipped. */
4177       UInt   addToSp = 0;
4178       RetLoc rloc    = mk_RetLoc_INVALID();
4179       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4180       vassert(is_sane_RetLoc(rloc));
4181
4182       /* Now figure out what to do with the returned value, if any. */
4183       switch (retty) {
4184          case Ity_INVALID: {
4185             /* No return value.  Nothing to do. */
4186             vassert(d->tmp == IRTemp_INVALID);
4187             vassert(rloc.pri == RLPri_None);
4188             vassert(addToSp == 0);
4189             return;
4190          }
4191          case Ity_I32: case Ity_I16: case Ity_I8: {
4192             /* The returned value is in %eax.  Park it in the register
4193                associated with tmp. */
4194             vassert(rloc.pri == RLPri_Int);
4195             vassert(addToSp == 0);
4196             HReg dst = lookupIRTemp(env, d->tmp);
4197             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4198             return;
4199          }
4200          case Ity_I64: {
4201             /* The returned value is in %edx:%eax.  Park it in the
4202                register-pair associated with tmp. */
4203             vassert(rloc.pri == RLPri_2Int);
4204             vassert(addToSp == 0);
4205             HReg dstHi, dstLo;
4206             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4207             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4208             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4209             return;
4210          }
4211          case Ity_V128: {
4212             /* The returned value is on the stack, and *retloc tells
4213                us where.  Fish it off the stack and then move the
4214                stack pointer upwards to clear it, as directed by
4215                doHelperCall. */
4216             vassert(rloc.pri == RLPri_V128SpRel);
4217             vassert(addToSp >= 16);
4218             HReg      dst = lookupIRTemp(env, d->tmp);
4219             X86AMode* am  = X86AMode_IR(rloc.spOff, hregX86_ESP());
4220             addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4221             add_to_esp(env, addToSp);
4222             return;
4223          }
4224          default:
4225             /*NOTREACHED*/
4226             vassert(0);
4227       }
4228       break;
4229    }
4230
4231    /* --------- MEM FENCE --------- */
4232    case Ist_MBE:
4233       switch (stmt->Ist.MBE.event) {
4234          case Imbe_Fence:
4235             addInstr(env, X86Instr_MFence(env->hwcaps));
4236             return;
4237          default:
4238             break;
4239       }
4240       break;
4241
4242    /* --------- ACAS --------- */
4243    case Ist_CAS:
4244       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4245          /* "normal" singleton CAS */
4246          UChar  sz;
4247          IRCAS* cas = stmt->Ist.CAS.details;
4248          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4249          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4250          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4251          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4252          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4253          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4254          vassert(cas->expdHi == NULL);
4255          vassert(cas->dataHi == NULL);
4256          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4257          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4258          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4259          switch (ty) {
4260             case Ity_I32: sz = 4; break;
4261             case Ity_I16: sz = 2; break;
4262             case Ity_I8:  sz = 1; break;
4263             default: goto unhandled_cas;
4264          }
4265          addInstr(env, X86Instr_ACAS(am, sz));
4266          addInstr(env,
4267                   X86Instr_CMov32(Xcc_NZ,
4268                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4269          return;
4270       } else {
4271          /* double CAS */
4272          IRCAS* cas = stmt->Ist.CAS.details;
4273          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4274          /* only 32-bit allowed in this case */
4275          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4276          /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4277          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4278          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4279          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4280          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4281          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4282          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
4283          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4284          if (ty != Ity_I32)
4285             goto unhandled_cas;
4286          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4287          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4288          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4289          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4290          addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4291          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4292          addInstr(env, X86Instr_DACAS(am));
4293          addInstr(env,
4294                   X86Instr_CMov32(Xcc_NZ,
4295                                   X86RM_Reg(hregX86_EDX()), rOldHi));
4296          addInstr(env,
4297                   X86Instr_CMov32(Xcc_NZ,
4298                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4299          return;
4300       }
4301       unhandled_cas:
4302       break;
4303
4304    /* --------- INSTR MARK --------- */
4305    /* Doesn't generate any executable code ... */
4306    case Ist_IMark:
4307        return;
4308
4309    /* --------- NO-OP --------- */
4310    /* Fairly self-explanatory, wouldn't you say? */
4311    case Ist_NoOp:
4312        return;
4313
4314    /* --------- EXIT --------- */
4315    case Ist_Exit: {
4316       if (stmt->Ist.Exit.dst->tag != Ico_U32)
4317          vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4318
4319       X86CondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
4320       X86AMode*   amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4321                                       hregX86_EBP());
4322
4323       /* Case: boring transfer to known address */
4324       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4325          if (env->chainingAllowed) {
4326             /* .. almost always true .. */
4327             /* Skip the event check at the dst if this is a forwards
4328                edge. */
4329             Bool toFastEP
4330                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4331             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4332             addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4333                                            amEIP, cc, toFastEP));
4334          } else {
4335             /* .. very occasionally .. */
4336             /* We can't use chaining, so ask for an assisted transfer,
4337                as that's the only alternative that is allowable. */
4338             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4339             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4340          }
4341          return;
4342       }
4343
4344       /* Case: assisted transfer to arbitrary address */
4345       switch (stmt->Ist.Exit.jk) {
4346          /* Keep this list in sync with that in iselNext below */
4347          case Ijk_ClientReq:
4348          case Ijk_EmWarn:
4349          case Ijk_MapFail:
4350          case Ijk_NoDecode:
4351          case Ijk_NoRedir:
4352          case Ijk_SigSEGV:
4353          case Ijk_SigTRAP:
4354          case Ijk_Sys_int128:
4355          case Ijk_Sys_int129:
4356          case Ijk_Sys_int130:
4357          case Ijk_Sys_int145:
4358          case Ijk_Sys_int210:
4359          case Ijk_Sys_syscall:
4360          case Ijk_Sys_sysenter:
4361          case Ijk_InvalICache:
4362          case Ijk_Yield:
4363          {
4364             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4365             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4366             return;
4367          }
4368          default:
4369             break;
4370       }
4371
4372       /* Do we ever expect to see any other kind? */
4373       goto stmt_fail;
4374    }
4375
4376    default: break;
4377    }
4378   stmt_fail:
4379    ppIRStmt(stmt);
4380    vpanic("iselStmt");
4381 }
4382
4383
4384 /*---------------------------------------------------------*/
4385 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4386 /*---------------------------------------------------------*/
4387
4388 static void iselNext ( ISelEnv* env,
4389                        IRExpr* next, IRJumpKind jk, Int offsIP )
4390 {
4391    if (vex_traceflags & VEX_TRACE_VCODE) {
4392       vex_printf( "\n-- PUT(%d) = ", offsIP);
4393       ppIRExpr( next );
4394       vex_printf( "; exit-");
4395       ppIRJumpKind(jk);
4396       vex_printf( "\n");
4397    }
4398
4399    /* Case: boring transfer to known address */
4400    if (next->tag == Iex_Const) {
4401       IRConst* cdst = next->Iex.Const.con;
4402       vassert(cdst->tag == Ico_U32);
4403       if (jk == Ijk_Boring || jk == Ijk_Call) {
4404          /* Boring transfer to known address */
4405          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4406          if (env->chainingAllowed) {
4407             /* .. almost always true .. */
4408             /* Skip the event check at the dst if this is a forwards
4409                edge. */
4410             Bool toFastEP
4411                = ((Addr32)cdst->Ico.U32) > env->max_ga;
4412             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4413             addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4414                                            amEIP, Xcc_ALWAYS,
4415                                            toFastEP));
4416          } else {
4417             /* .. very occasionally .. */
4418             /* We can't use chaining, so ask for an assisted transfer,
4419                as that's the only alternative that is allowable. */
4420             HReg r = iselIntExpr_R(env, next);
4421             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4422                                              Ijk_Boring));
4423          }
4424          return;
4425       }
4426    }
4427
4428    /* Case: call/return (==boring) transfer to any address */
4429    switch (jk) {
4430       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4431          HReg      r     = iselIntExpr_R(env, next);
4432          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4433          if (env->chainingAllowed) {
4434             addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4435          } else {
4436             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4437                                                Ijk_Boring));
4438          }
4439          return;
4440       }
4441       default:
4442          break;
4443    }
4444
4445    /* Case: assisted transfer to arbitrary address */
4446    switch (jk) {
4447       /* Keep this list in sync with that for Ist_Exit above */
4448       case Ijk_ClientReq:
4449       case Ijk_EmWarn:
4450       case Ijk_MapFail:
4451       case Ijk_NoDecode:
4452       case Ijk_NoRedir:
4453       case Ijk_SigSEGV:
4454       case Ijk_SigTRAP:
4455       case Ijk_Sys_int128:
4456       case Ijk_Sys_int129:
4457       case Ijk_Sys_int130:
4458       case Ijk_Sys_int145:
4459       case Ijk_Sys_int210:
4460       case Ijk_Sys_syscall:
4461       case Ijk_Sys_sysenter:
4462       case Ijk_InvalICache:
4463       case Ijk_Yield:
4464       {
4465          HReg      r     = iselIntExpr_R(env, next);
4466          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4467          addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4468          return;
4469       }
4470       default:
4471          break;
4472    }
4473
4474    vex_printf( "\n-- PUT(%d) = ", offsIP);
4475    ppIRExpr( next );
4476    vex_printf( "; exit-");
4477    ppIRJumpKind(jk);
4478    vex_printf( "\n");
4479    vassert(0); // are we expecting any other kind?
4480 }
4481
4482
4483 /*---------------------------------------------------------*/
4484 /*--- Insn selector top-level                           ---*/
4485 /*---------------------------------------------------------*/
4486
4487 /* Translate an entire SB to x86 code. */
4488
4489 HInstrArray* iselSB_X86 ( const IRSB* bb,
4490                           VexArch      arch_host,
4491                           const VexArchInfo* archinfo_host,
4492                           const VexAbiInfo*  vbi/*UNUSED*/,
4493                           Int offs_Host_EvC_Counter,
4494                           Int offs_Host_EvC_FailAddr,
4495                           Bool chainingAllowed,
4496                           Bool addProfInc,
4497                           Addr max_ga )
4498 {
4499    Int      i, j;
4500    HReg     hreg, hregHI;
4501    ISelEnv* env;
4502    UInt     hwcaps_host = archinfo_host->hwcaps;
4503    X86AMode *amCounter, *amFailAddr;
4504
4505    /* sanity ... */
4506    vassert(arch_host == VexArchX86);
4507    vassert(0 == (hwcaps_host
4508                  & ~(VEX_HWCAPS_X86_MMXEXT
4509                      | VEX_HWCAPS_X86_SSE1
4510                      | VEX_HWCAPS_X86_SSE2
4511                      | VEX_HWCAPS_X86_SSE3
4512                      | VEX_HWCAPS_X86_LZCNT)));
4513
4514    /* Check that the host's endianness is as expected. */
4515    vassert(archinfo_host->endness == VexEndnessLE);
4516
4517    /* Make up an initial environment to use. */
4518    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4519    env->vreg_ctr = 0;
4520
4521    /* Set up output code array. */
4522    env->code = newHInstrArray();
4523
4524    /* Copy BB's type env. */
4525    env->type_env = bb->tyenv;
4526
4527    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4528       change as we go along. */
4529    env->n_vregmap = bb->tyenv->types_used;
4530    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4531    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4532
4533    /* and finally ... */
4534    env->chainingAllowed = chainingAllowed;
4535    env->hwcaps          = hwcaps_host;
4536    env->max_ga          = max_ga;
4537
4538    /* For each IR temporary, allocate a suitably-kinded virtual
4539       register. */
4540    j = 0;
4541    for (i = 0; i < env->n_vregmap; i++) {
4542       hregHI = hreg = INVALID_HREG;
4543       switch (bb->tyenv->types[i]) {
4544          case Ity_I1:
4545          case Ity_I8:
4546          case Ity_I16:
4547          case Ity_I32:  hreg   = mkHReg(True, HRcInt32,  0, j++); break;
4548          case Ity_I64:  hreg   = mkHReg(True, HRcInt32,  0, j++);
4549                         hregHI = mkHReg(True, HRcInt32,  0, j++); break;
4550          case Ity_F32:
4551          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
4552          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
4553          default: ppIRType(bb->tyenv->types[i]);
4554                   vpanic("iselBB: IRTemp type");
4555       }
4556       env->vregmap[i]   = hreg;
4557       env->vregmapHI[i] = hregHI;
4558    }
4559    env->vreg_ctr = j;
4560
4561    /* The very first instruction must be an event check. */
4562    amCounter  = X86AMode_IR(offs_Host_EvC_Counter,  hregX86_EBP());
4563    amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4564    addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4565
4566    /* Possibly a block counter increment (for profiling).  At this
4567       point we don't know the address of the counter, so just pretend
4568       it is zero.  It will have to be patched later, but before this
4569       translation is used, by a call to LibVEX_patchProfCtr. */
4570    if (addProfInc) {
4571       addInstr(env, X86Instr_ProfInc());
4572    }
4573
4574    /* Ok, finally we can iterate over the statements. */
4575    for (i = 0; i < bb->stmts_used; i++)
4576       iselStmt(env, bb->stmts[i]);
4577
4578    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4579
4580    /* record the number of vregs we used. */
4581    env->code->n_vregs = env->vreg_ctr;
4582    return env->code;
4583 }
4584
4585
4586 /*---------------------------------------------------------------*/
4587 /*--- end                                     host_x86_isel.c ---*/
4588 /*---------------------------------------------------------------*/