Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_x86_isel.c
blobe89b145327513fa6baafa6324566b6fa8345d2e6
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
38 #include "ir_match.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h"
43 #include "host_generic_simd128.h"
44 #include "host_x86_defs.h"
46 /* TODO 21 Apr 2005:
48 -- (Really an assembler issue) don't emit CMov32 as a cmov
49 insn, since that's expensive on P4 and conditional branch
50 is cheaper if (as we expect) the condition is highly predictable
52 -- preserve xmm registers across function calls (by declaring them
53 as trashed by call insns)
55 -- preserve x87 ST stack discipline across function calls. Sigh.
57 -- Check doHelperCall: if a call is conditional, we cannot safely
58 compute any regparm args directly to registers. Hence, the
59 fast-regparm marshalling should be restricted to unconditional
60 calls only.
63 /*---------------------------------------------------------*/
64 /*--- x87 control word stuff ---*/
65 /*---------------------------------------------------------*/
67 /* Vex-generated code expects to run with the FPU set as follows: all
68 exceptions masked, round-to-nearest, precision = 53 bits. This
69 corresponds to a FPU control word value of 0x027F.
71 Similarly the SSE control word (%mxcsr) should be 0x1F80.
73 %fpucw and %mxcsr should have these values on entry to
74 Vex-generated code, and should those values should be
75 unchanged at exit.
78 #define DEFAULT_FPUCW 0x027F
80 /* debugging only, do not use */
81 /* define DEFAULT_FPUCW 0x037F */
84 /*---------------------------------------------------------*/
85 /*--- misc helpers ---*/
86 /*---------------------------------------------------------*/
88 /* These are duplicated in guest-x86/toIR.c */
89 static IRExpr* unop ( IROp op, IRExpr* a )
91 return IRExpr_Unop(op, a);
94 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
96 return IRExpr_Binop(op, a1, a2);
99 static IRExpr* bind ( Int binder )
101 return IRExpr_Binder(binder);
104 static Bool isZeroU8 ( IRExpr* e )
106 return e->tag == Iex_Const
107 && e->Iex.Const.con->tag == Ico_U8
108 && e->Iex.Const.con->Ico.U8 == 0;
111 static Bool isZeroU32 ( IRExpr* e )
113 return e->tag == Iex_Const
114 && e->Iex.Const.con->tag == Ico_U32
115 && e->Iex.Const.con->Ico.U32 == 0;
118 //static Bool isZeroU64 ( IRExpr* e )
120 // return e->tag == Iex_Const
121 // && e->Iex.Const.con->tag == Ico_U64
122 // && e->Iex.Const.con->Ico.U64 == 0ULL;
126 /*---------------------------------------------------------*/
127 /*--- ISelEnv ---*/
128 /*---------------------------------------------------------*/
130 /* This carries around:
132 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
133 might encounter. This is computed before insn selection starts,
134 and does not change.
136 - A mapping from IRTemp to HReg. This tells the insn selector
137 which virtual register(s) are associated with each IRTemp
138 temporary. This is computed before insn selection starts, and
139 does not change. We expect this mapping to map precisely the
140 same set of IRTemps as the type mapping does.
142 - vregmap holds the primary register for the IRTemp.
143 - vregmapHI is only used for 64-bit integer-typed
144 IRTemps. It holds the identity of a second
145 32-bit virtual HReg, which holds the high half
146 of the value.
148 - The code array, that is, the insns selected so far.
150 - A counter, for generating new virtual registers.
152 - The host subarchitecture we are selecting insns for.
153 This is set at the start and does not change.
155 - A Bool for indicating whether we may generate chain-me
156 instructions for control flow transfers, or whether we must use
157 XAssisted.
159 - The maximum guest address of any guest insn in this block.
160 Actually, the address of the highest-addressed byte from any insn
161 in this block. Is set at the start and does not change. This is
162 used for detecting jumps which are definitely forward-edges from
163 this block, and therefore can be made (chained) to the fast entry
164 point of the destination, thereby avoiding the destination's
165 event check.
167 Note, this is all (well, mostly) host-independent.
170 typedef
171 struct {
172 /* Constant -- are set at the start and do not change. */
173 IRTypeEnv* type_env;
175 HReg* vregmap;
176 HReg* vregmapHI;
177 Int n_vregmap;
179 UInt hwcaps;
181 Bool chainingAllowed;
182 Addr32 max_ga;
184 /* These are modified as we go along. */
185 HInstrArray* code;
186 Int vreg_ctr;
188 ISelEnv;
191 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
193 vassert(tmp < env->n_vregmap);
194 return env->vregmap[tmp];
197 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
199 vassert(tmp < env->n_vregmap);
200 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
201 *vrLO = env->vregmap[tmp];
202 *vrHI = env->vregmapHI[tmp];
205 static void addInstr ( ISelEnv* env, X86Instr* instr )
207 addHInstr(env->code, instr);
208 if (vex_traceflags & VEX_TRACE_VCODE) {
209 ppX86Instr(instr, False);
210 vex_printf("\n");
214 static HReg newVRegI ( ISelEnv* env )
216 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
217 env->vreg_ctr++;
218 return reg;
221 static HReg newVRegF ( ISelEnv* env )
223 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
224 env->vreg_ctr++;
225 return reg;
228 static HReg newVRegV ( ISelEnv* env )
230 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
231 env->vreg_ctr++;
232 return reg;
236 /*---------------------------------------------------------*/
237 /*--- ISEL: Forward declarations ---*/
238 /*---------------------------------------------------------*/
240 /* These are organised as iselXXX and iselXXX_wrk pairs. The
241 iselXXX_wrk do the real work, but are not to be called directly.
242 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
243 checks that all returned registers are virtual. You should not
244 call the _wrk version directly.
246 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
247 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e );
249 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
250 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e );
252 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
253 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e );
255 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
256 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e );
258 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
259 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e );
261 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
262 ISelEnv* env, const IRExpr* e );
263 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
264 ISelEnv* env, const IRExpr* e );
266 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
267 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e );
269 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
270 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e );
272 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
273 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e );
275 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
276 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e );
279 /*---------------------------------------------------------*/
280 /*--- ISEL: Misc helpers ---*/
281 /*---------------------------------------------------------*/
283 /* Make a int reg-reg move. */
285 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
287 vassert(hregClass(src) == HRcInt32);
288 vassert(hregClass(dst) == HRcInt32);
289 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
293 /* Make a vector reg-reg move. */
295 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
297 vassert(hregClass(src) == HRcVec128);
298 vassert(hregClass(dst) == HRcVec128);
299 return X86Instr_SseReRg(Xsse_MOV, src, dst);
302 /* Advance/retreat %esp by n. */
304 static void add_to_esp ( ISelEnv* env, Int n )
306 vassert(n > 0 && n < 256 && (n%4) == 0);
307 addInstr(env,
308 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
311 static void sub_from_esp ( ISelEnv* env, Int n )
313 vassert(n > 0 && n < 256 && (n%4) == 0);
314 addInstr(env,
315 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
319 /* Given an amode, return one which references 4 bytes further
320 along. */
322 static X86AMode* advance4 ( X86AMode* am )
324 X86AMode* am4 = dopyX86AMode(am);
325 switch (am4->tag) {
326 case Xam_IRRS:
327 am4->Xam.IRRS.imm += 4; break;
328 case Xam_IR:
329 am4->Xam.IR.imm += 4; break;
330 default:
331 vpanic("advance4(x86,host)");
333 return am4;
337 /* Push an arg onto the host stack, in preparation for a call to a
338 helper function of some kind. Returns the number of 32-bit words
339 pushed. If we encounter an IRExpr_VECRET() then we expect that
340 r_vecRetAddr will be a valid register, that holds the relevant
341 address.
343 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
345 if (UNLIKELY(arg->tag == Iex_VECRET)) {
346 vassert(0); //ATC
347 vassert(!hregIsInvalid(r_vecRetAddr));
348 addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
349 return 1;
351 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
352 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
353 return 1;
355 /* Else it's a "normal" expression. */
356 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
357 if (arg_ty == Ity_I32) {
358 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
359 return 1;
360 } else
361 if (arg_ty == Ity_I64) {
362 HReg rHi, rLo;
363 iselInt64Expr(&rHi, &rLo, env, arg);
364 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
365 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
366 return 2;
368 ppIRExpr(arg);
369 vpanic("pushArg(x86): can't handle arg of this type");
373 /* Complete the call to a helper function, by calling the
374 helper and clearing the args off the stack. */
376 static
377 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
378 IRCallee* cee, Int n_arg_ws,
379 RetLoc rloc )
381 /* Complication. Need to decide which reg to use as the fn address
382 pointer, in a way that doesn't trash regparm-passed
383 parameters. */
384 vassert(sizeof(void*) == 4);
386 addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
387 cee->regparms, rloc));
388 if (n_arg_ws > 0)
389 add_to_esp(env, 4*n_arg_ws);
393 /* Used only in doHelperCall. See big comment in doHelperCall re
394 handling of regparm args. This function figures out whether
395 evaluation of an expression might require use of a fixed register.
396 If in doubt return True (safe but suboptimal).
398 static
399 Bool mightRequireFixedRegs ( IRExpr* e )
401 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
402 // These are always "safe" -- either a copy of %esp in some
403 // arbitrary vreg, or a copy of %ebp, respectively.
404 return False;
406 /* Else it's a "normal" expression. */
407 switch (e->tag) {
408 case Iex_RdTmp: case Iex_Const: case Iex_Get:
409 return False;
410 default:
411 return True;
416 /* Do a complete function call. |guard| is a Ity_Bit expression
417 indicating whether or not the call happens. If guard==NULL, the
418 call is unconditional. |retloc| is set to indicate where the
419 return value is after the call. The caller (of this fn) must
420 generate code to add |stackAdjustAfterCall| to the stack pointer
421 after the call is done. */
423 static
424 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
425 /*OUT*/RetLoc* retloc,
426 ISelEnv* env,
427 IRExpr* guard,
428 IRCallee* cee, IRType retTy, IRExpr** args )
430 X86CondCode cc;
431 HReg argregs[3];
432 HReg tmpregs[3];
433 Bool danger;
434 Int not_done_yet, n_args, n_arg_ws, stack_limit,
435 i, argreg, argregX;
437 /* Set default returns. We'll update them later if needed. */
438 *stackAdjustAfterCall = 0;
439 *retloc = mk_RetLoc_INVALID();
441 /* These are used for cross-checking that IR-level constraints on
442 the use of Iex_VECRET and Iex_GSPTR are observed. */
443 UInt nVECRETs = 0;
444 UInt nGSPTRs = 0;
446 /* Marshal args for a call, do the call, and clear the stack.
447 Complexities to consider:
449 * The return type can be I{64,32,16,8} or V128. In the V128
450 case, it is expected that |args| will contain the special
451 node IRExpr_VECRET(), in which case this routine generates
452 code to allocate space on the stack for the vector return
453 value. Since we are not passing any scalars on the stack, it
454 is enough to preallocate the return space before marshalling
455 any arguments, in this case.
457 |args| may also contain IRExpr_GSPTR(), in which case the
458 value in %ebp is passed as the corresponding argument.
460 * If the callee claims regparmness of 1, 2 or 3, we must pass the
461 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
462 respectively). To keep things relatively simple, only args of
463 type I32 may be passed as regparms -- just bomb out if anything
464 else turns up. Clearly this depends on the front ends not
465 trying to pass any other types as regparms.
468 /* 16 Nov 2004: the regparm handling is complicated by the
469 following problem.
471 Consider a call two a function with two regparm parameters:
472 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
473 Suppose code is first generated to compute e1 into %eax. Then,
474 code is generated to compute e2 into %edx. Unfortunately, if
475 the latter code sequence uses %eax, it will trash the value of
476 e1 computed by the former sequence. This could happen if (for
477 example) e2 itself involved a function call. In the code below,
478 args are evaluated right-to-left, not left-to-right, but the
479 principle and the problem are the same.
481 One solution is to compute all regparm-bound args into vregs
482 first, and once they are all done, move them to the relevant
483 real regs. This always gives correct code, but it also gives
484 a bunch of vreg-to-rreg moves which are usually redundant but
485 are hard for the register allocator to get rid of.
487 A compromise is to first examine all regparm'd argument
488 expressions. If they are all so simple that it is clear
489 they will be evaluated without use of any fixed registers,
490 use the old compute-directly-to-fixed-target scheme. If not,
491 be safe and use the via-vregs scheme.
493 Note this requires being able to examine an expression and
494 determine whether or not evaluation of it might use a fixed
495 register. That requires knowledge of how the rest of this
496 insn selector works. Currently just the following 3 are
497 regarded as safe -- hopefully they cover the majority of
498 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
500 vassert(cee->regparms >= 0 && cee->regparms <= 3);
502 /* Count the number of args and also the VECRETs */
503 n_args = n_arg_ws = 0;
504 while (args[n_args]) {
505 IRExpr* arg = args[n_args];
506 n_args++;
507 if (UNLIKELY(arg->tag == Iex_VECRET)) {
508 nVECRETs++;
509 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
510 nGSPTRs++;
514 /* If this fails, the IR is ill-formed */
515 vassert(nGSPTRs == 0 || nGSPTRs == 1);
517 /* If we have a VECRET, allocate space on the stack for the return
518 value, and record the stack pointer after that. */
519 HReg r_vecRetAddr = INVALID_HREG;
520 if (nVECRETs == 1) {
521 vassert(retTy == Ity_V128 || retTy == Ity_V256);
522 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
523 r_vecRetAddr = newVRegI(env);
524 sub_from_esp(env, 16);
525 addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
526 } else {
527 // If either of these fail, the IR is ill-formed
528 vassert(retTy != Ity_V128 && retTy != Ity_V256);
529 vassert(nVECRETs == 0);
532 not_done_yet = n_args;
534 stack_limit = cee->regparms;
536 /* ------ BEGIN marshall all arguments ------ */
538 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
539 for (i = n_args-1; i >= stack_limit; i--) {
540 n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
541 not_done_yet--;
544 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
545 registers. */
547 if (cee->regparms > 0) {
549 /* ------ BEGIN deal with regparms ------ */
551 /* deal with regparms, not forgetting %ebp if needed. */
552 argregs[0] = hregX86_EAX();
553 argregs[1] = hregX86_EDX();
554 argregs[2] = hregX86_ECX();
555 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
557 argreg = cee->regparms;
559 /* In keeping with big comment above, detect potential danger
560 and use the via-vregs scheme if needed. */
561 danger = False;
562 for (i = stack_limit-1; i >= 0; i--) {
563 if (mightRequireFixedRegs(args[i])) {
564 danger = True;
565 break;
569 if (danger) {
571 /* Move via temporaries */
572 argregX = argreg;
573 for (i = stack_limit-1; i >= 0; i--) {
575 if (0) {
576 vex_printf("x86 host: register param is complex: ");
577 ppIRExpr(args[i]);
578 vex_printf("\n");
581 IRExpr* arg = args[i];
582 argreg--;
583 vassert(argreg >= 0);
584 if (UNLIKELY(arg->tag == Iex_VECRET)) {
585 vassert(0); //ATC
587 else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
588 vassert(0); //ATC
589 } else {
590 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
591 tmpregs[argreg] = iselIntExpr_R(env, arg);
593 not_done_yet--;
595 for (i = stack_limit-1; i >= 0; i--) {
596 argregX--;
597 vassert(argregX >= 0);
598 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
601 } else {
602 /* It's safe to compute all regparm args directly into their
603 target registers. */
604 for (i = stack_limit-1; i >= 0; i--) {
605 IRExpr* arg = args[i];
606 argreg--;
607 vassert(argreg >= 0);
608 if (UNLIKELY(arg->tag == Iex_VECRET)) {
609 vassert(!hregIsInvalid(r_vecRetAddr));
610 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
611 X86RMI_Reg(r_vecRetAddr),
612 argregs[argreg]));
614 else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
615 vassert(0); //ATC
616 } else {
617 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
618 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
619 iselIntExpr_RMI(env, arg),
620 argregs[argreg]));
622 not_done_yet--;
627 /* ------ END deal with regparms ------ */
631 vassert(not_done_yet == 0);
633 /* ------ END marshall all arguments ------ */
635 /* Now we can compute the condition. We can't do it earlier
636 because the argument computations could trash the condition
637 codes. Be a bit clever to handle the common case where the
638 guard is 1:Bit. */
639 cc = Xcc_ALWAYS;
640 if (guard) {
641 if (guard->tag == Iex_Const
642 && guard->Iex.Const.con->tag == Ico_U1
643 && guard->Iex.Const.con->Ico.U1 == True) {
644 /* unconditional -- do nothing */
645 } else {
646 cc = iselCondCode( env, guard );
650 /* Do final checks, set the return values, and generate the call
651 instruction proper. */
652 vassert(*stackAdjustAfterCall == 0);
653 vassert(is_RetLoc_INVALID(*retloc));
654 switch (retTy) {
655 case Ity_INVALID:
656 /* Function doesn't return a value. */
657 *retloc = mk_RetLoc_simple(RLPri_None);
658 break;
659 case Ity_I64:
660 *retloc = mk_RetLoc_simple(RLPri_2Int);
661 break;
662 case Ity_I32: case Ity_I16: case Ity_I8:
663 *retloc = mk_RetLoc_simple(RLPri_Int);
664 break;
665 case Ity_V128:
666 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
667 *stackAdjustAfterCall = 16;
668 break;
669 case Ity_V256:
670 vassert(0); // ATC
671 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
672 *stackAdjustAfterCall = 32;
673 break;
674 default:
675 /* IR can denote other possible return types, but we don't
676 handle those here. */
677 vassert(0);
680 /* Finally, generate the call itself. This needs the *retloc value
681 set in the switch above, which is why it's at the end. */
682 callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
686 /* Given a guest-state array descriptor, an index expression and a
687 bias, generate an X86AMode holding the relevant guest state
688 offset. */
690 static
691 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
692 IRExpr* off, Int bias )
694 HReg tmp, roff;
695 Int elemSz = sizeofIRType(descr->elemTy);
696 Int nElems = descr->nElems;
697 Int shift = 0;
699 /* throw out any cases not generated by an x86 front end. In
700 theory there might be a day where we need to handle them -- if
701 we ever run non-x86-guest on x86 host. */
703 if (nElems != 8)
704 vpanic("genGuestArrayOffset(x86 host)(1)");
706 switch (elemSz) {
707 case 1: shift = 0; break;
708 case 4: shift = 2; break;
709 case 8: shift = 3; break;
710 default: vpanic("genGuestArrayOffset(x86 host)(2)");
713 /* Compute off into a reg, %off. Then return:
715 movl %off, %tmp
716 addl $bias, %tmp (if bias != 0)
717 andl %tmp, 7
718 ... base(%ebp, %tmp, shift) ...
720 tmp = newVRegI(env);
721 roff = iselIntExpr_R(env, off);
722 addInstr(env, mk_iMOVsd_RR(roff, tmp));
723 if (bias != 0) {
724 addInstr(env,
725 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
727 addInstr(env,
728 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
729 return
730 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
734 /* Mess with the FPU's rounding mode: set to the default rounding mode
735 (DEFAULT_FPUCW). */
736 static
737 void set_FPU_rounding_default ( ISelEnv* env )
739 /* pushl $DEFAULT_FPUCW
740 fldcw 0(%esp)
741 addl $4, %esp
743 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
744 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
745 addInstr(env, X86Instr_FpLdCW(zero_esp));
746 add_to_esp(env, 4);
750 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
751 expression denoting a value in the range 0 .. 3, indicating a round
752 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
753 the same rounding.
755 static
756 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
758 HReg rrm = iselIntExpr_R(env, mode);
759 HReg rrm2 = newVRegI(env);
760 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
762 /* movl %rrm, %rrm2
763 andl $3, %rrm2 -- shouldn't be needed; paranoia
764 shll $10, %rrm2
765 orl $DEFAULT_FPUCW, %rrm2
766 pushl %rrm2
767 fldcw 0(%esp)
768 addl $4, %esp
770 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
771 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
772 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
773 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
774 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
775 addInstr(env, X86Instr_FpLdCW(zero_esp));
776 add_to_esp(env, 4);
780 /* Generate !src into a new vector register, and be sure that the code
781 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
782 way to do this.
784 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
786 HReg dst = newVRegV(env);
787 /* Set dst to zero. If dst contains a NaN then all hell might
788 break loose after the comparison. So, first zero it. */
789 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
790 /* And now make it all 1s ... */
791 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
792 /* Finally, xor 'src' into it. */
793 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
794 /* Doesn't that just totally suck? */
795 return dst;
799 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
800 after most non-simple FPU operations (simple = +, -, *, / and
801 sqrt).
803 This could be done a lot more efficiently if needed, by loading
804 zero and adding it to the value to be rounded (fldz ; faddp?).
806 static void roundToF64 ( ISelEnv* env, HReg reg )
808 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
809 sub_from_esp(env, 8);
810 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
811 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
812 add_to_esp(env, 8);
816 /*---------------------------------------------------------*/
817 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
818 /*---------------------------------------------------------*/
820 /* Select insns for an integer-typed expression, and add them to the
821 code list. Return a reg holding the result. This reg will be a
822 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
823 want to modify it, ask for a new vreg, copy it in there, and modify
824 the copy. The register allocator will do its best to map both
825 vregs to the same real register, so the copies will often disappear
826 later in the game.
828 This should handle expressions of 32, 16 and 8-bit type. All
829 results are returned in a 32-bit register. For 16- and 8-bit
830 expressions, the upper 16/24 bits are arbitrary, so you should mask
831 or sign extend partial values if necessary.
834 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
836 HReg r = iselIntExpr_R_wrk(env, e);
837 /* sanity checks ... */
838 # if 0
839 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
840 # endif
841 vassert(hregClass(r) == HRcInt32);
842 vassert(hregIsVirtual(r));
843 return r;
846 /* DO NOT CALL THIS DIRECTLY ! */
847 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
849 MatchInfo mi;
851 IRType ty = typeOfIRExpr(env->type_env,e);
852 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
854 switch (e->tag) {
856 /* --------- TEMP --------- */
857 case Iex_RdTmp: {
858 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
861 /* --------- LOAD --------- */
862 case Iex_Load: {
863 HReg dst = newVRegI(env);
864 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
866 /* We can't handle big-endian loads, nor load-linked. */
867 if (e->Iex.Load.end != Iend_LE)
868 goto irreducible;
870 if (ty == Ity_I32) {
871 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
872 X86RMI_Mem(amode), dst) );
873 return dst;
875 if (ty == Ity_I16) {
876 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
877 return dst;
879 if (ty == Ity_I8) {
880 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
881 return dst;
883 break;
886 /* --------- TERNARY OP --------- */
887 case Iex_Triop: {
888 IRTriop *triop = e->Iex.Triop.details;
889 /* C3210 flags following FPU partial remainder (fprem), both
890 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
891 if (triop->op == Iop_PRemC3210F64
892 || triop->op == Iop_PRem1C3210F64) {
893 HReg junk = newVRegF(env);
894 HReg dst = newVRegI(env);
895 HReg srcL = iselDblExpr(env, triop->arg2);
896 HReg srcR = iselDblExpr(env, triop->arg3);
897 /* XXXROUNDINGFIXME */
898 /* set roundingmode here */
899 addInstr(env, X86Instr_FpBinary(
900 e->Iex.Binop.op==Iop_PRemC3210F64
901 ? Xfp_PREM : Xfp_PREM1,
902 srcL,srcR,junk
904 /* The previous pseudo-insn will have left the FPU's C3210
905 flags set correctly. So bag them. */
906 addInstr(env, X86Instr_FpStSW_AX());
907 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
908 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
909 return dst;
912 break;
915 /* --------- BINARY OP --------- */
916 case Iex_Binop: {
917 X86AluOp aluOp;
918 X86ShiftOp shOp;
920 /* Pattern: Sub32(0,x) */
921 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
922 HReg dst = newVRegI(env);
923 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
924 addInstr(env, mk_iMOVsd_RR(reg,dst));
925 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
926 return dst;
929 /* Is it an addition or logical style op? */
930 switch (e->Iex.Binop.op) {
931 case Iop_Add8: case Iop_Add16: case Iop_Add32:
932 aluOp = Xalu_ADD; break;
933 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
934 aluOp = Xalu_SUB; break;
935 case Iop_And8: case Iop_And16: case Iop_And32:
936 aluOp = Xalu_AND; break;
937 case Iop_Or8: case Iop_Or16: case Iop_Or32:
938 aluOp = Xalu_OR; break;
939 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
940 aluOp = Xalu_XOR; break;
941 case Iop_Mul16: case Iop_Mul32:
942 aluOp = Xalu_MUL; break;
943 default:
944 aluOp = Xalu_INVALID; break;
946 /* For commutative ops we assume any literal
947 values are on the second operand. */
948 if (aluOp != Xalu_INVALID) {
949 HReg dst = newVRegI(env);
950 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
951 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
952 addInstr(env, mk_iMOVsd_RR(reg,dst));
953 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
954 return dst;
956 /* Could do better here; forcing the first arg into a reg
957 isn't always clever.
958 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
959 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
960 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
961 movl 0xFFFFFFA0(%vr41),%vr107
962 movl 0xFFFFFFA4(%vr41),%vr108
963 movl %vr107,%vr106
964 xorl %vr108,%vr106
965 movl 0xFFFFFFA8(%vr41),%vr109
966 movl %vr106,%vr105
967 andl %vr109,%vr105
968 movl 0xFFFFFFA0(%vr41),%vr110
969 movl %vr105,%vr104
970 xorl %vr110,%vr104
971 movl %vr104,%vr70
974 /* Perhaps a shift op? */
975 switch (e->Iex.Binop.op) {
976 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
977 shOp = Xsh_SHL; break;
978 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
979 shOp = Xsh_SHR; break;
980 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
981 shOp = Xsh_SAR; break;
982 default:
983 shOp = Xsh_INVALID; break;
985 if (shOp != Xsh_INVALID) {
986 HReg dst = newVRegI(env);
988 /* regL = the value to be shifted */
989 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
990 addInstr(env, mk_iMOVsd_RR(regL,dst));
992 /* Do any necessary widening for 16/8 bit operands */
993 switch (e->Iex.Binop.op) {
994 case Iop_Shr8:
995 addInstr(env, X86Instr_Alu32R(
996 Xalu_AND, X86RMI_Imm(0xFF), dst));
997 break;
998 case Iop_Shr16:
999 addInstr(env, X86Instr_Alu32R(
1000 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1001 break;
1002 case Iop_Sar8:
1003 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1004 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1005 break;
1006 case Iop_Sar16:
1007 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1008 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1009 break;
1010 default: break;
1013 /* Now consider the shift amount. If it's a literal, we
1014 can do a much better job than the general case. */
1015 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1016 /* assert that the IR is well-typed */
1017 Int nshift;
1018 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1019 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1020 vassert(nshift >= 0);
1021 if (nshift > 0)
1022 /* Can't allow nshift==0 since that means %cl */
1023 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1024 } else {
1025 /* General case; we have to force the amount into %cl. */
1026 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1027 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1028 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1030 return dst;
1033 /* Handle misc other ops. */
1035 if (e->Iex.Binop.op == Iop_Max32U) {
1036 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1037 HReg dst = newVRegI(env);
1038 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1039 addInstr(env, mk_iMOVsd_RR(src1,dst));
1040 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1041 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1042 return dst;
1045 if (e->Iex.Binop.op == Iop_8HLto16) {
1046 HReg hi8 = newVRegI(env);
1047 HReg lo8 = newVRegI(env);
1048 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1049 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1050 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1051 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1052 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1053 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1054 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1055 return hi8;
1058 if (e->Iex.Binop.op == Iop_16HLto32) {
1059 HReg hi16 = newVRegI(env);
1060 HReg lo16 = newVRegI(env);
1061 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1062 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1063 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1064 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1065 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1066 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1067 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1068 return hi16;
1071 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1072 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1073 HReg a16 = newVRegI(env);
1074 HReg b16 = newVRegI(env);
1075 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1076 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1077 Int shift = (e->Iex.Binop.op == Iop_MullS8
1078 || e->Iex.Binop.op == Iop_MullU8)
1079 ? 24 : 16;
1080 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1081 || e->Iex.Binop.op == Iop_MullS16)
1082 ? Xsh_SAR : Xsh_SHR;
1084 addInstr(env, mk_iMOVsd_RR(a16s, a16));
1085 addInstr(env, mk_iMOVsd_RR(b16s, b16));
1086 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1087 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1088 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
1089 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
1090 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1091 return b16;
1094 if (e->Iex.Binop.op == Iop_CmpF64) {
1095 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1096 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1097 HReg dst = newVRegI(env);
1098 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1099 /* shift this right 8 bits so as to conform to CmpF64
1100 definition. */
1101 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1102 return dst;
1105 if (e->Iex.Binop.op == Iop_F64toI32S
1106 || e->Iex.Binop.op == Iop_F64toI16S) {
1107 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1108 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1109 HReg dst = newVRegI(env);
1111 /* Used several times ... */
1112 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1114 /* rf now holds the value to be converted, and rrm holds the
1115 rounding mode value, encoded as per the IRRoundingMode
1116 enum. The first thing to do is set the FPU's rounding
1117 mode accordingly. */
1119 /* Create a space for the format conversion. */
1120 /* subl $4, %esp */
1121 sub_from_esp(env, 4);
1123 /* Set host rounding mode */
1124 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1126 /* gistw/l %rf, 0(%esp) */
1127 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1128 toUChar(sz), rf, zero_esp));
1130 if (sz == 2) {
1131 /* movzwl 0(%esp), %dst */
1132 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1133 } else {
1134 /* movl 0(%esp), %dst */
1135 vassert(sz == 4);
1136 addInstr(env, X86Instr_Alu32R(
1137 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1140 /* Restore default FPU rounding. */
1141 set_FPU_rounding_default( env );
1143 /* addl $4, %esp */
1144 add_to_esp(env, 4);
1145 return dst;
1148 break;
1151 /* --------- UNARY OP --------- */
1152 case Iex_Unop: {
1154 /* 1Uto8(32to1(expr32)) */
1155 if (e->Iex.Unop.op == Iop_1Uto8) {
1156 DECLARE_PATTERN(p_32to1_then_1Uto8);
1157 DEFINE_PATTERN(p_32to1_then_1Uto8,
1158 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1159 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1160 const IRExpr* expr32 = mi.bindee[0];
1161 HReg dst = newVRegI(env);
1162 HReg src = iselIntExpr_R(env, expr32);
1163 addInstr(env, mk_iMOVsd_RR(src,dst) );
1164 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1165 X86RMI_Imm(1), dst));
1166 return dst;
1170 /* 8Uto32(LDle(expr32)) */
1171 if (e->Iex.Unop.op == Iop_8Uto32) {
1172 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1173 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1174 unop(Iop_8Uto32,
1175 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1176 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1177 HReg dst = newVRegI(env);
1178 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1179 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1180 return dst;
1184 /* 8Sto32(LDle(expr32)) */
1185 if (e->Iex.Unop.op == Iop_8Sto32) {
1186 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1187 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1188 unop(Iop_8Sto32,
1189 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1190 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1191 HReg dst = newVRegI(env);
1192 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1193 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1194 return dst;
1198 /* 16Uto32(LDle(expr32)) */
1199 if (e->Iex.Unop.op == Iop_16Uto32) {
1200 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1201 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1202 unop(Iop_16Uto32,
1203 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1204 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1205 HReg dst = newVRegI(env);
1206 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1207 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1208 return dst;
1212 /* 8Uto32(GET:I8) */
1213 if (e->Iex.Unop.op == Iop_8Uto32) {
1214 if (e->Iex.Unop.arg->tag == Iex_Get) {
1215 HReg dst;
1216 X86AMode* amode;
1217 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1218 dst = newVRegI(env);
1219 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1220 hregX86_EBP());
1221 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1222 return dst;
1226 /* 16to32(GET:I16) */
1227 if (e->Iex.Unop.op == Iop_16Uto32) {
1228 if (e->Iex.Unop.arg->tag == Iex_Get) {
1229 HReg dst;
1230 X86AMode* amode;
1231 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1232 dst = newVRegI(env);
1233 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1234 hregX86_EBP());
1235 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1236 return dst;
1240 switch (e->Iex.Unop.op) {
1241 case Iop_8Uto16:
1242 case Iop_8Uto32:
1243 case Iop_16Uto32: {
1244 HReg dst = newVRegI(env);
1245 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1246 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1247 addInstr(env, mk_iMOVsd_RR(src,dst) );
1248 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1249 X86RMI_Imm(mask), dst));
1250 return dst;
1252 case Iop_8Sto16:
1253 case Iop_8Sto32:
1254 case Iop_16Sto32: {
1255 HReg dst = newVRegI(env);
1256 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1257 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1258 addInstr(env, mk_iMOVsd_RR(src,dst) );
1259 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1260 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1261 return dst;
1263 case Iop_Not8:
1264 case Iop_Not16:
1265 case Iop_Not32: {
1266 HReg dst = newVRegI(env);
1267 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1268 addInstr(env, mk_iMOVsd_RR(src,dst) );
1269 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1270 return dst;
1272 case Iop_64HIto32: {
1273 HReg rHi, rLo;
1274 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1275 return rHi; /* and abandon rLo .. poor wee thing :-) */
1277 case Iop_64to32: {
1278 HReg rHi, rLo;
1279 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1280 return rLo; /* similar stupid comment to the above ... */
1282 case Iop_16HIto8:
1283 case Iop_32HIto16: {
1284 HReg dst = newVRegI(env);
1285 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1286 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1287 addInstr(env, mk_iMOVsd_RR(src,dst) );
1288 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1289 return dst;
1291 case Iop_1Uto32:
1292 case Iop_1Uto8: {
1293 HReg dst = newVRegI(env);
1294 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1295 addInstr(env, X86Instr_Set32(cond,dst));
1296 return dst;
1298 case Iop_1Sto8:
1299 case Iop_1Sto16:
1300 case Iop_1Sto32: {
1301 /* could do better than this, but for now ... */
1302 HReg dst = newVRegI(env);
1303 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1304 addInstr(env, X86Instr_Set32(cond,dst));
1305 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1306 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1307 return dst;
1309 case Iop_Ctz32: {
1310 /* Count trailing zeroes, implemented by x86 'bsfl' */
1311 HReg dst = newVRegI(env);
1312 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1313 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1314 return dst;
1316 case Iop_Clz32: {
1317 /* Count leading zeroes. Do 'bsrl' to establish the index
1318 of the highest set bit, and subtract that value from
1319 31. */
1320 HReg tmp = newVRegI(env);
1321 HReg dst = newVRegI(env);
1322 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1323 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1324 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1325 X86RMI_Imm(31), dst));
1326 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1327 X86RMI_Reg(tmp), dst));
1328 return dst;
1331 case Iop_CmpwNEZ32: {
1332 HReg dst = newVRegI(env);
1333 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1334 addInstr(env, mk_iMOVsd_RR(src,dst));
1335 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1336 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1337 X86RMI_Reg(src), dst));
1338 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1339 return dst;
1341 case Iop_Left8:
1342 case Iop_Left16:
1343 case Iop_Left32: {
1344 HReg dst = newVRegI(env);
1345 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1346 addInstr(env, mk_iMOVsd_RR(src, dst));
1347 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1348 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1349 return dst;
1352 case Iop_V128to32: {
1353 HReg dst = newVRegI(env);
1354 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1355 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1356 sub_from_esp(env, 16);
1357 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1358 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1359 add_to_esp(env, 16);
1360 return dst;
1363 /* ReinterpF32asI32(e) */
1364 /* Given an IEEE754 single, produce an I32 with the same bit
1365 pattern. Keep stack 8-aligned even though only using 4
1366 bytes. */
1367 case Iop_ReinterpF32asI32: {
1368 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1369 HReg dst = newVRegI(env);
1370 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1371 /* paranoia */
1372 set_FPU_rounding_default(env);
1373 /* subl $8, %esp */
1374 sub_from_esp(env, 8);
1375 /* gstF %rf, 0(%esp) */
1376 addInstr(env,
1377 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1378 /* movl 0(%esp), %dst */
1379 addInstr(env,
1380 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1381 /* addl $8, %esp */
1382 add_to_esp(env, 8);
1383 return dst;
1386 case Iop_16to8:
1387 case Iop_32to8:
1388 case Iop_32to16:
1389 /* These are no-ops. */
1390 return iselIntExpr_R(env, e->Iex.Unop.arg);
1392 case Iop_GetMSBs8x8: {
1393 /* Note: the following assumes the helper is of
1394 signature
1395 UInt fn ( ULong ), and is not a regparm fn.
1397 HReg xLo, xHi;
1398 HReg dst = newVRegI(env);
1399 Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1400 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1401 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1402 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1403 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1404 0, mk_RetLoc_simple(RLPri_Int) ));
1405 add_to_esp(env, 2*4);
1406 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1407 return dst;
1410 default:
1411 break;
1413 break;
1416 /* --------- GET --------- */
1417 case Iex_Get: {
1418 if (ty == Ity_I32) {
1419 HReg dst = newVRegI(env);
1420 addInstr(env, X86Instr_Alu32R(
1421 Xalu_MOV,
1422 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1423 hregX86_EBP())),
1424 dst));
1425 return dst;
1427 if (ty == Ity_I8 || ty == Ity_I16) {
1428 HReg dst = newVRegI(env);
1429 addInstr(env, X86Instr_LoadEX(
1430 toUChar(ty==Ity_I8 ? 1 : 2),
1431 False,
1432 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1433 dst));
1434 return dst;
1436 break;
1439 case Iex_GetI: {
1440 X86AMode* am
1441 = genGuestArrayOffset(
1442 env, e->Iex.GetI.descr,
1443 e->Iex.GetI.ix, e->Iex.GetI.bias );
1444 HReg dst = newVRegI(env);
1445 if (ty == Ity_I8) {
1446 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1447 return dst;
1449 if (ty == Ity_I32) {
1450 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1451 return dst;
1453 break;
1456 /* --------- CCALL --------- */
1457 case Iex_CCall: {
1458 HReg dst = newVRegI(env);
1459 vassert(ty == e->Iex.CCall.retty);
1461 /* be very restrictive for now. Only 32/64-bit ints allowed for
1462 args, and 32 bits for return type. Don't forget to change
1463 the RetLoc if more return types are allowed in future. */
1464 if (e->Iex.CCall.retty != Ity_I32)
1465 goto irreducible;
1467 /* Marshal args, do the call, clear stack. */
1468 UInt addToSp = 0;
1469 RetLoc rloc = mk_RetLoc_INVALID();
1470 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1471 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1472 vassert(is_sane_RetLoc(rloc));
1473 vassert(rloc.pri == RLPri_Int);
1474 vassert(addToSp == 0);
1476 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1477 return dst;
1480 /* --------- LITERAL --------- */
1481 /* 32/16/8-bit literals */
1482 case Iex_Const: {
1483 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1484 HReg r = newVRegI(env);
1485 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1486 return r;
1489 /* --------- MULTIPLEX --------- */
1490 case Iex_ITE: { // VFD
1491 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1492 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1493 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1494 X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1495 HReg dst = newVRegI(env);
1496 addInstr(env, mk_iMOVsd_RR(r1,dst));
1497 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1498 addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1499 return dst;
1501 break;
1504 default:
1505 break;
1506 } /* switch (e->tag) */
1508 /* We get here if no pattern matched. */
1509 irreducible:
1510 ppIRExpr(e);
1511 vpanic("iselIntExpr_R: cannot reduce tree");
1515 /*---------------------------------------------------------*/
1516 /*--- ISEL: Integer expression auxiliaries ---*/
1517 /*---------------------------------------------------------*/
1519 /* --------------------- AMODEs --------------------- */
1521 /* Return an AMode which computes the value of the specified
1522 expression, possibly also adding insns to the code list as a
1523 result. The expression may only be a 32-bit one.
1526 static Bool sane_AMode ( X86AMode* am )
1528 switch (am->tag) {
1529 case Xam_IR:
1530 return
1531 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1532 && (hregIsVirtual(am->Xam.IR.reg)
1533 || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1534 case Xam_IRRS:
1535 return
1536 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1537 && hregIsVirtual(am->Xam.IRRS.base)
1538 && hregClass(am->Xam.IRRS.index) == HRcInt32
1539 && hregIsVirtual(am->Xam.IRRS.index) );
1540 default:
1541 vpanic("sane_AMode: unknown x86 amode tag");
1545 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
1547 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1548 vassert(sane_AMode(am));
1549 return am;
1552 /* DO NOT CALL THIS DIRECTLY ! */
1553 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
1555 IRType ty = typeOfIRExpr(env->type_env,e);
1556 vassert(ty == Ity_I32);
1558 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1559 if (e->tag == Iex_Binop
1560 && e->Iex.Binop.op == Iop_Add32
1561 && e->Iex.Binop.arg2->tag == Iex_Const
1562 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1563 && e->Iex.Binop.arg1->tag == Iex_Binop
1564 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1565 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1566 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1567 && e->Iex.Binop.arg1
1568 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1569 && e->Iex.Binop.arg1
1570 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1571 UInt shift = e->Iex.Binop.arg1
1572 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1573 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1574 if (shift == 1 || shift == 2 || shift == 3) {
1575 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1576 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1577 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1578 return X86AMode_IRRS(imm32, r1, r2, shift);
1582 /* Add32(expr1, Shl32(expr2, imm)) */
1583 if (e->tag == Iex_Binop
1584 && e->Iex.Binop.op == Iop_Add32
1585 && e->Iex.Binop.arg2->tag == Iex_Binop
1586 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1587 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1588 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1589 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1590 if (shift == 1 || shift == 2 || shift == 3) {
1591 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1592 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1593 return X86AMode_IRRS(0, r1, r2, shift);
1597 /* Add32(expr,i) */
1598 if (e->tag == Iex_Binop
1599 && e->Iex.Binop.op == Iop_Add32
1600 && e->Iex.Binop.arg2->tag == Iex_Const
1601 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1602 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1603 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1606 /* Doesn't match anything in particular. Generate it into
1607 a register and use that. */
1609 HReg r1 = iselIntExpr_R(env, e);
1610 return X86AMode_IR(0, r1);
1615 /* --------------------- RMIs --------------------- */
1617 /* Similarly, calculate an expression into an X86RMI operand. As with
1618 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1620 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
1622 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1623 /* sanity checks ... */
1624 switch (rmi->tag) {
1625 case Xrmi_Imm:
1626 return rmi;
1627 case Xrmi_Reg:
1628 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1629 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1630 return rmi;
1631 case Xrmi_Mem:
1632 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1633 return rmi;
1634 default:
1635 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1639 /* DO NOT CALL THIS DIRECTLY ! */
1640 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
1642 IRType ty = typeOfIRExpr(env->type_env,e);
1643 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1645 /* special case: immediate */
1646 if (e->tag == Iex_Const) {
1647 UInt u;
1648 switch (e->Iex.Const.con->tag) {
1649 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1650 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1651 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1652 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1654 return X86RMI_Imm(u);
1657 /* special case: 32-bit GET */
1658 if (e->tag == Iex_Get && ty == Ity_I32) {
1659 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1660 hregX86_EBP()));
1663 /* special case: 32-bit load from memory */
1664 if (e->tag == Iex_Load && ty == Ity_I32
1665 && e->Iex.Load.end == Iend_LE) {
1666 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1667 return X86RMI_Mem(am);
1670 /* default case: calculate into a register and return that */
1672 HReg r = iselIntExpr_R ( env, e );
1673 return X86RMI_Reg(r);
1678 /* --------------------- RIs --------------------- */
1680 /* Calculate an expression into an X86RI operand. As with
1681 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1683 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
1685 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1686 /* sanity checks ... */
1687 switch (ri->tag) {
1688 case Xri_Imm:
1689 return ri;
1690 case Xri_Reg:
1691 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1692 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1693 return ri;
1694 default:
1695 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1699 /* DO NOT CALL THIS DIRECTLY ! */
1700 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
1702 IRType ty = typeOfIRExpr(env->type_env,e);
1703 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1705 /* special case: immediate */
1706 if (e->tag == Iex_Const) {
1707 UInt u;
1708 switch (e->Iex.Const.con->tag) {
1709 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1710 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1711 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1712 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1714 return X86RI_Imm(u);
1717 /* default case: calculate into a register and return that */
1719 HReg r = iselIntExpr_R ( env, e );
1720 return X86RI_Reg(r);
1725 /* --------------------- RMs --------------------- */
1727 /* Similarly, calculate an expression into an X86RM operand. As with
1728 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1730 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
1732 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1733 /* sanity checks ... */
1734 switch (rm->tag) {
1735 case Xrm_Reg:
1736 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1737 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1738 return rm;
1739 case Xrm_Mem:
1740 vassert(sane_AMode(rm->Xrm.Mem.am));
1741 return rm;
1742 default:
1743 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1747 /* DO NOT CALL THIS DIRECTLY ! */
1748 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
1750 IRType ty = typeOfIRExpr(env->type_env,e);
1751 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1753 /* special case: 32-bit GET */
1754 if (e->tag == Iex_Get && ty == Ity_I32) {
1755 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1756 hregX86_EBP()));
1759 /* special case: load from memory */
1761 /* default case: calculate into a register and return that */
1763 HReg r = iselIntExpr_R ( env, e );
1764 return X86RM_Reg(r);
1769 /* --------------------- CONDCODE --------------------- */
1771 /* Generate code to evaluated a bit-typed expression, returning the
1772 condition code which would correspond when the expression would
1773 notionally have returned 1. */
1775 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
1777 /* Uh, there's nothing we can sanity check here, unfortunately. */
1778 return iselCondCode_wrk(env,e);
1781 /* DO NOT CALL THIS DIRECTLY ! */
1782 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
1784 MatchInfo mi;
1786 vassert(e);
1787 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1789 /* var */
1790 if (e->tag == Iex_RdTmp) {
1791 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1792 /* Test32 doesn't modify r32; so this is OK. */
1793 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1794 return Xcc_NZ;
1797 /* Constant 1:Bit */
1798 if (e->tag == Iex_Const) {
1799 HReg r;
1800 vassert(e->Iex.Const.con->tag == Ico_U1);
1801 vassert(e->Iex.Const.con->Ico.U1 == True
1802 || e->Iex.Const.con->Ico.U1 == False);
1803 r = newVRegI(env);
1804 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1805 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1806 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1809 /* Not1(e) */
1810 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1811 /* Generate code for the arg, and negate the test condition */
1812 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1815 /* --- patterns rooted at: 32to1 --- */
1817 if (e->tag == Iex_Unop
1818 && e->Iex.Unop.op == Iop_32to1) {
1819 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1820 addInstr(env, X86Instr_Test32(1,rm));
1821 return Xcc_NZ;
1824 /* --- patterns rooted at: CmpNEZ8 --- */
1826 /* CmpNEZ8(x) */
1827 if (e->tag == Iex_Unop
1828 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1829 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1830 addInstr(env, X86Instr_Test32(0xFF,rm));
1831 return Xcc_NZ;
1834 /* --- patterns rooted at: CmpNEZ16 --- */
1836 /* CmpNEZ16(x) */
1837 if (e->tag == Iex_Unop
1838 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1839 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1840 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1841 return Xcc_NZ;
1844 /* --- patterns rooted at: CmpNEZ32 --- */
1846 /* CmpNEZ32(And32(x,y)) */
1848 DECLARE_PATTERN(p_CmpNEZ32_And32);
1849 DEFINE_PATTERN(p_CmpNEZ32_And32,
1850 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1851 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1852 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1853 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1854 HReg tmp = newVRegI(env);
1855 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1856 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1857 return Xcc_NZ;
1861 /* CmpNEZ32(Or32(x,y)) */
1863 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1864 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1865 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1866 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1867 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1868 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1869 HReg tmp = newVRegI(env);
1870 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1871 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1872 return Xcc_NZ;
1876 /* CmpNEZ32(GET(..):I32) */
1877 if (e->tag == Iex_Unop
1878 && e->Iex.Unop.op == Iop_CmpNEZ32
1879 && e->Iex.Unop.arg->tag == Iex_Get) {
1880 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1881 hregX86_EBP());
1882 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1883 return Xcc_NZ;
1886 /* CmpNEZ32(x) */
1887 if (e->tag == Iex_Unop
1888 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1889 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1890 X86RMI* rmi2 = X86RMI_Imm(0);
1891 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1892 return Xcc_NZ;
1895 /* --- patterns rooted at: CmpNEZ64 --- */
1897 /* CmpNEZ64(Or64(x,y)) */
1899 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1900 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1901 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1902 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1903 HReg hi1, lo1, hi2, lo2;
1904 HReg tmp = newVRegI(env);
1905 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1906 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1907 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1908 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1909 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1910 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1911 return Xcc_NZ;
1915 /* CmpNEZ64(x) */
1916 if (e->tag == Iex_Unop
1917 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1918 HReg hi, lo;
1919 HReg tmp = newVRegI(env);
1920 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1921 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1922 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1923 return Xcc_NZ;
1926 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1928 /* CmpEQ8 / CmpNE8 */
1929 if (e->tag == Iex_Binop
1930 && (e->Iex.Binop.op == Iop_CmpEQ8
1931 || e->Iex.Binop.op == Iop_CmpNE8
1932 || e->Iex.Binop.op == Iop_CasCmpEQ8
1933 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1934 if (isZeroU8(e->Iex.Binop.arg2)) {
1935 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1936 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1937 switch (e->Iex.Binop.op) {
1938 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1939 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1940 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1942 } else {
1943 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1944 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1945 HReg r = newVRegI(env);
1946 addInstr(env, mk_iMOVsd_RR(r1,r));
1947 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1948 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1949 switch (e->Iex.Binop.op) {
1950 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1951 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1952 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957 /* CmpEQ16 / CmpNE16 */
1958 if (e->tag == Iex_Binop
1959 && (e->Iex.Binop.op == Iop_CmpEQ16
1960 || e->Iex.Binop.op == Iop_CmpNE16
1961 || e->Iex.Binop.op == Iop_CasCmpEQ16
1962 || e->Iex.Binop.op == Iop_CasCmpNE16
1963 || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1964 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1965 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1966 HReg r = newVRegI(env);
1967 addInstr(env, mk_iMOVsd_RR(r1,r));
1968 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1969 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1970 switch (e->Iex.Binop.op) {
1971 case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1972 return Xcc_Z;
1973 case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1974 return Xcc_NZ;
1975 default:
1976 vpanic("iselCondCode(x86): CmpXX16");
1980 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1981 Saves a "movl %eax, %tmp" compared to the default route. */
1982 if (e->tag == Iex_Binop
1983 && e->Iex.Binop.op == Iop_CmpNE32
1984 && e->Iex.Binop.arg1->tag == Iex_CCall
1985 && e->Iex.Binop.arg2->tag == Iex_Const) {
1986 IRExpr* cal = e->Iex.Binop.arg1;
1987 IRExpr* con = e->Iex.Binop.arg2;
1988 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1989 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1990 vassert(con->Iex.Const.con->tag == Ico_U32);
1991 /* Marshal args, do the call. */
1992 UInt addToSp = 0;
1993 RetLoc rloc = mk_RetLoc_INVALID();
1994 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1995 cal->Iex.CCall.cee,
1996 cal->Iex.CCall.retty, cal->Iex.CCall.args );
1997 vassert(is_sane_RetLoc(rloc));
1998 vassert(rloc.pri == RLPri_Int);
1999 vassert(addToSp == 0);
2000 /* */
2001 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2002 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2003 hregX86_EAX()));
2004 return Xcc_NZ;
2007 /* Cmp*32*(x,y) */
2008 if (e->tag == Iex_Binop
2009 && (e->Iex.Binop.op == Iop_CmpEQ32
2010 || e->Iex.Binop.op == Iop_CmpNE32
2011 || e->Iex.Binop.op == Iop_CmpLT32S
2012 || e->Iex.Binop.op == Iop_CmpLT32U
2013 || e->Iex.Binop.op == Iop_CmpLE32S
2014 || e->Iex.Binop.op == Iop_CmpLE32U
2015 || e->Iex.Binop.op == Iop_CasCmpEQ32
2016 || e->Iex.Binop.op == Iop_CasCmpNE32
2017 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2018 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2019 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2020 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2021 switch (e->Iex.Binop.op) {
2022 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2023 case Iop_CmpNE32:
2024 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2025 case Iop_CmpLT32S: return Xcc_L;
2026 case Iop_CmpLT32U: return Xcc_B;
2027 case Iop_CmpLE32S: return Xcc_LE;
2028 case Iop_CmpLE32U: return Xcc_BE;
2029 default: vpanic("iselCondCode(x86): CmpXX32");
2033 /* CmpNE64 */
2034 if (e->tag == Iex_Binop
2035 && (e->Iex.Binop.op == Iop_CmpNE64
2036 || e->Iex.Binop.op == Iop_CmpEQ64)) {
2037 HReg hi1, hi2, lo1, lo2;
2038 HReg tHi = newVRegI(env);
2039 HReg tLo = newVRegI(env);
2040 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2041 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2042 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2043 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2044 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2045 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2046 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2047 switch (e->Iex.Binop.op) {
2048 case Iop_CmpNE64: return Xcc_NZ;
2049 case Iop_CmpEQ64: return Xcc_Z;
2050 default: vpanic("iselCondCode(x86): CmpXX64");
2054 /* And1(x,y), Or1(x,y) */
2055 /* FIXME: We could (and probably should) do a lot better here. If both args
2056 are in temps already then we can just emit a reg-reg And/Or directly,
2057 followed by the final Test. */
2058 if (e->tag == Iex_Binop
2059 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
2060 // We could probably be cleverer about this. In the meantime ..
2061 HReg x_as_32 = newVRegI(env);
2062 X86CondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
2063 addInstr(env, X86Instr_Set32(cc_x, x_as_32));
2064 HReg y_as_32 = newVRegI(env);
2065 X86CondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
2066 addInstr(env, X86Instr_Set32(cc_y, y_as_32));
2067 X86AluOp aop = e->Iex.Binop.op == Iop_And1 ? Xalu_AND : Xalu_OR;
2068 addInstr(env, X86Instr_Alu32R(aop, X86RMI_Reg(x_as_32), y_as_32));
2069 addInstr(env, X86Instr_Test32(1, X86RM_Reg(y_as_32)));
2070 return Xcc_NZ;
2073 ppIRExpr(e);
2074 vpanic("iselCondCode");
2078 /*---------------------------------------------------------*/
2079 /*--- ISEL: Integer expressions (64 bit) ---*/
2080 /*---------------------------------------------------------*/
2082 /* Compute a 64-bit value into a register pair, which is returned as
2083 the first two parameters. As with iselIntExpr_R, these may be
2084 either real or virtual regs; in any case they must not be changed
2085 by subsequent code emitted by the caller. */
2087 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2088 const IRExpr* e )
2090 iselInt64Expr_wrk(rHi, rLo, env, e);
2091 # if 0
2092 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2093 # endif
2094 vassert(hregClass(*rHi) == HRcInt32);
2095 vassert(hregIsVirtual(*rHi));
2096 vassert(hregClass(*rLo) == HRcInt32);
2097 vassert(hregIsVirtual(*rLo));
2100 /* DO NOT CALL THIS DIRECTLY ! */
2101 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2102 const IRExpr* e )
2104 MatchInfo mi;
2105 HWord fn = 0; /* helper fn for most SIMD64 stuff */
2106 vassert(e);
2107 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2109 /* 64-bit literal */
2110 if (e->tag == Iex_Const) {
2111 ULong w64 = e->Iex.Const.con->Ico.U64;
2112 UInt wHi = toUInt(w64 >> 32);
2113 UInt wLo = toUInt(w64);
2114 HReg tLo = newVRegI(env);
2115 HReg tHi = newVRegI(env);
2116 vassert(e->Iex.Const.con->tag == Ico_U64);
2117 if (wLo == wHi) {
2118 /* Save a precious Int register in this special case. */
2119 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2120 *rHi = tLo;
2121 *rLo = tLo;
2122 } else {
2123 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2124 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2125 *rHi = tHi;
2126 *rLo = tLo;
2128 return;
2131 /* read 64-bit IRTemp */
2132 if (e->tag == Iex_RdTmp) {
2133 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2134 return;
2137 /* 64-bit load */
2138 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2139 HReg tLo, tHi;
2140 X86AMode *am0, *am4;
2141 vassert(e->Iex.Load.ty == Ity_I64);
2142 tLo = newVRegI(env);
2143 tHi = newVRegI(env);
2144 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2145 am4 = advance4(am0);
2146 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2147 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2148 *rHi = tHi;
2149 *rLo = tLo;
2150 return;
2153 /* 64-bit GET */
2154 if (e->tag == Iex_Get) {
2155 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2156 X86AMode* am4 = advance4(am);
2157 HReg tLo = newVRegI(env);
2158 HReg tHi = newVRegI(env);
2159 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2160 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2161 *rHi = tHi;
2162 *rLo = tLo;
2163 return;
2166 /* 64-bit GETI */
2167 if (e->tag == Iex_GetI) {
2168 X86AMode* am
2169 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2170 e->Iex.GetI.ix, e->Iex.GetI.bias );
2171 X86AMode* am4 = advance4(am);
2172 HReg tLo = newVRegI(env);
2173 HReg tHi = newVRegI(env);
2174 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2175 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2176 *rHi = tHi;
2177 *rLo = tLo;
2178 return;
2181 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2182 if (e->tag == Iex_ITE) {
2183 HReg e0Lo, e0Hi, e1Lo, e1Hi;
2184 HReg tLo = newVRegI(env);
2185 HReg tHi = newVRegI(env);
2186 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2187 iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2188 addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2189 addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2190 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2191 /* This assumes the first cmov32 doesn't trash the condition
2192 codes, so they are still available for the second cmov32 */
2193 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2194 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2195 *rHi = tHi;
2196 *rLo = tLo;
2197 return;
2200 /* --------- BINARY ops --------- */
2201 if (e->tag == Iex_Binop) {
2202 switch (e->Iex.Binop.op) {
2203 /* 32 x 32 -> 64 multiply */
2204 case Iop_MullU32:
2205 case Iop_MullS32: {
2206 /* get one operand into %eax, and the other into a R/M.
2207 Need to make an educated guess about which is better in
2208 which. */
2209 HReg tLo = newVRegI(env);
2210 HReg tHi = newVRegI(env);
2211 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2212 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2213 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2214 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2215 addInstr(env, X86Instr_MulL(syned, rmLeft));
2216 /* Result is now in EDX:EAX. Tell the caller. */
2217 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2218 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2219 *rHi = tHi;
2220 *rLo = tLo;
2221 return;
2224 /* 64 x 32 -> (32(rem),32(div)) division */
2225 case Iop_DivModU64to32:
2226 case Iop_DivModS64to32: {
2227 /* Get the 64-bit operand into edx:eax, and the other into
2228 any old R/M. */
2229 HReg sHi, sLo;
2230 HReg tLo = newVRegI(env);
2231 HReg tHi = newVRegI(env);
2232 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2233 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2234 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2235 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2236 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2237 addInstr(env, X86Instr_Div(syned, rmRight));
2238 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2239 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2240 *rHi = tHi;
2241 *rLo = tLo;
2242 return;
2245 /* Or64/And64/Xor64 */
2246 case Iop_Or64:
2247 case Iop_And64:
2248 case Iop_Xor64: {
2249 HReg xLo, xHi, yLo, yHi;
2250 HReg tLo = newVRegI(env);
2251 HReg tHi = newVRegI(env);
2252 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2253 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2254 : Xalu_XOR;
2255 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2256 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2257 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2258 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2259 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2260 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2261 *rHi = tHi;
2262 *rLo = tLo;
2263 return;
2266 /* Add64/Sub64 */
2267 case Iop_Add64:
2268 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2269 /* special case Add64(e, const) */
2270 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2271 UInt wHi = toUInt(w64 >> 32);
2272 UInt wLo = toUInt(w64);
2273 HReg tLo = newVRegI(env);
2274 HReg tHi = newVRegI(env);
2275 HReg xLo, xHi;
2276 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2277 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2278 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2279 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2280 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2281 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2282 *rHi = tHi;
2283 *rLo = tLo;
2284 return;
2286 /* else fall through to the generic case */
2287 case Iop_Sub64: {
2288 HReg xLo, xHi, yLo, yHi;
2289 HReg tLo = newVRegI(env);
2290 HReg tHi = newVRegI(env);
2291 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2292 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2293 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2294 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2295 if (e->Iex.Binop.op==Iop_Add64) {
2296 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2297 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2298 } else {
2299 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2300 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2302 *rHi = tHi;
2303 *rLo = tLo;
2304 return;
2307 /* 32HLto64(e1,e2) */
2308 case Iop_32HLto64:
2309 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2310 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2311 return;
2313 /* 64-bit shifts */
2314 case Iop_Shl64: {
2315 /* We use the same ingenious scheme as gcc. Put the value
2316 to be shifted into %hi:%lo, and the shift amount into
2317 %cl. Then (dsts on right, a la ATT syntax):
2319 shldl %cl, %lo, %hi -- make %hi be right for the
2320 -- shift amt %cl % 32
2321 shll %cl, %lo -- make %lo be right for the
2322 -- shift amt %cl % 32
2324 Now, if (shift amount % 64) is in the range 32 .. 63,
2325 we have to do a fixup, which puts the result low half
2326 into the result high half, and zeroes the low half:
2328 testl $32, %ecx
2330 cmovnz %lo, %hi
2331 movl $0, %tmp -- sigh; need yet another reg
2332 cmovnz %tmp, %lo
2334 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2335 tLo = newVRegI(env);
2336 tHi = newVRegI(env);
2337 tTemp = newVRegI(env);
2338 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2339 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2340 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2341 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2342 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2343 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2344 and those regs are legitimately modifiable. */
2345 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2346 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2347 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2348 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2349 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2350 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2351 *rHi = tHi;
2352 *rLo = tLo;
2353 return;
2356 case Iop_Shr64: {
2357 /* We use the same ingenious scheme as gcc. Put the value
2358 to be shifted into %hi:%lo, and the shift amount into
2359 %cl. Then:
2361 shrdl %cl, %hi, %lo -- make %lo be right for the
2362 -- shift amt %cl % 32
2363 shrl %cl, %hi -- make %hi be right for the
2364 -- shift amt %cl % 32
2366 Now, if (shift amount % 64) is in the range 32 .. 63,
2367 we have to do a fixup, which puts the result high half
2368 into the result low half, and zeroes the high half:
2370 testl $32, %ecx
2372 cmovnz %hi, %lo
2373 movl $0, %tmp -- sigh; need yet another reg
2374 cmovnz %tmp, %hi
2376 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2377 tLo = newVRegI(env);
2378 tHi = newVRegI(env);
2379 tTemp = newVRegI(env);
2380 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2381 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2382 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2383 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2384 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2385 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2386 and those regs are legitimately modifiable. */
2387 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2388 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2389 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2390 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2391 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2392 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2393 *rHi = tHi;
2394 *rLo = tLo;
2395 return;
2398 case Iop_Sar64: {
2399 /* gcc -O2 does the following. I don't know how it works, but it
2400 does work. Don't mess with it. This is hard to test because the
2401 x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2402 so it's impossible to write a test program that feeds values
2403 through Iop_Sar64 and prints their results. The implementation
2404 here was tested by using psrlq on mmx registers -- that generates
2405 Iop_Shr64 -- and temporarily hacking the front end to generate
2406 Iop_Sar64 for that instruction instead.
2408 movl %amount, %ecx
2409 movl %srcHi, %r1
2410 movl %srcLo, %r2
2412 movl %r1, %r3
2413 sarl %cl, %r3
2414 movl %r2, %r4
2415 shrdl %cl, %r1, %r4
2416 movl %r3, %r2
2417 sarl $31, %r2
2418 andl $32, %ecx
2419 cmovne %r3, %r4 // = resLo
2420 cmovne %r2, %r3 // = resHi
2422 HReg amount = iselIntExpr_R(env, e->Iex.Binop.arg2);
2423 HReg srcHi = INVALID_HREG, srcLo = INVALID_HREG;
2424 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg1);
2425 HReg r1 = newVRegI(env);
2426 HReg r2 = newVRegI(env);
2427 HReg r3 = newVRegI(env);
2428 HReg r4 = newVRegI(env);
2429 addInstr(env, mk_iMOVsd_RR(amount, hregX86_ECX()));
2430 addInstr(env, mk_iMOVsd_RR(srcHi, r1));
2431 addInstr(env, mk_iMOVsd_RR(srcLo, r2));
2433 addInstr(env, mk_iMOVsd_RR(r1, r3));
2434 addInstr(env, X86Instr_Sh32(Xsh_SAR, 0/*%cl*/, r3));
2435 addInstr(env, mk_iMOVsd_RR(r2, r4));
2436 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, r1, r4));
2437 addInstr(env, mk_iMOVsd_RR(r3, r2));
2438 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, r2));
2439 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(32),
2440 hregX86_ECX()));
2441 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r3), r4));
2442 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r2), r3));
2443 *rHi = r3;
2444 *rLo = r4;
2445 return;
2448 /* F64 -> I64 */
2449 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2450 case. Unfortunately I see no easy way to avoid the
2451 duplication. */
2452 case Iop_F64toI64S: {
2453 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2454 HReg tLo = newVRegI(env);
2455 HReg tHi = newVRegI(env);
2457 /* Used several times ... */
2458 /* Careful ... this sharing is only safe because
2459 zero_esp/four_esp do not hold any registers which the
2460 register allocator could attempt to swizzle later. */
2461 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2462 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2464 /* rf now holds the value to be converted, and rrm holds
2465 the rounding mode value, encoded as per the
2466 IRRoundingMode enum. The first thing to do is set the
2467 FPU's rounding mode accordingly. */
2469 /* Create a space for the format conversion. */
2470 /* subl $8, %esp */
2471 sub_from_esp(env, 8);
2473 /* Set host rounding mode */
2474 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2476 /* gistll %rf, 0(%esp) */
2477 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2479 /* movl 0(%esp), %dstLo */
2480 /* movl 4(%esp), %dstHi */
2481 addInstr(env, X86Instr_Alu32R(
2482 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2483 addInstr(env, X86Instr_Alu32R(
2484 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2486 /* Restore default FPU rounding. */
2487 set_FPU_rounding_default( env );
2489 /* addl $8, %esp */
2490 add_to_esp(env, 8);
2492 *rHi = tHi;
2493 *rLo = tLo;
2494 return;
2497 case Iop_Add8x8:
2498 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2499 case Iop_Add16x4:
2500 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2501 case Iop_Add32x2:
2502 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2504 case Iop_Avg8Ux8:
2505 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2506 case Iop_Avg16Ux4:
2507 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2509 case Iop_CmpEQ8x8:
2510 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2511 case Iop_CmpEQ16x4:
2512 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2513 case Iop_CmpEQ32x2:
2514 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2516 case Iop_CmpGT8Sx8:
2517 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2518 case Iop_CmpGT16Sx4:
2519 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2520 case Iop_CmpGT32Sx2:
2521 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2523 case Iop_InterleaveHI8x8:
2524 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2525 case Iop_InterleaveLO8x8:
2526 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2527 case Iop_InterleaveHI16x4:
2528 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2529 case Iop_InterleaveLO16x4:
2530 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2531 case Iop_InterleaveHI32x2:
2532 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2533 case Iop_InterleaveLO32x2:
2534 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2535 case Iop_CatOddLanes16x4:
2536 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2537 case Iop_CatEvenLanes16x4:
2538 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2539 case Iop_Perm8x8:
2540 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2542 case Iop_Max8Ux8:
2543 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2544 case Iop_Max16Sx4:
2545 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2546 case Iop_Min8Ux8:
2547 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2548 case Iop_Min16Sx4:
2549 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2551 case Iop_Mul16x4:
2552 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2553 case Iop_Mul32x2:
2554 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2555 case Iop_MulHi16Sx4:
2556 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2557 case Iop_MulHi16Ux4:
2558 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2560 case Iop_QAdd8Sx8:
2561 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2562 case Iop_QAdd16Sx4:
2563 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2564 case Iop_QAdd8Ux8:
2565 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2566 case Iop_QAdd16Ux4:
2567 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2569 case Iop_QNarrowBin32Sto16Sx4:
2570 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2571 case Iop_QNarrowBin16Sto8Sx8:
2572 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2573 case Iop_QNarrowBin16Sto8Ux8:
2574 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2575 case Iop_NarrowBin16to8x8:
2576 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2577 case Iop_NarrowBin32to16x4:
2578 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2580 case Iop_QSub8Sx8:
2581 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2582 case Iop_QSub16Sx4:
2583 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2584 case Iop_QSub8Ux8:
2585 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2586 case Iop_QSub16Ux4:
2587 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2589 case Iop_Sub8x8:
2590 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2591 case Iop_Sub16x4:
2592 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2593 case Iop_Sub32x2:
2594 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2596 binnish: {
2597 /* Note: the following assumes all helpers are of
2598 signature
2599 ULong fn ( ULong, ULong ), and they are
2600 not marked as regparm functions.
2602 HReg xLo, xHi, yLo, yHi;
2603 HReg tLo = newVRegI(env);
2604 HReg tHi = newVRegI(env);
2605 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2606 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2607 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2608 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2609 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2610 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2611 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2612 0, mk_RetLoc_simple(RLPri_2Int) ));
2613 add_to_esp(env, 4*4);
2614 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2615 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2616 *rHi = tHi;
2617 *rLo = tLo;
2618 return;
2621 case Iop_ShlN32x2:
2622 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2623 case Iop_ShlN16x4:
2624 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2625 case Iop_ShlN8x8:
2626 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2627 case Iop_ShrN32x2:
2628 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2629 case Iop_ShrN16x4:
2630 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2631 case Iop_SarN32x2:
2632 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2633 case Iop_SarN16x4:
2634 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2635 case Iop_SarN8x8:
2636 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2637 shifty: {
2638 /* Note: the following assumes all helpers are of
2639 signature
2640 ULong fn ( ULong, UInt ), and they are
2641 not marked as regparm functions.
2643 HReg xLo, xHi;
2644 HReg tLo = newVRegI(env);
2645 HReg tHi = newVRegI(env);
2646 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2647 addInstr(env, X86Instr_Push(y));
2648 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2649 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2650 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2651 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2652 0, mk_RetLoc_simple(RLPri_2Int) ));
2653 add_to_esp(env, 3*4);
2654 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2655 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2656 *rHi = tHi;
2657 *rLo = tLo;
2658 return;
2661 default:
2662 break;
2664 } /* if (e->tag == Iex_Binop) */
2667 /* --------- UNARY ops --------- */
2668 if (e->tag == Iex_Unop) {
2669 switch (e->Iex.Unop.op) {
2671 /* 32Sto64(e) */
2672 case Iop_32Sto64: {
2673 HReg tLo = newVRegI(env);
2674 HReg tHi = newVRegI(env);
2675 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2676 addInstr(env, mk_iMOVsd_RR(src,tHi));
2677 addInstr(env, mk_iMOVsd_RR(src,tLo));
2678 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2679 *rHi = tHi;
2680 *rLo = tLo;
2681 return;
2684 /* 32Uto64(e) */
2685 case Iop_32Uto64: {
2686 HReg tLo = newVRegI(env);
2687 HReg tHi = newVRegI(env);
2688 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2689 addInstr(env, mk_iMOVsd_RR(src,tLo));
2690 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2691 *rHi = tHi;
2692 *rLo = tLo;
2693 return;
2696 /* 16Uto64(e) */
2697 case Iop_16Uto64: {
2698 HReg tLo = newVRegI(env);
2699 HReg tHi = newVRegI(env);
2700 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2701 addInstr(env, mk_iMOVsd_RR(src,tLo));
2702 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2703 X86RMI_Imm(0xFFFF), tLo));
2704 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2705 *rHi = tHi;
2706 *rLo = tLo;
2707 return;
2710 /* V128{HI}to64 */
2711 case Iop_V128HIto64:
2712 case Iop_V128to64: {
2713 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2714 HReg tLo = newVRegI(env);
2715 HReg tHi = newVRegI(env);
2716 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2717 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2718 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2719 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2720 sub_from_esp(env, 16);
2721 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2722 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2723 X86RMI_Mem(espLO), tLo ));
2724 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2725 X86RMI_Mem(espHI), tHi ));
2726 add_to_esp(env, 16);
2727 *rHi = tHi;
2728 *rLo = tLo;
2729 return;
2732 /* could do better than this, but for now ... */
2733 case Iop_1Sto64: {
2734 HReg tLo = newVRegI(env);
2735 HReg tHi = newVRegI(env);
2736 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2737 addInstr(env, X86Instr_Set32(cond,tLo));
2738 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2739 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2740 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2741 *rHi = tHi;
2742 *rLo = tLo;
2743 return;
2746 /* Not64(e) */
2747 case Iop_Not64: {
2748 HReg tLo = newVRegI(env);
2749 HReg tHi = newVRegI(env);
2750 HReg sHi, sLo;
2751 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2752 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2753 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2754 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2755 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2756 *rHi = tHi;
2757 *rLo = tLo;
2758 return;
2761 /* Left64(e) */
2762 case Iop_Left64: {
2763 HReg yLo, yHi;
2764 HReg tLo = newVRegI(env);
2765 HReg tHi = newVRegI(env);
2766 /* yHi:yLo = arg */
2767 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2768 /* tLo = 0 - yLo, and set carry */
2769 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2770 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2771 /* tHi = 0 - yHi - carry */
2772 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2773 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2774 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2775 back in, so as to give the final result
2776 tHi:tLo = arg | -arg. */
2777 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2778 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2779 *rHi = tHi;
2780 *rLo = tLo;
2781 return;
2784 /* --- patterns rooted at: CmpwNEZ64 --- */
2786 /* CmpwNEZ64(e) */
2787 case Iop_CmpwNEZ64: {
2789 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2790 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2791 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2792 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2793 /* CmpwNEZ64(Or64(x,y)) */
2794 HReg xHi,xLo,yHi,yLo;
2795 HReg xBoth = newVRegI(env);
2796 HReg merged = newVRegI(env);
2797 HReg tmp2 = newVRegI(env);
2799 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2800 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2801 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2802 X86RMI_Reg(xLo),xBoth));
2804 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2805 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2806 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2807 X86RMI_Reg(yLo),merged));
2808 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2809 X86RMI_Reg(xBoth),merged));
2811 /* tmp2 = (merged | -merged) >>s 31 */
2812 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2813 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2814 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2815 X86RMI_Reg(merged), tmp2));
2816 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2817 *rHi = tmp2;
2818 *rLo = tmp2;
2819 return;
2820 } else {
2821 /* CmpwNEZ64(e) */
2822 HReg srcLo, srcHi;
2823 HReg tmp1 = newVRegI(env);
2824 HReg tmp2 = newVRegI(env);
2825 /* srcHi:srcLo = arg */
2826 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2827 /* tmp1 = srcHi | srcLo */
2828 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2829 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2830 X86RMI_Reg(srcLo), tmp1));
2831 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2832 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2833 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2834 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2835 X86RMI_Reg(tmp1), tmp2));
2836 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2837 *rHi = tmp2;
2838 *rLo = tmp2;
2839 return;
2843 /* ReinterpF64asI64(e) */
2844 /* Given an IEEE754 double, produce an I64 with the same bit
2845 pattern. */
2846 case Iop_ReinterpF64asI64: {
2847 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2848 HReg tLo = newVRegI(env);
2849 HReg tHi = newVRegI(env);
2850 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2851 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2852 /* paranoia */
2853 set_FPU_rounding_default(env);
2854 /* subl $8, %esp */
2855 sub_from_esp(env, 8);
2856 /* gstD %rf, 0(%esp) */
2857 addInstr(env,
2858 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2859 /* movl 0(%esp), %tLo */
2860 addInstr(env,
2861 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2862 /* movl 4(%esp), %tHi */
2863 addInstr(env,
2864 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2865 /* addl $8, %esp */
2866 add_to_esp(env, 8);
2867 *rHi = tHi;
2868 *rLo = tLo;
2869 return;
2872 case Iop_CmpNEZ32x2:
2873 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2874 case Iop_CmpNEZ16x4:
2875 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2876 case Iop_CmpNEZ8x8:
2877 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2878 unish: {
2879 /* Note: the following assumes all helpers are of
2880 signature
2881 ULong fn ( ULong ), and they are
2882 not marked as regparm functions.
2884 HReg xLo, xHi;
2885 HReg tLo = newVRegI(env);
2886 HReg tHi = newVRegI(env);
2887 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2888 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2889 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2890 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2891 0, mk_RetLoc_simple(RLPri_2Int) ));
2892 add_to_esp(env, 2*4);
2893 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2894 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2895 *rHi = tHi;
2896 *rLo = tLo;
2897 return;
2900 default:
2901 break;
2903 } /* if (e->tag == Iex_Unop) */
2906 /* --------- CCALL --------- */
2907 if (e->tag == Iex_CCall) {
2908 HReg tLo = newVRegI(env);
2909 HReg tHi = newVRegI(env);
2911 /* Marshal args, do the call, clear stack. */
2912 UInt addToSp = 0;
2913 RetLoc rloc = mk_RetLoc_INVALID();
2914 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2915 e->Iex.CCall.cee,
2916 e->Iex.CCall.retty, e->Iex.CCall.args );
2917 vassert(is_sane_RetLoc(rloc));
2918 vassert(rloc.pri == RLPri_2Int);
2919 vassert(addToSp == 0);
2920 /* */
2922 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2923 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2924 *rHi = tHi;
2925 *rLo = tLo;
2926 return;
2929 ppIRExpr(e);
2930 vpanic("iselInt64Expr");
2934 /*---------------------------------------------------------*/
2935 /*--- ISEL: Floating point expressions (32 bit) ---*/
2936 /*---------------------------------------------------------*/
2938 /* Nothing interesting here; really just wrappers for
2939 64-bit stuff. */
2941 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
2943 HReg r = iselFltExpr_wrk( env, e );
2944 # if 0
2945 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2946 # endif
2947 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2948 vassert(hregIsVirtual(r));
2949 return r;
2952 /* DO NOT CALL THIS DIRECTLY */
2953 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
2955 IRType ty = typeOfIRExpr(env->type_env,e);
2956 vassert(ty == Ity_F32);
2958 if (e->tag == Iex_RdTmp) {
2959 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2962 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2963 X86AMode* am;
2964 HReg res = newVRegF(env);
2965 vassert(e->Iex.Load.ty == Ity_F32);
2966 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2967 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2968 return res;
2971 if (e->tag == Iex_Binop
2972 && e->Iex.Binop.op == Iop_F64toF32) {
2973 /* Although the result is still held in a standard FPU register,
2974 we need to round it to reflect the loss of accuracy/range
2975 entailed in casting it to a 32-bit float. */
2976 HReg dst = newVRegF(env);
2977 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2978 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2979 addInstr(env, X86Instr_Fp64to32(src,dst));
2980 set_FPU_rounding_default( env );
2981 return dst;
2984 if (e->tag == Iex_Get) {
2985 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2986 hregX86_EBP() );
2987 HReg res = newVRegF(env);
2988 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2989 return res;
2992 if (e->tag == Iex_Unop
2993 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2994 /* Given an I32, produce an IEEE754 float with the same bit
2995 pattern. */
2996 HReg dst = newVRegF(env);
2997 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2998 /* paranoia */
2999 addInstr(env, X86Instr_Push(rmi));
3000 addInstr(env, X86Instr_FpLdSt(
3001 True/*load*/, 4, dst,
3002 X86AMode_IR(0, hregX86_ESP())));
3003 add_to_esp(env, 4);
3004 return dst;
3007 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
3008 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
3009 HReg dst = newVRegF(env);
3011 /* rf now holds the value to be rounded. The first thing to do
3012 is set the FPU's rounding mode accordingly. */
3014 /* Set host rounding mode */
3015 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3017 /* grndint %rf, %dst */
3018 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3020 /* Restore default FPU rounding. */
3021 set_FPU_rounding_default( env );
3023 return dst;
3026 ppIRExpr(e);
3027 vpanic("iselFltExpr_wrk");
3031 /*---------------------------------------------------------*/
3032 /*--- ISEL: Floating point expressions (64 bit) ---*/
3033 /*---------------------------------------------------------*/
3035 /* Compute a 64-bit floating point value into a register, the identity
3036 of which is returned. As with iselIntExpr_R, the reg may be either
3037 real or virtual; in any case it must not be changed by subsequent
3038 code emitted by the caller. */
3040 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3042 Type S (1 bit) E (11 bits) F (52 bits)
3043 ---- --------- ----------- -----------
3044 signalling NaN u 2047 (max) .0uuuuu---u
3045 (with at least
3046 one 1 bit)
3047 quiet NaN u 2047 (max) .1uuuuu---u
3049 negative infinity 1 2047 (max) .000000---0
3051 positive infinity 0 2047 (max) .000000---0
3053 negative zero 1 0 .000000---0
3055 positive zero 0 0 .000000---0
3058 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
3060 HReg r = iselDblExpr_wrk( env, e );
3061 # if 0
3062 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3063 # endif
3064 vassert(hregClass(r) == HRcFlt64);
3065 vassert(hregIsVirtual(r));
3066 return r;
3069 /* DO NOT CALL THIS DIRECTLY */
3070 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
3072 IRType ty = typeOfIRExpr(env->type_env,e);
3073 vassert(e);
3074 vassert(ty == Ity_F64);
3076 if (e->tag == Iex_RdTmp) {
3077 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3080 if (e->tag == Iex_Const) {
3081 union { UInt u32x2[2]; ULong u64; Double f64; } u;
3082 HReg freg = newVRegF(env);
3083 vassert(sizeof(u) == 8);
3084 vassert(sizeof(u.u64) == 8);
3085 vassert(sizeof(u.f64) == 8);
3086 vassert(sizeof(u.u32x2) == 8);
3088 if (e->Iex.Const.con->tag == Ico_F64) {
3089 u.f64 = e->Iex.Const.con->Ico.F64;
3091 else if (e->Iex.Const.con->tag == Ico_F64i) {
3092 u.u64 = e->Iex.Const.con->Ico.F64i;
3094 else
3095 vpanic("iselDblExpr(x86): const");
3097 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3098 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3099 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3100 X86AMode_IR(0, hregX86_ESP())));
3101 add_to_esp(env, 8);
3102 return freg;
3105 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3106 X86AMode* am;
3107 HReg res = newVRegF(env);
3108 vassert(e->Iex.Load.ty == Ity_F64);
3109 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3110 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3111 return res;
3114 if (e->tag == Iex_Get) {
3115 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3116 hregX86_EBP() );
3117 HReg res = newVRegF(env);
3118 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3119 return res;
3122 if (e->tag == Iex_GetI) {
3123 X86AMode* am
3124 = genGuestArrayOffset(
3125 env, e->Iex.GetI.descr,
3126 e->Iex.GetI.ix, e->Iex.GetI.bias );
3127 HReg res = newVRegF(env);
3128 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3129 return res;
3132 if (e->tag == Iex_Triop) {
3133 X86FpOp fpop = Xfp_INVALID;
3134 IRTriop *triop = e->Iex.Triop.details;
3135 switch (triop->op) {
3136 case Iop_AddF64: fpop = Xfp_ADD; break;
3137 case Iop_SubF64: fpop = Xfp_SUB; break;
3138 case Iop_MulF64: fpop = Xfp_MUL; break;
3139 case Iop_DivF64: fpop = Xfp_DIV; break;
3140 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
3141 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
3142 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3143 case Iop_AtanF64: fpop = Xfp_ATAN; break;
3144 case Iop_PRemF64: fpop = Xfp_PREM; break;
3145 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
3146 default: break;
3148 if (fpop != Xfp_INVALID) {
3149 HReg res = newVRegF(env);
3150 HReg srcL = iselDblExpr(env, triop->arg2);
3151 HReg srcR = iselDblExpr(env, triop->arg3);
3152 /* XXXROUNDINGFIXME */
3153 /* set roundingmode here */
3154 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3155 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3156 && fpop != Xfp_MUL && fpop != Xfp_DIV)
3157 roundToF64(env, res);
3158 return res;
3162 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3163 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
3164 HReg dst = newVRegF(env);
3166 /* rf now holds the value to be rounded. The first thing to do
3167 is set the FPU's rounding mode accordingly. */
3169 /* Set host rounding mode */
3170 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3172 /* grndint %rf, %dst */
3173 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3175 /* Restore default FPU rounding. */
3176 set_FPU_rounding_default( env );
3178 return dst;
3181 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3182 HReg dst = newVRegF(env);
3183 HReg rHi,rLo;
3184 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3185 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3186 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3188 /* Set host rounding mode */
3189 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3191 addInstr(env, X86Instr_FpLdStI(
3192 True/*load*/, 8, dst,
3193 X86AMode_IR(0, hregX86_ESP())));
3195 /* Restore default FPU rounding. */
3196 set_FPU_rounding_default( env );
3198 add_to_esp(env, 8);
3199 return dst;
3202 if (e->tag == Iex_Binop) {
3203 X86FpOp fpop = Xfp_INVALID;
3204 switch (e->Iex.Binop.op) {
3205 case Iop_SinF64: fpop = Xfp_SIN; break;
3206 case Iop_CosF64: fpop = Xfp_COS; break;
3207 case Iop_TanF64: fpop = Xfp_TAN; break;
3208 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3209 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3210 default: break;
3212 if (fpop != Xfp_INVALID) {
3213 HReg res = newVRegF(env);
3214 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3215 /* XXXROUNDINGFIXME */
3216 /* set roundingmode here */
3217 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3218 codes. I don't think that matters, since this insn
3219 selector never generates such an instruction intervening
3220 between an flag-setting instruction and a flag-using
3221 instruction. */
3222 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3223 if (fpop != Xfp_SQRT
3224 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3225 roundToF64(env, res);
3226 return res;
3230 if (e->tag == Iex_Unop) {
3231 X86FpOp fpop = Xfp_INVALID;
3232 switch (e->Iex.Unop.op) {
3233 case Iop_NegF64: fpop = Xfp_NEG; break;
3234 case Iop_AbsF64: fpop = Xfp_ABS; break;
3235 default: break;
3237 if (fpop != Xfp_INVALID) {
3238 HReg res = newVRegF(env);
3239 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3240 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3241 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3242 but might need to do that for other unary ops. */
3243 return res;
3247 if (e->tag == Iex_Unop) {
3248 switch (e->Iex.Unop.op) {
3249 case Iop_I32StoF64: {
3250 HReg dst = newVRegF(env);
3251 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3252 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3253 set_FPU_rounding_default(env);
3254 addInstr(env, X86Instr_FpLdStI(
3255 True/*load*/, 4, dst,
3256 X86AMode_IR(0, hregX86_ESP())));
3257 add_to_esp(env, 4);
3258 return dst;
3260 case Iop_ReinterpI64asF64: {
3261 /* Given an I64, produce an IEEE754 double with the same
3262 bit pattern. */
3263 HReg dst = newVRegF(env);
3264 HReg rHi, rLo;
3265 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3266 /* paranoia */
3267 set_FPU_rounding_default(env);
3268 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3269 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3270 addInstr(env, X86Instr_FpLdSt(
3271 True/*load*/, 8, dst,
3272 X86AMode_IR(0, hregX86_ESP())));
3273 add_to_esp(env, 8);
3274 return dst;
3276 case Iop_F32toF64: {
3277 /* this is a no-op */
3278 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3279 return res;
3281 default:
3282 break;
3286 /* --------- MULTIPLEX --------- */
3287 if (e->tag == Iex_ITE) { // VFD
3288 if (ty == Ity_F64
3289 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3290 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3291 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3292 HReg dst = newVRegF(env);
3293 addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3294 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3295 addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3296 return dst;
3300 ppIRExpr(e);
3301 vpanic("iselDblExpr_wrk");
3305 /*---------------------------------------------------------*/
3306 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3307 /*---------------------------------------------------------*/
3309 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
3311 HReg r = iselVecExpr_wrk( env, e );
3312 # if 0
3313 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3314 # endif
3315 vassert(hregClass(r) == HRcVec128);
3316 vassert(hregIsVirtual(r));
3317 return r;
3321 /* DO NOT CALL THIS DIRECTLY */
3322 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
3325 # define REQUIRE_SSE1 \
3326 do { if (env->hwcaps == 0/*baseline, no sse*/ \
3327 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3328 goto vec_fail; \
3329 } while (0)
3331 # define REQUIRE_SSE2 \
3332 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3333 goto vec_fail; \
3334 } while (0)
3336 # define SSE2_OR_ABOVE \
3337 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3339 HWord fn = 0; /* address of helper fn, if required */
3340 MatchInfo mi;
3341 Bool arg1isEReg = False;
3342 X86SseOp op = Xsse_INVALID;
3343 IRType ty = typeOfIRExpr(env->type_env,e);
3344 vassert(e);
3345 vassert(ty == Ity_V128);
3347 REQUIRE_SSE1;
3349 if (e->tag == Iex_RdTmp) {
3350 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3353 if (e->tag == Iex_Get) {
3354 HReg dst = newVRegV(env);
3355 addInstr(env, X86Instr_SseLdSt(
3356 True/*load*/,
3357 dst,
3358 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3361 return dst;
3364 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3365 HReg dst = newVRegV(env);
3366 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3367 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3368 return dst;
3371 if (e->tag == Iex_Const) {
3372 HReg dst = newVRegV(env);
3373 vassert(e->Iex.Const.con->tag == Ico_V128);
3374 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3375 return dst;
3378 if (e->tag == Iex_Unop) {
3380 if (SSE2_OR_ABOVE) {
3381 /* 64UtoV128(LDle:I64(addr)) */
3382 DECLARE_PATTERN(p_zwiden_load64);
3383 DEFINE_PATTERN(p_zwiden_load64,
3384 unop(Iop_64UtoV128,
3385 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3386 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3387 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3388 HReg dst = newVRegV(env);
3389 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3390 return dst;
3394 switch (e->Iex.Unop.op) {
3396 case Iop_NotV128: {
3397 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3398 return do_sse_Not128(env, arg);
3401 case Iop_CmpNEZ64x2: {
3402 /* We can use SSE2 instructions for this. */
3403 /* Ideally, we want to do a 64Ix2 comparison against zero of
3404 the operand. Problem is no such insn exists. Solution
3405 therefore is to do a 32Ix4 comparison instead, and bitwise-
3406 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3407 let the not'd result of this initial comparison be a:b:c:d.
3408 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3409 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3410 giving the required result.
3412 The required selection sequence is 2,3,0,1, which
3413 according to Intel's documentation means the pshufd
3414 literal value is 0xB1, that is,
3415 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3417 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3418 HReg tmp = newVRegV(env);
3419 HReg dst = newVRegV(env);
3420 REQUIRE_SSE2;
3421 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3422 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3423 tmp = do_sse_Not128(env, tmp);
3424 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3425 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3426 return dst;
3429 case Iop_CmpNEZ32x4: {
3430 /* Sigh, we have to generate lousy code since this has to
3431 work on SSE1 hosts */
3432 /* basically, the idea is: for each lane:
3433 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3434 sbbl %r, %r (now %r = 1Sto32(CF))
3435 movl %r, lane
3437 Int i;
3438 X86AMode* am;
3439 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3440 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3441 HReg dst = newVRegV(env);
3442 HReg r32 = newVRegI(env);
3443 sub_from_esp(env, 16);
3444 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3445 for (i = 0; i < 4; i++) {
3446 am = X86AMode_IR(i*4, hregX86_ESP());
3447 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3448 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3449 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3450 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3452 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3453 add_to_esp(env, 16);
3454 return dst;
3457 case Iop_CmpNEZ8x16:
3458 case Iop_CmpNEZ16x8: {
3459 /* We can use SSE2 instructions for this. */
3460 HReg arg;
3461 HReg vec0 = newVRegV(env);
3462 HReg vec1 = newVRegV(env);
3463 HReg dst = newVRegV(env);
3464 X86SseOp cmpOp
3465 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3466 : Xsse_CMPEQ8;
3467 REQUIRE_SSE2;
3468 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3469 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3470 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3471 /* defer arg computation to here so as to give CMPEQF as long
3472 as possible to complete */
3473 arg = iselVecExpr(env, e->Iex.Unop.arg);
3474 /* vec0 is all 0s; vec1 is all 1s */
3475 addInstr(env, mk_vMOVsd_RR(arg, dst));
3476 /* 16x8 or 8x16 comparison == */
3477 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3478 /* invert result */
3479 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3480 return dst;
3483 case Iop_RecipEst32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3484 case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3485 do_32Fx4_unary:
3487 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3488 HReg dst = newVRegV(env);
3489 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3490 return dst;
3493 case Iop_RecipEst32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3494 case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3495 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3496 do_32F0x4_unary:
3498 /* A bit subtle. We have to copy the arg to the result
3499 register first, because actually doing the SSE scalar insn
3500 leaves the upper 3/4 of the destination register
3501 unchanged. Whereas the required semantics of these
3502 primops is that the upper 3/4 is simply copied in from the
3503 argument. */
3504 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3505 HReg dst = newVRegV(env);
3506 addInstr(env, mk_vMOVsd_RR(arg, dst));
3507 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3508 return dst;
3511 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3512 do_64F0x2_unary:
3514 /* A bit subtle. We have to copy the arg to the result
3515 register first, because actually doing the SSE scalar insn
3516 leaves the upper half of the destination register
3517 unchanged. Whereas the required semantics of these
3518 primops is that the upper half is simply copied in from the
3519 argument. */
3520 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3521 HReg dst = newVRegV(env);
3522 REQUIRE_SSE2;
3523 addInstr(env, mk_vMOVsd_RR(arg, dst));
3524 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3525 return dst;
3528 case Iop_32UtoV128: {
3529 HReg dst = newVRegV(env);
3530 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3531 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3532 addInstr(env, X86Instr_Push(rmi));
3533 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3534 add_to_esp(env, 4);
3535 return dst;
3538 case Iop_64UtoV128: {
3539 HReg rHi, rLo;
3540 HReg dst = newVRegV(env);
3541 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3542 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3543 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3544 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3545 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3546 add_to_esp(env, 8);
3547 return dst;
3550 default:
3551 break;
3552 } /* switch (e->Iex.Unop.op) */
3553 } /* if (e->tag == Iex_Unop) */
3555 if (e->tag == Iex_Binop) {
3556 switch (e->Iex.Binop.op) {
3558 case Iop_Sqrt64Fx2:
3559 REQUIRE_SSE2;
3560 /* fallthrough */
3561 case Iop_Sqrt32Fx4: {
3562 /* :: (rmode, vec) -> vec */
3563 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3564 HReg dst = newVRegV(env);
3565 /* XXXROUNDINGFIXME */
3566 /* set roundingmode here */
3567 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3568 ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3569 (Xsse_SQRTF, arg, dst));
3570 return dst;
3573 case Iop_SetV128lo32: {
3574 HReg dst = newVRegV(env);
3575 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3576 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3577 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3578 sub_from_esp(env, 16);
3579 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3580 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3581 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3582 add_to_esp(env, 16);
3583 return dst;
3586 case Iop_SetV128lo64: {
3587 HReg dst = newVRegV(env);
3588 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3589 HReg srcIhi, srcIlo;
3590 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3591 X86AMode* esp4 = advance4(esp0);
3592 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3593 sub_from_esp(env, 16);
3594 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3595 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3596 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3597 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3598 add_to_esp(env, 16);
3599 return dst;
3602 case Iop_64HLtoV128: {
3603 HReg r3, r2, r1, r0;
3604 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3605 X86AMode* esp4 = advance4(esp0);
3606 X86AMode* esp8 = advance4(esp4);
3607 X86AMode* esp12 = advance4(esp8);
3608 HReg dst = newVRegV(env);
3609 /* do this via the stack (easy, convenient, etc) */
3610 sub_from_esp(env, 16);
3611 /* Do the less significant 64 bits */
3612 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3613 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3614 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3615 /* Do the more significant 64 bits */
3616 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3617 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3618 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3619 /* Fetch result back from stack. */
3620 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3621 add_to_esp(env, 16);
3622 return dst;
3625 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3626 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3627 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3628 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3629 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3630 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3631 do_32Fx4:
3633 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3634 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3635 HReg dst = newVRegV(env);
3636 addInstr(env, mk_vMOVsd_RR(argL, dst));
3637 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3638 return dst;
3641 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3642 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3643 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3644 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3645 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3646 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3647 do_64Fx2:
3649 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3650 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3651 HReg dst = newVRegV(env);
3652 REQUIRE_SSE2;
3653 addInstr(env, mk_vMOVsd_RR(argL, dst));
3654 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3655 return dst;
3658 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3659 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3660 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3661 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3662 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3663 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3664 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3665 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3666 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3667 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3668 do_32F0x4: {
3669 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3670 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3671 HReg dst = newVRegV(env);
3672 addInstr(env, mk_vMOVsd_RR(argL, dst));
3673 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3674 return dst;
3677 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3678 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3679 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3680 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3681 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3682 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3683 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3684 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3685 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3686 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3687 do_64F0x2: {
3688 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3689 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3690 HReg dst = newVRegV(env);
3691 REQUIRE_SSE2;
3692 addInstr(env, mk_vMOVsd_RR(argL, dst));
3693 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3694 return dst;
3697 case Iop_QNarrowBin32Sto16Sx8:
3698 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3699 case Iop_QNarrowBin16Sto8Sx16:
3700 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3701 case Iop_QNarrowBin16Sto8Ux16:
3702 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3704 case Iop_InterleaveHI8x16:
3705 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3706 case Iop_InterleaveHI16x8:
3707 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3708 case Iop_InterleaveHI32x4:
3709 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3710 case Iop_InterleaveHI64x2:
3711 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3713 case Iop_InterleaveLO8x16:
3714 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3715 case Iop_InterleaveLO16x8:
3716 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3717 case Iop_InterleaveLO32x4:
3718 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3719 case Iop_InterleaveLO64x2:
3720 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3722 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3723 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3724 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3725 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3726 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3727 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3728 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3729 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3730 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3731 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3732 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3733 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3734 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3735 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3736 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3737 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3738 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3739 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3740 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3741 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3742 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3743 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3744 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3745 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3746 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3747 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3748 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3749 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3750 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3751 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3752 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3753 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3754 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3755 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3756 do_SseReRg: {
3757 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3758 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3759 HReg dst = newVRegV(env);
3760 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3761 REQUIRE_SSE2;
3762 if (arg1isEReg) {
3763 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3764 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3765 } else {
3766 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3767 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3769 return dst;
3772 case Iop_ShlN8x16: {
3773 /* This instruction doesn't exist so we need to fake it using
3774 Xsse_SHL16 and Xsse_SHR16.
3776 We'd like to shift every byte in the 16-byte register to the left by
3777 some amount.
3779 Instead, we will make a copy and shift all the 16-bit words to the
3780 *right* by 8 and then to the left by 8 plus the shift amount. That
3781 will get us the correct answer for the upper 8 bits of each 16-bit
3782 word and zero elsewhere.
3784 Then we will shift all the 16-bit words in the original to the left
3785 by 8 plus the shift amount and then to the right by 8. This will
3786 get the correct answer for the lower 8 bits of each 16-bit word and
3787 zero elsewhere.
3789 Finally, we will OR those two results together.
3791 Because we don't have a shift by constant in x86, we store the
3792 constant 8 into a register and shift by that as needed.
3794 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3795 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3796 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3797 HReg ereg = newVRegV(env);
3798 HReg eight = newVRegV(env); // To store the constant value 8.
3799 HReg dst = newVRegV(env);
3800 HReg hi = newVRegV(env);
3801 REQUIRE_SSE2;
3802 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3803 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3804 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3805 addInstr(env, X86Instr_Push(rmi));
3806 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3807 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3808 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3809 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3810 addInstr(env, X86Instr_Push(X86RMI_Imm(8)));
3811 addInstr(env, X86Instr_SseLdSt(True/*load*/, eight, esp0));
3813 op = Xsse_SHL16;
3814 X86SseOp reverse_op = Xsse_SHR16;
3815 addInstr(env, mk_vMOVsd_RR(greg, hi));
3816 addInstr(env, X86Instr_SseReRg(reverse_op, eight, hi));
3817 addInstr(env, X86Instr_SseReRg(op, eight, hi));
3818 addInstr(env, X86Instr_SseReRg(op, ereg, hi));
3819 addInstr(env, mk_vMOVsd_RR(greg, dst));
3820 addInstr(env, X86Instr_SseReRg(op, eight, dst));
3821 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3822 addInstr(env, X86Instr_SseReRg(reverse_op, eight, dst));
3823 addInstr(env, X86Instr_SseReRg(Xsse_OR, hi, dst));
3825 add_to_esp(env, 32);
3826 return dst;
3828 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3829 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3830 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3831 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3832 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3833 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3834 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3835 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3836 do_SseShift: {
3837 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3838 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3839 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3840 HReg ereg = newVRegV(env);
3841 HReg dst = newVRegV(env);
3842 REQUIRE_SSE2;
3843 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3844 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3845 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3846 addInstr(env, X86Instr_Push(rmi));
3847 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3848 addInstr(env, mk_vMOVsd_RR(greg, dst));
3849 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3850 add_to_esp(env, 16);
3851 return dst;
3854 case Iop_NarrowBin32to16x8:
3855 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3856 goto do_SseAssistedBinary;
3857 case Iop_NarrowBin16to8x16:
3858 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3859 goto do_SseAssistedBinary;
3860 do_SseAssistedBinary: {
3861 /* As with the amd64 case (where this is copied from) we
3862 generate pretty bad code. */
3863 vassert(fn != 0);
3864 HReg dst = newVRegV(env);
3865 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3866 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3867 HReg argp = newVRegI(env);
3868 /* subl $112, %esp -- make a space */
3869 sub_from_esp(env, 112);
3870 /* leal 48(%esp), %r_argp -- point into it */
3871 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3872 argp));
3873 /* andl $-16, %r_argp -- 16-align the pointer */
3874 addInstr(env, X86Instr_Alu32R(Xalu_AND,
3875 X86RMI_Imm( ~(UInt)15 ),
3876 argp));
3877 /* Prepare 3 arg regs:
3878 leal 0(%r_argp), %eax
3879 leal 16(%r_argp), %edx
3880 leal 32(%r_argp), %ecx
3882 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3883 hregX86_EAX()));
3884 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3885 hregX86_EDX()));
3886 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3887 hregX86_ECX()));
3888 /* Store the two args, at (%edx) and (%ecx):
3889 movupd %argL, 0(%edx)
3890 movupd %argR, 0(%ecx)
3892 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3893 X86AMode_IR(0, hregX86_EDX())));
3894 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3895 X86AMode_IR(0, hregX86_ECX())));
3896 /* call the helper */
3897 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3898 3, mk_RetLoc_simple(RLPri_None) ));
3899 /* fetch the result from memory, using %r_argp, which the
3900 register allocator will keep alive across the call. */
3901 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3902 X86AMode_IR(0, argp)));
3903 /* and finally, clear the space */
3904 add_to_esp(env, 112);
3905 return dst;
3908 default:
3909 break;
3910 } /* switch (e->Iex.Binop.op) */
3911 } /* if (e->tag == Iex_Binop) */
3914 if (e->tag == Iex_Triop) {
3915 IRTriop *triop = e->Iex.Triop.details;
3916 switch (triop->op) {
3918 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3919 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3920 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3921 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3922 do_32Fx4_w_rm:
3924 HReg argL = iselVecExpr(env, triop->arg2);
3925 HReg argR = iselVecExpr(env, triop->arg3);
3926 HReg dst = newVRegV(env);
3927 addInstr(env, mk_vMOVsd_RR(argL, dst));
3928 /* XXXROUNDINGFIXME */
3929 /* set roundingmode here */
3930 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3931 return dst;
3934 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3935 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3936 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3937 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3938 do_64Fx2_w_rm:
3940 HReg argL = iselVecExpr(env, triop->arg2);
3941 HReg argR = iselVecExpr(env, triop->arg3);
3942 HReg dst = newVRegV(env);
3943 REQUIRE_SSE2;
3944 addInstr(env, mk_vMOVsd_RR(argL, dst));
3945 /* XXXROUNDINGFIXME */
3946 /* set roundingmode here */
3947 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3948 return dst;
3951 default:
3952 break;
3953 } /* switch (triop->op) */
3954 } /* if (e->tag == Iex_Triop) */
3957 if (e->tag == Iex_ITE) { // VFD
3958 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3959 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
3960 HReg dst = newVRegV(env);
3961 addInstr(env, mk_vMOVsd_RR(r1,dst));
3962 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3963 addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3964 return dst;
3967 vec_fail:
3968 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3969 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3970 ppIRExpr(e);
3971 vpanic("iselVecExpr_wrk");
3973 # undef REQUIRE_SSE1
3974 # undef REQUIRE_SSE2
3975 # undef SSE2_OR_ABOVE
3979 /*---------------------------------------------------------*/
3980 /*--- ISEL: Statements ---*/
3981 /*---------------------------------------------------------*/
3983 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3985 if (vex_traceflags & VEX_TRACE_VCODE) {
3986 vex_printf("\n-- ");
3987 ppIRStmt(stmt);
3988 vex_printf("\n");
3991 switch (stmt->tag) {
3993 /* --------- STORE --------- */
3994 case Ist_Store: {
3995 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3996 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3997 IREndness end = stmt->Ist.Store.end;
3999 if (tya != Ity_I32 || end != Iend_LE)
4000 goto stmt_fail;
4002 if (tyd == Ity_I32) {
4003 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
4004 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
4005 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
4006 return;
4008 if (tyd == Ity_I8 || tyd == Ity_I16) {
4009 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
4010 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
4011 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
4012 r,am ));
4013 return;
4015 if (tyd == Ity_F64) {
4016 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
4017 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
4018 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
4019 return;
4021 if (tyd == Ity_F32) {
4022 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
4023 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
4024 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
4025 return;
4027 if (tyd == Ity_I64) {
4028 HReg vHi, vLo, rA;
4029 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
4030 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
4031 addInstr(env, X86Instr_Alu32M(
4032 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
4033 addInstr(env, X86Instr_Alu32M(
4034 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
4035 return;
4037 if (tyd == Ity_V128) {
4038 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
4039 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
4040 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
4041 return;
4043 break;
4046 /* --------- PUT --------- */
4047 case Ist_Put: {
4048 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4049 if (ty == Ity_I32) {
4050 /* We're going to write to memory, so compute the RHS into an
4051 X86RI. */
4052 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
4053 addInstr(env,
4054 X86Instr_Alu32M(
4055 Xalu_MOV,
4057 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
4059 return;
4061 if (ty == Ity_I8 || ty == Ity_I16) {
4062 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4063 addInstr(env, X86Instr_Store(
4064 toUChar(ty==Ity_I8 ? 1 : 2),
4066 X86AMode_IR(stmt->Ist.Put.offset,
4067 hregX86_EBP())));
4068 return;
4070 if (ty == Ity_I64) {
4071 HReg vHi, vLo;
4072 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4073 X86AMode* am4 = advance4(am);
4074 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
4075 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
4076 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
4077 return;
4079 if (ty == Ity_V128) {
4080 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4081 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4082 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
4083 return;
4085 if (ty == Ity_F32) {
4086 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4087 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4088 set_FPU_rounding_default(env); /* paranoia */
4089 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
4090 return;
4092 if (ty == Ity_F64) {
4093 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4094 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4095 set_FPU_rounding_default(env); /* paranoia */
4096 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
4097 return;
4099 break;
4102 /* --------- Indexed PUT --------- */
4103 case Ist_PutI: {
4104 IRPutI *puti = stmt->Ist.PutI.details;
4106 X86AMode* am
4107 = genGuestArrayOffset(
4108 env, puti->descr,
4109 puti->ix, puti->bias );
4111 IRType ty = typeOfIRExpr(env->type_env, puti->data);
4112 if (ty == Ity_F64) {
4113 HReg val = iselDblExpr(env, puti->data);
4114 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
4115 return;
4117 if (ty == Ity_I8) {
4118 HReg r = iselIntExpr_R(env, puti->data);
4119 addInstr(env, X86Instr_Store( 1, r, am ));
4120 return;
4122 if (ty == Ity_I32) {
4123 HReg r = iselIntExpr_R(env, puti->data);
4124 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4125 return;
4127 if (ty == Ity_I64) {
4128 HReg rHi, rLo;
4129 X86AMode* am4 = advance4(am);
4130 iselInt64Expr(&rHi, &rLo, env, puti->data);
4131 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4132 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4133 return;
4135 break;
4138 /* --------- TMP --------- */
4139 case Ist_WrTmp: {
4140 IRTemp tmp = stmt->Ist.WrTmp.tmp;
4141 IRType ty = typeOfIRTemp(env->type_env, tmp);
4143 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4144 compute it into an AMode and then use LEA. This usually
4145 produces fewer instructions, often because (for memcheck
4146 created IR) we get t = address-expression, (t is later used
4147 twice) and so doing this naturally turns address-expression
4148 back into an X86 amode. */
4149 if (ty == Ity_I32
4150 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4151 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4152 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4153 HReg dst = lookupIRTemp(env, tmp);
4154 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4155 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4156 value into a register. Just emit a normal reg-reg move
4157 so reg-alloc can coalesce it away in the usual way. */
4158 HReg src = am->Xam.IR.reg;
4159 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4160 } else {
4161 addInstr(env, X86Instr_Lea32(am,dst));
4163 return;
4166 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4167 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4168 HReg dst = lookupIRTemp(env, tmp);
4169 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4170 return;
4172 if (ty == Ity_I64) {
4173 HReg rHi, rLo, dstHi, dstLo;
4174 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4175 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4176 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4177 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4178 return;
4180 if (ty == Ity_I1) {
4181 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4182 HReg dst = lookupIRTemp(env, tmp);
4183 addInstr(env, X86Instr_Set32(cond, dst));
4184 return;
4186 if (ty == Ity_F64) {
4187 HReg dst = lookupIRTemp(env, tmp);
4188 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4189 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4190 return;
4192 if (ty == Ity_F32) {
4193 HReg dst = lookupIRTemp(env, tmp);
4194 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4195 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4196 return;
4198 if (ty == Ity_V128) {
4199 HReg dst = lookupIRTemp(env, tmp);
4200 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4201 addInstr(env, mk_vMOVsd_RR(src,dst));
4202 return;
4204 break;
4207 /* --------- Call to DIRTY helper --------- */
4208 case Ist_Dirty: {
4209 IRDirty* d = stmt->Ist.Dirty.details;
4211 /* Figure out the return type, if any. */
4212 IRType retty = Ity_INVALID;
4213 if (d->tmp != IRTemp_INVALID)
4214 retty = typeOfIRTemp(env->type_env, d->tmp);
4216 Bool retty_ok = False;
4217 switch (retty) {
4218 case Ity_INVALID: /* function doesn't return anything */
4219 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4220 case Ity_V128:
4221 retty_ok = True; break;
4222 default:
4223 break;
4225 if (!retty_ok)
4226 break; /* will go to stmt_fail: */
4228 /* Marshal args, do the call, and set the return value to
4229 0x555..555 if this is a conditional call that returns a value
4230 and the call is skipped. */
4231 UInt addToSp = 0;
4232 RetLoc rloc = mk_RetLoc_INVALID();
4233 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4234 vassert(is_sane_RetLoc(rloc));
4236 /* Now figure out what to do with the returned value, if any. */
4237 switch (retty) {
4238 case Ity_INVALID: {
4239 /* No return value. Nothing to do. */
4240 vassert(d->tmp == IRTemp_INVALID);
4241 vassert(rloc.pri == RLPri_None);
4242 vassert(addToSp == 0);
4243 return;
4245 case Ity_I32: case Ity_I16: case Ity_I8: {
4246 /* The returned value is in %eax. Park it in the register
4247 associated with tmp. */
4248 vassert(rloc.pri == RLPri_Int);
4249 vassert(addToSp == 0);
4250 HReg dst = lookupIRTemp(env, d->tmp);
4251 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4252 return;
4254 case Ity_I64: {
4255 /* The returned value is in %edx:%eax. Park it in the
4256 register-pair associated with tmp. */
4257 vassert(rloc.pri == RLPri_2Int);
4258 vassert(addToSp == 0);
4259 HReg dstHi, dstLo;
4260 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4261 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4262 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4263 return;
4265 case Ity_V128: {
4266 /* The returned value is on the stack, and *retloc tells
4267 us where. Fish it off the stack and then move the
4268 stack pointer upwards to clear it, as directed by
4269 doHelperCall. */
4270 vassert(rloc.pri == RLPri_V128SpRel);
4271 vassert(addToSp >= 16);
4272 HReg dst = lookupIRTemp(env, d->tmp);
4273 X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP());
4274 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4275 add_to_esp(env, addToSp);
4276 return;
4278 default:
4279 /*NOTREACHED*/
4280 vassert(0);
4282 break;
4285 /* --------- MEM FENCE --------- */
4286 case Ist_MBE:
4287 switch (stmt->Ist.MBE.event) {
4288 case Imbe_Fence:
4289 addInstr(env, X86Instr_MFence(env->hwcaps));
4290 return;
4291 default:
4292 break;
4294 break;
4296 /* --------- ACAS --------- */
4297 case Ist_CAS:
4298 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4299 /* "normal" singleton CAS */
4300 UChar sz;
4301 IRCAS* cas = stmt->Ist.CAS.details;
4302 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4303 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4304 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4305 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4306 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4307 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4308 vassert(cas->expdHi == NULL);
4309 vassert(cas->dataHi == NULL);
4310 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4311 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4312 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4313 switch (ty) {
4314 case Ity_I32: sz = 4; break;
4315 case Ity_I16: sz = 2; break;
4316 case Ity_I8: sz = 1; break;
4317 default: goto unhandled_cas;
4319 addInstr(env, X86Instr_ACAS(am, sz));
4320 addInstr(env,
4321 X86Instr_CMov32(Xcc_NZ,
4322 X86RM_Reg(hregX86_EAX()), rOldLo));
4323 return;
4324 } else {
4325 /* double CAS */
4326 IRCAS* cas = stmt->Ist.CAS.details;
4327 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4328 /* only 32-bit allowed in this case */
4329 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4330 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4331 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4332 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4333 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4334 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4335 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4336 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4337 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4338 if (ty != Ity_I32)
4339 goto unhandled_cas;
4340 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4341 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4342 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4343 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4344 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4345 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4346 addInstr(env, X86Instr_DACAS(am));
4347 addInstr(env,
4348 X86Instr_CMov32(Xcc_NZ,
4349 X86RM_Reg(hregX86_EDX()), rOldHi));
4350 addInstr(env,
4351 X86Instr_CMov32(Xcc_NZ,
4352 X86RM_Reg(hregX86_EAX()), rOldLo));
4353 return;
4355 unhandled_cas:
4356 break;
4358 /* --------- INSTR MARK --------- */
4359 /* Doesn't generate any executable code ... */
4360 case Ist_IMark:
4361 return;
4363 /* --------- NO-OP --------- */
4364 /* Fairly self-explanatory, wouldn't you say? */
4365 case Ist_NoOp:
4366 return;
4368 /* --------- EXIT --------- */
4369 case Ist_Exit: {
4370 if (stmt->Ist.Exit.dst->tag != Ico_U32)
4371 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4373 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4374 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4375 hregX86_EBP());
4377 /* Case: boring transfer to known address */
4378 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4379 if (env->chainingAllowed) {
4380 /* .. almost always true .. */
4381 /* Skip the event check at the dst if this is a forwards
4382 edge. */
4383 Bool toFastEP
4384 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4385 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4386 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4387 amEIP, cc, toFastEP));
4388 } else {
4389 /* .. very occasionally .. */
4390 /* We can't use chaining, so ask for an assisted transfer,
4391 as that's the only alternative that is allowable. */
4392 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4393 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4395 return;
4398 /* Case: assisted transfer to arbitrary address */
4399 switch (stmt->Ist.Exit.jk) {
4400 /* Keep this list in sync with that in iselNext below */
4401 case Ijk_ClientReq:
4402 case Ijk_EmWarn:
4403 case Ijk_MapFail:
4404 case Ijk_NoDecode:
4405 case Ijk_NoRedir:
4406 case Ijk_SigSEGV:
4407 case Ijk_SigTRAP:
4408 case Ijk_Sys_int128:
4409 case Ijk_Sys_int129:
4410 case Ijk_Sys_int130:
4411 case Ijk_Sys_int145:
4412 case Ijk_Sys_int210:
4413 case Ijk_Sys_syscall:
4414 case Ijk_Sys_sysenter:
4415 case Ijk_InvalICache:
4416 case Ijk_Yield:
4418 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4419 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4420 return;
4422 default:
4423 break;
4426 /* Do we ever expect to see any other kind? */
4427 goto stmt_fail;
4430 default: break;
4432 stmt_fail:
4433 ppIRStmt(stmt);
4434 vpanic("iselStmt");
4438 /*---------------------------------------------------------*/
4439 /*--- ISEL: Basic block terminators (Nexts) ---*/
4440 /*---------------------------------------------------------*/
4442 static void iselNext ( ISelEnv* env,
4443 IRExpr* next, IRJumpKind jk, Int offsIP )
4445 if (vex_traceflags & VEX_TRACE_VCODE) {
4446 vex_printf( "\n-- PUT(%d) = ", offsIP);
4447 ppIRExpr( next );
4448 vex_printf( "; exit-");
4449 ppIRJumpKind(jk);
4450 vex_printf( "\n");
4453 /* Case: boring transfer to known address */
4454 if (next->tag == Iex_Const) {
4455 IRConst* cdst = next->Iex.Const.con;
4456 vassert(cdst->tag == Ico_U32);
4457 if (jk == Ijk_Boring || jk == Ijk_Call) {
4458 /* Boring transfer to known address */
4459 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4460 if (env->chainingAllowed) {
4461 /* .. almost always true .. */
4462 /* Skip the event check at the dst if this is a forwards
4463 edge. */
4464 Bool toFastEP
4465 = ((Addr32)cdst->Ico.U32) > env->max_ga;
4466 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4467 addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4468 amEIP, Xcc_ALWAYS,
4469 toFastEP));
4470 } else {
4471 /* .. very occasionally .. */
4472 /* We can't use chaining, so ask for an assisted transfer,
4473 as that's the only alternative that is allowable. */
4474 HReg r = iselIntExpr_R(env, next);
4475 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4476 Ijk_Boring));
4478 return;
4482 /* Case: call/return (==boring) transfer to any address */
4483 switch (jk) {
4484 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4485 HReg r = iselIntExpr_R(env, next);
4486 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4487 if (env->chainingAllowed) {
4488 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4489 } else {
4490 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4491 Ijk_Boring));
4493 return;
4495 default:
4496 break;
4499 /* Case: assisted transfer to arbitrary address */
4500 switch (jk) {
4501 /* Keep this list in sync with that for Ist_Exit above */
4502 case Ijk_ClientReq:
4503 case Ijk_EmWarn:
4504 case Ijk_MapFail:
4505 case Ijk_NoDecode:
4506 case Ijk_NoRedir:
4507 case Ijk_SigSEGV:
4508 case Ijk_SigTRAP:
4509 case Ijk_Sys_int128:
4510 case Ijk_Sys_int129:
4511 case Ijk_Sys_int130:
4512 case Ijk_Sys_int145:
4513 case Ijk_Sys_int210:
4514 case Ijk_Sys_syscall:
4515 case Ijk_Sys_sysenter:
4516 case Ijk_InvalICache:
4517 case Ijk_Yield:
4519 HReg r = iselIntExpr_R(env, next);
4520 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4521 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4522 return;
4524 default:
4525 break;
4528 vex_printf( "\n-- PUT(%d) = ", offsIP);
4529 ppIRExpr( next );
4530 vex_printf( "; exit-");
4531 ppIRJumpKind(jk);
4532 vex_printf( "\n");
4533 vassert(0); // are we expecting any other kind?
4537 /*---------------------------------------------------------*/
4538 /*--- Insn selector top-level ---*/
4539 /*---------------------------------------------------------*/
4541 /* Translate an entire SB to x86 code. */
4543 HInstrArray* iselSB_X86 ( const IRSB* bb,
4544 VexArch arch_host,
4545 const VexArchInfo* archinfo_host,
4546 const VexAbiInfo* vbi/*UNUSED*/,
4547 Int offs_Host_EvC_Counter,
4548 Int offs_Host_EvC_FailAddr,
4549 Bool chainingAllowed,
4550 Bool addProfInc,
4551 Addr max_ga )
4553 Int i, j;
4554 HReg hreg, hregHI;
4555 ISelEnv* env;
4556 UInt hwcaps_host = archinfo_host->hwcaps;
4557 X86AMode *amCounter, *amFailAddr;
4559 /* sanity ... */
4560 vassert(arch_host == VexArchX86);
4561 vassert(0 == (hwcaps_host
4562 & ~(VEX_HWCAPS_X86_MMXEXT
4563 | VEX_HWCAPS_X86_SSE1
4564 | VEX_HWCAPS_X86_SSE2
4565 | VEX_HWCAPS_X86_SSE3
4566 | VEX_HWCAPS_X86_LZCNT)));
4568 /* Check that the host's endianness is as expected. */
4569 vassert(archinfo_host->endness == VexEndnessLE);
4571 /* Make up an initial environment to use. */
4572 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4573 env->vreg_ctr = 0;
4575 /* Set up output code array. */
4576 env->code = newHInstrArray();
4578 /* Copy BB's type env. */
4579 env->type_env = bb->tyenv;
4581 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4582 change as we go along. */
4583 env->n_vregmap = bb->tyenv->types_used;
4584 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4585 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4587 /* and finally ... */
4588 env->chainingAllowed = chainingAllowed;
4589 env->hwcaps = hwcaps_host;
4590 env->max_ga = max_ga;
4592 /* For each IR temporary, allocate a suitably-kinded virtual
4593 register. */
4594 j = 0;
4595 for (i = 0; i < env->n_vregmap; i++) {
4596 hregHI = hreg = INVALID_HREG;
4597 switch (bb->tyenv->types[i]) {
4598 case Ity_I1:
4599 case Ity_I8:
4600 case Ity_I16:
4601 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
4602 case Ity_I64: hreg = mkHReg(True, HRcInt32, 0, j++);
4603 hregHI = mkHReg(True, HRcInt32, 0, j++); break;
4604 case Ity_F32:
4605 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
4606 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
4607 default: ppIRType(bb->tyenv->types[i]);
4608 vpanic("iselBB: IRTemp type");
4610 env->vregmap[i] = hreg;
4611 env->vregmapHI[i] = hregHI;
4613 env->vreg_ctr = j;
4615 /* The very first instruction must be an event check. */
4616 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
4617 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4618 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4620 /* Possibly a block counter increment (for profiling). At this
4621 point we don't know the address of the counter, so just pretend
4622 it is zero. It will have to be patched later, but before this
4623 translation is used, by a call to LibVEX_patchProfCtr. */
4624 if (addProfInc) {
4625 addInstr(env, X86Instr_ProfInc());
4628 /* Ok, finally we can iterate over the statements. */
4629 for (i = 0; i < bb->stmts_used; i++)
4630 iselStmt(env, bb->stmts[i]);
4632 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4634 /* record the number of vregs we used. */
4635 env->code->n_vregs = env->vreg_ctr;
4636 return env->code;
4640 /*---------------------------------------------------------------*/
4641 /*--- end host_x86_isel.c ---*/
4642 /*---------------------------------------------------------------*/