2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h"
43 #include "host_generic_simd128.h"
44 #include "host_x86_defs.h"
48 -- (Really an assembler issue) don't emit CMov32 as a cmov
49 insn, since that's expensive on P4 and conditional branch
50 is cheaper if (as we expect) the condition is highly predictable
52 -- preserve xmm registers across function calls (by declaring them
53 as trashed by call insns)
55 -- preserve x87 ST stack discipline across function calls. Sigh.
57 -- Check doHelperCall: if a call is conditional, we cannot safely
58 compute any regparm args directly to registers. Hence, the
59 fast-regparm marshalling should be restricted to unconditional
63 /*---------------------------------------------------------*/
64 /*--- x87 control word stuff ---*/
65 /*---------------------------------------------------------*/
67 /* Vex-generated code expects to run with the FPU set as follows: all
68 exceptions masked, round-to-nearest, precision = 53 bits. This
69 corresponds to a FPU control word value of 0x027F.
71 Similarly the SSE control word (%mxcsr) should be 0x1F80.
73 %fpucw and %mxcsr should have these values on entry to
74 Vex-generated code, and should those values should be
78 #define DEFAULT_FPUCW 0x027F
80 /* debugging only, do not use */
81 /* define DEFAULT_FPUCW 0x037F */
84 /*---------------------------------------------------------*/
85 /*--- misc helpers ---*/
86 /*---------------------------------------------------------*/
88 /* These are duplicated in guest-x86/toIR.c */
89 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
91 return IRExpr_Unop(op
, a
);
94 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
96 return IRExpr_Binop(op
, a1
, a2
);
99 static IRExpr
* bind ( Int binder
)
101 return IRExpr_Binder(binder
);
104 static Bool
isZeroU8 ( IRExpr
* e
)
106 return e
->tag
== Iex_Const
107 && e
->Iex
.Const
.con
->tag
== Ico_U8
108 && e
->Iex
.Const
.con
->Ico
.U8
== 0;
111 static Bool
isZeroU32 ( IRExpr
* e
)
113 return e
->tag
== Iex_Const
114 && e
->Iex
.Const
.con
->tag
== Ico_U32
115 && e
->Iex
.Const
.con
->Ico
.U32
== 0;
118 //static Bool isZeroU64 ( IRExpr* e )
120 // return e->tag == Iex_Const
121 // && e->Iex.Const.con->tag == Ico_U64
122 // && e->Iex.Const.con->Ico.U64 == 0ULL;
126 /*---------------------------------------------------------*/
128 /*---------------------------------------------------------*/
130 /* This carries around:
132 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
133 might encounter. This is computed before insn selection starts,
136 - A mapping from IRTemp to HReg. This tells the insn selector
137 which virtual register(s) are associated with each IRTemp
138 temporary. This is computed before insn selection starts, and
139 does not change. We expect this mapping to map precisely the
140 same set of IRTemps as the type mapping does.
142 - vregmap holds the primary register for the IRTemp.
143 - vregmapHI is only used for 64-bit integer-typed
144 IRTemps. It holds the identity of a second
145 32-bit virtual HReg, which holds the high half
148 - The code array, that is, the insns selected so far.
150 - A counter, for generating new virtual registers.
152 - The host subarchitecture we are selecting insns for.
153 This is set at the start and does not change.
155 - A Bool for indicating whether we may generate chain-me
156 instructions for control flow transfers, or whether we must use
159 - The maximum guest address of any guest insn in this block.
160 Actually, the address of the highest-addressed byte from any insn
161 in this block. Is set at the start and does not change. This is
162 used for detecting jumps which are definitely forward-edges from
163 this block, and therefore can be made (chained) to the fast entry
164 point of the destination, thereby avoiding the destination's
167 Note, this is all (well, mostly) host-independent.
172 /* Constant -- are set at the start and do not change. */
181 Bool chainingAllowed
;
184 /* These are modified as we go along. */
191 static HReg
lookupIRTemp ( ISelEnv
* env
, IRTemp tmp
)
193 vassert(tmp
< env
->n_vregmap
);
194 return env
->vregmap
[tmp
];
197 static void lookupIRTemp64 ( HReg
* vrHI
, HReg
* vrLO
, ISelEnv
* env
, IRTemp tmp
)
199 vassert(tmp
< env
->n_vregmap
);
200 vassert(! hregIsInvalid(env
->vregmapHI
[tmp
]));
201 *vrLO
= env
->vregmap
[tmp
];
202 *vrHI
= env
->vregmapHI
[tmp
];
205 static void addInstr ( ISelEnv
* env
, X86Instr
* instr
)
207 addHInstr(env
->code
, instr
);
208 if (vex_traceflags
& VEX_TRACE_VCODE
) {
209 ppX86Instr(instr
, False
);
214 static HReg
newVRegI ( ISelEnv
* env
)
216 HReg reg
= mkHReg(True
/*virtual reg*/, HRcInt32
, 0/*enc*/, env
->vreg_ctr
);
221 static HReg
newVRegF ( ISelEnv
* env
)
223 HReg reg
= mkHReg(True
/*virtual reg*/, HRcFlt64
, 0/*enc*/, env
->vreg_ctr
);
228 static HReg
newVRegV ( ISelEnv
* env
)
230 HReg reg
= mkHReg(True
/*virtual reg*/, HRcVec128
, 0/*enc*/, env
->vreg_ctr
);
236 /*---------------------------------------------------------*/
237 /*--- ISEL: Forward declarations ---*/
238 /*---------------------------------------------------------*/
240 /* These are organised as iselXXX and iselXXX_wrk pairs. The
241 iselXXX_wrk do the real work, but are not to be called directly.
242 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
243 checks that all returned registers are virtual. You should not
244 call the _wrk version directly.
246 static X86RMI
* iselIntExpr_RMI_wrk ( ISelEnv
* env
, const IRExpr
* e
);
247 static X86RMI
* iselIntExpr_RMI ( ISelEnv
* env
, const IRExpr
* e
);
249 static X86RI
* iselIntExpr_RI_wrk ( ISelEnv
* env
, const IRExpr
* e
);
250 static X86RI
* iselIntExpr_RI ( ISelEnv
* env
, const IRExpr
* e
);
252 static X86RM
* iselIntExpr_RM_wrk ( ISelEnv
* env
, const IRExpr
* e
);
253 static X86RM
* iselIntExpr_RM ( ISelEnv
* env
, const IRExpr
* e
);
255 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, const IRExpr
* e
);
256 static HReg
iselIntExpr_R ( ISelEnv
* env
, const IRExpr
* e
);
258 static X86AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, const IRExpr
* e
);
259 static X86AMode
* iselIntExpr_AMode ( ISelEnv
* env
, const IRExpr
* e
);
261 static void iselInt64Expr_wrk ( HReg
* rHi
, HReg
* rLo
,
262 ISelEnv
* env
, const IRExpr
* e
);
263 static void iselInt64Expr ( HReg
* rHi
, HReg
* rLo
,
264 ISelEnv
* env
, const IRExpr
* e
);
266 static X86CondCode
iselCondCode_wrk ( ISelEnv
* env
, const IRExpr
* e
);
267 static X86CondCode
iselCondCode ( ISelEnv
* env
, const IRExpr
* e
);
269 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
270 static HReg
iselDblExpr ( ISelEnv
* env
, const IRExpr
* e
);
272 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
273 static HReg
iselFltExpr ( ISelEnv
* env
, const IRExpr
* e
);
275 static HReg
iselVecExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
276 static HReg
iselVecExpr ( ISelEnv
* env
, const IRExpr
* e
);
279 /*---------------------------------------------------------*/
280 /*--- ISEL: Misc helpers ---*/
281 /*---------------------------------------------------------*/
283 /* Make a int reg-reg move. */
285 static X86Instr
* mk_iMOVsd_RR ( HReg src
, HReg dst
)
287 vassert(hregClass(src
) == HRcInt32
);
288 vassert(hregClass(dst
) == HRcInt32
);
289 return X86Instr_Alu32R(Xalu_MOV
, X86RMI_Reg(src
), dst
);
293 /* Make a vector reg-reg move. */
295 static X86Instr
* mk_vMOVsd_RR ( HReg src
, HReg dst
)
297 vassert(hregClass(src
) == HRcVec128
);
298 vassert(hregClass(dst
) == HRcVec128
);
299 return X86Instr_SseReRg(Xsse_MOV
, src
, dst
);
302 /* Advance/retreat %esp by n. */
304 static void add_to_esp ( ISelEnv
* env
, Int n
)
306 vassert(n
> 0 && n
< 256 && (n
%4) == 0);
308 X86Instr_Alu32R(Xalu_ADD
, X86RMI_Imm(n
), hregX86_ESP()));
311 static void sub_from_esp ( ISelEnv
* env
, Int n
)
313 vassert(n
> 0 && n
< 256 && (n
%4) == 0);
315 X86Instr_Alu32R(Xalu_SUB
, X86RMI_Imm(n
), hregX86_ESP()));
319 /* Given an amode, return one which references 4 bytes further
322 static X86AMode
* advance4 ( X86AMode
* am
)
324 X86AMode
* am4
= dopyX86AMode(am
);
327 am4
->Xam
.IRRS
.imm
+= 4; break;
329 am4
->Xam
.IR
.imm
+= 4; break;
331 vpanic("advance4(x86,host)");
337 /* Push an arg onto the host stack, in preparation for a call to a
338 helper function of some kind. Returns the number of 32-bit words
339 pushed. If we encounter an IRExpr_VECRET() then we expect that
340 r_vecRetAddr will be a valid register, that holds the relevant
343 static Int
pushArg ( ISelEnv
* env
, IRExpr
* arg
, HReg r_vecRetAddr
)
345 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
347 vassert(!hregIsInvalid(r_vecRetAddr
));
348 addInstr(env
, X86Instr_Push(X86RMI_Reg(r_vecRetAddr
)));
351 if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
352 addInstr(env
, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
355 /* Else it's a "normal" expression. */
356 IRType arg_ty
= typeOfIRExpr(env
->type_env
, arg
);
357 if (arg_ty
== Ity_I32
) {
358 addInstr(env
, X86Instr_Push(iselIntExpr_RMI(env
, arg
)));
361 if (arg_ty
== Ity_I64
) {
363 iselInt64Expr(&rHi
, &rLo
, env
, arg
);
364 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
365 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
369 vpanic("pushArg(x86): can't handle arg of this type");
373 /* Complete the call to a helper function, by calling the
374 helper and clearing the args off the stack. */
377 void callHelperAndClearArgs ( ISelEnv
* env
, X86CondCode cc
,
378 IRCallee
* cee
, Int n_arg_ws
,
381 /* Complication. Need to decide which reg to use as the fn address
382 pointer, in a way that doesn't trash regparm-passed
384 vassert(sizeof(void*) == 4);
386 addInstr(env
, X86Instr_Call( cc
, (Addr
)cee
->addr
,
387 cee
->regparms
, rloc
));
389 add_to_esp(env
, 4*n_arg_ws
);
393 /* Used only in doHelperCall. See big comment in doHelperCall re
394 handling of regparm args. This function figures out whether
395 evaluation of an expression might require use of a fixed register.
396 If in doubt return True (safe but suboptimal).
399 Bool
mightRequireFixedRegs ( IRExpr
* e
)
401 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e
))) {
402 // These are always "safe" -- either a copy of %esp in some
403 // arbitrary vreg, or a copy of %ebp, respectively.
406 /* Else it's a "normal" expression. */
408 case Iex_RdTmp
: case Iex_Const
: case Iex_Get
:
416 /* Do a complete function call. |guard| is a Ity_Bit expression
417 indicating whether or not the call happens. If guard==NULL, the
418 call is unconditional. |retloc| is set to indicate where the
419 return value is after the call. The caller (of this fn) must
420 generate code to add |stackAdjustAfterCall| to the stack pointer
421 after the call is done. */
424 void doHelperCall ( /*OUT*/UInt
* stackAdjustAfterCall
,
425 /*OUT*/RetLoc
* retloc
,
428 IRCallee
* cee
, IRType retTy
, IRExpr
** args
)
434 Int not_done_yet
, n_args
, n_arg_ws
, stack_limit
,
437 /* Set default returns. We'll update them later if needed. */
438 *stackAdjustAfterCall
= 0;
439 *retloc
= mk_RetLoc_INVALID();
441 /* These are used for cross-checking that IR-level constraints on
442 the use of Iex_VECRET and Iex_GSPTR are observed. */
446 /* Marshal args for a call, do the call, and clear the stack.
447 Complexities to consider:
449 * The return type can be I{64,32,16,8} or V128. In the V128
450 case, it is expected that |args| will contain the special
451 node IRExpr_VECRET(), in which case this routine generates
452 code to allocate space on the stack for the vector return
453 value. Since we are not passing any scalars on the stack, it
454 is enough to preallocate the return space before marshalling
455 any arguments, in this case.
457 |args| may also contain IRExpr_GSPTR(), in which case the
458 value in %ebp is passed as the corresponding argument.
460 * If the callee claims regparmness of 1, 2 or 3, we must pass the
461 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
462 respectively). To keep things relatively simple, only args of
463 type I32 may be passed as regparms -- just bomb out if anything
464 else turns up. Clearly this depends on the front ends not
465 trying to pass any other types as regparms.
468 /* 16 Nov 2004: the regparm handling is complicated by the
471 Consider a call two a function with two regparm parameters:
472 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
473 Suppose code is first generated to compute e1 into %eax. Then,
474 code is generated to compute e2 into %edx. Unfortunately, if
475 the latter code sequence uses %eax, it will trash the value of
476 e1 computed by the former sequence. This could happen if (for
477 example) e2 itself involved a function call. In the code below,
478 args are evaluated right-to-left, not left-to-right, but the
479 principle and the problem are the same.
481 One solution is to compute all regparm-bound args into vregs
482 first, and once they are all done, move them to the relevant
483 real regs. This always gives correct code, but it also gives
484 a bunch of vreg-to-rreg moves which are usually redundant but
485 are hard for the register allocator to get rid of.
487 A compromise is to first examine all regparm'd argument
488 expressions. If they are all so simple that it is clear
489 they will be evaluated without use of any fixed registers,
490 use the old compute-directly-to-fixed-target scheme. If not,
491 be safe and use the via-vregs scheme.
493 Note this requires being able to examine an expression and
494 determine whether or not evaluation of it might use a fixed
495 register. That requires knowledge of how the rest of this
496 insn selector works. Currently just the following 3 are
497 regarded as safe -- hopefully they cover the majority of
498 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
500 vassert(cee
->regparms
>= 0 && cee
->regparms
<= 3);
502 /* Count the number of args and also the VECRETs */
503 n_args
= n_arg_ws
= 0;
504 while (args
[n_args
]) {
505 IRExpr
* arg
= args
[n_args
];
507 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
509 } else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
514 /* If this fails, the IR is ill-formed */
515 vassert(nGSPTRs
== 0 || nGSPTRs
== 1);
517 /* If we have a VECRET, allocate space on the stack for the return
518 value, and record the stack pointer after that. */
519 HReg r_vecRetAddr
= INVALID_HREG
;
521 vassert(retTy
== Ity_V128
|| retTy
== Ity_V256
);
522 vassert(retTy
!= Ity_V256
); // we don't handle that yet (if ever)
523 r_vecRetAddr
= newVRegI(env
);
524 sub_from_esp(env
, 16);
525 addInstr(env
, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr
));
527 // If either of these fail, the IR is ill-formed
528 vassert(retTy
!= Ity_V128
&& retTy
!= Ity_V256
);
529 vassert(nVECRETs
== 0);
532 not_done_yet
= n_args
;
534 stack_limit
= cee
->regparms
;
536 /* ------ BEGIN marshall all arguments ------ */
538 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
539 for (i
= n_args
-1; i
>= stack_limit
; i
--) {
540 n_arg_ws
+= pushArg(env
, args
[i
], r_vecRetAddr
);
544 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
547 if (cee
->regparms
> 0) {
549 /* ------ BEGIN deal with regparms ------ */
551 /* deal with regparms, not forgetting %ebp if needed. */
552 argregs
[0] = hregX86_EAX();
553 argregs
[1] = hregX86_EDX();
554 argregs
[2] = hregX86_ECX();
555 tmpregs
[0] = tmpregs
[1] = tmpregs
[2] = INVALID_HREG
;
557 argreg
= cee
->regparms
;
559 /* In keeping with big comment above, detect potential danger
560 and use the via-vregs scheme if needed. */
562 for (i
= stack_limit
-1; i
>= 0; i
--) {
563 if (mightRequireFixedRegs(args
[i
])) {
571 /* Move via temporaries */
573 for (i
= stack_limit
-1; i
>= 0; i
--) {
576 vex_printf("x86 host: register param is complex: ");
581 IRExpr
* arg
= args
[i
];
583 vassert(argreg
>= 0);
584 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
587 else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
590 vassert(typeOfIRExpr(env
->type_env
, arg
) == Ity_I32
);
591 tmpregs
[argreg
] = iselIntExpr_R(env
, arg
);
595 for (i
= stack_limit
-1; i
>= 0; i
--) {
597 vassert(argregX
>= 0);
598 addInstr( env
, mk_iMOVsd_RR( tmpregs
[argregX
], argregs
[argregX
] ) );
602 /* It's safe to compute all regparm args directly into their
604 for (i
= stack_limit
-1; i
>= 0; i
--) {
605 IRExpr
* arg
= args
[i
];
607 vassert(argreg
>= 0);
608 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
609 vassert(!hregIsInvalid(r_vecRetAddr
));
610 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
611 X86RMI_Reg(r_vecRetAddr
),
614 else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
617 vassert(typeOfIRExpr(env
->type_env
, arg
) == Ity_I32
);
618 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
619 iselIntExpr_RMI(env
, arg
),
627 /* ------ END deal with regparms ------ */
631 vassert(not_done_yet
== 0);
633 /* ------ END marshall all arguments ------ */
635 /* Now we can compute the condition. We can't do it earlier
636 because the argument computations could trash the condition
637 codes. Be a bit clever to handle the common case where the
641 if (guard
->tag
== Iex_Const
642 && guard
->Iex
.Const
.con
->tag
== Ico_U1
643 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
644 /* unconditional -- do nothing */
646 cc
= iselCondCode( env
, guard
);
650 /* Do final checks, set the return values, and generate the call
651 instruction proper. */
652 vassert(*stackAdjustAfterCall
== 0);
653 vassert(is_RetLoc_INVALID(*retloc
));
656 /* Function doesn't return a value. */
657 *retloc
= mk_RetLoc_simple(RLPri_None
);
660 *retloc
= mk_RetLoc_simple(RLPri_2Int
);
662 case Ity_I32
: case Ity_I16
: case Ity_I8
:
663 *retloc
= mk_RetLoc_simple(RLPri_Int
);
666 *retloc
= mk_RetLoc_spRel(RLPri_V128SpRel
, 0);
667 *stackAdjustAfterCall
= 16;
671 *retloc
= mk_RetLoc_spRel(RLPri_V256SpRel
, 0);
672 *stackAdjustAfterCall
= 32;
675 /* IR can denote other possible return types, but we don't
676 handle those here. */
680 /* Finally, generate the call itself. This needs the *retloc value
681 set in the switch above, which is why it's at the end. */
682 callHelperAndClearArgs( env
, cc
, cee
, n_arg_ws
, *retloc
);
686 /* Given a guest-state array descriptor, an index expression and a
687 bias, generate an X86AMode holding the relevant guest state
691 X86AMode
* genGuestArrayOffset ( ISelEnv
* env
, IRRegArray
* descr
,
692 IRExpr
* off
, Int bias
)
695 Int elemSz
= sizeofIRType(descr
->elemTy
);
696 Int nElems
= descr
->nElems
;
699 /* throw out any cases not generated by an x86 front end. In
700 theory there might be a day where we need to handle them -- if
701 we ever run non-x86-guest on x86 host. */
704 vpanic("genGuestArrayOffset(x86 host)(1)");
707 case 1: shift
= 0; break;
708 case 4: shift
= 2; break;
709 case 8: shift
= 3; break;
710 default: vpanic("genGuestArrayOffset(x86 host)(2)");
713 /* Compute off into a reg, %off. Then return:
716 addl $bias, %tmp (if bias != 0)
718 ... base(%ebp, %tmp, shift) ...
721 roff
= iselIntExpr_R(env
, off
);
722 addInstr(env
, mk_iMOVsd_RR(roff
, tmp
));
725 X86Instr_Alu32R(Xalu_ADD
, X86RMI_Imm(bias
), tmp
));
728 X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(7), tmp
));
730 X86AMode_IRRS( descr
->base
, hregX86_EBP(), tmp
, shift
);
734 /* Mess with the FPU's rounding mode: set to the default rounding mode
737 void set_FPU_rounding_default ( ISelEnv
* env
)
739 /* pushl $DEFAULT_FPUCW
743 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
744 addInstr(env
, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW
)));
745 addInstr(env
, X86Instr_FpLdCW(zero_esp
));
750 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
751 expression denoting a value in the range 0 .. 3, indicating a round
752 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
756 void set_FPU_rounding_mode ( ISelEnv
* env
, IRExpr
* mode
)
758 HReg rrm
= iselIntExpr_R(env
, mode
);
759 HReg rrm2
= newVRegI(env
);
760 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
763 andl $3, %rrm2 -- shouldn't be needed; paranoia
765 orl $DEFAULT_FPUCW, %rrm2
770 addInstr(env
, mk_iMOVsd_RR(rrm
, rrm2
));
771 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(3), rrm2
));
772 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 10, rrm2
));
773 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Imm(DEFAULT_FPUCW
), rrm2
));
774 addInstr(env
, X86Instr_Push(X86RMI_Reg(rrm2
)));
775 addInstr(env
, X86Instr_FpLdCW(zero_esp
));
780 /* Generate !src into a new vector register, and be sure that the code
781 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
784 static HReg
do_sse_Not128 ( ISelEnv
* env
, HReg src
)
786 HReg dst
= newVRegV(env
);
787 /* Set dst to zero. If dst contains a NaN then all hell might
788 break loose after the comparison. So, first zero it. */
789 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, dst
, dst
));
790 /* And now make it all 1s ... */
791 addInstr(env
, X86Instr_Sse32Fx4(Xsse_CMPEQF
, dst
, dst
));
792 /* Finally, xor 'src' into it. */
793 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, src
, dst
));
794 /* Doesn't that just totally suck? */
799 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
800 after most non-simple FPU operations (simple = +, -, *, / and
803 This could be done a lot more efficiently if needed, by loading
804 zero and adding it to the value to be rounded (fldz ; faddp?).
806 static void roundToF64 ( ISelEnv
* env
, HReg reg
)
808 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
809 sub_from_esp(env
, 8);
810 addInstr(env
, X86Instr_FpLdSt(False
/*store*/, 8, reg
, zero_esp
));
811 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 8, reg
, zero_esp
));
816 /*---------------------------------------------------------*/
817 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
818 /*---------------------------------------------------------*/
820 /* Select insns for an integer-typed expression, and add them to the
821 code list. Return a reg holding the result. This reg will be a
822 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
823 want to modify it, ask for a new vreg, copy it in there, and modify
824 the copy. The register allocator will do its best to map both
825 vregs to the same real register, so the copies will often disappear
828 This should handle expressions of 32, 16 and 8-bit type. All
829 results are returned in a 32-bit register. For 16- and 8-bit
830 expressions, the upper 16/24 bits are arbitrary, so you should mask
831 or sign extend partial values if necessary.
834 static HReg
iselIntExpr_R ( ISelEnv
* env
, const IRExpr
* e
)
836 HReg r
= iselIntExpr_R_wrk(env
, e
);
837 /* sanity checks ... */
839 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
841 vassert(hregClass(r
) == HRcInt32
);
842 vassert(hregIsVirtual(r
));
846 /* DO NOT CALL THIS DIRECTLY ! */
847 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, const IRExpr
* e
)
851 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
852 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
856 /* --------- TEMP --------- */
858 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
861 /* --------- LOAD --------- */
863 HReg dst
= newVRegI(env
);
864 X86AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
);
866 /* We can't handle big-endian loads, nor load-linked. */
867 if (e
->Iex
.Load
.end
!= Iend_LE
)
871 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
872 X86RMI_Mem(amode
), dst
) );
876 addInstr(env
, X86Instr_LoadEX(2,False
,amode
,dst
));
880 addInstr(env
, X86Instr_LoadEX(1,False
,amode
,dst
));
886 /* --------- TERNARY OP --------- */
888 IRTriop
*triop
= e
->Iex
.Triop
.details
;
889 /* C3210 flags following FPU partial remainder (fprem), both
890 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
891 if (triop
->op
== Iop_PRemC3210F64
892 || triop
->op
== Iop_PRem1C3210F64
) {
893 HReg junk
= newVRegF(env
);
894 HReg dst
= newVRegI(env
);
895 HReg srcL
= iselDblExpr(env
, triop
->arg2
);
896 HReg srcR
= iselDblExpr(env
, triop
->arg3
);
897 /* XXXROUNDINGFIXME */
898 /* set roundingmode here */
899 addInstr(env
, X86Instr_FpBinary(
900 e
->Iex
.Binop
.op
==Iop_PRemC3210F64
901 ? Xfp_PREM
: Xfp_PREM1
,
904 /* The previous pseudo-insn will have left the FPU's C3210
905 flags set correctly. So bag them. */
906 addInstr(env
, X86Instr_FpStSW_AX());
907 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), dst
));
908 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(0x4700), dst
));
915 /* --------- BINARY OP --------- */
920 /* Pattern: Sub32(0,x) */
921 if (e
->Iex
.Binop
.op
== Iop_Sub32
&& isZeroU32(e
->Iex
.Binop
.arg1
)) {
922 HReg dst
= newVRegI(env
);
923 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
924 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
925 addInstr(env
, X86Instr_Unary32(Xun_NEG
,dst
));
929 /* Is it an addition or logical style op? */
930 switch (e
->Iex
.Binop
.op
) {
931 case Iop_Add8
: case Iop_Add16
: case Iop_Add32
:
932 aluOp
= Xalu_ADD
; break;
933 case Iop_Sub8
: case Iop_Sub16
: case Iop_Sub32
:
934 aluOp
= Xalu_SUB
; break;
935 case Iop_And8
: case Iop_And16
: case Iop_And32
:
936 aluOp
= Xalu_AND
; break;
937 case Iop_Or8
: case Iop_Or16
: case Iop_Or32
:
938 aluOp
= Xalu_OR
; break;
939 case Iop_Xor8
: case Iop_Xor16
: case Iop_Xor32
:
940 aluOp
= Xalu_XOR
; break;
941 case Iop_Mul16
: case Iop_Mul32
:
942 aluOp
= Xalu_MUL
; break;
944 aluOp
= Xalu_INVALID
; break;
946 /* For commutative ops we assume any literal
947 values are on the second operand. */
948 if (aluOp
!= Xalu_INVALID
) {
949 HReg dst
= newVRegI(env
);
950 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
951 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
952 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
953 addInstr(env
, X86Instr_Alu32R(aluOp
, rmi
, dst
));
956 /* Could do better here; forcing the first arg into a reg
958 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
959 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
960 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
961 movl 0xFFFFFFA0(%vr41),%vr107
962 movl 0xFFFFFFA4(%vr41),%vr108
965 movl 0xFFFFFFA8(%vr41),%vr109
968 movl 0xFFFFFFA0(%vr41),%vr110
974 /* Perhaps a shift op? */
975 switch (e
->Iex
.Binop
.op
) {
976 case Iop_Shl32
: case Iop_Shl16
: case Iop_Shl8
:
977 shOp
= Xsh_SHL
; break;
978 case Iop_Shr32
: case Iop_Shr16
: case Iop_Shr8
:
979 shOp
= Xsh_SHR
; break;
980 case Iop_Sar32
: case Iop_Sar16
: case Iop_Sar8
:
981 shOp
= Xsh_SAR
; break;
983 shOp
= Xsh_INVALID
; break;
985 if (shOp
!= Xsh_INVALID
) {
986 HReg dst
= newVRegI(env
);
988 /* regL = the value to be shifted */
989 HReg regL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
990 addInstr(env
, mk_iMOVsd_RR(regL
,dst
));
992 /* Do any necessary widening for 16/8 bit operands */
993 switch (e
->Iex
.Binop
.op
) {
995 addInstr(env
, X86Instr_Alu32R(
996 Xalu_AND
, X86RMI_Imm(0xFF), dst
));
999 addInstr(env
, X86Instr_Alu32R(
1000 Xalu_AND
, X86RMI_Imm(0xFFFF), dst
));
1003 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 24, dst
));
1004 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 24, dst
));
1007 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 16, dst
));
1008 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 16, dst
));
1013 /* Now consider the shift amount. If it's a literal, we
1014 can do a much better job than the general case. */
1015 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1016 /* assert that the IR is well-typed */
1018 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1019 nshift
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1020 vassert(nshift
>= 0);
1022 /* Can't allow nshift==0 since that means %cl */
1023 addInstr(env
, X86Instr_Sh32( shOp
, nshift
, dst
));
1025 /* General case; we have to force the amount into %cl. */
1026 HReg regR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1027 addInstr(env
, mk_iMOVsd_RR(regR
,hregX86_ECX()));
1028 addInstr(env
, X86Instr_Sh32(shOp
, 0/* %cl */, dst
));
1033 /* Handle misc other ops. */
1035 if (e
->Iex
.Binop
.op
== Iop_Max32U
) {
1036 HReg src1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1037 HReg dst
= newVRegI(env
);
1038 HReg src2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1039 addInstr(env
, mk_iMOVsd_RR(src1
,dst
));
1040 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
, X86RMI_Reg(src2
), dst
));
1041 addInstr(env
, X86Instr_CMov32(Xcc_B
, X86RM_Reg(src2
), dst
));
1045 if (e
->Iex
.Binop
.op
== Iop_8HLto16
) {
1046 HReg hi8
= newVRegI(env
);
1047 HReg lo8
= newVRegI(env
);
1048 HReg hi8s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1049 HReg lo8s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1050 addInstr(env
, mk_iMOVsd_RR(hi8s
, hi8
));
1051 addInstr(env
, mk_iMOVsd_RR(lo8s
, lo8
));
1052 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 8, hi8
));
1053 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(0xFF), lo8
));
1054 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(lo8
), hi8
));
1058 if (e
->Iex
.Binop
.op
== Iop_16HLto32
) {
1059 HReg hi16
= newVRegI(env
);
1060 HReg lo16
= newVRegI(env
);
1061 HReg hi16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1062 HReg lo16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1063 addInstr(env
, mk_iMOVsd_RR(hi16s
, hi16
));
1064 addInstr(env
, mk_iMOVsd_RR(lo16s
, lo16
));
1065 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 16, hi16
));
1066 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(0xFFFF), lo16
));
1067 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(lo16
), hi16
));
1071 if (e
->Iex
.Binop
.op
== Iop_MullS16
|| e
->Iex
.Binop
.op
== Iop_MullS8
1072 || e
->Iex
.Binop
.op
== Iop_MullU16
|| e
->Iex
.Binop
.op
== Iop_MullU8
) {
1073 HReg a16
= newVRegI(env
);
1074 HReg b16
= newVRegI(env
);
1075 HReg a16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1076 HReg b16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1077 Int shift
= (e
->Iex
.Binop
.op
== Iop_MullS8
1078 || e
->Iex
.Binop
.op
== Iop_MullU8
)
1080 X86ShiftOp shr_op
= (e
->Iex
.Binop
.op
== Iop_MullS8
1081 || e
->Iex
.Binop
.op
== Iop_MullS16
)
1082 ? Xsh_SAR
: Xsh_SHR
;
1084 addInstr(env
, mk_iMOVsd_RR(a16s
, a16
));
1085 addInstr(env
, mk_iMOVsd_RR(b16s
, b16
));
1086 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, shift
, a16
));
1087 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, shift
, b16
));
1088 addInstr(env
, X86Instr_Sh32(shr_op
, shift
, a16
));
1089 addInstr(env
, X86Instr_Sh32(shr_op
, shift
, b16
));
1090 addInstr(env
, X86Instr_Alu32R(Xalu_MUL
, X86RMI_Reg(a16
), b16
));
1094 if (e
->Iex
.Binop
.op
== Iop_CmpF64
) {
1095 HReg fL
= iselDblExpr(env
, e
->Iex
.Binop
.arg1
);
1096 HReg fR
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1097 HReg dst
= newVRegI(env
);
1098 addInstr(env
, X86Instr_FpCmp(fL
,fR
,dst
));
1099 /* shift this right 8 bits so as to conform to CmpF64
1101 addInstr(env
, X86Instr_Sh32(Xsh_SHR
, 8, dst
));
1105 if (e
->Iex
.Binop
.op
== Iop_F64toI32S
1106 || e
->Iex
.Binop
.op
== Iop_F64toI16S
) {
1107 Int sz
= e
->Iex
.Binop
.op
== Iop_F64toI16S
? 2 : 4;
1108 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1109 HReg dst
= newVRegI(env
);
1111 /* Used several times ... */
1112 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
1114 /* rf now holds the value to be converted, and rrm holds the
1115 rounding mode value, encoded as per the IRRoundingMode
1116 enum. The first thing to do is set the FPU's rounding
1117 mode accordingly. */
1119 /* Create a space for the format conversion. */
1121 sub_from_esp(env
, 4);
1123 /* Set host rounding mode */
1124 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
1126 /* gistw/l %rf, 0(%esp) */
1127 addInstr(env
, X86Instr_FpLdStI(False
/*store*/,
1128 toUChar(sz
), rf
, zero_esp
));
1131 /* movzwl 0(%esp), %dst */
1132 addInstr(env
, X86Instr_LoadEX(2,False
,zero_esp
,dst
));
1134 /* movl 0(%esp), %dst */
1136 addInstr(env
, X86Instr_Alu32R(
1137 Xalu_MOV
, X86RMI_Mem(zero_esp
), dst
));
1140 /* Restore default FPU rounding. */
1141 set_FPU_rounding_default( env
);
1151 /* --------- UNARY OP --------- */
1154 /* 1Uto8(32to1(expr32)) */
1155 if (e
->Iex
.Unop
.op
== Iop_1Uto8
) {
1156 DECLARE_PATTERN(p_32to1_then_1Uto8
);
1157 DEFINE_PATTERN(p_32to1_then_1Uto8
,
1158 unop(Iop_1Uto8
,unop(Iop_32to1
,bind(0))));
1159 if (matchIRExpr(&mi
,p_32to1_then_1Uto8
,e
)) {
1160 const IRExpr
* expr32
= mi
.bindee
[0];
1161 HReg dst
= newVRegI(env
);
1162 HReg src
= iselIntExpr_R(env
, expr32
);
1163 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1164 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
1165 X86RMI_Imm(1), dst
));
1170 /* 8Uto32(LDle(expr32)) */
1171 if (e
->Iex
.Unop
.op
== Iop_8Uto32
) {
1172 DECLARE_PATTERN(p_LDle8_then_8Uto32
);
1173 DEFINE_PATTERN(p_LDle8_then_8Uto32
,
1175 IRExpr_Load(Iend_LE
,Ity_I8
,bind(0))) );
1176 if (matchIRExpr(&mi
,p_LDle8_then_8Uto32
,e
)) {
1177 HReg dst
= newVRegI(env
);
1178 X86AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1179 addInstr(env
, X86Instr_LoadEX(1,False
,amode
,dst
));
1184 /* 8Sto32(LDle(expr32)) */
1185 if (e
->Iex
.Unop
.op
== Iop_8Sto32
) {
1186 DECLARE_PATTERN(p_LDle8_then_8Sto32
);
1187 DEFINE_PATTERN(p_LDle8_then_8Sto32
,
1189 IRExpr_Load(Iend_LE
,Ity_I8
,bind(0))) );
1190 if (matchIRExpr(&mi
,p_LDle8_then_8Sto32
,e
)) {
1191 HReg dst
= newVRegI(env
);
1192 X86AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1193 addInstr(env
, X86Instr_LoadEX(1,True
,amode
,dst
));
1198 /* 16Uto32(LDle(expr32)) */
1199 if (e
->Iex
.Unop
.op
== Iop_16Uto32
) {
1200 DECLARE_PATTERN(p_LDle16_then_16Uto32
);
1201 DEFINE_PATTERN(p_LDle16_then_16Uto32
,
1203 IRExpr_Load(Iend_LE
,Ity_I16
,bind(0))) );
1204 if (matchIRExpr(&mi
,p_LDle16_then_16Uto32
,e
)) {
1205 HReg dst
= newVRegI(env
);
1206 X86AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1207 addInstr(env
, X86Instr_LoadEX(2,False
,amode
,dst
));
1212 /* 8Uto32(GET:I8) */
1213 if (e
->Iex
.Unop
.op
== Iop_8Uto32
) {
1214 if (e
->Iex
.Unop
.arg
->tag
== Iex_Get
) {
1217 vassert(e
->Iex
.Unop
.arg
->Iex
.Get
.ty
== Ity_I8
);
1218 dst
= newVRegI(env
);
1219 amode
= X86AMode_IR(e
->Iex
.Unop
.arg
->Iex
.Get
.offset
,
1221 addInstr(env
, X86Instr_LoadEX(1,False
,amode
,dst
));
1226 /* 16to32(GET:I16) */
1227 if (e
->Iex
.Unop
.op
== Iop_16Uto32
) {
1228 if (e
->Iex
.Unop
.arg
->tag
== Iex_Get
) {
1231 vassert(e
->Iex
.Unop
.arg
->Iex
.Get
.ty
== Ity_I16
);
1232 dst
= newVRegI(env
);
1233 amode
= X86AMode_IR(e
->Iex
.Unop
.arg
->Iex
.Get
.offset
,
1235 addInstr(env
, X86Instr_LoadEX(2,False
,amode
,dst
));
1240 switch (e
->Iex
.Unop
.op
) {
1244 HReg dst
= newVRegI(env
);
1245 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1246 UInt mask
= e
->Iex
.Unop
.op
==Iop_16Uto32
? 0xFFFF : 0xFF;
1247 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1248 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
1249 X86RMI_Imm(mask
), dst
));
1255 HReg dst
= newVRegI(env
);
1256 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1257 UInt amt
= e
->Iex
.Unop
.op
==Iop_16Sto32
? 16 : 24;
1258 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1259 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, amt
, dst
));
1260 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, amt
, dst
));
1266 HReg dst
= newVRegI(env
);
1267 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1268 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1269 addInstr(env
, X86Instr_Unary32(Xun_NOT
,dst
));
1272 case Iop_64HIto32
: {
1274 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1275 return rHi
; /* and abandon rLo .. poor wee thing :-) */
1279 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1280 return rLo
; /* similar stupid comment to the above ... */
1283 case Iop_32HIto16
: {
1284 HReg dst
= newVRegI(env
);
1285 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1286 Int shift
= e
->Iex
.Unop
.op
== Iop_16HIto8
? 8 : 16;
1287 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1288 addInstr(env
, X86Instr_Sh32(Xsh_SHR
, shift
, dst
));
1293 HReg dst
= newVRegI(env
);
1294 X86CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1295 addInstr(env
, X86Instr_Set32(cond
,dst
));
1301 /* could do better than this, but for now ... */
1302 HReg dst
= newVRegI(env
);
1303 X86CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1304 addInstr(env
, X86Instr_Set32(cond
,dst
));
1305 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 31, dst
));
1306 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, dst
));
1310 /* Count trailing zeroes, implemented by x86 'bsfl' */
1311 HReg dst
= newVRegI(env
);
1312 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1313 addInstr(env
, X86Instr_Bsfr32(True
,src
,dst
));
1317 /* Count leading zeroes. Do 'bsrl' to establish the index
1318 of the highest set bit, and subtract that value from
1320 HReg tmp
= newVRegI(env
);
1321 HReg dst
= newVRegI(env
);
1322 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1323 addInstr(env
, X86Instr_Bsfr32(False
,src
,tmp
));
1324 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
1325 X86RMI_Imm(31), dst
));
1326 addInstr(env
, X86Instr_Alu32R(Xalu_SUB
,
1327 X86RMI_Reg(tmp
), dst
));
1331 case Iop_CmpwNEZ32
: {
1332 HReg dst
= newVRegI(env
);
1333 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1334 addInstr(env
, mk_iMOVsd_RR(src
,dst
));
1335 addInstr(env
, X86Instr_Unary32(Xun_NEG
,dst
));
1336 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
1337 X86RMI_Reg(src
), dst
));
1338 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, dst
));
1344 HReg dst
= newVRegI(env
);
1345 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1346 addInstr(env
, mk_iMOVsd_RR(src
, dst
));
1347 addInstr(env
, X86Instr_Unary32(Xun_NEG
, dst
));
1348 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(src
), dst
));
1352 case Iop_V128to32
: {
1353 HReg dst
= newVRegI(env
);
1354 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
1355 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
1356 sub_from_esp(env
, 16);
1357 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, vec
, esp0
));
1358 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(esp0
), dst
));
1359 add_to_esp(env
, 16);
1363 /* ReinterpF32asI32(e) */
1364 /* Given an IEEE754 single, produce an I32 with the same bit
1365 pattern. Keep stack 8-aligned even though only using 4
1367 case Iop_ReinterpF32asI32
: {
1368 HReg rf
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
1369 HReg dst
= newVRegI(env
);
1370 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
1372 set_FPU_rounding_default(env
);
1374 sub_from_esp(env
, 8);
1375 /* gstF %rf, 0(%esp) */
1377 X86Instr_FpLdSt(False
/*store*/, 4, rf
, zero_esp
));
1378 /* movl 0(%esp), %dst */
1380 X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(zero_esp
), dst
));
1389 /* These are no-ops. */
1390 return iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1392 case Iop_GetMSBs8x8
: {
1393 /* Note: the following assumes the helper is of
1395 UInt fn ( ULong ), and is not a regparm fn.
1398 HReg dst
= newVRegI(env
);
1399 Addr fn
= (Addr
)h_generic_calc_GetMSBs8x8
;
1400 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Unop
.arg
);
1401 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
1402 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
1403 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
1404 0, mk_RetLoc_simple(RLPri_Int
) ));
1405 add_to_esp(env
, 2*4);
1406 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), dst
));
1416 /* --------- GET --------- */
1418 if (ty
== Ity_I32
) {
1419 HReg dst
= newVRegI(env
);
1420 addInstr(env
, X86Instr_Alu32R(
1422 X86RMI_Mem(X86AMode_IR(e
->Iex
.Get
.offset
,
1427 if (ty
== Ity_I8
|| ty
== Ity_I16
) {
1428 HReg dst
= newVRegI(env
);
1429 addInstr(env
, X86Instr_LoadEX(
1430 toUChar(ty
==Ity_I8
? 1 : 2),
1432 X86AMode_IR(e
->Iex
.Get
.offset
,hregX86_EBP()),
1441 = genGuestArrayOffset(
1442 env
, e
->Iex
.GetI
.descr
,
1443 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
1444 HReg dst
= newVRegI(env
);
1446 addInstr(env
, X86Instr_LoadEX( 1, False
, am
, dst
));
1449 if (ty
== Ity_I32
) {
1450 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(am
), dst
));
1456 /* --------- CCALL --------- */
1458 HReg dst
= newVRegI(env
);
1459 vassert(ty
== e
->Iex
.CCall
.retty
);
1461 /* be very restrictive for now. Only 32/64-bit ints allowed for
1462 args, and 32 bits for return type. Don't forget to change
1463 the RetLoc if more return types are allowed in future. */
1464 if (e
->Iex
.CCall
.retty
!= Ity_I32
)
1467 /* Marshal args, do the call, clear stack. */
1469 RetLoc rloc
= mk_RetLoc_INVALID();
1470 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
1471 e
->Iex
.CCall
.cee
, e
->Iex
.CCall
.retty
, e
->Iex
.CCall
.args
);
1472 vassert(is_sane_RetLoc(rloc
));
1473 vassert(rloc
.pri
== RLPri_Int
);
1474 vassert(addToSp
== 0);
1476 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), dst
));
1480 /* --------- LITERAL --------- */
1481 /* 32/16/8-bit literals */
1483 X86RMI
* rmi
= iselIntExpr_RMI ( env
, e
);
1484 HReg r
= newVRegI(env
);
1485 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, rmi
, r
));
1489 /* --------- MULTIPLEX --------- */
1490 case Iex_ITE
: { // VFD
1491 if ((ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
)
1492 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
1493 HReg r1
= iselIntExpr_R(env
, e
->Iex
.ITE
.iftrue
);
1494 X86RM
* r0
= iselIntExpr_RM(env
, e
->Iex
.ITE
.iffalse
);
1495 HReg dst
= newVRegI(env
);
1496 addInstr(env
, mk_iMOVsd_RR(r1
,dst
));
1497 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
1498 addInstr(env
, X86Instr_CMov32(cc
^ 1, r0
, dst
));
1506 } /* switch (e->tag) */
1508 /* We get here if no pattern matched. */
1511 vpanic("iselIntExpr_R: cannot reduce tree");
1515 /*---------------------------------------------------------*/
1516 /*--- ISEL: Integer expression auxiliaries ---*/
1517 /*---------------------------------------------------------*/
1519 /* --------------------- AMODEs --------------------- */
1521 /* Return an AMode which computes the value of the specified
1522 expression, possibly also adding insns to the code list as a
1523 result. The expression may only be a 32-bit one.
1526 static Bool
sane_AMode ( X86AMode
* am
)
1531 toBool( hregClass(am
->Xam
.IR
.reg
) == HRcInt32
1532 && (hregIsVirtual(am
->Xam
.IR
.reg
)
1533 || sameHReg(am
->Xam
.IR
.reg
, hregX86_EBP())) );
1536 toBool( hregClass(am
->Xam
.IRRS
.base
) == HRcInt32
1537 && hregIsVirtual(am
->Xam
.IRRS
.base
)
1538 && hregClass(am
->Xam
.IRRS
.index
) == HRcInt32
1539 && hregIsVirtual(am
->Xam
.IRRS
.index
) );
1541 vpanic("sane_AMode: unknown x86 amode tag");
1545 static X86AMode
* iselIntExpr_AMode ( ISelEnv
* env
, const IRExpr
* e
)
1547 X86AMode
* am
= iselIntExpr_AMode_wrk(env
, e
);
1548 vassert(sane_AMode(am
));
1552 /* DO NOT CALL THIS DIRECTLY ! */
1553 static X86AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1555 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1556 vassert(ty
== Ity_I32
);
1558 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1559 if (e
->tag
== Iex_Binop
1560 && e
->Iex
.Binop
.op
== Iop_Add32
1561 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
1562 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
1563 && e
->Iex
.Binop
.arg1
->tag
== Iex_Binop
1564 && e
->Iex
.Binop
.arg1
->Iex
.Binop
.op
== Iop_Add32
1565 && e
->Iex
.Binop
.arg1
->Iex
.Binop
.arg2
->tag
== Iex_Binop
1566 && e
->Iex
.Binop
.arg1
->Iex
.Binop
.arg2
->Iex
.Binop
.op
== Iop_Shl32
1567 && e
->Iex
.Binop
.arg1
1568 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->tag
== Iex_Const
1569 && e
->Iex
.Binop
.arg1
1570 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
) {
1571 UInt shift
= e
->Iex
.Binop
.arg1
1572 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1573 UInt imm32
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1574 if (shift
== 1 || shift
== 2 || shift
== 3) {
1575 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
->Iex
.Binop
.arg1
);
1576 HReg r2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
1577 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg1
);
1578 return X86AMode_IRRS(imm32
, r1
, r2
, shift
);
1582 /* Add32(expr1, Shl32(expr2, imm)) */
1583 if (e
->tag
== Iex_Binop
1584 && e
->Iex
.Binop
.op
== Iop_Add32
1585 && e
->Iex
.Binop
.arg2
->tag
== Iex_Binop
1586 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.op
== Iop_Shl32
1587 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->tag
== Iex_Const
1588 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
) {
1589 UInt shift
= e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1590 if (shift
== 1 || shift
== 2 || shift
== 3) {
1591 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1592 HReg r2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg1
);
1593 return X86AMode_IRRS(0, r1
, r2
, shift
);
1598 if (e
->tag
== Iex_Binop
1599 && e
->Iex
.Binop
.op
== Iop_Add32
1600 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
1601 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
) {
1602 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1603 return X86AMode_IR(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
, r1
);
1606 /* Doesn't match anything in particular. Generate it into
1607 a register and use that. */
1609 HReg r1
= iselIntExpr_R(env
, e
);
1610 return X86AMode_IR(0, r1
);
1615 /* --------------------- RMIs --------------------- */
1617 /* Similarly, calculate an expression into an X86RMI operand. As with
1618 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1620 static X86RMI
* iselIntExpr_RMI ( ISelEnv
* env
, const IRExpr
* e
)
1622 X86RMI
* rmi
= iselIntExpr_RMI_wrk(env
, e
);
1623 /* sanity checks ... */
1628 vassert(hregClass(rmi
->Xrmi
.Reg
.reg
) == HRcInt32
);
1629 vassert(hregIsVirtual(rmi
->Xrmi
.Reg
.reg
));
1632 vassert(sane_AMode(rmi
->Xrmi
.Mem
.am
));
1635 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1639 /* DO NOT CALL THIS DIRECTLY ! */
1640 static X86RMI
* iselIntExpr_RMI_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1642 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1643 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1645 /* special case: immediate */
1646 if (e
->tag
== Iex_Const
) {
1648 switch (e
->Iex
.Const
.con
->tag
) {
1649 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1650 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
1651 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
1652 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1654 return X86RMI_Imm(u
);
1657 /* special case: 32-bit GET */
1658 if (e
->tag
== Iex_Get
&& ty
== Ity_I32
) {
1659 return X86RMI_Mem(X86AMode_IR(e
->Iex
.Get
.offset
,
1663 /* special case: 32-bit load from memory */
1664 if (e
->tag
== Iex_Load
&& ty
== Ity_I32
1665 && e
->Iex
.Load
.end
== Iend_LE
) {
1666 X86AMode
* am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
1667 return X86RMI_Mem(am
);
1670 /* default case: calculate into a register and return that */
1672 HReg r
= iselIntExpr_R ( env
, e
);
1673 return X86RMI_Reg(r
);
1678 /* --------------------- RIs --------------------- */
1680 /* Calculate an expression into an X86RI operand. As with
1681 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1683 static X86RI
* iselIntExpr_RI ( ISelEnv
* env
, const IRExpr
* e
)
1685 X86RI
* ri
= iselIntExpr_RI_wrk(env
, e
);
1686 /* sanity checks ... */
1691 vassert(hregClass(ri
->Xri
.Reg
.reg
) == HRcInt32
);
1692 vassert(hregIsVirtual(ri
->Xri
.Reg
.reg
));
1695 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1699 /* DO NOT CALL THIS DIRECTLY ! */
1700 static X86RI
* iselIntExpr_RI_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1702 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1703 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1705 /* special case: immediate */
1706 if (e
->tag
== Iex_Const
) {
1708 switch (e
->Iex
.Const
.con
->tag
) {
1709 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1710 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
1711 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
1712 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1714 return X86RI_Imm(u
);
1717 /* default case: calculate into a register and return that */
1719 HReg r
= iselIntExpr_R ( env
, e
);
1720 return X86RI_Reg(r
);
1725 /* --------------------- RMs --------------------- */
1727 /* Similarly, calculate an expression into an X86RM operand. As with
1728 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1730 static X86RM
* iselIntExpr_RM ( ISelEnv
* env
, const IRExpr
* e
)
1732 X86RM
* rm
= iselIntExpr_RM_wrk(env
, e
);
1733 /* sanity checks ... */
1736 vassert(hregClass(rm
->Xrm
.Reg
.reg
) == HRcInt32
);
1737 vassert(hregIsVirtual(rm
->Xrm
.Reg
.reg
));
1740 vassert(sane_AMode(rm
->Xrm
.Mem
.am
));
1743 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1747 /* DO NOT CALL THIS DIRECTLY ! */
1748 static X86RM
* iselIntExpr_RM_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1750 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1751 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1753 /* special case: 32-bit GET */
1754 if (e
->tag
== Iex_Get
&& ty
== Ity_I32
) {
1755 return X86RM_Mem(X86AMode_IR(e
->Iex
.Get
.offset
,
1759 /* special case: load from memory */
1761 /* default case: calculate into a register and return that */
1763 HReg r
= iselIntExpr_R ( env
, e
);
1764 return X86RM_Reg(r
);
1769 /* --------------------- CONDCODE --------------------- */
1771 /* Generate code to evaluated a bit-typed expression, returning the
1772 condition code which would correspond when the expression would
1773 notionally have returned 1. */
1775 static X86CondCode
iselCondCode ( ISelEnv
* env
, const IRExpr
* e
)
1777 /* Uh, there's nothing we can sanity check here, unfortunately. */
1778 return iselCondCode_wrk(env
,e
);
1781 /* DO NOT CALL THIS DIRECTLY ! */
1782 static X86CondCode
iselCondCode_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1787 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I1
);
1790 if (e
->tag
== Iex_RdTmp
) {
1791 HReg r32
= lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1792 /* Test32 doesn't modify r32; so this is OK. */
1793 addInstr(env
, X86Instr_Test32(1,X86RM_Reg(r32
)));
1797 /* Constant 1:Bit */
1798 if (e
->tag
== Iex_Const
) {
1800 vassert(e
->Iex
.Const
.con
->tag
== Ico_U1
);
1801 vassert(e
->Iex
.Const
.con
->Ico
.U1
== True
1802 || e
->Iex
.Const
.con
->Ico
.U1
== False
);
1804 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,X86RMI_Imm(0),r
));
1805 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,X86RMI_Reg(r
),r
));
1806 return e
->Iex
.Const
.con
->Ico
.U1
? Xcc_Z
: Xcc_NZ
;
1810 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_Not1
) {
1811 /* Generate code for the arg, and negate the test condition */
1812 return 1 ^ iselCondCode(env
, e
->Iex
.Unop
.arg
);
1815 /* --- patterns rooted at: 32to1 --- */
1817 if (e
->tag
== Iex_Unop
1818 && e
->Iex
.Unop
.op
== Iop_32to1
) {
1819 X86RM
* rm
= iselIntExpr_RM(env
, e
->Iex
.Unop
.arg
);
1820 addInstr(env
, X86Instr_Test32(1,rm
));
1824 /* --- patterns rooted at: CmpNEZ8 --- */
1827 if (e
->tag
== Iex_Unop
1828 && e
->Iex
.Unop
.op
== Iop_CmpNEZ8
) {
1829 X86RM
* rm
= iselIntExpr_RM(env
, e
->Iex
.Unop
.arg
);
1830 addInstr(env
, X86Instr_Test32(0xFF,rm
));
1834 /* --- patterns rooted at: CmpNEZ16 --- */
1837 if (e
->tag
== Iex_Unop
1838 && e
->Iex
.Unop
.op
== Iop_CmpNEZ16
) {
1839 X86RM
* rm
= iselIntExpr_RM(env
, e
->Iex
.Unop
.arg
);
1840 addInstr(env
, X86Instr_Test32(0xFFFF,rm
));
1844 /* --- patterns rooted at: CmpNEZ32 --- */
1846 /* CmpNEZ32(And32(x,y)) */
1848 DECLARE_PATTERN(p_CmpNEZ32_And32
);
1849 DEFINE_PATTERN(p_CmpNEZ32_And32
,
1850 unop(Iop_CmpNEZ32
, binop(Iop_And32
, bind(0), bind(1))));
1851 if (matchIRExpr(&mi
, p_CmpNEZ32_And32
, e
)) {
1852 HReg r0
= iselIntExpr_R(env
, mi
.bindee
[0]);
1853 X86RMI
* rmi1
= iselIntExpr_RMI(env
, mi
.bindee
[1]);
1854 HReg tmp
= newVRegI(env
);
1855 addInstr(env
, mk_iMOVsd_RR(r0
, tmp
));
1856 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,rmi1
,tmp
));
1861 /* CmpNEZ32(Or32(x,y)) */
1863 DECLARE_PATTERN(p_CmpNEZ32_Or32
);
1864 DEFINE_PATTERN(p_CmpNEZ32_Or32
,
1865 unop(Iop_CmpNEZ32
, binop(Iop_Or32
, bind(0), bind(1))));
1866 if (matchIRExpr(&mi
, p_CmpNEZ32_Or32
, e
)) {
1867 HReg r0
= iselIntExpr_R(env
, mi
.bindee
[0]);
1868 X86RMI
* rmi1
= iselIntExpr_RMI(env
, mi
.bindee
[1]);
1869 HReg tmp
= newVRegI(env
);
1870 addInstr(env
, mk_iMOVsd_RR(r0
, tmp
));
1871 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,rmi1
,tmp
));
1876 /* CmpNEZ32(GET(..):I32) */
1877 if (e
->tag
== Iex_Unop
1878 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
1879 && e
->Iex
.Unop
.arg
->tag
== Iex_Get
) {
1880 X86AMode
* am
= X86AMode_IR(e
->Iex
.Unop
.arg
->Iex
.Get
.offset
,
1882 addInstr(env
, X86Instr_Alu32M(Xalu_CMP
, X86RI_Imm(0), am
));
1887 if (e
->tag
== Iex_Unop
1888 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
) {
1889 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1890 X86RMI
* rmi2
= X86RMI_Imm(0);
1891 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
,rmi2
,r1
));
1895 /* --- patterns rooted at: CmpNEZ64 --- */
1897 /* CmpNEZ64(Or64(x,y)) */
1899 DECLARE_PATTERN(p_CmpNEZ64_Or64
);
1900 DEFINE_PATTERN(p_CmpNEZ64_Or64
,
1901 unop(Iop_CmpNEZ64
, binop(Iop_Or64
, bind(0), bind(1))));
1902 if (matchIRExpr(&mi
, p_CmpNEZ64_Or64
, e
)) {
1903 HReg hi1
, lo1
, hi2
, lo2
;
1904 HReg tmp
= newVRegI(env
);
1905 iselInt64Expr( &hi1
, &lo1
, env
, mi
.bindee
[0] );
1906 addInstr(env
, mk_iMOVsd_RR(hi1
, tmp
));
1907 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(lo1
),tmp
));
1908 iselInt64Expr( &hi2
, &lo2
, env
, mi
.bindee
[1] );
1909 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(hi2
),tmp
));
1910 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(lo2
),tmp
));
1916 if (e
->tag
== Iex_Unop
1917 && e
->Iex
.Unop
.op
== Iop_CmpNEZ64
) {
1919 HReg tmp
= newVRegI(env
);
1920 iselInt64Expr( &hi
, &lo
, env
, e
->Iex
.Unop
.arg
);
1921 addInstr(env
, mk_iMOVsd_RR(hi
, tmp
));
1922 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(lo
), tmp
));
1926 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1928 /* CmpEQ8 / CmpNE8 */
1929 if (e
->tag
== Iex_Binop
1930 && (e
->Iex
.Binop
.op
== Iop_CmpEQ8
1931 || e
->Iex
.Binop
.op
== Iop_CmpNE8
1932 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ8
1933 || e
->Iex
.Binop
.op
== Iop_CasCmpNE8
)) {
1934 if (isZeroU8(e
->Iex
.Binop
.arg2
)) {
1935 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1936 addInstr(env
, X86Instr_Test32(0xFF,X86RM_Reg(r1
)));
1937 switch (e
->Iex
.Binop
.op
) {
1938 case Iop_CmpEQ8
: case Iop_CasCmpEQ8
: return Xcc_Z
;
1939 case Iop_CmpNE8
: case Iop_CasCmpNE8
: return Xcc_NZ
;
1940 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1943 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1944 X86RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
1945 HReg r
= newVRegI(env
);
1946 addInstr(env
, mk_iMOVsd_RR(r1
,r
));
1947 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,rmi2
,r
));
1948 addInstr(env
, X86Instr_Test32(0xFF,X86RM_Reg(r
)));
1949 switch (e
->Iex
.Binop
.op
) {
1950 case Iop_CmpEQ8
: case Iop_CasCmpEQ8
: return Xcc_Z
;
1951 case Iop_CmpNE8
: case Iop_CasCmpNE8
: return Xcc_NZ
;
1952 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957 /* CmpEQ16 / CmpNE16 */
1958 if (e
->tag
== Iex_Binop
1959 && (e
->Iex
.Binop
.op
== Iop_CmpEQ16
1960 || e
->Iex
.Binop
.op
== Iop_CmpNE16
1961 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ16
1962 || e
->Iex
.Binop
.op
== Iop_CasCmpNE16
1963 || e
->Iex
.Binop
.op
== Iop_ExpCmpNE16
)) {
1964 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1965 X86RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
1966 HReg r
= newVRegI(env
);
1967 addInstr(env
, mk_iMOVsd_RR(r1
,r
));
1968 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,rmi2
,r
));
1969 addInstr(env
, X86Instr_Test32(0xFFFF,X86RM_Reg(r
)));
1970 switch (e
->Iex
.Binop
.op
) {
1971 case Iop_CmpEQ16
: case Iop_CasCmpEQ16
:
1973 case Iop_CmpNE16
: case Iop_CasCmpNE16
: case Iop_ExpCmpNE16
:
1976 vpanic("iselCondCode(x86): CmpXX16");
1980 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1981 Saves a "movl %eax, %tmp" compared to the default route. */
1982 if (e
->tag
== Iex_Binop
1983 && e
->Iex
.Binop
.op
== Iop_CmpNE32
1984 && e
->Iex
.Binop
.arg1
->tag
== Iex_CCall
1985 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1986 IRExpr
* cal
= e
->Iex
.Binop
.arg1
;
1987 IRExpr
* con
= e
->Iex
.Binop
.arg2
;
1988 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1989 vassert(cal
->Iex
.CCall
.retty
== Ity_I32
); /* else ill-typed IR */
1990 vassert(con
->Iex
.Const
.con
->tag
== Ico_U32
);
1991 /* Marshal args, do the call. */
1993 RetLoc rloc
= mk_RetLoc_INVALID();
1994 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
1996 cal
->Iex
.CCall
.retty
, cal
->Iex
.CCall
.args
);
1997 vassert(is_sane_RetLoc(rloc
));
1998 vassert(rloc
.pri
== RLPri_Int
);
1999 vassert(addToSp
== 0);
2001 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
,
2002 X86RMI_Imm(con
->Iex
.Const
.con
->Ico
.U32
),
2008 if (e
->tag
== Iex_Binop
2009 && (e
->Iex
.Binop
.op
== Iop_CmpEQ32
2010 || e
->Iex
.Binop
.op
== Iop_CmpNE32
2011 || e
->Iex
.Binop
.op
== Iop_CmpLT32S
2012 || e
->Iex
.Binop
.op
== Iop_CmpLT32U
2013 || e
->Iex
.Binop
.op
== Iop_CmpLE32S
2014 || e
->Iex
.Binop
.op
== Iop_CmpLE32U
2015 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ32
2016 || e
->Iex
.Binop
.op
== Iop_CasCmpNE32
2017 || e
->Iex
.Binop
.op
== Iop_ExpCmpNE32
)) {
2018 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2019 X86RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2020 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
,rmi2
,r1
));
2021 switch (e
->Iex
.Binop
.op
) {
2022 case Iop_CmpEQ32
: case Iop_CasCmpEQ32
: return Xcc_Z
;
2024 case Iop_CasCmpNE32
: case Iop_ExpCmpNE32
: return Xcc_NZ
;
2025 case Iop_CmpLT32S
: return Xcc_L
;
2026 case Iop_CmpLT32U
: return Xcc_B
;
2027 case Iop_CmpLE32S
: return Xcc_LE
;
2028 case Iop_CmpLE32U
: return Xcc_BE
;
2029 default: vpanic("iselCondCode(x86): CmpXX32");
2034 if (e
->tag
== Iex_Binop
2035 && (e
->Iex
.Binop
.op
== Iop_CmpNE64
2036 || e
->Iex
.Binop
.op
== Iop_CmpEQ64
)) {
2037 HReg hi1
, hi2
, lo1
, lo2
;
2038 HReg tHi
= newVRegI(env
);
2039 HReg tLo
= newVRegI(env
);
2040 iselInt64Expr( &hi1
, &lo1
, env
, e
->Iex
.Binop
.arg1
);
2041 iselInt64Expr( &hi2
, &lo2
, env
, e
->Iex
.Binop
.arg2
);
2042 addInstr(env
, mk_iMOVsd_RR(hi1
, tHi
));
2043 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,X86RMI_Reg(hi2
), tHi
));
2044 addInstr(env
, mk_iMOVsd_RR(lo1
, tLo
));
2045 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,X86RMI_Reg(lo2
), tLo
));
2046 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(tHi
), tLo
));
2047 switch (e
->Iex
.Binop
.op
) {
2048 case Iop_CmpNE64
: return Xcc_NZ
;
2049 case Iop_CmpEQ64
: return Xcc_Z
;
2050 default: vpanic("iselCondCode(x86): CmpXX64");
2054 /* And1(x,y), Or1(x,y) */
2055 /* FIXME: We could (and probably should) do a lot better here. If both args
2056 are in temps already then we can just emit a reg-reg And/Or directly,
2057 followed by the final Test. */
2058 if (e
->tag
== Iex_Binop
2059 && (e
->Iex
.Binop
.op
== Iop_And1
|| e
->Iex
.Binop
.op
== Iop_Or1
)) {
2060 // We could probably be cleverer about this. In the meantime ..
2061 HReg x_as_32
= newVRegI(env
);
2062 X86CondCode cc_x
= iselCondCode(env
, e
->Iex
.Binop
.arg1
);
2063 addInstr(env
, X86Instr_Set32(cc_x
, x_as_32
));
2064 HReg y_as_32
= newVRegI(env
);
2065 X86CondCode cc_y
= iselCondCode(env
, e
->Iex
.Binop
.arg2
);
2066 addInstr(env
, X86Instr_Set32(cc_y
, y_as_32
));
2067 X86AluOp aop
= e
->Iex
.Binop
.op
== Iop_And1
? Xalu_AND
: Xalu_OR
;
2068 addInstr(env
, X86Instr_Alu32R(aop
, X86RMI_Reg(x_as_32
), y_as_32
));
2069 addInstr(env
, X86Instr_Test32(1, X86RM_Reg(y_as_32
)));
2074 vpanic("iselCondCode");
2078 /*---------------------------------------------------------*/
2079 /*--- ISEL: Integer expressions (64 bit) ---*/
2080 /*---------------------------------------------------------*/
2082 /* Compute a 64-bit value into a register pair, which is returned as
2083 the first two parameters. As with iselIntExpr_R, these may be
2084 either real or virtual regs; in any case they must not be changed
2085 by subsequent code emitted by the caller. */
2087 static void iselInt64Expr ( HReg
* rHi
, HReg
* rLo
, ISelEnv
* env
,
2090 iselInt64Expr_wrk(rHi
, rLo
, env
, e
);
2092 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2094 vassert(hregClass(*rHi
) == HRcInt32
);
2095 vassert(hregIsVirtual(*rHi
));
2096 vassert(hregClass(*rLo
) == HRcInt32
);
2097 vassert(hregIsVirtual(*rLo
));
2100 /* DO NOT CALL THIS DIRECTLY ! */
2101 static void iselInt64Expr_wrk ( HReg
* rHi
, HReg
* rLo
, ISelEnv
* env
,
2105 HWord fn
= 0; /* helper fn for most SIMD64 stuff */
2107 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I64
);
2109 /* 64-bit literal */
2110 if (e
->tag
== Iex_Const
) {
2111 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
2112 UInt wHi
= toUInt(w64
>> 32);
2113 UInt wLo
= toUInt(w64
);
2114 HReg tLo
= newVRegI(env
);
2115 HReg tHi
= newVRegI(env
);
2116 vassert(e
->Iex
.Const
.con
->tag
== Ico_U64
);
2118 /* Save a precious Int register in this special case. */
2119 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(wLo
), tLo
));
2123 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(wHi
), tHi
));
2124 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(wLo
), tLo
));
2131 /* read 64-bit IRTemp */
2132 if (e
->tag
== Iex_RdTmp
) {
2133 lookupIRTemp64( rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
2138 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2140 X86AMode
*am0
, *am4
;
2141 vassert(e
->Iex
.Load
.ty
== Ity_I64
);
2142 tLo
= newVRegI(env
);
2143 tHi
= newVRegI(env
);
2144 am0
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2145 am4
= advance4(am0
);
2146 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am0
), tLo
));
2147 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am4
), tHi
));
2154 if (e
->tag
== Iex_Get
) {
2155 X86AMode
* am
= X86AMode_IR(e
->Iex
.Get
.offset
, hregX86_EBP());
2156 X86AMode
* am4
= advance4(am
);
2157 HReg tLo
= newVRegI(env
);
2158 HReg tHi
= newVRegI(env
);
2159 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am
), tLo
));
2160 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am4
), tHi
));
2167 if (e
->tag
== Iex_GetI
) {
2169 = genGuestArrayOffset( env
, e
->Iex
.GetI
.descr
,
2170 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
2171 X86AMode
* am4
= advance4(am
);
2172 HReg tLo
= newVRegI(env
);
2173 HReg tHi
= newVRegI(env
);
2174 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am
), tLo
));
2175 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am4
), tHi
));
2181 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2182 if (e
->tag
== Iex_ITE
) {
2183 HReg e0Lo
, e0Hi
, e1Lo
, e1Hi
;
2184 HReg tLo
= newVRegI(env
);
2185 HReg tHi
= newVRegI(env
);
2186 iselInt64Expr(&e0Hi
, &e0Lo
, env
, e
->Iex
.ITE
.iffalse
);
2187 iselInt64Expr(&e1Hi
, &e1Lo
, env
, e
->Iex
.ITE
.iftrue
);
2188 addInstr(env
, mk_iMOVsd_RR(e1Hi
, tHi
));
2189 addInstr(env
, mk_iMOVsd_RR(e1Lo
, tLo
));
2190 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
2191 /* This assumes the first cmov32 doesn't trash the condition
2192 codes, so they are still available for the second cmov32 */
2193 addInstr(env
, X86Instr_CMov32(cc
^ 1, X86RM_Reg(e0Hi
), tHi
));
2194 addInstr(env
, X86Instr_CMov32(cc
^ 1, X86RM_Reg(e0Lo
), tLo
));
2200 /* --------- BINARY ops --------- */
2201 if (e
->tag
== Iex_Binop
) {
2202 switch (e
->Iex
.Binop
.op
) {
2203 /* 32 x 32 -> 64 multiply */
2206 /* get one operand into %eax, and the other into a R/M.
2207 Need to make an educated guess about which is better in
2209 HReg tLo
= newVRegI(env
);
2210 HReg tHi
= newVRegI(env
);
2211 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_MullS32
);
2212 X86RM
* rmLeft
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg1
);
2213 HReg rRight
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2214 addInstr(env
, mk_iMOVsd_RR(rRight
, hregX86_EAX()));
2215 addInstr(env
, X86Instr_MulL(syned
, rmLeft
));
2216 /* Result is now in EDX:EAX. Tell the caller. */
2217 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2218 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2224 /* 64 x 32 -> (32(rem),32(div)) division */
2225 case Iop_DivModU64to32
:
2226 case Iop_DivModS64to32
: {
2227 /* Get the 64-bit operand into edx:eax, and the other into
2230 HReg tLo
= newVRegI(env
);
2231 HReg tHi
= newVRegI(env
);
2232 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_DivModS64to32
);
2233 X86RM
* rmRight
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg2
);
2234 iselInt64Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2235 addInstr(env
, mk_iMOVsd_RR(sHi
, hregX86_EDX()));
2236 addInstr(env
, mk_iMOVsd_RR(sLo
, hregX86_EAX()));
2237 addInstr(env
, X86Instr_Div(syned
, rmRight
));
2238 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2239 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2245 /* Or64/And64/Xor64 */
2249 HReg xLo
, xHi
, yLo
, yHi
;
2250 HReg tLo
= newVRegI(env
);
2251 HReg tHi
= newVRegI(env
);
2252 X86AluOp op
= e
->Iex
.Binop
.op
==Iop_Or64
? Xalu_OR
2253 : e
->Iex
.Binop
.op
==Iop_And64
? Xalu_AND
2255 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2256 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2257 addInstr(env
, mk_iMOVsd_RR(xHi
, tHi
));
2258 addInstr(env
, X86Instr_Alu32R(op
, X86RMI_Reg(yHi
), tHi
));
2259 addInstr(env
, mk_iMOVsd_RR(xLo
, tLo
));
2260 addInstr(env
, X86Instr_Alu32R(op
, X86RMI_Reg(yLo
), tLo
));
2268 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
2269 /* special case Add64(e, const) */
2270 ULong w64
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
2271 UInt wHi
= toUInt(w64
>> 32);
2272 UInt wLo
= toUInt(w64
);
2273 HReg tLo
= newVRegI(env
);
2274 HReg tHi
= newVRegI(env
);
2276 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U64
);
2277 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2278 addInstr(env
, mk_iMOVsd_RR(xHi
, tHi
));
2279 addInstr(env
, mk_iMOVsd_RR(xLo
, tLo
));
2280 addInstr(env
, X86Instr_Alu32R(Xalu_ADD
, X86RMI_Imm(wLo
), tLo
));
2281 addInstr(env
, X86Instr_Alu32R(Xalu_ADC
, X86RMI_Imm(wHi
), tHi
));
2286 /* else fall through to the generic case */
2288 HReg xLo
, xHi
, yLo
, yHi
;
2289 HReg tLo
= newVRegI(env
);
2290 HReg tHi
= newVRegI(env
);
2291 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2292 addInstr(env
, mk_iMOVsd_RR(xHi
, tHi
));
2293 addInstr(env
, mk_iMOVsd_RR(xLo
, tLo
));
2294 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2295 if (e
->Iex
.Binop
.op
==Iop_Add64
) {
2296 addInstr(env
, X86Instr_Alu32R(Xalu_ADD
, X86RMI_Reg(yLo
), tLo
));
2297 addInstr(env
, X86Instr_Alu32R(Xalu_ADC
, X86RMI_Reg(yHi
), tHi
));
2299 addInstr(env
, X86Instr_Alu32R(Xalu_SUB
, X86RMI_Reg(yLo
), tLo
));
2300 addInstr(env
, X86Instr_Alu32R(Xalu_SBB
, X86RMI_Reg(yHi
), tHi
));
2307 /* 32HLto64(e1,e2) */
2309 *rHi
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2310 *rLo
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2315 /* We use the same ingenious scheme as gcc. Put the value
2316 to be shifted into %hi:%lo, and the shift amount into
2317 %cl. Then (dsts on right, a la ATT syntax):
2319 shldl %cl, %lo, %hi -- make %hi be right for the
2320 -- shift amt %cl % 32
2321 shll %cl, %lo -- make %lo be right for the
2322 -- shift amt %cl % 32
2324 Now, if (shift amount % 64) is in the range 32 .. 63,
2325 we have to do a fixup, which puts the result low half
2326 into the result high half, and zeroes the low half:
2331 movl $0, %tmp -- sigh; need yet another reg
2334 HReg rAmt
, sHi
, sLo
, tHi
, tLo
, tTemp
;
2335 tLo
= newVRegI(env
);
2336 tHi
= newVRegI(env
);
2337 tTemp
= newVRegI(env
);
2338 rAmt
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2339 iselInt64Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2340 addInstr(env
, mk_iMOVsd_RR(rAmt
, hregX86_ECX()));
2341 addInstr(env
, mk_iMOVsd_RR(sHi
, tHi
));
2342 addInstr(env
, mk_iMOVsd_RR(sLo
, tLo
));
2343 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2344 and those regs are legitimately modifiable. */
2345 addInstr(env
, X86Instr_Sh3232(Xsh_SHL
, 0/*%cl*/, tLo
, tHi
));
2346 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 0/*%cl*/, tLo
));
2347 addInstr(env
, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2348 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tLo
), tHi
));
2349 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tTemp
));
2350 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tTemp
), tLo
));
2357 /* We use the same ingenious scheme as gcc. Put the value
2358 to be shifted into %hi:%lo, and the shift amount into
2361 shrdl %cl, %hi, %lo -- make %lo be right for the
2362 -- shift amt %cl % 32
2363 shrl %cl, %hi -- make %hi be right for the
2364 -- shift amt %cl % 32
2366 Now, if (shift amount % 64) is in the range 32 .. 63,
2367 we have to do a fixup, which puts the result high half
2368 into the result low half, and zeroes the high half:
2373 movl $0, %tmp -- sigh; need yet another reg
2376 HReg rAmt
, sHi
, sLo
, tHi
, tLo
, tTemp
;
2377 tLo
= newVRegI(env
);
2378 tHi
= newVRegI(env
);
2379 tTemp
= newVRegI(env
);
2380 rAmt
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2381 iselInt64Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2382 addInstr(env
, mk_iMOVsd_RR(rAmt
, hregX86_ECX()));
2383 addInstr(env
, mk_iMOVsd_RR(sHi
, tHi
));
2384 addInstr(env
, mk_iMOVsd_RR(sLo
, tLo
));
2385 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2386 and those regs are legitimately modifiable. */
2387 addInstr(env
, X86Instr_Sh3232(Xsh_SHR
, 0/*%cl*/, tHi
, tLo
));
2388 addInstr(env
, X86Instr_Sh32(Xsh_SHR
, 0/*%cl*/, tHi
));
2389 addInstr(env
, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2390 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tHi
), tLo
));
2391 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tTemp
));
2392 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tTemp
), tHi
));
2399 /* gcc -O2 does the following. I don't know how it works, but it
2400 does work. Don't mess with it. This is hard to test because the
2401 x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2402 so it's impossible to write a test program that feeds values
2403 through Iop_Sar64 and prints their results. The implementation
2404 here was tested by using psrlq on mmx registers -- that generates
2405 Iop_Shr64 -- and temporarily hacking the front end to generate
2406 Iop_Sar64 for that instruction instead.
2419 cmovne %r3, %r4 // = resLo
2420 cmovne %r2, %r3 // = resHi
2422 HReg amount
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2423 HReg srcHi
= INVALID_HREG
, srcLo
= INVALID_HREG
;
2424 iselInt64Expr(&srcHi
, &srcLo
, env
, e
->Iex
.Binop
.arg1
);
2425 HReg r1
= newVRegI(env
);
2426 HReg r2
= newVRegI(env
);
2427 HReg r3
= newVRegI(env
);
2428 HReg r4
= newVRegI(env
);
2429 addInstr(env
, mk_iMOVsd_RR(amount
, hregX86_ECX()));
2430 addInstr(env
, mk_iMOVsd_RR(srcHi
, r1
));
2431 addInstr(env
, mk_iMOVsd_RR(srcLo
, r2
));
2433 addInstr(env
, mk_iMOVsd_RR(r1
, r3
));
2434 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 0/*%cl*/, r3
));
2435 addInstr(env
, mk_iMOVsd_RR(r2
, r4
));
2436 addInstr(env
, X86Instr_Sh3232(Xsh_SHR
, 0/*%cl*/, r1
, r4
));
2437 addInstr(env
, mk_iMOVsd_RR(r3
, r2
));
2438 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, r2
));
2439 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(32),
2441 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(r3
), r4
));
2442 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(r2
), r3
));
2449 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2450 case. Unfortunately I see no easy way to avoid the
2452 case Iop_F64toI64S
: {
2453 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
2454 HReg tLo
= newVRegI(env
);
2455 HReg tHi
= newVRegI(env
);
2457 /* Used several times ... */
2458 /* Careful ... this sharing is only safe because
2459 zero_esp/four_esp do not hold any registers which the
2460 register allocator could attempt to swizzle later. */
2461 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
2462 X86AMode
* four_esp
= X86AMode_IR(4, hregX86_ESP());
2464 /* rf now holds the value to be converted, and rrm holds
2465 the rounding mode value, encoded as per the
2466 IRRoundingMode enum. The first thing to do is set the
2467 FPU's rounding mode accordingly. */
2469 /* Create a space for the format conversion. */
2471 sub_from_esp(env
, 8);
2473 /* Set host rounding mode */
2474 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
2476 /* gistll %rf, 0(%esp) */
2477 addInstr(env
, X86Instr_FpLdStI(False
/*store*/, 8, rf
, zero_esp
));
2479 /* movl 0(%esp), %dstLo */
2480 /* movl 4(%esp), %dstHi */
2481 addInstr(env
, X86Instr_Alu32R(
2482 Xalu_MOV
, X86RMI_Mem(zero_esp
), tLo
));
2483 addInstr(env
, X86Instr_Alu32R(
2484 Xalu_MOV
, X86RMI_Mem(four_esp
), tHi
));
2486 /* Restore default FPU rounding. */
2487 set_FPU_rounding_default( env
);
2498 fn
= (HWord
)h_generic_calc_Add8x8
; goto binnish
;
2500 fn
= (HWord
)h_generic_calc_Add16x4
; goto binnish
;
2502 fn
= (HWord
)h_generic_calc_Add32x2
; goto binnish
;
2505 fn
= (HWord
)h_generic_calc_Avg8Ux8
; goto binnish
;
2507 fn
= (HWord
)h_generic_calc_Avg16Ux4
; goto binnish
;
2510 fn
= (HWord
)h_generic_calc_CmpEQ8x8
; goto binnish
;
2512 fn
= (HWord
)h_generic_calc_CmpEQ16x4
; goto binnish
;
2514 fn
= (HWord
)h_generic_calc_CmpEQ32x2
; goto binnish
;
2517 fn
= (HWord
)h_generic_calc_CmpGT8Sx8
; goto binnish
;
2518 case Iop_CmpGT16Sx4
:
2519 fn
= (HWord
)h_generic_calc_CmpGT16Sx4
; goto binnish
;
2520 case Iop_CmpGT32Sx2
:
2521 fn
= (HWord
)h_generic_calc_CmpGT32Sx2
; goto binnish
;
2523 case Iop_InterleaveHI8x8
:
2524 fn
= (HWord
)h_generic_calc_InterleaveHI8x8
; goto binnish
;
2525 case Iop_InterleaveLO8x8
:
2526 fn
= (HWord
)h_generic_calc_InterleaveLO8x8
; goto binnish
;
2527 case Iop_InterleaveHI16x4
:
2528 fn
= (HWord
)h_generic_calc_InterleaveHI16x4
; goto binnish
;
2529 case Iop_InterleaveLO16x4
:
2530 fn
= (HWord
)h_generic_calc_InterleaveLO16x4
; goto binnish
;
2531 case Iop_InterleaveHI32x2
:
2532 fn
= (HWord
)h_generic_calc_InterleaveHI32x2
; goto binnish
;
2533 case Iop_InterleaveLO32x2
:
2534 fn
= (HWord
)h_generic_calc_InterleaveLO32x2
; goto binnish
;
2535 case Iop_CatOddLanes16x4
:
2536 fn
= (HWord
)h_generic_calc_CatOddLanes16x4
; goto binnish
;
2537 case Iop_CatEvenLanes16x4
:
2538 fn
= (HWord
)h_generic_calc_CatEvenLanes16x4
; goto binnish
;
2540 fn
= (HWord
)h_generic_calc_Perm8x8
; goto binnish
;
2543 fn
= (HWord
)h_generic_calc_Max8Ux8
; goto binnish
;
2545 fn
= (HWord
)h_generic_calc_Max16Sx4
; goto binnish
;
2547 fn
= (HWord
)h_generic_calc_Min8Ux8
; goto binnish
;
2549 fn
= (HWord
)h_generic_calc_Min16Sx4
; goto binnish
;
2552 fn
= (HWord
)h_generic_calc_Mul16x4
; goto binnish
;
2554 fn
= (HWord
)h_generic_calc_Mul32x2
; goto binnish
;
2555 case Iop_MulHi16Sx4
:
2556 fn
= (HWord
)h_generic_calc_MulHi16Sx4
; goto binnish
;
2557 case Iop_MulHi16Ux4
:
2558 fn
= (HWord
)h_generic_calc_MulHi16Ux4
; goto binnish
;
2561 fn
= (HWord
)h_generic_calc_QAdd8Sx8
; goto binnish
;
2563 fn
= (HWord
)h_generic_calc_QAdd16Sx4
; goto binnish
;
2565 fn
= (HWord
)h_generic_calc_QAdd8Ux8
; goto binnish
;
2567 fn
= (HWord
)h_generic_calc_QAdd16Ux4
; goto binnish
;
2569 case Iop_QNarrowBin32Sto16Sx4
:
2570 fn
= (HWord
)h_generic_calc_QNarrowBin32Sto16Sx4
; goto binnish
;
2571 case Iop_QNarrowBin16Sto8Sx8
:
2572 fn
= (HWord
)h_generic_calc_QNarrowBin16Sto8Sx8
; goto binnish
;
2573 case Iop_QNarrowBin16Sto8Ux8
:
2574 fn
= (HWord
)h_generic_calc_QNarrowBin16Sto8Ux8
; goto binnish
;
2575 case Iop_NarrowBin16to8x8
:
2576 fn
= (HWord
)h_generic_calc_NarrowBin16to8x8
; goto binnish
;
2577 case Iop_NarrowBin32to16x4
:
2578 fn
= (HWord
)h_generic_calc_NarrowBin32to16x4
; goto binnish
;
2581 fn
= (HWord
)h_generic_calc_QSub8Sx8
; goto binnish
;
2583 fn
= (HWord
)h_generic_calc_QSub16Sx4
; goto binnish
;
2585 fn
= (HWord
)h_generic_calc_QSub8Ux8
; goto binnish
;
2587 fn
= (HWord
)h_generic_calc_QSub16Ux4
; goto binnish
;
2590 fn
= (HWord
)h_generic_calc_Sub8x8
; goto binnish
;
2592 fn
= (HWord
)h_generic_calc_Sub16x4
; goto binnish
;
2594 fn
= (HWord
)h_generic_calc_Sub32x2
; goto binnish
;
2597 /* Note: the following assumes all helpers are of
2599 ULong fn ( ULong, ULong ), and they are
2600 not marked as regparm functions.
2602 HReg xLo
, xHi
, yLo
, yHi
;
2603 HReg tLo
= newVRegI(env
);
2604 HReg tHi
= newVRegI(env
);
2605 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2606 addInstr(env
, X86Instr_Push(X86RMI_Reg(yHi
)));
2607 addInstr(env
, X86Instr_Push(X86RMI_Reg(yLo
)));
2608 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2609 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
2610 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
2611 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
2612 0, mk_RetLoc_simple(RLPri_2Int
) ));
2613 add_to_esp(env
, 4*4);
2614 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2615 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2622 fn
= (HWord
)h_generic_calc_ShlN32x2
; goto shifty
;
2624 fn
= (HWord
)h_generic_calc_ShlN16x4
; goto shifty
;
2626 fn
= (HWord
)h_generic_calc_ShlN8x8
; goto shifty
;
2628 fn
= (HWord
)h_generic_calc_ShrN32x2
; goto shifty
;
2630 fn
= (HWord
)h_generic_calc_ShrN16x4
; goto shifty
;
2632 fn
= (HWord
)h_generic_calc_SarN32x2
; goto shifty
;
2634 fn
= (HWord
)h_generic_calc_SarN16x4
; goto shifty
;
2636 fn
= (HWord
)h_generic_calc_SarN8x8
; goto shifty
;
2638 /* Note: the following assumes all helpers are of
2640 ULong fn ( ULong, UInt ), and they are
2641 not marked as regparm functions.
2644 HReg tLo
= newVRegI(env
);
2645 HReg tHi
= newVRegI(env
);
2646 X86RMI
* y
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2647 addInstr(env
, X86Instr_Push(y
));
2648 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2649 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
2650 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
2651 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
2652 0, mk_RetLoc_simple(RLPri_2Int
) ));
2653 add_to_esp(env
, 3*4);
2654 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2655 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2664 } /* if (e->tag == Iex_Binop) */
2667 /* --------- UNARY ops --------- */
2668 if (e
->tag
== Iex_Unop
) {
2669 switch (e
->Iex
.Unop
.op
) {
2673 HReg tLo
= newVRegI(env
);
2674 HReg tHi
= newVRegI(env
);
2675 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2676 addInstr(env
, mk_iMOVsd_RR(src
,tHi
));
2677 addInstr(env
, mk_iMOVsd_RR(src
,tLo
));
2678 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tHi
));
2686 HReg tLo
= newVRegI(env
);
2687 HReg tHi
= newVRegI(env
);
2688 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2689 addInstr(env
, mk_iMOVsd_RR(src
,tLo
));
2690 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tHi
));
2698 HReg tLo
= newVRegI(env
);
2699 HReg tHi
= newVRegI(env
);
2700 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2701 addInstr(env
, mk_iMOVsd_RR(src
,tLo
));
2702 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
2703 X86RMI_Imm(0xFFFF), tLo
));
2704 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tHi
));
2711 case Iop_V128HIto64
:
2712 case Iop_V128to64
: {
2713 Int off
= e
->Iex
.Unop
.op
==Iop_V128HIto64
? 8 : 0;
2714 HReg tLo
= newVRegI(env
);
2715 HReg tHi
= newVRegI(env
);
2716 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
2717 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
2718 X86AMode
* espLO
= X86AMode_IR(off
, hregX86_ESP());
2719 X86AMode
* espHI
= X86AMode_IR(off
+4, hregX86_ESP());
2720 sub_from_esp(env
, 16);
2721 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, vec
, esp0
));
2722 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
,
2723 X86RMI_Mem(espLO
), tLo
));
2724 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
,
2725 X86RMI_Mem(espHI
), tHi
));
2726 add_to_esp(env
, 16);
2732 /* could do better than this, but for now ... */
2734 HReg tLo
= newVRegI(env
);
2735 HReg tHi
= newVRegI(env
);
2736 X86CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
2737 addInstr(env
, X86Instr_Set32(cond
,tLo
));
2738 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 31, tLo
));
2739 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tLo
));
2740 addInstr(env
, mk_iMOVsd_RR(tLo
, tHi
));
2748 HReg tLo
= newVRegI(env
);
2749 HReg tHi
= newVRegI(env
);
2751 iselInt64Expr(&sHi
, &sLo
, env
, e
->Iex
.Unop
.arg
);
2752 addInstr(env
, mk_iMOVsd_RR(sHi
, tHi
));
2753 addInstr(env
, mk_iMOVsd_RR(sLo
, tLo
));
2754 addInstr(env
, X86Instr_Unary32(Xun_NOT
,tHi
));
2755 addInstr(env
, X86Instr_Unary32(Xun_NOT
,tLo
));
2764 HReg tLo
= newVRegI(env
);
2765 HReg tHi
= newVRegI(env
);
2767 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Unop
.arg
);
2768 /* tLo = 0 - yLo, and set carry */
2769 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tLo
));
2770 addInstr(env
, X86Instr_Alu32R(Xalu_SUB
, X86RMI_Reg(yLo
), tLo
));
2771 /* tHi = 0 - yHi - carry */
2772 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tHi
));
2773 addInstr(env
, X86Instr_Alu32R(Xalu_SBB
, X86RMI_Reg(yHi
), tHi
));
2774 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2775 back in, so as to give the final result
2776 tHi:tLo = arg | -arg. */
2777 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(yLo
), tLo
));
2778 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(yHi
), tHi
));
2784 /* --- patterns rooted at: CmpwNEZ64 --- */
2787 case Iop_CmpwNEZ64
: {
2789 DECLARE_PATTERN(p_CmpwNEZ64_Or64
);
2790 DEFINE_PATTERN(p_CmpwNEZ64_Or64
,
2791 unop(Iop_CmpwNEZ64
,binop(Iop_Or64
,bind(0),bind(1))));
2792 if (matchIRExpr(&mi
, p_CmpwNEZ64_Or64
, e
)) {
2793 /* CmpwNEZ64(Or64(x,y)) */
2794 HReg xHi
,xLo
,yHi
,yLo
;
2795 HReg xBoth
= newVRegI(env
);
2796 HReg merged
= newVRegI(env
);
2797 HReg tmp2
= newVRegI(env
);
2799 iselInt64Expr(&xHi
,&xLo
, env
, mi
.bindee
[0]);
2800 addInstr(env
, mk_iMOVsd_RR(xHi
,xBoth
));
2801 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2802 X86RMI_Reg(xLo
),xBoth
));
2804 iselInt64Expr(&yHi
,&yLo
, env
, mi
.bindee
[1]);
2805 addInstr(env
, mk_iMOVsd_RR(yHi
,merged
));
2806 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2807 X86RMI_Reg(yLo
),merged
));
2808 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2809 X86RMI_Reg(xBoth
),merged
));
2811 /* tmp2 = (merged | -merged) >>s 31 */
2812 addInstr(env
, mk_iMOVsd_RR(merged
,tmp2
));
2813 addInstr(env
, X86Instr_Unary32(Xun_NEG
,tmp2
));
2814 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2815 X86RMI_Reg(merged
), tmp2
));
2816 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tmp2
));
2823 HReg tmp1
= newVRegI(env
);
2824 HReg tmp2
= newVRegI(env
);
2825 /* srcHi:srcLo = arg */
2826 iselInt64Expr(&srcHi
, &srcLo
, env
, e
->Iex
.Unop
.arg
);
2827 /* tmp1 = srcHi | srcLo */
2828 addInstr(env
, mk_iMOVsd_RR(srcHi
,tmp1
));
2829 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2830 X86RMI_Reg(srcLo
), tmp1
));
2831 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2832 addInstr(env
, mk_iMOVsd_RR(tmp1
,tmp2
));
2833 addInstr(env
, X86Instr_Unary32(Xun_NEG
,tmp2
));
2834 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2835 X86RMI_Reg(tmp1
), tmp2
));
2836 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tmp2
));
2843 /* ReinterpF64asI64(e) */
2844 /* Given an IEEE754 double, produce an I64 with the same bit
2846 case Iop_ReinterpF64asI64
: {
2847 HReg rf
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
2848 HReg tLo
= newVRegI(env
);
2849 HReg tHi
= newVRegI(env
);
2850 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
2851 X86AMode
* four_esp
= X86AMode_IR(4, hregX86_ESP());
2853 set_FPU_rounding_default(env
);
2855 sub_from_esp(env
, 8);
2856 /* gstD %rf, 0(%esp) */
2858 X86Instr_FpLdSt(False
/*store*/, 8, rf
, zero_esp
));
2859 /* movl 0(%esp), %tLo */
2861 X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(zero_esp
), tLo
));
2862 /* movl 4(%esp), %tHi */
2864 X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(four_esp
), tHi
));
2872 case Iop_CmpNEZ32x2
:
2873 fn
= (HWord
)h_generic_calc_CmpNEZ32x2
; goto unish
;
2874 case Iop_CmpNEZ16x4
:
2875 fn
= (HWord
)h_generic_calc_CmpNEZ16x4
; goto unish
;
2877 fn
= (HWord
)h_generic_calc_CmpNEZ8x8
; goto unish
;
2879 /* Note: the following assumes all helpers are of
2881 ULong fn ( ULong ), and they are
2882 not marked as regparm functions.
2885 HReg tLo
= newVRegI(env
);
2886 HReg tHi
= newVRegI(env
);
2887 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Unop
.arg
);
2888 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
2889 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
2890 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
2891 0, mk_RetLoc_simple(RLPri_2Int
) ));
2892 add_to_esp(env
, 2*4);
2893 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2894 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2903 } /* if (e->tag == Iex_Unop) */
2906 /* --------- CCALL --------- */
2907 if (e
->tag
== Iex_CCall
) {
2908 HReg tLo
= newVRegI(env
);
2909 HReg tHi
= newVRegI(env
);
2911 /* Marshal args, do the call, clear stack. */
2913 RetLoc rloc
= mk_RetLoc_INVALID();
2914 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
2916 e
->Iex
.CCall
.retty
, e
->Iex
.CCall
.args
);
2917 vassert(is_sane_RetLoc(rloc
));
2918 vassert(rloc
.pri
== RLPri_2Int
);
2919 vassert(addToSp
== 0);
2922 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2923 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2930 vpanic("iselInt64Expr");
2934 /*---------------------------------------------------------*/
2935 /*--- ISEL: Floating point expressions (32 bit) ---*/
2936 /*---------------------------------------------------------*/
2938 /* Nothing interesting here; really just wrappers for
2941 static HReg
iselFltExpr ( ISelEnv
* env
, const IRExpr
* e
)
2943 HReg r
= iselFltExpr_wrk( env
, e
);
2945 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2947 vassert(hregClass(r
) == HRcFlt64
); /* yes, really Flt64 */
2948 vassert(hregIsVirtual(r
));
2952 /* DO NOT CALL THIS DIRECTLY */
2953 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2955 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2956 vassert(ty
== Ity_F32
);
2958 if (e
->tag
== Iex_RdTmp
) {
2959 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2962 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2964 HReg res
= newVRegF(env
);
2965 vassert(e
->Iex
.Load
.ty
== Ity_F32
);
2966 am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2967 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 4, res
, am
));
2971 if (e
->tag
== Iex_Binop
2972 && e
->Iex
.Binop
.op
== Iop_F64toF32
) {
2973 /* Although the result is still held in a standard FPU register,
2974 we need to round it to reflect the loss of accuracy/range
2975 entailed in casting it to a 32-bit float. */
2976 HReg dst
= newVRegF(env
);
2977 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
2978 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
2979 addInstr(env
, X86Instr_Fp64to32(src
,dst
));
2980 set_FPU_rounding_default( env
);
2984 if (e
->tag
== Iex_Get
) {
2985 X86AMode
* am
= X86AMode_IR( e
->Iex
.Get
.offset
,
2987 HReg res
= newVRegF(env
);
2988 addInstr(env
, X86Instr_FpLdSt( True
/*load*/, 4, res
, am
));
2992 if (e
->tag
== Iex_Unop
2993 && e
->Iex
.Unop
.op
== Iop_ReinterpI32asF32
) {
2994 /* Given an I32, produce an IEEE754 float with the same bit
2996 HReg dst
= newVRegF(env
);
2997 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Unop
.arg
);
2999 addInstr(env
, X86Instr_Push(rmi
));
3000 addInstr(env
, X86Instr_FpLdSt(
3001 True
/*load*/, 4, dst
,
3002 X86AMode_IR(0, hregX86_ESP())));
3007 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_RoundF32toInt
) {
3008 HReg rf
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
3009 HReg dst
= newVRegF(env
);
3011 /* rf now holds the value to be rounded. The first thing to do
3012 is set the FPU's rounding mode accordingly. */
3014 /* Set host rounding mode */
3015 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3017 /* grndint %rf, %dst */
3018 addInstr(env
, X86Instr_FpUnary(Xfp_ROUND
, rf
, dst
));
3020 /* Restore default FPU rounding. */
3021 set_FPU_rounding_default( env
);
3027 vpanic("iselFltExpr_wrk");
3031 /*---------------------------------------------------------*/
3032 /*--- ISEL: Floating point expressions (64 bit) ---*/
3033 /*---------------------------------------------------------*/
3035 /* Compute a 64-bit floating point value into a register, the identity
3036 of which is returned. As with iselIntExpr_R, the reg may be either
3037 real or virtual; in any case it must not be changed by subsequent
3038 code emitted by the caller. */
3040 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3042 Type S (1 bit) E (11 bits) F (52 bits)
3043 ---- --------- ----------- -----------
3044 signalling NaN u 2047 (max) .0uuuuu---u
3047 quiet NaN u 2047 (max) .1uuuuu---u
3049 negative infinity 1 2047 (max) .000000---0
3051 positive infinity 0 2047 (max) .000000---0
3053 negative zero 1 0 .000000---0
3055 positive zero 0 0 .000000---0
3058 static HReg
iselDblExpr ( ISelEnv
* env
, const IRExpr
* e
)
3060 HReg r
= iselDblExpr_wrk( env
, e
);
3062 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3064 vassert(hregClass(r
) == HRcFlt64
);
3065 vassert(hregIsVirtual(r
));
3069 /* DO NOT CALL THIS DIRECTLY */
3070 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
3072 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3074 vassert(ty
== Ity_F64
);
3076 if (e
->tag
== Iex_RdTmp
) {
3077 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3080 if (e
->tag
== Iex_Const
) {
3081 union { UInt u32x2
[2]; ULong u64
; Double f64
; } u
;
3082 HReg freg
= newVRegF(env
);
3083 vassert(sizeof(u
) == 8);
3084 vassert(sizeof(u
.u64
) == 8);
3085 vassert(sizeof(u
.f64
) == 8);
3086 vassert(sizeof(u
.u32x2
) == 8);
3088 if (e
->Iex
.Const
.con
->tag
== Ico_F64
) {
3089 u
.f64
= e
->Iex
.Const
.con
->Ico
.F64
;
3091 else if (e
->Iex
.Const
.con
->tag
== Ico_F64i
) {
3092 u
.u64
= e
->Iex
.Const
.con
->Ico
.F64i
;
3095 vpanic("iselDblExpr(x86): const");
3097 addInstr(env
, X86Instr_Push(X86RMI_Imm(u
.u32x2
[1])));
3098 addInstr(env
, X86Instr_Push(X86RMI_Imm(u
.u32x2
[0])));
3099 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 8, freg
,
3100 X86AMode_IR(0, hregX86_ESP())));
3105 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3107 HReg res
= newVRegF(env
);
3108 vassert(e
->Iex
.Load
.ty
== Ity_F64
);
3109 am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
3110 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 8, res
, am
));
3114 if (e
->tag
== Iex_Get
) {
3115 X86AMode
* am
= X86AMode_IR( e
->Iex
.Get
.offset
,
3117 HReg res
= newVRegF(env
);
3118 addInstr(env
, X86Instr_FpLdSt( True
/*load*/, 8, res
, am
));
3122 if (e
->tag
== Iex_GetI
) {
3124 = genGuestArrayOffset(
3125 env
, e
->Iex
.GetI
.descr
,
3126 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
3127 HReg res
= newVRegF(env
);
3128 addInstr(env
, X86Instr_FpLdSt( True
/*load*/, 8, res
, am
));
3132 if (e
->tag
== Iex_Triop
) {
3133 X86FpOp fpop
= Xfp_INVALID
;
3134 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3135 switch (triop
->op
) {
3136 case Iop_AddF64
: fpop
= Xfp_ADD
; break;
3137 case Iop_SubF64
: fpop
= Xfp_SUB
; break;
3138 case Iop_MulF64
: fpop
= Xfp_MUL
; break;
3139 case Iop_DivF64
: fpop
= Xfp_DIV
; break;
3140 case Iop_ScaleF64
: fpop
= Xfp_SCALE
; break;
3141 case Iop_Yl2xF64
: fpop
= Xfp_YL2X
; break;
3142 case Iop_Yl2xp1F64
: fpop
= Xfp_YL2XP1
; break;
3143 case Iop_AtanF64
: fpop
= Xfp_ATAN
; break;
3144 case Iop_PRemF64
: fpop
= Xfp_PREM
; break;
3145 case Iop_PRem1F64
: fpop
= Xfp_PREM1
; break;
3148 if (fpop
!= Xfp_INVALID
) {
3149 HReg res
= newVRegF(env
);
3150 HReg srcL
= iselDblExpr(env
, triop
->arg2
);
3151 HReg srcR
= iselDblExpr(env
, triop
->arg3
);
3152 /* XXXROUNDINGFIXME */
3153 /* set roundingmode here */
3154 addInstr(env
, X86Instr_FpBinary(fpop
,srcL
,srcR
,res
));
3155 if (fpop
!= Xfp_ADD
&& fpop
!= Xfp_SUB
3156 && fpop
!= Xfp_MUL
&& fpop
!= Xfp_DIV
)
3157 roundToF64(env
, res
);
3162 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_RoundF64toInt
) {
3163 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3164 HReg dst
= newVRegF(env
);
3166 /* rf now holds the value to be rounded. The first thing to do
3167 is set the FPU's rounding mode accordingly. */
3169 /* Set host rounding mode */
3170 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3172 /* grndint %rf, %dst */
3173 addInstr(env
, X86Instr_FpUnary(Xfp_ROUND
, rf
, dst
));
3175 /* Restore default FPU rounding. */
3176 set_FPU_rounding_default( env
);
3181 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_I64StoF64
) {
3182 HReg dst
= newVRegF(env
);
3184 iselInt64Expr( &rHi
, &rLo
, env
, e
->Iex
.Binop
.arg2
);
3185 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
3186 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
3188 /* Set host rounding mode */
3189 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3191 addInstr(env
, X86Instr_FpLdStI(
3192 True
/*load*/, 8, dst
,
3193 X86AMode_IR(0, hregX86_ESP())));
3195 /* Restore default FPU rounding. */
3196 set_FPU_rounding_default( env
);
3202 if (e
->tag
== Iex_Binop
) {
3203 X86FpOp fpop
= Xfp_INVALID
;
3204 switch (e
->Iex
.Binop
.op
) {
3205 case Iop_SinF64
: fpop
= Xfp_SIN
; break;
3206 case Iop_CosF64
: fpop
= Xfp_COS
; break;
3207 case Iop_TanF64
: fpop
= Xfp_TAN
; break;
3208 case Iop_2xm1F64
: fpop
= Xfp_2XM1
; break;
3209 case Iop_SqrtF64
: fpop
= Xfp_SQRT
; break;
3212 if (fpop
!= Xfp_INVALID
) {
3213 HReg res
= newVRegF(env
);
3214 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3215 /* XXXROUNDINGFIXME */
3216 /* set roundingmode here */
3217 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3218 codes. I don't think that matters, since this insn
3219 selector never generates such an instruction intervening
3220 between an flag-setting instruction and a flag-using
3222 addInstr(env
, X86Instr_FpUnary(fpop
,src
,res
));
3223 if (fpop
!= Xfp_SQRT
3224 && fpop
!= Xfp_NEG
&& fpop
!= Xfp_ABS
)
3225 roundToF64(env
, res
);
3230 if (e
->tag
== Iex_Unop
) {
3231 X86FpOp fpop
= Xfp_INVALID
;
3232 switch (e
->Iex
.Unop
.op
) {
3233 case Iop_NegF64
: fpop
= Xfp_NEG
; break;
3234 case Iop_AbsF64
: fpop
= Xfp_ABS
; break;
3237 if (fpop
!= Xfp_INVALID
) {
3238 HReg res
= newVRegF(env
);
3239 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3240 addInstr(env
, X86Instr_FpUnary(fpop
,src
,res
));
3241 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3242 but might need to do that for other unary ops. */
3247 if (e
->tag
== Iex_Unop
) {
3248 switch (e
->Iex
.Unop
.op
) {
3249 case Iop_I32StoF64
: {
3250 HReg dst
= newVRegF(env
);
3251 HReg ri
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3252 addInstr(env
, X86Instr_Push(X86RMI_Reg(ri
)));
3253 set_FPU_rounding_default(env
);
3254 addInstr(env
, X86Instr_FpLdStI(
3255 True
/*load*/, 4, dst
,
3256 X86AMode_IR(0, hregX86_ESP())));
3260 case Iop_ReinterpI64asF64
: {
3261 /* Given an I64, produce an IEEE754 double with the same
3263 HReg dst
= newVRegF(env
);
3265 iselInt64Expr( &rHi
, &rLo
, env
, e
->Iex
.Unop
.arg
);
3267 set_FPU_rounding_default(env
);
3268 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
3269 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
3270 addInstr(env
, X86Instr_FpLdSt(
3271 True
/*load*/, 8, dst
,
3272 X86AMode_IR(0, hregX86_ESP())));
3276 case Iop_F32toF64
: {
3277 /* this is a no-op */
3278 HReg res
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3286 /* --------- MULTIPLEX --------- */
3287 if (e
->tag
== Iex_ITE
) { // VFD
3289 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
3290 HReg r1
= iselDblExpr(env
, e
->Iex
.ITE
.iftrue
);
3291 HReg r0
= iselDblExpr(env
, e
->Iex
.ITE
.iffalse
);
3292 HReg dst
= newVRegF(env
);
3293 addInstr(env
, X86Instr_FpUnary(Xfp_MOV
,r1
,dst
));
3294 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
3295 addInstr(env
, X86Instr_FpCMov(cc
^ 1, r0
, dst
));
3301 vpanic("iselDblExpr_wrk");
3305 /*---------------------------------------------------------*/
3306 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3307 /*---------------------------------------------------------*/
3309 static HReg
iselVecExpr ( ISelEnv
* env
, const IRExpr
* e
)
3311 HReg r
= iselVecExpr_wrk( env
, e
);
3313 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3315 vassert(hregClass(r
) == HRcVec128
);
3316 vassert(hregIsVirtual(r
));
3321 /* DO NOT CALL THIS DIRECTLY */
3322 static HReg
iselVecExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
3325 # define REQUIRE_SSE1 \
3326 do { if (env->hwcaps == 0/*baseline, no sse*/ \
3327 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3331 # define REQUIRE_SSE2 \
3332 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3336 # define SSE2_OR_ABOVE \
3337 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3339 HWord fn
= 0; /* address of helper fn, if required */
3341 Bool arg1isEReg
= False
;
3342 X86SseOp op
= Xsse_INVALID
;
3343 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3345 vassert(ty
== Ity_V128
);
3349 if (e
->tag
== Iex_RdTmp
) {
3350 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3353 if (e
->tag
== Iex_Get
) {
3354 HReg dst
= newVRegV(env
);
3355 addInstr(env
, X86Instr_SseLdSt(
3358 X86AMode_IR(e
->Iex
.Get
.offset
, hregX86_EBP())
3364 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3365 HReg dst
= newVRegV(env
);
3366 X86AMode
* am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
3367 addInstr(env
, X86Instr_SseLdSt( True
/*load*/, dst
, am
));
3371 if (e
->tag
== Iex_Const
) {
3372 HReg dst
= newVRegV(env
);
3373 vassert(e
->Iex
.Const
.con
->tag
== Ico_V128
);
3374 addInstr(env
, X86Instr_SseConst(e
->Iex
.Const
.con
->Ico
.V128
, dst
));
3378 if (e
->tag
== Iex_Unop
) {
3380 if (SSE2_OR_ABOVE
) {
3381 /* 64UtoV128(LDle:I64(addr)) */
3382 DECLARE_PATTERN(p_zwiden_load64
);
3383 DEFINE_PATTERN(p_zwiden_load64
,
3385 IRExpr_Load(Iend_LE
,Ity_I64
,bind(0))));
3386 if (matchIRExpr(&mi
, p_zwiden_load64
, e
)) {
3387 X86AMode
* am
= iselIntExpr_AMode(env
, mi
.bindee
[0]);
3388 HReg dst
= newVRegV(env
);
3389 addInstr(env
, X86Instr_SseLdzLO(8, dst
, am
));
3394 switch (e
->Iex
.Unop
.op
) {
3397 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3398 return do_sse_Not128(env
, arg
);
3401 case Iop_CmpNEZ64x2
: {
3402 /* We can use SSE2 instructions for this. */
3403 /* Ideally, we want to do a 64Ix2 comparison against zero of
3404 the operand. Problem is no such insn exists. Solution
3405 therefore is to do a 32Ix4 comparison instead, and bitwise-
3406 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3407 let the not'd result of this initial comparison be a:b:c:d.
3408 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3409 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3410 giving the required result.
3412 The required selection sequence is 2,3,0,1, which
3413 according to Intel's documentation means the pshufd
3414 literal value is 0xB1, that is,
3415 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3417 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3418 HReg tmp
= newVRegV(env
);
3419 HReg dst
= newVRegV(env
);
3421 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, tmp
, tmp
));
3422 addInstr(env
, X86Instr_SseReRg(Xsse_CMPEQ32
, arg
, tmp
));
3423 tmp
= do_sse_Not128(env
, tmp
);
3424 addInstr(env
, X86Instr_SseShuf(0xB1, tmp
, dst
));
3425 addInstr(env
, X86Instr_SseReRg(Xsse_OR
, tmp
, dst
));
3429 case Iop_CmpNEZ32x4
: {
3430 /* Sigh, we have to generate lousy code since this has to
3431 work on SSE1 hosts */
3432 /* basically, the idea is: for each lane:
3433 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3434 sbbl %r, %r (now %r = 1Sto32(CF))
3439 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3440 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3441 HReg dst
= newVRegV(env
);
3442 HReg r32
= newVRegI(env
);
3443 sub_from_esp(env
, 16);
3444 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, arg
, esp0
));
3445 for (i
= 0; i
< 4; i
++) {
3446 am
= X86AMode_IR(i
*4, hregX86_ESP());
3447 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(am
), r32
));
3448 addInstr(env
, X86Instr_Unary32(Xun_NEG
, r32
));
3449 addInstr(env
, X86Instr_Alu32R(Xalu_SBB
, X86RMI_Reg(r32
), r32
));
3450 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r32
), am
));
3452 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3453 add_to_esp(env
, 16);
3457 case Iop_CmpNEZ8x16
:
3458 case Iop_CmpNEZ16x8
: {
3459 /* We can use SSE2 instructions for this. */
3461 HReg vec0
= newVRegV(env
);
3462 HReg vec1
= newVRegV(env
);
3463 HReg dst
= newVRegV(env
);
3465 = e
->Iex
.Unop
.op
==Iop_CmpNEZ16x8
? Xsse_CMPEQ16
3468 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, vec0
, vec0
));
3469 addInstr(env
, mk_vMOVsd_RR(vec0
, vec1
));
3470 addInstr(env
, X86Instr_Sse32Fx4(Xsse_CMPEQF
, vec1
, vec1
));
3471 /* defer arg computation to here so as to give CMPEQF as long
3472 as possible to complete */
3473 arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3474 /* vec0 is all 0s; vec1 is all 1s */
3475 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3476 /* 16x8 or 8x16 comparison == */
3477 addInstr(env
, X86Instr_SseReRg(cmpOp
, vec0
, dst
));
3479 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, vec1
, dst
));
3483 case Iop_RecipEst32Fx4
: op
= Xsse_RCPF
; goto do_32Fx4_unary
;
3484 case Iop_RSqrtEst32Fx4
: op
= Xsse_RSQRTF
; goto do_32Fx4_unary
;
3487 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3488 HReg dst
= newVRegV(env
);
3489 addInstr(env
, X86Instr_Sse32Fx4(op
, arg
, dst
));
3493 case Iop_RecipEst32F0x4
: op
= Xsse_RCPF
; goto do_32F0x4_unary
;
3494 case Iop_RSqrtEst32F0x4
: op
= Xsse_RSQRTF
; goto do_32F0x4_unary
;
3495 case Iop_Sqrt32F0x4
: op
= Xsse_SQRTF
; goto do_32F0x4_unary
;
3498 /* A bit subtle. We have to copy the arg to the result
3499 register first, because actually doing the SSE scalar insn
3500 leaves the upper 3/4 of the destination register
3501 unchanged. Whereas the required semantics of these
3502 primops is that the upper 3/4 is simply copied in from the
3504 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3505 HReg dst
= newVRegV(env
);
3506 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3507 addInstr(env
, X86Instr_Sse32FLo(op
, arg
, dst
));
3511 case Iop_Sqrt64F0x2
: op
= Xsse_SQRTF
; goto do_64F0x2_unary
;
3514 /* A bit subtle. We have to copy the arg to the result
3515 register first, because actually doing the SSE scalar insn
3516 leaves the upper half of the destination register
3517 unchanged. Whereas the required semantics of these
3518 primops is that the upper half is simply copied in from the
3520 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3521 HReg dst
= newVRegV(env
);
3523 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3524 addInstr(env
, X86Instr_Sse64FLo(op
, arg
, dst
));
3528 case Iop_32UtoV128
: {
3529 HReg dst
= newVRegV(env
);
3530 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3531 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Unop
.arg
);
3532 addInstr(env
, X86Instr_Push(rmi
));
3533 addInstr(env
, X86Instr_SseLdzLO(4, dst
, esp0
));
3538 case Iop_64UtoV128
: {
3540 HReg dst
= newVRegV(env
);
3541 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3542 iselInt64Expr(&rHi
, &rLo
, env
, e
->Iex
.Unop
.arg
);
3543 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
3544 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
3545 addInstr(env
, X86Instr_SseLdzLO(8, dst
, esp0
));
3552 } /* switch (e->Iex.Unop.op) */
3553 } /* if (e->tag == Iex_Unop) */
3555 if (e
->tag
== Iex_Binop
) {
3556 switch (e
->Iex
.Binop
.op
) {
3561 case Iop_Sqrt32Fx4
: {
3562 /* :: (rmode, vec) -> vec */
3563 HReg arg
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3564 HReg dst
= newVRegV(env
);
3565 /* XXXROUNDINGFIXME */
3566 /* set roundingmode here */
3567 addInstr(env
, (e
->Iex
.Binop
.op
== Iop_Sqrt64Fx2
3568 ? X86Instr_Sse64Fx2
: X86Instr_Sse32Fx4
)
3569 (Xsse_SQRTF
, arg
, dst
));
3573 case Iop_SetV128lo32
: {
3574 HReg dst
= newVRegV(env
);
3575 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3576 HReg srcI
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3577 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3578 sub_from_esp(env
, 16);
3579 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, srcV
, esp0
));
3580 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(srcI
), esp0
));
3581 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3582 add_to_esp(env
, 16);
3586 case Iop_SetV128lo64
: {
3587 HReg dst
= newVRegV(env
);
3588 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3589 HReg srcIhi
, srcIlo
;
3590 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3591 X86AMode
* esp4
= advance4(esp0
);
3592 iselInt64Expr(&srcIhi
, &srcIlo
, env
, e
->Iex
.Binop
.arg2
);
3593 sub_from_esp(env
, 16);
3594 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, srcV
, esp0
));
3595 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(srcIlo
), esp0
));
3596 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(srcIhi
), esp4
));
3597 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3598 add_to_esp(env
, 16);
3602 case Iop_64HLtoV128
: {
3603 HReg r3
, r2
, r1
, r0
;
3604 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3605 X86AMode
* esp4
= advance4(esp0
);
3606 X86AMode
* esp8
= advance4(esp4
);
3607 X86AMode
* esp12
= advance4(esp8
);
3608 HReg dst
= newVRegV(env
);
3609 /* do this via the stack (easy, convenient, etc) */
3610 sub_from_esp(env
, 16);
3611 /* Do the less significant 64 bits */
3612 iselInt64Expr(&r1
, &r0
, env
, e
->Iex
.Binop
.arg2
);
3613 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r0
), esp0
));
3614 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r1
), esp4
));
3615 /* Do the more significant 64 bits */
3616 iselInt64Expr(&r3
, &r2
, env
, e
->Iex
.Binop
.arg1
);
3617 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r2
), esp8
));
3618 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r3
), esp12
));
3619 /* Fetch result back from stack. */
3620 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3621 add_to_esp(env
, 16);
3625 case Iop_CmpEQ32Fx4
: op
= Xsse_CMPEQF
; goto do_32Fx4
;
3626 case Iop_CmpLT32Fx4
: op
= Xsse_CMPLTF
; goto do_32Fx4
;
3627 case Iop_CmpLE32Fx4
: op
= Xsse_CMPLEF
; goto do_32Fx4
;
3628 case Iop_CmpUN32Fx4
: op
= Xsse_CMPUNF
; goto do_32Fx4
;
3629 case Iop_Max32Fx4
: op
= Xsse_MAXF
; goto do_32Fx4
;
3630 case Iop_Min32Fx4
: op
= Xsse_MINF
; goto do_32Fx4
;
3633 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3634 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3635 HReg dst
= newVRegV(env
);
3636 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3637 addInstr(env
, X86Instr_Sse32Fx4(op
, argR
, dst
));
3641 case Iop_CmpEQ64Fx2
: op
= Xsse_CMPEQF
; goto do_64Fx2
;
3642 case Iop_CmpLT64Fx2
: op
= Xsse_CMPLTF
; goto do_64Fx2
;
3643 case Iop_CmpLE64Fx2
: op
= Xsse_CMPLEF
; goto do_64Fx2
;
3644 case Iop_CmpUN64Fx2
: op
= Xsse_CMPUNF
; goto do_64Fx2
;
3645 case Iop_Max64Fx2
: op
= Xsse_MAXF
; goto do_64Fx2
;
3646 case Iop_Min64Fx2
: op
= Xsse_MINF
; goto do_64Fx2
;
3649 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3650 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3651 HReg dst
= newVRegV(env
);
3653 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3654 addInstr(env
, X86Instr_Sse64Fx2(op
, argR
, dst
));
3658 case Iop_CmpEQ32F0x4
: op
= Xsse_CMPEQF
; goto do_32F0x4
;
3659 case Iop_CmpLT32F0x4
: op
= Xsse_CMPLTF
; goto do_32F0x4
;
3660 case Iop_CmpLE32F0x4
: op
= Xsse_CMPLEF
; goto do_32F0x4
;
3661 case Iop_CmpUN32F0x4
: op
= Xsse_CMPUNF
; goto do_32F0x4
;
3662 case Iop_Add32F0x4
: op
= Xsse_ADDF
; goto do_32F0x4
;
3663 case Iop_Div32F0x4
: op
= Xsse_DIVF
; goto do_32F0x4
;
3664 case Iop_Max32F0x4
: op
= Xsse_MAXF
; goto do_32F0x4
;
3665 case Iop_Min32F0x4
: op
= Xsse_MINF
; goto do_32F0x4
;
3666 case Iop_Mul32F0x4
: op
= Xsse_MULF
; goto do_32F0x4
;
3667 case Iop_Sub32F0x4
: op
= Xsse_SUBF
; goto do_32F0x4
;
3669 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3670 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3671 HReg dst
= newVRegV(env
);
3672 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3673 addInstr(env
, X86Instr_Sse32FLo(op
, argR
, dst
));
3677 case Iop_CmpEQ64F0x2
: op
= Xsse_CMPEQF
; goto do_64F0x2
;
3678 case Iop_CmpLT64F0x2
: op
= Xsse_CMPLTF
; goto do_64F0x2
;
3679 case Iop_CmpLE64F0x2
: op
= Xsse_CMPLEF
; goto do_64F0x2
;
3680 case Iop_CmpUN64F0x2
: op
= Xsse_CMPUNF
; goto do_64F0x2
;
3681 case Iop_Add64F0x2
: op
= Xsse_ADDF
; goto do_64F0x2
;
3682 case Iop_Div64F0x2
: op
= Xsse_DIVF
; goto do_64F0x2
;
3683 case Iop_Max64F0x2
: op
= Xsse_MAXF
; goto do_64F0x2
;
3684 case Iop_Min64F0x2
: op
= Xsse_MINF
; goto do_64F0x2
;
3685 case Iop_Mul64F0x2
: op
= Xsse_MULF
; goto do_64F0x2
;
3686 case Iop_Sub64F0x2
: op
= Xsse_SUBF
; goto do_64F0x2
;
3688 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3689 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3690 HReg dst
= newVRegV(env
);
3692 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3693 addInstr(env
, X86Instr_Sse64FLo(op
, argR
, dst
));
3697 case Iop_QNarrowBin32Sto16Sx8
:
3698 op
= Xsse_PACKSSD
; arg1isEReg
= True
; goto do_SseReRg
;
3699 case Iop_QNarrowBin16Sto8Sx16
:
3700 op
= Xsse_PACKSSW
; arg1isEReg
= True
; goto do_SseReRg
;
3701 case Iop_QNarrowBin16Sto8Ux16
:
3702 op
= Xsse_PACKUSW
; arg1isEReg
= True
; goto do_SseReRg
;
3704 case Iop_InterleaveHI8x16
:
3705 op
= Xsse_UNPCKHB
; arg1isEReg
= True
; goto do_SseReRg
;
3706 case Iop_InterleaveHI16x8
:
3707 op
= Xsse_UNPCKHW
; arg1isEReg
= True
; goto do_SseReRg
;
3708 case Iop_InterleaveHI32x4
:
3709 op
= Xsse_UNPCKHD
; arg1isEReg
= True
; goto do_SseReRg
;
3710 case Iop_InterleaveHI64x2
:
3711 op
= Xsse_UNPCKHQ
; arg1isEReg
= True
; goto do_SseReRg
;
3713 case Iop_InterleaveLO8x16
:
3714 op
= Xsse_UNPCKLB
; arg1isEReg
= True
; goto do_SseReRg
;
3715 case Iop_InterleaveLO16x8
:
3716 op
= Xsse_UNPCKLW
; arg1isEReg
= True
; goto do_SseReRg
;
3717 case Iop_InterleaveLO32x4
:
3718 op
= Xsse_UNPCKLD
; arg1isEReg
= True
; goto do_SseReRg
;
3719 case Iop_InterleaveLO64x2
:
3720 op
= Xsse_UNPCKLQ
; arg1isEReg
= True
; goto do_SseReRg
;
3722 case Iop_AndV128
: op
= Xsse_AND
; goto do_SseReRg
;
3723 case Iop_OrV128
: op
= Xsse_OR
; goto do_SseReRg
;
3724 case Iop_XorV128
: op
= Xsse_XOR
; goto do_SseReRg
;
3725 case Iop_Add8x16
: op
= Xsse_ADD8
; goto do_SseReRg
;
3726 case Iop_Add16x8
: op
= Xsse_ADD16
; goto do_SseReRg
;
3727 case Iop_Add32x4
: op
= Xsse_ADD32
; goto do_SseReRg
;
3728 case Iop_Add64x2
: op
= Xsse_ADD64
; goto do_SseReRg
;
3729 case Iop_QAdd8Sx16
: op
= Xsse_QADD8S
; goto do_SseReRg
;
3730 case Iop_QAdd16Sx8
: op
= Xsse_QADD16S
; goto do_SseReRg
;
3731 case Iop_QAdd8Ux16
: op
= Xsse_QADD8U
; goto do_SseReRg
;
3732 case Iop_QAdd16Ux8
: op
= Xsse_QADD16U
; goto do_SseReRg
;
3733 case Iop_Avg8Ux16
: op
= Xsse_AVG8U
; goto do_SseReRg
;
3734 case Iop_Avg16Ux8
: op
= Xsse_AVG16U
; goto do_SseReRg
;
3735 case Iop_CmpEQ8x16
: op
= Xsse_CMPEQ8
; goto do_SseReRg
;
3736 case Iop_CmpEQ16x8
: op
= Xsse_CMPEQ16
; goto do_SseReRg
;
3737 case Iop_CmpEQ32x4
: op
= Xsse_CMPEQ32
; goto do_SseReRg
;
3738 case Iop_CmpGT8Sx16
: op
= Xsse_CMPGT8S
; goto do_SseReRg
;
3739 case Iop_CmpGT16Sx8
: op
= Xsse_CMPGT16S
; goto do_SseReRg
;
3740 case Iop_CmpGT32Sx4
: op
= Xsse_CMPGT32S
; goto do_SseReRg
;
3741 case Iop_Max16Sx8
: op
= Xsse_MAX16S
; goto do_SseReRg
;
3742 case Iop_Max8Ux16
: op
= Xsse_MAX8U
; goto do_SseReRg
;
3743 case Iop_Min16Sx8
: op
= Xsse_MIN16S
; goto do_SseReRg
;
3744 case Iop_Min8Ux16
: op
= Xsse_MIN8U
; goto do_SseReRg
;
3745 case Iop_MulHi16Ux8
: op
= Xsse_MULHI16U
; goto do_SseReRg
;
3746 case Iop_MulHi16Sx8
: op
= Xsse_MULHI16S
; goto do_SseReRg
;
3747 case Iop_Mul16x8
: op
= Xsse_MUL16
; goto do_SseReRg
;
3748 case Iop_Sub8x16
: op
= Xsse_SUB8
; goto do_SseReRg
;
3749 case Iop_Sub16x8
: op
= Xsse_SUB16
; goto do_SseReRg
;
3750 case Iop_Sub32x4
: op
= Xsse_SUB32
; goto do_SseReRg
;
3751 case Iop_Sub64x2
: op
= Xsse_SUB64
; goto do_SseReRg
;
3752 case Iop_QSub8Sx16
: op
= Xsse_QSUB8S
; goto do_SseReRg
;
3753 case Iop_QSub16Sx8
: op
= Xsse_QSUB16S
; goto do_SseReRg
;
3754 case Iop_QSub8Ux16
: op
= Xsse_QSUB8U
; goto do_SseReRg
;
3755 case Iop_QSub16Ux8
: op
= Xsse_QSUB16U
; goto do_SseReRg
;
3757 HReg arg1
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3758 HReg arg2
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3759 HReg dst
= newVRegV(env
);
3760 if (op
!= Xsse_OR
&& op
!= Xsse_AND
&& op
!= Xsse_XOR
)
3763 addInstr(env
, mk_vMOVsd_RR(arg2
, dst
));
3764 addInstr(env
, X86Instr_SseReRg(op
, arg1
, dst
));
3766 addInstr(env
, mk_vMOVsd_RR(arg1
, dst
));
3767 addInstr(env
, X86Instr_SseReRg(op
, arg2
, dst
));
3772 case Iop_ShlN8x16
: {
3773 /* This instruction doesn't exist so we need to fake it using
3774 Xsse_SHL16 and Xsse_SHR16.
3776 We'd like to shift every byte in the 16-byte register to the left by
3779 Instead, we will make a copy and shift all the 16-bit words to the
3780 *right* by 8 and then to the left by 8 plus the shift amount. That
3781 will get us the correct answer for the upper 8 bits of each 16-bit
3782 word and zero elsewhere.
3784 Then we will shift all the 16-bit words in the original to the left
3785 by 8 plus the shift amount and then to the right by 8. This will
3786 get the correct answer for the lower 8 bits of each 16-bit word and
3789 Finally, we will OR those two results together.
3791 Because we don't have a shift by constant in x86, we store the
3792 constant 8 into a register and shift by that as needed.
3794 HReg greg
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3795 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
3796 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3797 HReg ereg
= newVRegV(env
);
3798 HReg eight
= newVRegV(env
); // To store the constant value 8.
3799 HReg dst
= newVRegV(env
);
3800 HReg hi
= newVRegV(env
);
3802 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3803 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3804 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3805 addInstr(env
, X86Instr_Push(rmi
));
3806 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, ereg
, esp0
));
3807 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3808 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3809 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3810 addInstr(env
, X86Instr_Push(X86RMI_Imm(8)));
3811 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, eight
, esp0
));
3814 X86SseOp reverse_op
= Xsse_SHR16
;
3815 addInstr(env
, mk_vMOVsd_RR(greg
, hi
));
3816 addInstr(env
, X86Instr_SseReRg(reverse_op
, eight
, hi
));
3817 addInstr(env
, X86Instr_SseReRg(op
, eight
, hi
));
3818 addInstr(env
, X86Instr_SseReRg(op
, ereg
, hi
));
3819 addInstr(env
, mk_vMOVsd_RR(greg
, dst
));
3820 addInstr(env
, X86Instr_SseReRg(op
, eight
, dst
));
3821 addInstr(env
, X86Instr_SseReRg(op
, ereg
, dst
));
3822 addInstr(env
, X86Instr_SseReRg(reverse_op
, eight
, dst
));
3823 addInstr(env
, X86Instr_SseReRg(Xsse_OR
, hi
, dst
));
3825 add_to_esp(env
, 32);
3828 case Iop_ShlN16x8
: op
= Xsse_SHL16
; goto do_SseShift
;
3829 case Iop_ShlN32x4
: op
= Xsse_SHL32
; goto do_SseShift
;
3830 case Iop_ShlN64x2
: op
= Xsse_SHL64
; goto do_SseShift
;
3831 case Iop_SarN16x8
: op
= Xsse_SAR16
; goto do_SseShift
;
3832 case Iop_SarN32x4
: op
= Xsse_SAR32
; goto do_SseShift
;
3833 case Iop_ShrN16x8
: op
= Xsse_SHR16
; goto do_SseShift
;
3834 case Iop_ShrN32x4
: op
= Xsse_SHR32
; goto do_SseShift
;
3835 case Iop_ShrN64x2
: op
= Xsse_SHR64
; goto do_SseShift
;
3837 HReg greg
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3838 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
3839 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3840 HReg ereg
= newVRegV(env
);
3841 HReg dst
= newVRegV(env
);
3843 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3844 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3845 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3846 addInstr(env
, X86Instr_Push(rmi
));
3847 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, ereg
, esp0
));
3848 addInstr(env
, mk_vMOVsd_RR(greg
, dst
));
3849 addInstr(env
, X86Instr_SseReRg(op
, ereg
, dst
));
3850 add_to_esp(env
, 16);
3854 case Iop_NarrowBin32to16x8
:
3855 fn
= (HWord
)h_generic_calc_NarrowBin32to16x8
;
3856 goto do_SseAssistedBinary
;
3857 case Iop_NarrowBin16to8x16
:
3858 fn
= (HWord
)h_generic_calc_NarrowBin16to8x16
;
3859 goto do_SseAssistedBinary
;
3860 do_SseAssistedBinary
: {
3861 /* As with the amd64 case (where this is copied from) we
3862 generate pretty bad code. */
3864 HReg dst
= newVRegV(env
);
3865 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3866 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3867 HReg argp
= newVRegI(env
);
3868 /* subl $112, %esp -- make a space */
3869 sub_from_esp(env
, 112);
3870 /* leal 48(%esp), %r_argp -- point into it */
3871 addInstr(env
, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3873 /* andl $-16, %r_argp -- 16-align the pointer */
3874 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
3875 X86RMI_Imm( ~(UInt
)15 ),
3877 /* Prepare 3 arg regs:
3878 leal 0(%r_argp), %eax
3879 leal 16(%r_argp), %edx
3880 leal 32(%r_argp), %ecx
3882 addInstr(env
, X86Instr_Lea32(X86AMode_IR(0, argp
),
3884 addInstr(env
, X86Instr_Lea32(X86AMode_IR(16, argp
),
3886 addInstr(env
, X86Instr_Lea32(X86AMode_IR(32, argp
),
3888 /* Store the two args, at (%edx) and (%ecx):
3889 movupd %argL, 0(%edx)
3890 movupd %argR, 0(%ecx)
3892 addInstr(env
, X86Instr_SseLdSt(False
/*!isLoad*/, argL
,
3893 X86AMode_IR(0, hregX86_EDX())));
3894 addInstr(env
, X86Instr_SseLdSt(False
/*!isLoad*/, argR
,
3895 X86AMode_IR(0, hregX86_ECX())));
3896 /* call the helper */
3897 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
3898 3, mk_RetLoc_simple(RLPri_None
) ));
3899 /* fetch the result from memory, using %r_argp, which the
3900 register allocator will keep alive across the call. */
3901 addInstr(env
, X86Instr_SseLdSt(True
/*isLoad*/, dst
,
3902 X86AMode_IR(0, argp
)));
3903 /* and finally, clear the space */
3904 add_to_esp(env
, 112);
3910 } /* switch (e->Iex.Binop.op) */
3911 } /* if (e->tag == Iex_Binop) */
3914 if (e
->tag
== Iex_Triop
) {
3915 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3916 switch (triop
->op
) {
3918 case Iop_Add32Fx4
: op
= Xsse_ADDF
; goto do_32Fx4_w_rm
;
3919 case Iop_Sub32Fx4
: op
= Xsse_SUBF
; goto do_32Fx4_w_rm
;
3920 case Iop_Mul32Fx4
: op
= Xsse_MULF
; goto do_32Fx4_w_rm
;
3921 case Iop_Div32Fx4
: op
= Xsse_DIVF
; goto do_32Fx4_w_rm
;
3924 HReg argL
= iselVecExpr(env
, triop
->arg2
);
3925 HReg argR
= iselVecExpr(env
, triop
->arg3
);
3926 HReg dst
= newVRegV(env
);
3927 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3928 /* XXXROUNDINGFIXME */
3929 /* set roundingmode here */
3930 addInstr(env
, X86Instr_Sse32Fx4(op
, argR
, dst
));
3934 case Iop_Add64Fx2
: op
= Xsse_ADDF
; goto do_64Fx2_w_rm
;
3935 case Iop_Sub64Fx2
: op
= Xsse_SUBF
; goto do_64Fx2_w_rm
;
3936 case Iop_Mul64Fx2
: op
= Xsse_MULF
; goto do_64Fx2_w_rm
;
3937 case Iop_Div64Fx2
: op
= Xsse_DIVF
; goto do_64Fx2_w_rm
;
3940 HReg argL
= iselVecExpr(env
, triop
->arg2
);
3941 HReg argR
= iselVecExpr(env
, triop
->arg3
);
3942 HReg dst
= newVRegV(env
);
3944 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3945 /* XXXROUNDINGFIXME */
3946 /* set roundingmode here */
3947 addInstr(env
, X86Instr_Sse64Fx2(op
, argR
, dst
));
3953 } /* switch (triop->op) */
3954 } /* if (e->tag == Iex_Triop) */
3957 if (e
->tag
== Iex_ITE
) { // VFD
3958 HReg r1
= iselVecExpr(env
, e
->Iex
.ITE
.iftrue
);
3959 HReg r0
= iselVecExpr(env
, e
->Iex
.ITE
.iffalse
);
3960 HReg dst
= newVRegV(env
);
3961 addInstr(env
, mk_vMOVsd_RR(r1
,dst
));
3962 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
3963 addInstr(env
, X86Instr_SseCMov(cc
^ 1, r0
, dst
));
3968 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3969 LibVEX_ppVexHwCaps(VexArchX86
,env
->hwcaps
));
3971 vpanic("iselVecExpr_wrk");
3973 # undef REQUIRE_SSE1
3974 # undef REQUIRE_SSE2
3975 # undef SSE2_OR_ABOVE
3979 /*---------------------------------------------------------*/
3980 /*--- ISEL: Statements ---*/
3981 /*---------------------------------------------------------*/
3983 static void iselStmt ( ISelEnv
* env
, IRStmt
* stmt
)
3985 if (vex_traceflags
& VEX_TRACE_VCODE
) {
3986 vex_printf("\n-- ");
3991 switch (stmt
->tag
) {
3993 /* --------- STORE --------- */
3995 IRType tya
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.addr
);
3996 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.data
);
3997 IREndness end
= stmt
->Ist
.Store
.end
;
3999 if (tya
!= Ity_I32
|| end
!= Iend_LE
)
4002 if (tyd
== Ity_I32
) {
4003 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4004 X86RI
* ri
= iselIntExpr_RI(env
, stmt
->Ist
.Store
.data
);
4005 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
,ri
,am
));
4008 if (tyd
== Ity_I8
|| tyd
== Ity_I16
) {
4009 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4010 HReg r
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
4011 addInstr(env
, X86Instr_Store( toUChar(tyd
==Ity_I8
? 1 : 2),
4015 if (tyd
== Ity_F64
) {
4016 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4017 HReg r
= iselDblExpr(env
, stmt
->Ist
.Store
.data
);
4018 addInstr(env
, X86Instr_FpLdSt(False
/*store*/, 8, r
, am
));
4021 if (tyd
== Ity_F32
) {
4022 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4023 HReg r
= iselFltExpr(env
, stmt
->Ist
.Store
.data
);
4024 addInstr(env
, X86Instr_FpLdSt(False
/*store*/, 4, r
, am
));
4027 if (tyd
== Ity_I64
) {
4029 iselInt64Expr(&vHi
, &vLo
, env
, stmt
->Ist
.Store
.data
);
4030 rA
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
4031 addInstr(env
, X86Instr_Alu32M(
4032 Xalu_MOV
, X86RI_Reg(vLo
), X86AMode_IR(0, rA
)));
4033 addInstr(env
, X86Instr_Alu32M(
4034 Xalu_MOV
, X86RI_Reg(vHi
), X86AMode_IR(4, rA
)));
4037 if (tyd
== Ity_V128
) {
4038 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4039 HReg r
= iselVecExpr(env
, stmt
->Ist
.Store
.data
);
4040 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, r
, am
));
4046 /* --------- PUT --------- */
4048 IRType ty
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Put
.data
);
4049 if (ty
== Ity_I32
) {
4050 /* We're going to write to memory, so compute the RHS into an
4052 X86RI
* ri
= iselIntExpr_RI(env
, stmt
->Ist
.Put
.data
);
4057 X86AMode_IR(stmt
->Ist
.Put
.offset
,hregX86_EBP())
4061 if (ty
== Ity_I8
|| ty
== Ity_I16
) {
4062 HReg r
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4063 addInstr(env
, X86Instr_Store(
4064 toUChar(ty
==Ity_I8
? 1 : 2),
4066 X86AMode_IR(stmt
->Ist
.Put
.offset
,
4070 if (ty
== Ity_I64
) {
4072 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4073 X86AMode
* am4
= advance4(am
);
4074 iselInt64Expr(&vHi
, &vLo
, env
, stmt
->Ist
.Put
.data
);
4075 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(vLo
), am
));
4076 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(vHi
), am4
));
4079 if (ty
== Ity_V128
) {
4080 HReg vec
= iselVecExpr(env
, stmt
->Ist
.Put
.data
);
4081 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4082 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, vec
, am
));
4085 if (ty
== Ity_F32
) {
4086 HReg f32
= iselFltExpr(env
, stmt
->Ist
.Put
.data
);
4087 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4088 set_FPU_rounding_default(env
); /* paranoia */
4089 addInstr(env
, X86Instr_FpLdSt( False
/*store*/, 4, f32
, am
));
4092 if (ty
== Ity_F64
) {
4093 HReg f64
= iselDblExpr(env
, stmt
->Ist
.Put
.data
);
4094 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4095 set_FPU_rounding_default(env
); /* paranoia */
4096 addInstr(env
, X86Instr_FpLdSt( False
/*store*/, 8, f64
, am
));
4102 /* --------- Indexed PUT --------- */
4104 IRPutI
*puti
= stmt
->Ist
.PutI
.details
;
4107 = genGuestArrayOffset(
4109 puti
->ix
, puti
->bias
);
4111 IRType ty
= typeOfIRExpr(env
->type_env
, puti
->data
);
4112 if (ty
== Ity_F64
) {
4113 HReg val
= iselDblExpr(env
, puti
->data
);
4114 addInstr(env
, X86Instr_FpLdSt( False
/*store*/, 8, val
, am
));
4118 HReg r
= iselIntExpr_R(env
, puti
->data
);
4119 addInstr(env
, X86Instr_Store( 1, r
, am
));
4122 if (ty
== Ity_I32
) {
4123 HReg r
= iselIntExpr_R(env
, puti
->data
);
4124 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(r
), am
));
4127 if (ty
== Ity_I64
) {
4129 X86AMode
* am4
= advance4(am
);
4130 iselInt64Expr(&rHi
, &rLo
, env
, puti
->data
);
4131 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(rLo
), am
));
4132 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(rHi
), am4
));
4138 /* --------- TMP --------- */
4140 IRTemp tmp
= stmt
->Ist
.WrTmp
.tmp
;
4141 IRType ty
= typeOfIRTemp(env
->type_env
, tmp
);
4143 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4144 compute it into an AMode and then use LEA. This usually
4145 produces fewer instructions, often because (for memcheck
4146 created IR) we get t = address-expression, (t is later used
4147 twice) and so doing this naturally turns address-expression
4148 back into an X86 amode. */
4150 && stmt
->Ist
.WrTmp
.data
->tag
== Iex_Binop
4151 && stmt
->Ist
.WrTmp
.data
->Iex
.Binop
.op
== Iop_Add32
) {
4152 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.WrTmp
.data
);
4153 HReg dst
= lookupIRTemp(env
, tmp
);
4154 if (am
->tag
== Xam_IR
&& am
->Xam
.IR
.imm
== 0) {
4155 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4156 value into a register. Just emit a normal reg-reg move
4157 so reg-alloc can coalesce it away in the usual way. */
4158 HReg src
= am
->Xam
.IR
.reg
;
4159 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Reg(src
), dst
));
4161 addInstr(env
, X86Instr_Lea32(am
,dst
));
4166 if (ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
) {
4167 X86RMI
* rmi
= iselIntExpr_RMI(env
, stmt
->Ist
.WrTmp
.data
);
4168 HReg dst
= lookupIRTemp(env
, tmp
);
4169 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,rmi
,dst
));
4172 if (ty
== Ity_I64
) {
4173 HReg rHi
, rLo
, dstHi
, dstLo
;
4174 iselInt64Expr(&rHi
,&rLo
, env
, stmt
->Ist
.WrTmp
.data
);
4175 lookupIRTemp64( &dstHi
, &dstLo
, env
, tmp
);
4176 addInstr(env
, mk_iMOVsd_RR(rHi
,dstHi
) );
4177 addInstr(env
, mk_iMOVsd_RR(rLo
,dstLo
) );
4181 X86CondCode cond
= iselCondCode(env
, stmt
->Ist
.WrTmp
.data
);
4182 HReg dst
= lookupIRTemp(env
, tmp
);
4183 addInstr(env
, X86Instr_Set32(cond
, dst
));
4186 if (ty
== Ity_F64
) {
4187 HReg dst
= lookupIRTemp(env
, tmp
);
4188 HReg src
= iselDblExpr(env
, stmt
->Ist
.WrTmp
.data
);
4189 addInstr(env
, X86Instr_FpUnary(Xfp_MOV
,src
,dst
));
4192 if (ty
== Ity_F32
) {
4193 HReg dst
= lookupIRTemp(env
, tmp
);
4194 HReg src
= iselFltExpr(env
, stmt
->Ist
.WrTmp
.data
);
4195 addInstr(env
, X86Instr_FpUnary(Xfp_MOV
,src
,dst
));
4198 if (ty
== Ity_V128
) {
4199 HReg dst
= lookupIRTemp(env
, tmp
);
4200 HReg src
= iselVecExpr(env
, stmt
->Ist
.WrTmp
.data
);
4201 addInstr(env
, mk_vMOVsd_RR(src
,dst
));
4207 /* --------- Call to DIRTY helper --------- */
4209 IRDirty
* d
= stmt
->Ist
.Dirty
.details
;
4211 /* Figure out the return type, if any. */
4212 IRType retty
= Ity_INVALID
;
4213 if (d
->tmp
!= IRTemp_INVALID
)
4214 retty
= typeOfIRTemp(env
->type_env
, d
->tmp
);
4216 Bool retty_ok
= False
;
4218 case Ity_INVALID
: /* function doesn't return anything */
4219 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
4221 retty_ok
= True
; break;
4226 break; /* will go to stmt_fail: */
4228 /* Marshal args, do the call, and set the return value to
4229 0x555..555 if this is a conditional call that returns a value
4230 and the call is skipped. */
4232 RetLoc rloc
= mk_RetLoc_INVALID();
4233 doHelperCall( &addToSp
, &rloc
, env
, d
->guard
, d
->cee
, retty
, d
->args
);
4234 vassert(is_sane_RetLoc(rloc
));
4236 /* Now figure out what to do with the returned value, if any. */
4239 /* No return value. Nothing to do. */
4240 vassert(d
->tmp
== IRTemp_INVALID
);
4241 vassert(rloc
.pri
== RLPri_None
);
4242 vassert(addToSp
== 0);
4245 case Ity_I32
: case Ity_I16
: case Ity_I8
: {
4246 /* The returned value is in %eax. Park it in the register
4247 associated with tmp. */
4248 vassert(rloc
.pri
== RLPri_Int
);
4249 vassert(addToSp
== 0);
4250 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4251 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(),dst
) );
4255 /* The returned value is in %edx:%eax. Park it in the
4256 register-pair associated with tmp. */
4257 vassert(rloc
.pri
== RLPri_2Int
);
4258 vassert(addToSp
== 0);
4260 lookupIRTemp64( &dstHi
, &dstLo
, env
, d
->tmp
);
4261 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(),dstHi
) );
4262 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(),dstLo
) );
4266 /* The returned value is on the stack, and *retloc tells
4267 us where. Fish it off the stack and then move the
4268 stack pointer upwards to clear it, as directed by
4270 vassert(rloc
.pri
== RLPri_V128SpRel
);
4271 vassert(addToSp
>= 16);
4272 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4273 X86AMode
* am
= X86AMode_IR(rloc
.spOff
, hregX86_ESP());
4274 addInstr(env
, X86Instr_SseLdSt( True
/*load*/, dst
, am
));
4275 add_to_esp(env
, addToSp
);
4285 /* --------- MEM FENCE --------- */
4287 switch (stmt
->Ist
.MBE
.event
) {
4289 addInstr(env
, X86Instr_MFence(env
->hwcaps
));
4296 /* --------- ACAS --------- */
4298 if (stmt
->Ist
.CAS
.details
->oldHi
== IRTemp_INVALID
) {
4299 /* "normal" singleton CAS */
4301 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4302 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4303 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4304 X86AMode
* am
= iselIntExpr_AMode(env
, cas
->addr
);
4305 HReg rDataLo
= iselIntExpr_R(env
, cas
->dataLo
);
4306 HReg rExpdLo
= iselIntExpr_R(env
, cas
->expdLo
);
4307 HReg rOldLo
= lookupIRTemp(env
, cas
->oldLo
);
4308 vassert(cas
->expdHi
== NULL
);
4309 vassert(cas
->dataHi
== NULL
);
4310 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, rOldLo
));
4311 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, hregX86_EAX()));
4312 addInstr(env
, mk_iMOVsd_RR(rDataLo
, hregX86_EBX()));
4314 case Ity_I32
: sz
= 4; break;
4315 case Ity_I16
: sz
= 2; break;
4316 case Ity_I8
: sz
= 1; break;
4317 default: goto unhandled_cas
;
4319 addInstr(env
, X86Instr_ACAS(am
, sz
));
4321 X86Instr_CMov32(Xcc_NZ
,
4322 X86RM_Reg(hregX86_EAX()), rOldLo
));
4326 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4327 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4328 /* only 32-bit allowed in this case */
4329 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4330 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4331 X86AMode
* am
= iselIntExpr_AMode(env
, cas
->addr
);
4332 HReg rDataHi
= iselIntExpr_R(env
, cas
->dataHi
);
4333 HReg rDataLo
= iselIntExpr_R(env
, cas
->dataLo
);
4334 HReg rExpdHi
= iselIntExpr_R(env
, cas
->expdHi
);
4335 HReg rExpdLo
= iselIntExpr_R(env
, cas
->expdLo
);
4336 HReg rOldHi
= lookupIRTemp(env
, cas
->oldHi
);
4337 HReg rOldLo
= lookupIRTemp(env
, cas
->oldLo
);
4340 addInstr(env
, mk_iMOVsd_RR(rExpdHi
, rOldHi
));
4341 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, rOldLo
));
4342 addInstr(env
, mk_iMOVsd_RR(rExpdHi
, hregX86_EDX()));
4343 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, hregX86_EAX()));
4344 addInstr(env
, mk_iMOVsd_RR(rDataHi
, hregX86_ECX()));
4345 addInstr(env
, mk_iMOVsd_RR(rDataLo
, hregX86_EBX()));
4346 addInstr(env
, X86Instr_DACAS(am
));
4348 X86Instr_CMov32(Xcc_NZ
,
4349 X86RM_Reg(hregX86_EDX()), rOldHi
));
4351 X86Instr_CMov32(Xcc_NZ
,
4352 X86RM_Reg(hregX86_EAX()), rOldLo
));
4358 /* --------- INSTR MARK --------- */
4359 /* Doesn't generate any executable code ... */
4363 /* --------- NO-OP --------- */
4364 /* Fairly self-explanatory, wouldn't you say? */
4368 /* --------- EXIT --------- */
4370 if (stmt
->Ist
.Exit
.dst
->tag
!= Ico_U32
)
4371 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4373 X86CondCode cc
= iselCondCode(env
, stmt
->Ist
.Exit
.guard
);
4374 X86AMode
* amEIP
= X86AMode_IR(stmt
->Ist
.Exit
.offsIP
,
4377 /* Case: boring transfer to known address */
4378 if (stmt
->Ist
.Exit
.jk
== Ijk_Boring
) {
4379 if (env
->chainingAllowed
) {
4380 /* .. almost always true .. */
4381 /* Skip the event check at the dst if this is a forwards
4384 = ((Addr32
)stmt
->Ist
.Exit
.dst
->Ico
.U32
) > env
->max_ga
;
4385 if (0) vex_printf("%s", toFastEP
? "Y" : ",");
4386 addInstr(env
, X86Instr_XDirect(stmt
->Ist
.Exit
.dst
->Ico
.U32
,
4387 amEIP
, cc
, toFastEP
));
4389 /* .. very occasionally .. */
4390 /* We can't use chaining, so ask for an assisted transfer,
4391 as that's the only alternative that is allowable. */
4392 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
4393 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, cc
, Ijk_Boring
));
4398 /* Case: assisted transfer to arbitrary address */
4399 switch (stmt
->Ist
.Exit
.jk
) {
4400 /* Keep this list in sync with that in iselNext below */
4408 case Ijk_Sys_int128
:
4409 case Ijk_Sys_int129
:
4410 case Ijk_Sys_int130
:
4411 case Ijk_Sys_int145
:
4412 case Ijk_Sys_int210
:
4413 case Ijk_Sys_syscall
:
4414 case Ijk_Sys_sysenter
:
4415 case Ijk_InvalICache
:
4418 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
4419 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, cc
, stmt
->Ist
.Exit
.jk
));
4426 /* Do we ever expect to see any other kind? */
4438 /*---------------------------------------------------------*/
4439 /*--- ISEL: Basic block terminators (Nexts) ---*/
4440 /*---------------------------------------------------------*/
4442 static void iselNext ( ISelEnv
* env
,
4443 IRExpr
* next
, IRJumpKind jk
, Int offsIP
)
4445 if (vex_traceflags
& VEX_TRACE_VCODE
) {
4446 vex_printf( "\n-- PUT(%d) = ", offsIP
);
4448 vex_printf( "; exit-");
4453 /* Case: boring transfer to known address */
4454 if (next
->tag
== Iex_Const
) {
4455 IRConst
* cdst
= next
->Iex
.Const
.con
;
4456 vassert(cdst
->tag
== Ico_U32
);
4457 if (jk
== Ijk_Boring
|| jk
== Ijk_Call
) {
4458 /* Boring transfer to known address */
4459 X86AMode
* amEIP
= X86AMode_IR(offsIP
, hregX86_EBP());
4460 if (env
->chainingAllowed
) {
4461 /* .. almost always true .. */
4462 /* Skip the event check at the dst if this is a forwards
4465 = ((Addr32
)cdst
->Ico
.U32
) > env
->max_ga
;
4466 if (0) vex_printf("%s", toFastEP
? "X" : ".");
4467 addInstr(env
, X86Instr_XDirect(cdst
->Ico
.U32
,
4471 /* .. very occasionally .. */
4472 /* We can't use chaining, so ask for an assisted transfer,
4473 as that's the only alternative that is allowable. */
4474 HReg r
= iselIntExpr_R(env
, next
);
4475 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, Xcc_ALWAYS
,
4482 /* Case: call/return (==boring) transfer to any address */
4484 case Ijk_Boring
: case Ijk_Ret
: case Ijk_Call
: {
4485 HReg r
= iselIntExpr_R(env
, next
);
4486 X86AMode
* amEIP
= X86AMode_IR(offsIP
, hregX86_EBP());
4487 if (env
->chainingAllowed
) {
4488 addInstr(env
, X86Instr_XIndir(r
, amEIP
, Xcc_ALWAYS
));
4490 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, Xcc_ALWAYS
,
4499 /* Case: assisted transfer to arbitrary address */
4501 /* Keep this list in sync with that for Ist_Exit above */
4509 case Ijk_Sys_int128
:
4510 case Ijk_Sys_int129
:
4511 case Ijk_Sys_int130
:
4512 case Ijk_Sys_int145
:
4513 case Ijk_Sys_int210
:
4514 case Ijk_Sys_syscall
:
4515 case Ijk_Sys_sysenter
:
4516 case Ijk_InvalICache
:
4519 HReg r
= iselIntExpr_R(env
, next
);
4520 X86AMode
* amEIP
= X86AMode_IR(offsIP
, hregX86_EBP());
4521 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, Xcc_ALWAYS
, jk
));
4528 vex_printf( "\n-- PUT(%d) = ", offsIP
);
4530 vex_printf( "; exit-");
4533 vassert(0); // are we expecting any other kind?
4537 /*---------------------------------------------------------*/
4538 /*--- Insn selector top-level ---*/
4539 /*---------------------------------------------------------*/
4541 /* Translate an entire SB to x86 code. */
4543 HInstrArray
* iselSB_X86 ( const IRSB
* bb
,
4545 const VexArchInfo
* archinfo_host
,
4546 const VexAbiInfo
* vbi
/*UNUSED*/,
4547 Int offs_Host_EvC_Counter
,
4548 Int offs_Host_EvC_FailAddr
,
4549 Bool chainingAllowed
,
4556 UInt hwcaps_host
= archinfo_host
->hwcaps
;
4557 X86AMode
*amCounter
, *amFailAddr
;
4560 vassert(arch_host
== VexArchX86
);
4561 vassert(0 == (hwcaps_host
4562 & ~(VEX_HWCAPS_X86_MMXEXT
4563 | VEX_HWCAPS_X86_SSE1
4564 | VEX_HWCAPS_X86_SSE2
4565 | VEX_HWCAPS_X86_SSE3
4566 | VEX_HWCAPS_X86_LZCNT
)));
4568 /* Check that the host's endianness is as expected. */
4569 vassert(archinfo_host
->endness
== VexEndnessLE
);
4571 /* Make up an initial environment to use. */
4572 env
= LibVEX_Alloc_inline(sizeof(ISelEnv
));
4575 /* Set up output code array. */
4576 env
->code
= newHInstrArray();
4578 /* Copy BB's type env. */
4579 env
->type_env
= bb
->tyenv
;
4581 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4582 change as we go along. */
4583 env
->n_vregmap
= bb
->tyenv
->types_used
;
4584 env
->vregmap
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
4585 env
->vregmapHI
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
4587 /* and finally ... */
4588 env
->chainingAllowed
= chainingAllowed
;
4589 env
->hwcaps
= hwcaps_host
;
4590 env
->max_ga
= max_ga
;
4592 /* For each IR temporary, allocate a suitably-kinded virtual
4595 for (i
= 0; i
< env
->n_vregmap
; i
++) {
4596 hregHI
= hreg
= INVALID_HREG
;
4597 switch (bb
->tyenv
->types
[i
]) {
4601 case Ity_I32
: hreg
= mkHReg(True
, HRcInt32
, 0, j
++); break;
4602 case Ity_I64
: hreg
= mkHReg(True
, HRcInt32
, 0, j
++);
4603 hregHI
= mkHReg(True
, HRcInt32
, 0, j
++); break;
4605 case Ity_F64
: hreg
= mkHReg(True
, HRcFlt64
, 0, j
++); break;
4606 case Ity_V128
: hreg
= mkHReg(True
, HRcVec128
, 0, j
++); break;
4607 default: ppIRType(bb
->tyenv
->types
[i
]);
4608 vpanic("iselBB: IRTemp type");
4610 env
->vregmap
[i
] = hreg
;
4611 env
->vregmapHI
[i
] = hregHI
;
4615 /* The very first instruction must be an event check. */
4616 amCounter
= X86AMode_IR(offs_Host_EvC_Counter
, hregX86_EBP());
4617 amFailAddr
= X86AMode_IR(offs_Host_EvC_FailAddr
, hregX86_EBP());
4618 addInstr(env
, X86Instr_EvCheck(amCounter
, amFailAddr
));
4620 /* Possibly a block counter increment (for profiling). At this
4621 point we don't know the address of the counter, so just pretend
4622 it is zero. It will have to be patched later, but before this
4623 translation is used, by a call to LibVEX_patchProfCtr. */
4625 addInstr(env
, X86Instr_ProfInc());
4628 /* Ok, finally we can iterate over the statements. */
4629 for (i
= 0; i
< bb
->stmts_used
; i
++)
4630 iselStmt(env
, bb
->stmts
[i
]);
4632 iselNext(env
, bb
->next
, bb
->jumpkind
, bb
->offsIP
);
4634 /* record the number of vregs we used. */
4635 env
->code
->n_vregs
= env
->vreg_ctr
;
4640 /*---------------------------------------------------------------*/
4641 /*--- end host_x86_isel.c ---*/
4642 /*---------------------------------------------------------------*/