2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h"
43 #include "host_generic_simd128.h"
44 #include "host_x86_defs.h"
48 -- (Really an assembler issue) don't emit CMov32 as a cmov
49 insn, since that's expensive on P4 and conditional branch
50 is cheaper if (as we expect) the condition is highly predictable
52 -- preserve xmm registers across function calls (by declaring them
53 as trashed by call insns)
55 -- preserve x87 ST stack discipline across function calls. Sigh.
57 -- Check doHelperCall: if a call is conditional, we cannot safely
58 compute any regparm args directly to registers. Hence, the
59 fast-regparm marshalling should be restricted to unconditional
63 /*---------------------------------------------------------*/
64 /*--- x87 control word stuff ---*/
65 /*---------------------------------------------------------*/
67 /* Vex-generated code expects to run with the FPU set as follows: all
68 exceptions masked, round-to-nearest, precision = 53 bits. This
69 corresponds to a FPU control word value of 0x027F.
71 Similarly the SSE control word (%mxcsr) should be 0x1F80.
73 %fpucw and %mxcsr should have these values on entry to
74 Vex-generated code, and should those values should be
78 #define DEFAULT_FPUCW 0x027F
80 /* debugging only, do not use */
81 /* define DEFAULT_FPUCW 0x037F */
84 /*---------------------------------------------------------*/
85 /*--- misc helpers ---*/
86 /*---------------------------------------------------------*/
88 /* These are duplicated in guest-x86/toIR.c */
89 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
91 return IRExpr_Unop(op
, a
);
94 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
96 return IRExpr_Binop(op
, a1
, a2
);
99 static IRExpr
* bind ( Int binder
)
101 return IRExpr_Binder(binder
);
104 static Bool
isZeroU8 ( IRExpr
* e
)
106 return e
->tag
== Iex_Const
107 && e
->Iex
.Const
.con
->tag
== Ico_U8
108 && e
->Iex
.Const
.con
->Ico
.U8
== 0;
111 static Bool
isZeroU32 ( IRExpr
* e
)
113 return e
->tag
== Iex_Const
114 && e
->Iex
.Const
.con
->tag
== Ico_U32
115 && e
->Iex
.Const
.con
->Ico
.U32
== 0;
118 //static Bool isZeroU64 ( IRExpr* e )
120 // return e->tag == Iex_Const
121 // && e->Iex.Const.con->tag == Ico_U64
122 // && e->Iex.Const.con->Ico.U64 == 0ULL;
126 /*---------------------------------------------------------*/
128 /*---------------------------------------------------------*/
130 /* This carries around:
132 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
133 might encounter. This is computed before insn selection starts,
136 - A mapping from IRTemp to HReg. This tells the insn selector
137 which virtual register(s) are associated with each IRTemp
138 temporary. This is computed before insn selection starts, and
139 does not change. We expect this mapping to map precisely the
140 same set of IRTemps as the type mapping does.
142 - vregmap holds the primary register for the IRTemp.
143 - vregmapHI is only used for 64-bit integer-typed
144 IRTemps. It holds the identity of a second
145 32-bit virtual HReg, which holds the high half
148 - The code array, that is, the insns selected so far.
150 - A counter, for generating new virtual registers.
152 - The host subarchitecture we are selecting insns for.
153 This is set at the start and does not change.
155 - A Bool for indicating whether we may generate chain-me
156 instructions for control flow transfers, or whether we must use
159 - The maximum guest address of any guest insn in this block.
160 Actually, the address of the highest-addressed byte from any insn
161 in this block. Is set at the start and does not change. This is
162 used for detecting jumps which are definitely forward-edges from
163 this block, and therefore can be made (chained) to the fast entry
164 point of the destination, thereby avoiding the destination's
167 Note, this is all (well, mostly) host-independent.
172 /* Constant -- are set at the start and do not change. */
181 Bool chainingAllowed
;
184 /* These are modified as we go along. */
191 static HReg
lookupIRTemp ( ISelEnv
* env
, IRTemp tmp
)
194 vassert(tmp
< env
->n_vregmap
);
195 return env
->vregmap
[tmp
];
198 static void lookupIRTemp64 ( HReg
* vrHI
, HReg
* vrLO
, ISelEnv
* env
, IRTemp tmp
)
201 vassert(tmp
< env
->n_vregmap
);
202 vassert(! hregIsInvalid(env
->vregmapHI
[tmp
]));
203 *vrLO
= env
->vregmap
[tmp
];
204 *vrHI
= env
->vregmapHI
[tmp
];
207 static void addInstr ( ISelEnv
* env
, X86Instr
* instr
)
209 addHInstr(env
->code
, instr
);
210 if (vex_traceflags
& VEX_TRACE_VCODE
) {
211 ppX86Instr(instr
, False
);
216 static HReg
newVRegI ( ISelEnv
* env
)
218 HReg reg
= mkHReg(True
/*virtual reg*/, HRcInt32
, 0/*enc*/, env
->vreg_ctr
);
223 static HReg
newVRegF ( ISelEnv
* env
)
225 HReg reg
= mkHReg(True
/*virtual reg*/, HRcFlt64
, 0/*enc*/, env
->vreg_ctr
);
230 static HReg
newVRegV ( ISelEnv
* env
)
232 HReg reg
= mkHReg(True
/*virtual reg*/, HRcVec128
, 0/*enc*/, env
->vreg_ctr
);
238 /*---------------------------------------------------------*/
239 /*--- ISEL: Forward declarations ---*/
240 /*---------------------------------------------------------*/
242 /* These are organised as iselXXX and iselXXX_wrk pairs. The
243 iselXXX_wrk do the real work, but are not to be called directly.
244 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
245 checks that all returned registers are virtual. You should not
246 call the _wrk version directly.
248 static X86RMI
* iselIntExpr_RMI_wrk ( ISelEnv
* env
, const IRExpr
* e
);
249 static X86RMI
* iselIntExpr_RMI ( ISelEnv
* env
, const IRExpr
* e
);
251 static X86RI
* iselIntExpr_RI_wrk ( ISelEnv
* env
, const IRExpr
* e
);
252 static X86RI
* iselIntExpr_RI ( ISelEnv
* env
, const IRExpr
* e
);
254 static X86RM
* iselIntExpr_RM_wrk ( ISelEnv
* env
, const IRExpr
* e
);
255 static X86RM
* iselIntExpr_RM ( ISelEnv
* env
, const IRExpr
* e
);
257 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, const IRExpr
* e
);
258 static HReg
iselIntExpr_R ( ISelEnv
* env
, const IRExpr
* e
);
260 static X86AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, const IRExpr
* e
);
261 static X86AMode
* iselIntExpr_AMode ( ISelEnv
* env
, const IRExpr
* e
);
263 static void iselInt64Expr_wrk ( HReg
* rHi
, HReg
* rLo
,
264 ISelEnv
* env
, const IRExpr
* e
);
265 static void iselInt64Expr ( HReg
* rHi
, HReg
* rLo
,
266 ISelEnv
* env
, const IRExpr
* e
);
268 static X86CondCode
iselCondCode_wrk ( ISelEnv
* env
, const IRExpr
* e
);
269 static X86CondCode
iselCondCode ( ISelEnv
* env
, const IRExpr
* e
);
271 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
272 static HReg
iselDblExpr ( ISelEnv
* env
, const IRExpr
* e
);
274 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
275 static HReg
iselFltExpr ( ISelEnv
* env
, const IRExpr
* e
);
277 static HReg
iselVecExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
278 static HReg
iselVecExpr ( ISelEnv
* env
, const IRExpr
* e
);
281 /*---------------------------------------------------------*/
282 /*--- ISEL: Misc helpers ---*/
283 /*---------------------------------------------------------*/
285 /* Make a int reg-reg move. */
287 static X86Instr
* mk_iMOVsd_RR ( HReg src
, HReg dst
)
289 vassert(hregClass(src
) == HRcInt32
);
290 vassert(hregClass(dst
) == HRcInt32
);
291 return X86Instr_Alu32R(Xalu_MOV
, X86RMI_Reg(src
), dst
);
295 /* Make a vector reg-reg move. */
297 static X86Instr
* mk_vMOVsd_RR ( HReg src
, HReg dst
)
299 vassert(hregClass(src
) == HRcVec128
);
300 vassert(hregClass(dst
) == HRcVec128
);
301 return X86Instr_SseReRg(Xsse_MOV
, src
, dst
);
304 /* Advance/retreat %esp by n. */
306 static void add_to_esp ( ISelEnv
* env
, Int n
)
308 vassert(n
> 0 && n
< 256 && (n
%4) == 0);
310 X86Instr_Alu32R(Xalu_ADD
, X86RMI_Imm(n
), hregX86_ESP()));
313 static void sub_from_esp ( ISelEnv
* env
, Int n
)
315 vassert(n
> 0 && n
< 256 && (n
%4) == 0);
317 X86Instr_Alu32R(Xalu_SUB
, X86RMI_Imm(n
), hregX86_ESP()));
321 /* Given an amode, return one which references 4 bytes further
324 static X86AMode
* advance4 ( X86AMode
* am
)
326 X86AMode
* am4
= dopyX86AMode(am
);
329 am4
->Xam
.IRRS
.imm
+= 4; break;
331 am4
->Xam
.IR
.imm
+= 4; break;
333 vpanic("advance4(x86,host)");
339 /* Push an arg onto the host stack, in preparation for a call to a
340 helper function of some kind. Returns the number of 32-bit words
341 pushed. If we encounter an IRExpr_VECRET() then we expect that
342 r_vecRetAddr will be a valid register, that holds the relevant
345 static Int
pushArg ( ISelEnv
* env
, IRExpr
* arg
, HReg r_vecRetAddr
)
347 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
349 vassert(!hregIsInvalid(r_vecRetAddr
));
350 addInstr(env
, X86Instr_Push(X86RMI_Reg(r_vecRetAddr
)));
353 if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
354 addInstr(env
, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
357 /* Else it's a "normal" expression. */
358 IRType arg_ty
= typeOfIRExpr(env
->type_env
, arg
);
359 if (arg_ty
== Ity_I32
) {
360 addInstr(env
, X86Instr_Push(iselIntExpr_RMI(env
, arg
)));
363 if (arg_ty
== Ity_I64
) {
365 iselInt64Expr(&rHi
, &rLo
, env
, arg
);
366 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
367 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
371 vpanic("pushArg(x86): can't handle arg of this type");
375 /* Complete the call to a helper function, by calling the
376 helper and clearing the args off the stack. */
379 void callHelperAndClearArgs ( ISelEnv
* env
, X86CondCode cc
,
380 IRCallee
* cee
, Int n_arg_ws
,
383 /* Complication. Need to decide which reg to use as the fn address
384 pointer, in a way that doesn't trash regparm-passed
386 vassert(sizeof(void*) == 4);
388 addInstr(env
, X86Instr_Call( cc
, (Addr
)cee
->addr
,
389 cee
->regparms
, rloc
));
391 add_to_esp(env
, 4*n_arg_ws
);
395 /* Used only in doHelperCall. See big comment in doHelperCall re
396 handling of regparm args. This function figures out whether
397 evaluation of an expression might require use of a fixed register.
398 If in doubt return True (safe but suboptimal).
401 Bool
mightRequireFixedRegs ( IRExpr
* e
)
403 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e
))) {
404 // These are always "safe" -- either a copy of %esp in some
405 // arbitrary vreg, or a copy of %ebp, respectively.
408 /* Else it's a "normal" expression. */
410 case Iex_RdTmp
: case Iex_Const
: case Iex_Get
:
418 /* Do a complete function call. |guard| is a Ity_Bit expression
419 indicating whether or not the call happens. If guard==NULL, the
420 call is unconditional. |retloc| is set to indicate where the
421 return value is after the call. The caller (of this fn) must
422 generate code to add |stackAdjustAfterCall| to the stack pointer
423 after the call is done. */
426 void doHelperCall ( /*OUT*/UInt
* stackAdjustAfterCall
,
427 /*OUT*/RetLoc
* retloc
,
430 IRCallee
* cee
, IRType retTy
, IRExpr
** args
)
436 Int not_done_yet
, n_args
, n_arg_ws
, stack_limit
,
439 /* Set default returns. We'll update them later if needed. */
440 *stackAdjustAfterCall
= 0;
441 *retloc
= mk_RetLoc_INVALID();
443 /* These are used for cross-checking that IR-level constraints on
444 the use of Iex_VECRET and Iex_GSPTR are observed. */
448 /* Marshal args for a call, do the call, and clear the stack.
449 Complexities to consider:
451 * The return type can be I{64,32,16,8} or V128. In the V128
452 case, it is expected that |args| will contain the special
453 node IRExpr_VECRET(), in which case this routine generates
454 code to allocate space on the stack for the vector return
455 value. Since we are not passing any scalars on the stack, it
456 is enough to preallocate the return space before marshalling
457 any arguments, in this case.
459 |args| may also contain IRExpr_GSPTR(), in which case the
460 value in %ebp is passed as the corresponding argument.
462 * If the callee claims regparmness of 1, 2 or 3, we must pass the
463 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
464 respectively). To keep things relatively simple, only args of
465 type I32 may be passed as regparms -- just bomb out if anything
466 else turns up. Clearly this depends on the front ends not
467 trying to pass any other types as regparms.
470 /* 16 Nov 2004: the regparm handling is complicated by the
473 Consider a call two a function with two regparm parameters:
474 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
475 Suppose code is first generated to compute e1 into %eax. Then,
476 code is generated to compute e2 into %edx. Unfortunately, if
477 the latter code sequence uses %eax, it will trash the value of
478 e1 computed by the former sequence. This could happen if (for
479 example) e2 itself involved a function call. In the code below,
480 args are evaluated right-to-left, not left-to-right, but the
481 principle and the problem are the same.
483 One solution is to compute all regparm-bound args into vregs
484 first, and once they are all done, move them to the relevant
485 real regs. This always gives correct code, but it also gives
486 a bunch of vreg-to-rreg moves which are usually redundant but
487 are hard for the register allocator to get rid of.
489 A compromise is to first examine all regparm'd argument
490 expressions. If they are all so simple that it is clear
491 they will be evaluated without use of any fixed registers,
492 use the old compute-directly-to-fixed-target scheme. If not,
493 be safe and use the via-vregs scheme.
495 Note this requires being able to examine an expression and
496 determine whether or not evaluation of it might use a fixed
497 register. That requires knowledge of how the rest of this
498 insn selector works. Currently just the following 3 are
499 regarded as safe -- hopefully they cover the majority of
500 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
502 vassert(cee
->regparms
>= 0 && cee
->regparms
<= 3);
504 /* Count the number of args and also the VECRETs */
505 n_args
= n_arg_ws
= 0;
506 while (args
[n_args
]) {
507 IRExpr
* arg
= args
[n_args
];
509 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
511 } else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
516 /* If this fails, the IR is ill-formed */
517 vassert(nGSPTRs
== 0 || nGSPTRs
== 1);
519 /* If we have a VECRET, allocate space on the stack for the return
520 value, and record the stack pointer after that. */
521 HReg r_vecRetAddr
= INVALID_HREG
;
523 vassert(retTy
== Ity_V128
|| retTy
== Ity_V256
);
524 vassert(retTy
!= Ity_V256
); // we don't handle that yet (if ever)
525 r_vecRetAddr
= newVRegI(env
);
526 sub_from_esp(env
, 16);
527 addInstr(env
, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr
));
529 // If either of these fail, the IR is ill-formed
530 vassert(retTy
!= Ity_V128
&& retTy
!= Ity_V256
);
531 vassert(nVECRETs
== 0);
534 not_done_yet
= n_args
;
536 stack_limit
= cee
->regparms
;
538 /* ------ BEGIN marshall all arguments ------ */
540 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
541 for (i
= n_args
-1; i
>= stack_limit
; i
--) {
542 n_arg_ws
+= pushArg(env
, args
[i
], r_vecRetAddr
);
546 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
549 if (cee
->regparms
> 0) {
551 /* ------ BEGIN deal with regparms ------ */
553 /* deal with regparms, not forgetting %ebp if needed. */
554 argregs
[0] = hregX86_EAX();
555 argregs
[1] = hregX86_EDX();
556 argregs
[2] = hregX86_ECX();
557 tmpregs
[0] = tmpregs
[1] = tmpregs
[2] = INVALID_HREG
;
559 argreg
= cee
->regparms
;
561 /* In keeping with big comment above, detect potential danger
562 and use the via-vregs scheme if needed. */
564 for (i
= stack_limit
-1; i
>= 0; i
--) {
565 if (mightRequireFixedRegs(args
[i
])) {
573 /* Move via temporaries */
575 for (i
= stack_limit
-1; i
>= 0; i
--) {
578 vex_printf("x86 host: register param is complex: ");
583 IRExpr
* arg
= args
[i
];
585 vassert(argreg
>= 0);
586 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
589 else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
592 vassert(typeOfIRExpr(env
->type_env
, arg
) == Ity_I32
);
593 tmpregs
[argreg
] = iselIntExpr_R(env
, arg
);
597 for (i
= stack_limit
-1; i
>= 0; i
--) {
599 vassert(argregX
>= 0);
600 addInstr( env
, mk_iMOVsd_RR( tmpregs
[argregX
], argregs
[argregX
] ) );
604 /* It's safe to compute all regparm args directly into their
606 for (i
= stack_limit
-1; i
>= 0; i
--) {
607 IRExpr
* arg
= args
[i
];
609 vassert(argreg
>= 0);
610 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
611 vassert(!hregIsInvalid(r_vecRetAddr
));
612 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
613 X86RMI_Reg(r_vecRetAddr
),
616 else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
619 vassert(typeOfIRExpr(env
->type_env
, arg
) == Ity_I32
);
620 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
621 iselIntExpr_RMI(env
, arg
),
629 /* ------ END deal with regparms ------ */
633 vassert(not_done_yet
== 0);
635 /* ------ END marshall all arguments ------ */
637 /* Now we can compute the condition. We can't do it earlier
638 because the argument computations could trash the condition
639 codes. Be a bit clever to handle the common case where the
643 if (guard
->tag
== Iex_Const
644 && guard
->Iex
.Const
.con
->tag
== Ico_U1
645 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
646 /* unconditional -- do nothing */
648 cc
= iselCondCode( env
, guard
);
652 /* Do final checks, set the return values, and generate the call
653 instruction proper. */
654 vassert(*stackAdjustAfterCall
== 0);
655 vassert(is_RetLoc_INVALID(*retloc
));
658 /* Function doesn't return a value. */
659 *retloc
= mk_RetLoc_simple(RLPri_None
);
662 *retloc
= mk_RetLoc_simple(RLPri_2Int
);
664 case Ity_I32
: case Ity_I16
: case Ity_I8
:
665 *retloc
= mk_RetLoc_simple(RLPri_Int
);
668 *retloc
= mk_RetLoc_spRel(RLPri_V128SpRel
, 0);
669 *stackAdjustAfterCall
= 16;
673 *retloc
= mk_RetLoc_spRel(RLPri_V256SpRel
, 0);
674 *stackAdjustAfterCall
= 32;
677 /* IR can denote other possible return types, but we don't
678 handle those here. */
682 /* Finally, generate the call itself. This needs the *retloc value
683 set in the switch above, which is why it's at the end. */
684 callHelperAndClearArgs( env
, cc
, cee
, n_arg_ws
, *retloc
);
688 /* Given a guest-state array descriptor, an index expression and a
689 bias, generate an X86AMode holding the relevant guest state
693 X86AMode
* genGuestArrayOffset ( ISelEnv
* env
, IRRegArray
* descr
,
694 IRExpr
* off
, Int bias
)
697 Int elemSz
= sizeofIRType(descr
->elemTy
);
698 Int nElems
= descr
->nElems
;
701 /* throw out any cases not generated by an x86 front end. In
702 theory there might be a day where we need to handle them -- if
703 we ever run non-x86-guest on x86 host. */
706 vpanic("genGuestArrayOffset(x86 host)(1)");
709 case 1: shift
= 0; break;
710 case 4: shift
= 2; break;
711 case 8: shift
= 3; break;
712 default: vpanic("genGuestArrayOffset(x86 host)(2)");
715 /* Compute off into a reg, %off. Then return:
718 addl $bias, %tmp (if bias != 0)
720 ... base(%ebp, %tmp, shift) ...
723 roff
= iselIntExpr_R(env
, off
);
724 addInstr(env
, mk_iMOVsd_RR(roff
, tmp
));
727 X86Instr_Alu32R(Xalu_ADD
, X86RMI_Imm(bias
), tmp
));
730 X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(7), tmp
));
732 X86AMode_IRRS( descr
->base
, hregX86_EBP(), tmp
, shift
);
736 /* Mess with the FPU's rounding mode: set to the default rounding mode
739 void set_FPU_rounding_default ( ISelEnv
* env
)
741 /* pushl $DEFAULT_FPUCW
745 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
746 addInstr(env
, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW
)));
747 addInstr(env
, X86Instr_FpLdCW(zero_esp
));
752 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
753 expression denoting a value in the range 0 .. 3, indicating a round
754 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
758 void set_FPU_rounding_mode ( ISelEnv
* env
, IRExpr
* mode
)
760 HReg rrm
= iselIntExpr_R(env
, mode
);
761 HReg rrm2
= newVRegI(env
);
762 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
765 andl $3, %rrm2 -- shouldn't be needed; paranoia
767 orl $DEFAULT_FPUCW, %rrm2
772 addInstr(env
, mk_iMOVsd_RR(rrm
, rrm2
));
773 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(3), rrm2
));
774 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 10, rrm2
));
775 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Imm(DEFAULT_FPUCW
), rrm2
));
776 addInstr(env
, X86Instr_Push(X86RMI_Reg(rrm2
)));
777 addInstr(env
, X86Instr_FpLdCW(zero_esp
));
782 /* Generate !src into a new vector register, and be sure that the code
783 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
786 static HReg
do_sse_Not128 ( ISelEnv
* env
, HReg src
)
788 HReg dst
= newVRegV(env
);
789 /* Set dst to zero. If dst contains a NaN then all hell might
790 break loose after the comparison. So, first zero it. */
791 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, dst
, dst
));
792 /* And now make it all 1s ... */
793 addInstr(env
, X86Instr_Sse32Fx4(Xsse_CMPEQF
, dst
, dst
));
794 /* Finally, xor 'src' into it. */
795 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, src
, dst
));
796 /* Doesn't that just totally suck? */
801 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
802 after most non-simple FPU operations (simple = +, -, *, / and
805 This could be done a lot more efficiently if needed, by loading
806 zero and adding it to the value to be rounded (fldz ; faddp?).
808 static void roundToF64 ( ISelEnv
* env
, HReg reg
)
810 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
811 sub_from_esp(env
, 8);
812 addInstr(env
, X86Instr_FpLdSt(False
/*store*/, 8, reg
, zero_esp
));
813 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 8, reg
, zero_esp
));
818 /*---------------------------------------------------------*/
819 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
820 /*---------------------------------------------------------*/
822 /* Select insns for an integer-typed expression, and add them to the
823 code list. Return a reg holding the result. This reg will be a
824 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
825 want to modify it, ask for a new vreg, copy it in there, and modify
826 the copy. The register allocator will do its best to map both
827 vregs to the same real register, so the copies will often disappear
830 This should handle expressions of 32, 16 and 8-bit type. All
831 results are returned in a 32-bit register. For 16- and 8-bit
832 expressions, the upper 16/24 bits are arbitrary, so you should mask
833 or sign extend partial values if necessary.
836 static HReg
iselIntExpr_R ( ISelEnv
* env
, const IRExpr
* e
)
838 HReg r
= iselIntExpr_R_wrk(env
, e
);
839 /* sanity checks ... */
841 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
843 vassert(hregClass(r
) == HRcInt32
);
844 vassert(hregIsVirtual(r
));
848 /* DO NOT CALL THIS DIRECTLY ! */
849 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, const IRExpr
* e
)
853 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
854 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
858 /* --------- TEMP --------- */
860 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
863 /* --------- LOAD --------- */
865 HReg dst
= newVRegI(env
);
866 X86AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
);
868 /* We can't handle big-endian loads, nor load-linked. */
869 if (e
->Iex
.Load
.end
!= Iend_LE
)
873 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
874 X86RMI_Mem(amode
), dst
) );
878 addInstr(env
, X86Instr_LoadEX(2,False
,amode
,dst
));
882 addInstr(env
, X86Instr_LoadEX(1,False
,amode
,dst
));
888 /* --------- TERNARY OP --------- */
890 IRTriop
*triop
= e
->Iex
.Triop
.details
;
891 /* C3210 flags following FPU partial remainder (fprem), both
892 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
893 if (triop
->op
== Iop_PRemC3210F64
894 || triop
->op
== Iop_PRem1C3210F64
) {
895 HReg junk
= newVRegF(env
);
896 HReg dst
= newVRegI(env
);
897 HReg srcL
= iselDblExpr(env
, triop
->arg2
);
898 HReg srcR
= iselDblExpr(env
, triop
->arg3
);
899 /* XXXROUNDINGFIXME */
900 /* set roundingmode here */
901 addInstr(env
, X86Instr_FpBinary(
902 e
->Iex
.Binop
.op
==Iop_PRemC3210F64
903 ? Xfp_PREM
: Xfp_PREM1
,
906 /* The previous pseudo-insn will have left the FPU's C3210
907 flags set correctly. So bag them. */
908 addInstr(env
, X86Instr_FpStSW_AX());
909 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), dst
));
910 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(0x4700), dst
));
917 /* --------- BINARY OP --------- */
922 /* Pattern: Sub32(0,x) */
923 if (e
->Iex
.Binop
.op
== Iop_Sub32
&& isZeroU32(e
->Iex
.Binop
.arg1
)) {
924 HReg dst
= newVRegI(env
);
925 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
926 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
927 addInstr(env
, X86Instr_Unary32(Xun_NEG
,dst
));
931 /* Is it an addition or logical style op? */
932 switch (e
->Iex
.Binop
.op
) {
933 case Iop_Add8
: case Iop_Add16
: case Iop_Add32
:
934 aluOp
= Xalu_ADD
; break;
935 case Iop_Sub8
: case Iop_Sub16
: case Iop_Sub32
:
936 aluOp
= Xalu_SUB
; break;
937 case Iop_And8
: case Iop_And16
: case Iop_And32
:
938 aluOp
= Xalu_AND
; break;
939 case Iop_Or8
: case Iop_Or16
: case Iop_Or32
:
940 aluOp
= Xalu_OR
; break;
941 case Iop_Xor8
: case Iop_Xor16
: case Iop_Xor32
:
942 aluOp
= Xalu_XOR
; break;
943 case Iop_Mul16
: case Iop_Mul32
:
944 aluOp
= Xalu_MUL
; break;
946 aluOp
= Xalu_INVALID
; break;
948 /* For commutative ops we assume any literal
949 values are on the second operand. */
950 if (aluOp
!= Xalu_INVALID
) {
951 HReg dst
= newVRegI(env
);
952 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
953 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
954 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
955 addInstr(env
, X86Instr_Alu32R(aluOp
, rmi
, dst
));
958 /* Could do better here; forcing the first arg into a reg
960 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
961 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
962 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
963 movl 0xFFFFFFA0(%vr41),%vr107
964 movl 0xFFFFFFA4(%vr41),%vr108
967 movl 0xFFFFFFA8(%vr41),%vr109
970 movl 0xFFFFFFA0(%vr41),%vr110
976 /* Perhaps a shift op? */
977 switch (e
->Iex
.Binop
.op
) {
978 case Iop_Shl32
: case Iop_Shl16
: case Iop_Shl8
:
979 shOp
= Xsh_SHL
; break;
980 case Iop_Shr32
: case Iop_Shr16
: case Iop_Shr8
:
981 shOp
= Xsh_SHR
; break;
982 case Iop_Sar32
: case Iop_Sar16
: case Iop_Sar8
:
983 shOp
= Xsh_SAR
; break;
985 shOp
= Xsh_INVALID
; break;
987 if (shOp
!= Xsh_INVALID
) {
988 HReg dst
= newVRegI(env
);
990 /* regL = the value to be shifted */
991 HReg regL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
992 addInstr(env
, mk_iMOVsd_RR(regL
,dst
));
994 /* Do any necessary widening for 16/8 bit operands */
995 switch (e
->Iex
.Binop
.op
) {
997 addInstr(env
, X86Instr_Alu32R(
998 Xalu_AND
, X86RMI_Imm(0xFF), dst
));
1001 addInstr(env
, X86Instr_Alu32R(
1002 Xalu_AND
, X86RMI_Imm(0xFFFF), dst
));
1005 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 24, dst
));
1006 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 24, dst
));
1009 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 16, dst
));
1010 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 16, dst
));
1015 /* Now consider the shift amount. If it's a literal, we
1016 can do a much better job than the general case. */
1017 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1018 /* assert that the IR is well-typed */
1020 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1021 nshift
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1022 vassert(nshift
>= 0);
1024 /* Can't allow nshift==0 since that means %cl */
1025 addInstr(env
, X86Instr_Sh32( shOp
, nshift
, dst
));
1027 /* General case; we have to force the amount into %cl. */
1028 HReg regR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1029 addInstr(env
, mk_iMOVsd_RR(regR
,hregX86_ECX()));
1030 addInstr(env
, X86Instr_Sh32(shOp
, 0/* %cl */, dst
));
1035 /* Handle misc other ops. */
1037 if (e
->Iex
.Binop
.op
== Iop_Max32U
) {
1038 HReg src1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1039 HReg dst
= newVRegI(env
);
1040 HReg src2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1041 addInstr(env
, mk_iMOVsd_RR(src1
,dst
));
1042 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
, X86RMI_Reg(src2
), dst
));
1043 addInstr(env
, X86Instr_CMov32(Xcc_B
, X86RM_Reg(src2
), dst
));
1047 if (e
->Iex
.Binop
.op
== Iop_8HLto16
) {
1048 HReg hi8
= newVRegI(env
);
1049 HReg lo8
= newVRegI(env
);
1050 HReg hi8s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1051 HReg lo8s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1052 addInstr(env
, mk_iMOVsd_RR(hi8s
, hi8
));
1053 addInstr(env
, mk_iMOVsd_RR(lo8s
, lo8
));
1054 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 8, hi8
));
1055 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(0xFF), lo8
));
1056 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(lo8
), hi8
));
1060 if (e
->Iex
.Binop
.op
== Iop_16HLto32
) {
1061 HReg hi16
= newVRegI(env
);
1062 HReg lo16
= newVRegI(env
);
1063 HReg hi16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1064 HReg lo16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1065 addInstr(env
, mk_iMOVsd_RR(hi16s
, hi16
));
1066 addInstr(env
, mk_iMOVsd_RR(lo16s
, lo16
));
1067 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 16, hi16
));
1068 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(0xFFFF), lo16
));
1069 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(lo16
), hi16
));
1073 if (e
->Iex
.Binop
.op
== Iop_MullS16
|| e
->Iex
.Binop
.op
== Iop_MullS8
1074 || e
->Iex
.Binop
.op
== Iop_MullU16
|| e
->Iex
.Binop
.op
== Iop_MullU8
) {
1075 HReg a16
= newVRegI(env
);
1076 HReg b16
= newVRegI(env
);
1077 HReg a16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1078 HReg b16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1079 Int shift
= (e
->Iex
.Binop
.op
== Iop_MullS8
1080 || e
->Iex
.Binop
.op
== Iop_MullU8
)
1082 X86ShiftOp shr_op
= (e
->Iex
.Binop
.op
== Iop_MullS8
1083 || e
->Iex
.Binop
.op
== Iop_MullS16
)
1084 ? Xsh_SAR
: Xsh_SHR
;
1086 addInstr(env
, mk_iMOVsd_RR(a16s
, a16
));
1087 addInstr(env
, mk_iMOVsd_RR(b16s
, b16
));
1088 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, shift
, a16
));
1089 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, shift
, b16
));
1090 addInstr(env
, X86Instr_Sh32(shr_op
, shift
, a16
));
1091 addInstr(env
, X86Instr_Sh32(shr_op
, shift
, b16
));
1092 addInstr(env
, X86Instr_Alu32R(Xalu_MUL
, X86RMI_Reg(a16
), b16
));
1096 if (e
->Iex
.Binop
.op
== Iop_CmpF64
) {
1097 HReg fL
= iselDblExpr(env
, e
->Iex
.Binop
.arg1
);
1098 HReg fR
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1099 HReg dst
= newVRegI(env
);
1100 addInstr(env
, X86Instr_FpCmp(fL
,fR
,dst
));
1101 /* shift this right 8 bits so as to conform to CmpF64
1103 addInstr(env
, X86Instr_Sh32(Xsh_SHR
, 8, dst
));
1107 if (e
->Iex
.Binop
.op
== Iop_F64toI32S
1108 || e
->Iex
.Binop
.op
== Iop_F64toI16S
) {
1109 Int sz
= e
->Iex
.Binop
.op
== Iop_F64toI16S
? 2 : 4;
1110 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1111 HReg dst
= newVRegI(env
);
1113 /* Used several times ... */
1114 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
1116 /* rf now holds the value to be converted, and rrm holds the
1117 rounding mode value, encoded as per the IRRoundingMode
1118 enum. The first thing to do is set the FPU's rounding
1119 mode accordingly. */
1121 /* Create a space for the format conversion. */
1123 sub_from_esp(env
, 4);
1125 /* Set host rounding mode */
1126 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
1128 /* gistw/l %rf, 0(%esp) */
1129 addInstr(env
, X86Instr_FpLdStI(False
/*store*/,
1130 toUChar(sz
), rf
, zero_esp
));
1133 /* movzwl 0(%esp), %dst */
1134 addInstr(env
, X86Instr_LoadEX(2,False
,zero_esp
,dst
));
1136 /* movl 0(%esp), %dst */
1138 addInstr(env
, X86Instr_Alu32R(
1139 Xalu_MOV
, X86RMI_Mem(zero_esp
), dst
));
1142 /* Restore default FPU rounding. */
1143 set_FPU_rounding_default( env
);
1153 /* --------- UNARY OP --------- */
1156 /* 1Uto8(32to1(expr32)) */
1157 if (e
->Iex
.Unop
.op
== Iop_1Uto8
) {
1158 DECLARE_PATTERN(p_32to1_then_1Uto8
);
1159 DEFINE_PATTERN(p_32to1_then_1Uto8
,
1160 unop(Iop_1Uto8
,unop(Iop_32to1
,bind(0))));
1161 if (matchIRExpr(&mi
,p_32to1_then_1Uto8
,e
)) {
1162 const IRExpr
* expr32
= mi
.bindee
[0];
1163 HReg dst
= newVRegI(env
);
1164 HReg src
= iselIntExpr_R(env
, expr32
);
1165 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1166 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
1167 X86RMI_Imm(1), dst
));
1172 /* 8Uto32(LDle(expr32)) */
1173 if (e
->Iex
.Unop
.op
== Iop_8Uto32
) {
1174 DECLARE_PATTERN(p_LDle8_then_8Uto32
);
1175 DEFINE_PATTERN(p_LDle8_then_8Uto32
,
1177 IRExpr_Load(Iend_LE
,Ity_I8
,bind(0))) );
1178 if (matchIRExpr(&mi
,p_LDle8_then_8Uto32
,e
)) {
1179 HReg dst
= newVRegI(env
);
1180 X86AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1181 addInstr(env
, X86Instr_LoadEX(1,False
,amode
,dst
));
1186 /* 8Sto32(LDle(expr32)) */
1187 if (e
->Iex
.Unop
.op
== Iop_8Sto32
) {
1188 DECLARE_PATTERN(p_LDle8_then_8Sto32
);
1189 DEFINE_PATTERN(p_LDle8_then_8Sto32
,
1191 IRExpr_Load(Iend_LE
,Ity_I8
,bind(0))) );
1192 if (matchIRExpr(&mi
,p_LDle8_then_8Sto32
,e
)) {
1193 HReg dst
= newVRegI(env
);
1194 X86AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1195 addInstr(env
, X86Instr_LoadEX(1,True
,amode
,dst
));
1200 /* 16Uto32(LDle(expr32)) */
1201 if (e
->Iex
.Unop
.op
== Iop_16Uto32
) {
1202 DECLARE_PATTERN(p_LDle16_then_16Uto32
);
1203 DEFINE_PATTERN(p_LDle16_then_16Uto32
,
1205 IRExpr_Load(Iend_LE
,Ity_I16
,bind(0))) );
1206 if (matchIRExpr(&mi
,p_LDle16_then_16Uto32
,e
)) {
1207 HReg dst
= newVRegI(env
);
1208 X86AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1209 addInstr(env
, X86Instr_LoadEX(2,False
,amode
,dst
));
1214 /* 8Uto32(GET:I8) */
1215 if (e
->Iex
.Unop
.op
== Iop_8Uto32
) {
1216 if (e
->Iex
.Unop
.arg
->tag
== Iex_Get
) {
1219 vassert(e
->Iex
.Unop
.arg
->Iex
.Get
.ty
== Ity_I8
);
1220 dst
= newVRegI(env
);
1221 amode
= X86AMode_IR(e
->Iex
.Unop
.arg
->Iex
.Get
.offset
,
1223 addInstr(env
, X86Instr_LoadEX(1,False
,amode
,dst
));
1228 /* 16to32(GET:I16) */
1229 if (e
->Iex
.Unop
.op
== Iop_16Uto32
) {
1230 if (e
->Iex
.Unop
.arg
->tag
== Iex_Get
) {
1233 vassert(e
->Iex
.Unop
.arg
->Iex
.Get
.ty
== Ity_I16
);
1234 dst
= newVRegI(env
);
1235 amode
= X86AMode_IR(e
->Iex
.Unop
.arg
->Iex
.Get
.offset
,
1237 addInstr(env
, X86Instr_LoadEX(2,False
,amode
,dst
));
1242 switch (e
->Iex
.Unop
.op
) {
1246 HReg dst
= newVRegI(env
);
1247 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1248 UInt mask
= e
->Iex
.Unop
.op
==Iop_16Uto32
? 0xFFFF : 0xFF;
1249 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1250 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
1251 X86RMI_Imm(mask
), dst
));
1257 HReg dst
= newVRegI(env
);
1258 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1259 UInt amt
= e
->Iex
.Unop
.op
==Iop_16Sto32
? 16 : 24;
1260 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1261 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, amt
, dst
));
1262 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, amt
, dst
));
1268 HReg dst
= newVRegI(env
);
1269 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1270 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1271 addInstr(env
, X86Instr_Unary32(Xun_NOT
,dst
));
1274 case Iop_64HIto32
: {
1276 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1277 return rHi
; /* and abandon rLo .. poor wee thing :-) */
1281 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1282 return rLo
; /* similar stupid comment to the above ... */
1285 case Iop_32HIto16
: {
1286 HReg dst
= newVRegI(env
);
1287 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1288 Int shift
= e
->Iex
.Unop
.op
== Iop_16HIto8
? 8 : 16;
1289 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1290 addInstr(env
, X86Instr_Sh32(Xsh_SHR
, shift
, dst
));
1295 HReg dst
= newVRegI(env
);
1296 X86CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1297 addInstr(env
, X86Instr_Set32(cond
,dst
));
1303 /* could do better than this, but for now ... */
1304 HReg dst
= newVRegI(env
);
1305 X86CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1306 addInstr(env
, X86Instr_Set32(cond
,dst
));
1307 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 31, dst
));
1308 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, dst
));
1312 /* Count trailing zeroes, implemented by x86 'bsfl' */
1313 HReg dst
= newVRegI(env
);
1314 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1315 addInstr(env
, X86Instr_Bsfr32(True
,src
,dst
));
1319 /* Count leading zeroes. Do 'bsrl' to establish the index
1320 of the highest set bit, and subtract that value from
1322 HReg tmp
= newVRegI(env
);
1323 HReg dst
= newVRegI(env
);
1324 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1325 addInstr(env
, X86Instr_Bsfr32(False
,src
,tmp
));
1326 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,
1327 X86RMI_Imm(31), dst
));
1328 addInstr(env
, X86Instr_Alu32R(Xalu_SUB
,
1329 X86RMI_Reg(tmp
), dst
));
1333 case Iop_CmpwNEZ32
: {
1334 HReg dst
= newVRegI(env
);
1335 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1336 addInstr(env
, mk_iMOVsd_RR(src
,dst
));
1337 addInstr(env
, X86Instr_Unary32(Xun_NEG
,dst
));
1338 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
1339 X86RMI_Reg(src
), dst
));
1340 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, dst
));
1346 HReg dst
= newVRegI(env
);
1347 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1348 addInstr(env
, mk_iMOVsd_RR(src
, dst
));
1349 addInstr(env
, X86Instr_Unary32(Xun_NEG
, dst
));
1350 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(src
), dst
));
1354 case Iop_V128to32
: {
1355 HReg dst
= newVRegI(env
);
1356 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
1357 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
1358 sub_from_esp(env
, 16);
1359 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, vec
, esp0
));
1360 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(esp0
), dst
));
1361 add_to_esp(env
, 16);
1365 /* ReinterpF32asI32(e) */
1366 /* Given an IEEE754 single, produce an I32 with the same bit
1367 pattern. Keep stack 8-aligned even though only using 4
1369 case Iop_ReinterpF32asI32
: {
1370 HReg rf
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
1371 HReg dst
= newVRegI(env
);
1372 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
1374 set_FPU_rounding_default(env
);
1376 sub_from_esp(env
, 8);
1377 /* gstF %rf, 0(%esp) */
1379 X86Instr_FpLdSt(False
/*store*/, 4, rf
, zero_esp
));
1380 /* movl 0(%esp), %dst */
1382 X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(zero_esp
), dst
));
1391 /* These are no-ops. */
1392 return iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1394 case Iop_GetMSBs8x8
: {
1395 /* Note: the following assumes the helper is of
1397 UInt fn ( ULong ), and is not a regparm fn.
1400 HReg dst
= newVRegI(env
);
1401 Addr fn
= (Addr
)h_generic_calc_GetMSBs8x8
;
1402 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Unop
.arg
);
1403 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
1404 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
1405 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
1406 0, mk_RetLoc_simple(RLPri_Int
) ));
1407 add_to_esp(env
, 2*4);
1408 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), dst
));
1418 /* --------- GET --------- */
1420 if (ty
== Ity_I32
) {
1421 HReg dst
= newVRegI(env
);
1422 addInstr(env
, X86Instr_Alu32R(
1424 X86RMI_Mem(X86AMode_IR(e
->Iex
.Get
.offset
,
1429 if (ty
== Ity_I8
|| ty
== Ity_I16
) {
1430 HReg dst
= newVRegI(env
);
1431 addInstr(env
, X86Instr_LoadEX(
1432 toUChar(ty
==Ity_I8
? 1 : 2),
1434 X86AMode_IR(e
->Iex
.Get
.offset
,hregX86_EBP()),
1443 = genGuestArrayOffset(
1444 env
, e
->Iex
.GetI
.descr
,
1445 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
1446 HReg dst
= newVRegI(env
);
1448 addInstr(env
, X86Instr_LoadEX( 1, False
, am
, dst
));
1451 if (ty
== Ity_I32
) {
1452 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(am
), dst
));
1458 /* --------- CCALL --------- */
1460 HReg dst
= newVRegI(env
);
1461 vassert(ty
== e
->Iex
.CCall
.retty
);
1463 /* be very restrictive for now. Only 32/64-bit ints allowed for
1464 args, and 32 bits for return type. Don't forget to change
1465 the RetLoc if more return types are allowed in future. */
1466 if (e
->Iex
.CCall
.retty
!= Ity_I32
)
1469 /* Marshal args, do the call, clear stack. */
1471 RetLoc rloc
= mk_RetLoc_INVALID();
1472 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
1473 e
->Iex
.CCall
.cee
, e
->Iex
.CCall
.retty
, e
->Iex
.CCall
.args
);
1474 vassert(is_sane_RetLoc(rloc
));
1475 vassert(rloc
.pri
== RLPri_Int
);
1476 vassert(addToSp
== 0);
1478 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), dst
));
1482 /* --------- LITERAL --------- */
1483 /* 32/16/8-bit literals */
1485 X86RMI
* rmi
= iselIntExpr_RMI ( env
, e
);
1486 HReg r
= newVRegI(env
);
1487 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, rmi
, r
));
1491 /* --------- MULTIPLEX --------- */
1492 case Iex_ITE
: { // VFD
1493 if ((ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
)
1494 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
1495 HReg r1
= iselIntExpr_R(env
, e
->Iex
.ITE
.iftrue
);
1496 X86RM
* r0
= iselIntExpr_RM(env
, e
->Iex
.ITE
.iffalse
);
1497 HReg dst
= newVRegI(env
);
1498 addInstr(env
, mk_iMOVsd_RR(r1
,dst
));
1499 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
1500 addInstr(env
, X86Instr_CMov32(cc
^ 1, r0
, dst
));
1508 } /* switch (e->tag) */
1510 /* We get here if no pattern matched. */
1513 vpanic("iselIntExpr_R: cannot reduce tree");
1517 /*---------------------------------------------------------*/
1518 /*--- ISEL: Integer expression auxiliaries ---*/
1519 /*---------------------------------------------------------*/
1521 /* --------------------- AMODEs --------------------- */
1523 /* Return an AMode which computes the value of the specified
1524 expression, possibly also adding insns to the code list as a
1525 result. The expression may only be a 32-bit one.
1528 static Bool
sane_AMode ( X86AMode
* am
)
1533 toBool( hregClass(am
->Xam
.IR
.reg
) == HRcInt32
1534 && (hregIsVirtual(am
->Xam
.IR
.reg
)
1535 || sameHReg(am
->Xam
.IR
.reg
, hregX86_EBP())) );
1538 toBool( hregClass(am
->Xam
.IRRS
.base
) == HRcInt32
1539 && hregIsVirtual(am
->Xam
.IRRS
.base
)
1540 && hregClass(am
->Xam
.IRRS
.index
) == HRcInt32
1541 && hregIsVirtual(am
->Xam
.IRRS
.index
) );
1543 vpanic("sane_AMode: unknown x86 amode tag");
1547 static X86AMode
* iselIntExpr_AMode ( ISelEnv
* env
, const IRExpr
* e
)
1549 X86AMode
* am
= iselIntExpr_AMode_wrk(env
, e
);
1550 vassert(sane_AMode(am
));
1554 /* DO NOT CALL THIS DIRECTLY ! */
1555 static X86AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1557 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1558 vassert(ty
== Ity_I32
);
1560 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1561 if (e
->tag
== Iex_Binop
1562 && e
->Iex
.Binop
.op
== Iop_Add32
1563 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
1564 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
1565 && e
->Iex
.Binop
.arg1
->tag
== Iex_Binop
1566 && e
->Iex
.Binop
.arg1
->Iex
.Binop
.op
== Iop_Add32
1567 && e
->Iex
.Binop
.arg1
->Iex
.Binop
.arg2
->tag
== Iex_Binop
1568 && e
->Iex
.Binop
.arg1
->Iex
.Binop
.arg2
->Iex
.Binop
.op
== Iop_Shl32
1569 && e
->Iex
.Binop
.arg1
1570 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->tag
== Iex_Const
1571 && e
->Iex
.Binop
.arg1
1572 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
) {
1573 UInt shift
= e
->Iex
.Binop
.arg1
1574 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1575 UInt imm32
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1576 if (shift
== 1 || shift
== 2 || shift
== 3) {
1577 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
->Iex
.Binop
.arg1
);
1578 HReg r2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
1579 ->Iex
.Binop
.arg2
->Iex
.Binop
.arg1
);
1580 return X86AMode_IRRS(imm32
, r1
, r2
, shift
);
1584 /* Add32(expr1, Shl32(expr2, imm)) */
1585 if (e
->tag
== Iex_Binop
1586 && e
->Iex
.Binop
.op
== Iop_Add32
1587 && e
->Iex
.Binop
.arg2
->tag
== Iex_Binop
1588 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.op
== Iop_Shl32
1589 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->tag
== Iex_Const
1590 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
) {
1591 UInt shift
= e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1592 if (shift
== 1 || shift
== 2 || shift
== 3) {
1593 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1594 HReg r2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg1
);
1595 return X86AMode_IRRS(0, r1
, r2
, shift
);
1600 if (e
->tag
== Iex_Binop
1601 && e
->Iex
.Binop
.op
== Iop_Add32
1602 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
1603 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
) {
1604 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1605 return X86AMode_IR(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
, r1
);
1608 /* Doesn't match anything in particular. Generate it into
1609 a register and use that. */
1611 HReg r1
= iselIntExpr_R(env
, e
);
1612 return X86AMode_IR(0, r1
);
1617 /* --------------------- RMIs --------------------- */
1619 /* Similarly, calculate an expression into an X86RMI operand. As with
1620 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1622 static X86RMI
* iselIntExpr_RMI ( ISelEnv
* env
, const IRExpr
* e
)
1624 X86RMI
* rmi
= iselIntExpr_RMI_wrk(env
, e
);
1625 /* sanity checks ... */
1630 vassert(hregClass(rmi
->Xrmi
.Reg
.reg
) == HRcInt32
);
1631 vassert(hregIsVirtual(rmi
->Xrmi
.Reg
.reg
));
1634 vassert(sane_AMode(rmi
->Xrmi
.Mem
.am
));
1637 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1641 /* DO NOT CALL THIS DIRECTLY ! */
1642 static X86RMI
* iselIntExpr_RMI_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1644 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1645 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1647 /* special case: immediate */
1648 if (e
->tag
== Iex_Const
) {
1650 switch (e
->Iex
.Const
.con
->tag
) {
1651 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1652 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
1653 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
1654 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1656 return X86RMI_Imm(u
);
1659 /* special case: 32-bit GET */
1660 if (e
->tag
== Iex_Get
&& ty
== Ity_I32
) {
1661 return X86RMI_Mem(X86AMode_IR(e
->Iex
.Get
.offset
,
1665 /* special case: 32-bit load from memory */
1666 if (e
->tag
== Iex_Load
&& ty
== Ity_I32
1667 && e
->Iex
.Load
.end
== Iend_LE
) {
1668 X86AMode
* am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
1669 return X86RMI_Mem(am
);
1672 /* default case: calculate into a register and return that */
1674 HReg r
= iselIntExpr_R ( env
, e
);
1675 return X86RMI_Reg(r
);
1680 /* --------------------- RIs --------------------- */
1682 /* Calculate an expression into an X86RI operand. As with
1683 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1685 static X86RI
* iselIntExpr_RI ( ISelEnv
* env
, const IRExpr
* e
)
1687 X86RI
* ri
= iselIntExpr_RI_wrk(env
, e
);
1688 /* sanity checks ... */
1693 vassert(hregClass(ri
->Xri
.Reg
.reg
) == HRcInt32
);
1694 vassert(hregIsVirtual(ri
->Xri
.Reg
.reg
));
1697 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1701 /* DO NOT CALL THIS DIRECTLY ! */
1702 static X86RI
* iselIntExpr_RI_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1704 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1705 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1707 /* special case: immediate */
1708 if (e
->tag
== Iex_Const
) {
1710 switch (e
->Iex
.Const
.con
->tag
) {
1711 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1712 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
1713 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
1714 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1716 return X86RI_Imm(u
);
1719 /* default case: calculate into a register and return that */
1721 HReg r
= iselIntExpr_R ( env
, e
);
1722 return X86RI_Reg(r
);
1727 /* --------------------- RMs --------------------- */
1729 /* Similarly, calculate an expression into an X86RM operand. As with
1730 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1732 static X86RM
* iselIntExpr_RM ( ISelEnv
* env
, const IRExpr
* e
)
1734 X86RM
* rm
= iselIntExpr_RM_wrk(env
, e
);
1735 /* sanity checks ... */
1738 vassert(hregClass(rm
->Xrm
.Reg
.reg
) == HRcInt32
);
1739 vassert(hregIsVirtual(rm
->Xrm
.Reg
.reg
));
1742 vassert(sane_AMode(rm
->Xrm
.Mem
.am
));
1745 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1749 /* DO NOT CALL THIS DIRECTLY ! */
1750 static X86RM
* iselIntExpr_RM_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1752 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1753 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1755 /* special case: 32-bit GET */
1756 if (e
->tag
== Iex_Get
&& ty
== Ity_I32
) {
1757 return X86RM_Mem(X86AMode_IR(e
->Iex
.Get
.offset
,
1761 /* special case: load from memory */
1763 /* default case: calculate into a register and return that */
1765 HReg r
= iselIntExpr_R ( env
, e
);
1766 return X86RM_Reg(r
);
1771 /* --------------------- CONDCODE --------------------- */
1773 /* Generate code to evaluated a bit-typed expression, returning the
1774 condition code which would correspond when the expression would
1775 notionally have returned 1. */
1777 static X86CondCode
iselCondCode ( ISelEnv
* env
, const IRExpr
* e
)
1779 /* Uh, there's nothing we can sanity check here, unfortunately. */
1780 return iselCondCode_wrk(env
,e
);
1783 /* DO NOT CALL THIS DIRECTLY ! */
1784 static X86CondCode
iselCondCode_wrk ( ISelEnv
* env
, const IRExpr
* e
)
1789 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I1
);
1792 if (e
->tag
== Iex_RdTmp
) {
1793 HReg r32
= lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1794 /* Test32 doesn't modify r32; so this is OK. */
1795 addInstr(env
, X86Instr_Test32(1,X86RM_Reg(r32
)));
1799 /* Constant 1:Bit */
1800 if (e
->tag
== Iex_Const
) {
1802 vassert(e
->Iex
.Const
.con
->tag
== Ico_U1
);
1803 vassert(e
->Iex
.Const
.con
->Ico
.U1
== True
1804 || e
->Iex
.Const
.con
->Ico
.U1
== False
);
1806 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,X86RMI_Imm(0),r
));
1807 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,X86RMI_Reg(r
),r
));
1808 return e
->Iex
.Const
.con
->Ico
.U1
? Xcc_Z
: Xcc_NZ
;
1812 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_Not1
) {
1813 /* Generate code for the arg, and negate the test condition */
1814 return 1 ^ iselCondCode(env
, e
->Iex
.Unop
.arg
);
1817 /* --- patterns rooted at: 32to1 --- */
1819 if (e
->tag
== Iex_Unop
1820 && e
->Iex
.Unop
.op
== Iop_32to1
) {
1821 X86RM
* rm
= iselIntExpr_RM(env
, e
->Iex
.Unop
.arg
);
1822 addInstr(env
, X86Instr_Test32(1,rm
));
1826 /* --- patterns rooted at: CmpNEZ8 --- */
1829 if (e
->tag
== Iex_Unop
1830 && e
->Iex
.Unop
.op
== Iop_CmpNEZ8
) {
1831 X86RM
* rm
= iselIntExpr_RM(env
, e
->Iex
.Unop
.arg
);
1832 addInstr(env
, X86Instr_Test32(0xFF,rm
));
1836 /* --- patterns rooted at: CmpNEZ16 --- */
1839 if (e
->tag
== Iex_Unop
1840 && e
->Iex
.Unop
.op
== Iop_CmpNEZ16
) {
1841 X86RM
* rm
= iselIntExpr_RM(env
, e
->Iex
.Unop
.arg
);
1842 addInstr(env
, X86Instr_Test32(0xFFFF,rm
));
1846 /* --- patterns rooted at: CmpNEZ32 --- */
1848 /* CmpNEZ32(And32(x,y)) */
1850 DECLARE_PATTERN(p_CmpNEZ32_And32
);
1851 DEFINE_PATTERN(p_CmpNEZ32_And32
,
1852 unop(Iop_CmpNEZ32
, binop(Iop_And32
, bind(0), bind(1))));
1853 if (matchIRExpr(&mi
, p_CmpNEZ32_And32
, e
)) {
1854 HReg r0
= iselIntExpr_R(env
, mi
.bindee
[0]);
1855 X86RMI
* rmi1
= iselIntExpr_RMI(env
, mi
.bindee
[1]);
1856 HReg tmp
= newVRegI(env
);
1857 addInstr(env
, mk_iMOVsd_RR(r0
, tmp
));
1858 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,rmi1
,tmp
));
1863 /* CmpNEZ32(Or32(x,y)) */
1865 DECLARE_PATTERN(p_CmpNEZ32_Or32
);
1866 DEFINE_PATTERN(p_CmpNEZ32_Or32
,
1867 unop(Iop_CmpNEZ32
, binop(Iop_Or32
, bind(0), bind(1))));
1868 if (matchIRExpr(&mi
, p_CmpNEZ32_Or32
, e
)) {
1869 HReg r0
= iselIntExpr_R(env
, mi
.bindee
[0]);
1870 X86RMI
* rmi1
= iselIntExpr_RMI(env
, mi
.bindee
[1]);
1871 HReg tmp
= newVRegI(env
);
1872 addInstr(env
, mk_iMOVsd_RR(r0
, tmp
));
1873 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,rmi1
,tmp
));
1878 /* CmpNEZ32(GET(..):I32) */
1879 if (e
->tag
== Iex_Unop
1880 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
1881 && e
->Iex
.Unop
.arg
->tag
== Iex_Get
) {
1882 X86AMode
* am
= X86AMode_IR(e
->Iex
.Unop
.arg
->Iex
.Get
.offset
,
1884 addInstr(env
, X86Instr_Alu32M(Xalu_CMP
, X86RI_Imm(0), am
));
1889 if (e
->tag
== Iex_Unop
1890 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
) {
1891 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1892 X86RMI
* rmi2
= X86RMI_Imm(0);
1893 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
,rmi2
,r1
));
1897 /* --- patterns rooted at: CmpNEZ64 --- */
1899 /* CmpNEZ64(Or64(x,y)) */
1901 DECLARE_PATTERN(p_CmpNEZ64_Or64
);
1902 DEFINE_PATTERN(p_CmpNEZ64_Or64
,
1903 unop(Iop_CmpNEZ64
, binop(Iop_Or64
, bind(0), bind(1))));
1904 if (matchIRExpr(&mi
, p_CmpNEZ64_Or64
, e
)) {
1905 HReg hi1
, lo1
, hi2
, lo2
;
1906 HReg tmp
= newVRegI(env
);
1907 iselInt64Expr( &hi1
, &lo1
, env
, mi
.bindee
[0] );
1908 addInstr(env
, mk_iMOVsd_RR(hi1
, tmp
));
1909 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(lo1
),tmp
));
1910 iselInt64Expr( &hi2
, &lo2
, env
, mi
.bindee
[1] );
1911 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(hi2
),tmp
));
1912 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(lo2
),tmp
));
1918 if (e
->tag
== Iex_Unop
1919 && e
->Iex
.Unop
.op
== Iop_CmpNEZ64
) {
1921 HReg tmp
= newVRegI(env
);
1922 iselInt64Expr( &hi
, &lo
, env
, e
->Iex
.Unop
.arg
);
1923 addInstr(env
, mk_iMOVsd_RR(hi
, tmp
));
1924 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(lo
), tmp
));
1928 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1930 /* CmpEQ8 / CmpNE8 */
1931 if (e
->tag
== Iex_Binop
1932 && (e
->Iex
.Binop
.op
== Iop_CmpEQ8
1933 || e
->Iex
.Binop
.op
== Iop_CmpNE8
1934 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ8
1935 || e
->Iex
.Binop
.op
== Iop_CasCmpNE8
)) {
1936 if (isZeroU8(e
->Iex
.Binop
.arg2
)) {
1937 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1938 addInstr(env
, X86Instr_Test32(0xFF,X86RM_Reg(r1
)));
1939 switch (e
->Iex
.Binop
.op
) {
1940 case Iop_CmpEQ8
: case Iop_CasCmpEQ8
: return Xcc_Z
;
1941 case Iop_CmpNE8
: case Iop_CasCmpNE8
: return Xcc_NZ
;
1942 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1945 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1946 X86RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
1947 HReg r
= newVRegI(env
);
1948 addInstr(env
, mk_iMOVsd_RR(r1
,r
));
1949 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,rmi2
,r
));
1950 addInstr(env
, X86Instr_Test32(0xFF,X86RM_Reg(r
)));
1951 switch (e
->Iex
.Binop
.op
) {
1952 case Iop_CmpEQ8
: case Iop_CasCmpEQ8
: return Xcc_Z
;
1953 case Iop_CmpNE8
: case Iop_CasCmpNE8
: return Xcc_NZ
;
1954 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1959 /* CmpEQ16 / CmpNE16 */
1960 if (e
->tag
== Iex_Binop
1961 && (e
->Iex
.Binop
.op
== Iop_CmpEQ16
1962 || e
->Iex
.Binop
.op
== Iop_CmpNE16
1963 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ16
1964 || e
->Iex
.Binop
.op
== Iop_CasCmpNE16
1965 || e
->Iex
.Binop
.op
== Iop_ExpCmpNE16
)) {
1966 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1967 X86RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
1968 HReg r
= newVRegI(env
);
1969 addInstr(env
, mk_iMOVsd_RR(r1
,r
));
1970 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,rmi2
,r
));
1971 addInstr(env
, X86Instr_Test32(0xFFFF,X86RM_Reg(r
)));
1972 switch (e
->Iex
.Binop
.op
) {
1973 case Iop_CmpEQ16
: case Iop_CasCmpEQ16
:
1975 case Iop_CmpNE16
: case Iop_CasCmpNE16
: case Iop_ExpCmpNE16
:
1978 vpanic("iselCondCode(x86): CmpXX16");
1982 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1983 Saves a "movl %eax, %tmp" compared to the default route. */
1984 if (e
->tag
== Iex_Binop
1985 && e
->Iex
.Binop
.op
== Iop_CmpNE32
1986 && e
->Iex
.Binop
.arg1
->tag
== Iex_CCall
1987 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1988 IRExpr
* cal
= e
->Iex
.Binop
.arg1
;
1989 IRExpr
* con
= e
->Iex
.Binop
.arg2
;
1990 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1991 vassert(cal
->Iex
.CCall
.retty
== Ity_I32
); /* else ill-typed IR */
1992 vassert(con
->Iex
.Const
.con
->tag
== Ico_U32
);
1993 /* Marshal args, do the call. */
1995 RetLoc rloc
= mk_RetLoc_INVALID();
1996 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
1998 cal
->Iex
.CCall
.retty
, cal
->Iex
.CCall
.args
);
1999 vassert(is_sane_RetLoc(rloc
));
2000 vassert(rloc
.pri
== RLPri_Int
);
2001 vassert(addToSp
== 0);
2003 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
,
2004 X86RMI_Imm(con
->Iex
.Const
.con
->Ico
.U32
),
2010 if (e
->tag
== Iex_Binop
2011 && (e
->Iex
.Binop
.op
== Iop_CmpEQ32
2012 || e
->Iex
.Binop
.op
== Iop_CmpNE32
2013 || e
->Iex
.Binop
.op
== Iop_CmpLT32S
2014 || e
->Iex
.Binop
.op
== Iop_CmpLT32U
2015 || e
->Iex
.Binop
.op
== Iop_CmpLE32S
2016 || e
->Iex
.Binop
.op
== Iop_CmpLE32U
2017 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ32
2018 || e
->Iex
.Binop
.op
== Iop_CasCmpNE32
2019 || e
->Iex
.Binop
.op
== Iop_ExpCmpNE32
)) {
2020 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2021 X86RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2022 addInstr(env
, X86Instr_Alu32R(Xalu_CMP
,rmi2
,r1
));
2023 switch (e
->Iex
.Binop
.op
) {
2024 case Iop_CmpEQ32
: case Iop_CasCmpEQ32
: return Xcc_Z
;
2026 case Iop_CasCmpNE32
: case Iop_ExpCmpNE32
: return Xcc_NZ
;
2027 case Iop_CmpLT32S
: return Xcc_L
;
2028 case Iop_CmpLT32U
: return Xcc_B
;
2029 case Iop_CmpLE32S
: return Xcc_LE
;
2030 case Iop_CmpLE32U
: return Xcc_BE
;
2031 default: vpanic("iselCondCode(x86): CmpXX32");
2036 if (e
->tag
== Iex_Binop
2037 && (e
->Iex
.Binop
.op
== Iop_CmpNE64
2038 || e
->Iex
.Binop
.op
== Iop_CmpEQ64
)) {
2039 HReg hi1
, hi2
, lo1
, lo2
;
2040 HReg tHi
= newVRegI(env
);
2041 HReg tLo
= newVRegI(env
);
2042 iselInt64Expr( &hi1
, &lo1
, env
, e
->Iex
.Binop
.arg1
);
2043 iselInt64Expr( &hi2
, &lo2
, env
, e
->Iex
.Binop
.arg2
);
2044 addInstr(env
, mk_iMOVsd_RR(hi1
, tHi
));
2045 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,X86RMI_Reg(hi2
), tHi
));
2046 addInstr(env
, mk_iMOVsd_RR(lo1
, tLo
));
2047 addInstr(env
, X86Instr_Alu32R(Xalu_XOR
,X86RMI_Reg(lo2
), tLo
));
2048 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,X86RMI_Reg(tHi
), tLo
));
2049 switch (e
->Iex
.Binop
.op
) {
2050 case Iop_CmpNE64
: return Xcc_NZ
;
2051 case Iop_CmpEQ64
: return Xcc_Z
;
2052 default: vpanic("iselCondCode(x86): CmpXX64");
2056 /* And1(x,y), Or1(x,y) */
2057 /* FIXME: We could (and probably should) do a lot better here. If both args
2058 are in temps already then we can just emit a reg-reg And/Or directly,
2059 followed by the final Test. */
2060 if (e
->tag
== Iex_Binop
2061 && (e
->Iex
.Binop
.op
== Iop_And1
|| e
->Iex
.Binop
.op
== Iop_Or1
)) {
2062 // We could probably be cleverer about this. In the meantime ..
2063 HReg x_as_32
= newVRegI(env
);
2064 X86CondCode cc_x
= iselCondCode(env
, e
->Iex
.Binop
.arg1
);
2065 addInstr(env
, X86Instr_Set32(cc_x
, x_as_32
));
2066 HReg y_as_32
= newVRegI(env
);
2067 X86CondCode cc_y
= iselCondCode(env
, e
->Iex
.Binop
.arg2
);
2068 addInstr(env
, X86Instr_Set32(cc_y
, y_as_32
));
2069 X86AluOp aop
= e
->Iex
.Binop
.op
== Iop_And1
? Xalu_AND
: Xalu_OR
;
2070 addInstr(env
, X86Instr_Alu32R(aop
, X86RMI_Reg(x_as_32
), y_as_32
));
2071 addInstr(env
, X86Instr_Test32(1, X86RM_Reg(y_as_32
)));
2076 vpanic("iselCondCode");
2080 /*---------------------------------------------------------*/
2081 /*--- ISEL: Integer expressions (64 bit) ---*/
2082 /*---------------------------------------------------------*/
2084 /* Compute a 64-bit value into a register pair, which is returned as
2085 the first two parameters. As with iselIntExpr_R, these may be
2086 either real or virtual regs; in any case they must not be changed
2087 by subsequent code emitted by the caller. */
2089 static void iselInt64Expr ( HReg
* rHi
, HReg
* rLo
, ISelEnv
* env
,
2092 iselInt64Expr_wrk(rHi
, rLo
, env
, e
);
2094 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2096 vassert(hregClass(*rHi
) == HRcInt32
);
2097 vassert(hregIsVirtual(*rHi
));
2098 vassert(hregClass(*rLo
) == HRcInt32
);
2099 vassert(hregIsVirtual(*rLo
));
2102 /* DO NOT CALL THIS DIRECTLY ! */
2103 static void iselInt64Expr_wrk ( HReg
* rHi
, HReg
* rLo
, ISelEnv
* env
,
2107 HWord fn
= 0; /* helper fn for most SIMD64 stuff */
2109 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I64
);
2111 /* 64-bit literal */
2112 if (e
->tag
== Iex_Const
) {
2113 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
2114 UInt wHi
= toUInt(w64
>> 32);
2115 UInt wLo
= toUInt(w64
);
2116 HReg tLo
= newVRegI(env
);
2117 HReg tHi
= newVRegI(env
);
2118 vassert(e
->Iex
.Const
.con
->tag
== Ico_U64
);
2120 /* Save a precious Int register in this special case. */
2121 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(wLo
), tLo
));
2125 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(wHi
), tHi
));
2126 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(wLo
), tLo
));
2133 /* read 64-bit IRTemp */
2134 if (e
->tag
== Iex_RdTmp
) {
2135 lookupIRTemp64( rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
2140 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2142 X86AMode
*am0
, *am4
;
2143 vassert(e
->Iex
.Load
.ty
== Ity_I64
);
2144 tLo
= newVRegI(env
);
2145 tHi
= newVRegI(env
);
2146 am0
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2147 am4
= advance4(am0
);
2148 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am0
), tLo
));
2149 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am4
), tHi
));
2156 if (e
->tag
== Iex_Get
) {
2157 X86AMode
* am
= X86AMode_IR(e
->Iex
.Get
.offset
, hregX86_EBP());
2158 X86AMode
* am4
= advance4(am
);
2159 HReg tLo
= newVRegI(env
);
2160 HReg tHi
= newVRegI(env
);
2161 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am
), tLo
));
2162 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am4
), tHi
));
2169 if (e
->tag
== Iex_GetI
) {
2171 = genGuestArrayOffset( env
, e
->Iex
.GetI
.descr
,
2172 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
2173 X86AMode
* am4
= advance4(am
);
2174 HReg tLo
= newVRegI(env
);
2175 HReg tHi
= newVRegI(env
);
2176 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am
), tLo
));
2177 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
, X86RMI_Mem(am4
), tHi
));
2183 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2184 if (e
->tag
== Iex_ITE
) {
2185 HReg e0Lo
, e0Hi
, e1Lo
, e1Hi
;
2186 HReg tLo
= newVRegI(env
);
2187 HReg tHi
= newVRegI(env
);
2188 iselInt64Expr(&e0Hi
, &e0Lo
, env
, e
->Iex
.ITE
.iffalse
);
2189 iselInt64Expr(&e1Hi
, &e1Lo
, env
, e
->Iex
.ITE
.iftrue
);
2190 addInstr(env
, mk_iMOVsd_RR(e1Hi
, tHi
));
2191 addInstr(env
, mk_iMOVsd_RR(e1Lo
, tLo
));
2192 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
2193 /* This assumes the first cmov32 doesn't trash the condition
2194 codes, so they are still available for the second cmov32 */
2195 addInstr(env
, X86Instr_CMov32(cc
^ 1, X86RM_Reg(e0Hi
), tHi
));
2196 addInstr(env
, X86Instr_CMov32(cc
^ 1, X86RM_Reg(e0Lo
), tLo
));
2202 /* --------- BINARY ops --------- */
2203 if (e
->tag
== Iex_Binop
) {
2204 switch (e
->Iex
.Binop
.op
) {
2205 /* 32 x 32 -> 64 multiply */
2208 /* get one operand into %eax, and the other into a R/M.
2209 Need to make an educated guess about which is better in
2211 HReg tLo
= newVRegI(env
);
2212 HReg tHi
= newVRegI(env
);
2213 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_MullS32
);
2214 X86RM
* rmLeft
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg1
);
2215 HReg rRight
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2216 addInstr(env
, mk_iMOVsd_RR(rRight
, hregX86_EAX()));
2217 addInstr(env
, X86Instr_MulL(syned
, rmLeft
));
2218 /* Result is now in EDX:EAX. Tell the caller. */
2219 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2220 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2226 /* 64 x 32 -> (32(rem),32(div)) division */
2227 case Iop_DivModU64to32
:
2228 case Iop_DivModS64to32
: {
2229 /* Get the 64-bit operand into edx:eax, and the other into
2232 HReg tLo
= newVRegI(env
);
2233 HReg tHi
= newVRegI(env
);
2234 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_DivModS64to32
);
2235 X86RM
* rmRight
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg2
);
2236 iselInt64Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2237 addInstr(env
, mk_iMOVsd_RR(sHi
, hregX86_EDX()));
2238 addInstr(env
, mk_iMOVsd_RR(sLo
, hregX86_EAX()));
2239 addInstr(env
, X86Instr_Div(syned
, rmRight
));
2240 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2241 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2247 /* Or64/And64/Xor64 */
2251 HReg xLo
, xHi
, yLo
, yHi
;
2252 HReg tLo
= newVRegI(env
);
2253 HReg tHi
= newVRegI(env
);
2254 X86AluOp op
= e
->Iex
.Binop
.op
==Iop_Or64
? Xalu_OR
2255 : e
->Iex
.Binop
.op
==Iop_And64
? Xalu_AND
2257 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2258 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2259 addInstr(env
, mk_iMOVsd_RR(xHi
, tHi
));
2260 addInstr(env
, X86Instr_Alu32R(op
, X86RMI_Reg(yHi
), tHi
));
2261 addInstr(env
, mk_iMOVsd_RR(xLo
, tLo
));
2262 addInstr(env
, X86Instr_Alu32R(op
, X86RMI_Reg(yLo
), tLo
));
2270 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
2271 /* special case Add64(e, const) */
2272 ULong w64
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
2273 UInt wHi
= toUInt(w64
>> 32);
2274 UInt wLo
= toUInt(w64
);
2275 HReg tLo
= newVRegI(env
);
2276 HReg tHi
= newVRegI(env
);
2278 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U64
);
2279 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2280 addInstr(env
, mk_iMOVsd_RR(xHi
, tHi
));
2281 addInstr(env
, mk_iMOVsd_RR(xLo
, tLo
));
2282 addInstr(env
, X86Instr_Alu32R(Xalu_ADD
, X86RMI_Imm(wLo
), tLo
));
2283 addInstr(env
, X86Instr_Alu32R(Xalu_ADC
, X86RMI_Imm(wHi
), tHi
));
2288 /* else fall through to the generic case */
2290 HReg xLo
, xHi
, yLo
, yHi
;
2291 HReg tLo
= newVRegI(env
);
2292 HReg tHi
= newVRegI(env
);
2293 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2294 addInstr(env
, mk_iMOVsd_RR(xHi
, tHi
));
2295 addInstr(env
, mk_iMOVsd_RR(xLo
, tLo
));
2296 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2297 if (e
->Iex
.Binop
.op
==Iop_Add64
) {
2298 addInstr(env
, X86Instr_Alu32R(Xalu_ADD
, X86RMI_Reg(yLo
), tLo
));
2299 addInstr(env
, X86Instr_Alu32R(Xalu_ADC
, X86RMI_Reg(yHi
), tHi
));
2301 addInstr(env
, X86Instr_Alu32R(Xalu_SUB
, X86RMI_Reg(yLo
), tLo
));
2302 addInstr(env
, X86Instr_Alu32R(Xalu_SBB
, X86RMI_Reg(yHi
), tHi
));
2309 /* 32HLto64(e1,e2) */
2311 *rHi
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2312 *rLo
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2317 /* We use the same ingenious scheme as gcc. Put the value
2318 to be shifted into %hi:%lo, and the shift amount into
2319 %cl. Then (dsts on right, a la ATT syntax):
2321 shldl %cl, %lo, %hi -- make %hi be right for the
2322 -- shift amt %cl % 32
2323 shll %cl, %lo -- make %lo be right for the
2324 -- shift amt %cl % 32
2326 Now, if (shift amount % 64) is in the range 32 .. 63,
2327 we have to do a fixup, which puts the result low half
2328 into the result high half, and zeroes the low half:
2333 movl $0, %tmp -- sigh; need yet another reg
2336 HReg rAmt
, sHi
, sLo
, tHi
, tLo
, tTemp
;
2337 tLo
= newVRegI(env
);
2338 tHi
= newVRegI(env
);
2339 tTemp
= newVRegI(env
);
2340 rAmt
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2341 iselInt64Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2342 addInstr(env
, mk_iMOVsd_RR(rAmt
, hregX86_ECX()));
2343 addInstr(env
, mk_iMOVsd_RR(sHi
, tHi
));
2344 addInstr(env
, mk_iMOVsd_RR(sLo
, tLo
));
2345 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2346 and those regs are legitimately modifiable. */
2347 addInstr(env
, X86Instr_Sh3232(Xsh_SHL
, 0/*%cl*/, tLo
, tHi
));
2348 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 0/*%cl*/, tLo
));
2349 addInstr(env
, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2350 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tLo
), tHi
));
2351 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tTemp
));
2352 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tTemp
), tLo
));
2359 /* We use the same ingenious scheme as gcc. Put the value
2360 to be shifted into %hi:%lo, and the shift amount into
2363 shrdl %cl, %hi, %lo -- make %lo be right for the
2364 -- shift amt %cl % 32
2365 shrl %cl, %hi -- make %hi be right for the
2366 -- shift amt %cl % 32
2368 Now, if (shift amount % 64) is in the range 32 .. 63,
2369 we have to do a fixup, which puts the result high half
2370 into the result low half, and zeroes the high half:
2375 movl $0, %tmp -- sigh; need yet another reg
2378 HReg rAmt
, sHi
, sLo
, tHi
, tLo
, tTemp
;
2379 tLo
= newVRegI(env
);
2380 tHi
= newVRegI(env
);
2381 tTemp
= newVRegI(env
);
2382 rAmt
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2383 iselInt64Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2384 addInstr(env
, mk_iMOVsd_RR(rAmt
, hregX86_ECX()));
2385 addInstr(env
, mk_iMOVsd_RR(sHi
, tHi
));
2386 addInstr(env
, mk_iMOVsd_RR(sLo
, tLo
));
2387 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2388 and those regs are legitimately modifiable. */
2389 addInstr(env
, X86Instr_Sh3232(Xsh_SHR
, 0/*%cl*/, tHi
, tLo
));
2390 addInstr(env
, X86Instr_Sh32(Xsh_SHR
, 0/*%cl*/, tHi
));
2391 addInstr(env
, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2392 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tHi
), tLo
));
2393 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tTemp
));
2394 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(tTemp
), tHi
));
2401 /* gcc -O2 does the following. I don't know how it works, but it
2402 does work. Don't mess with it. This is hard to test because the
2403 x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2404 so it's impossible to write a test program that feeds values
2405 through Iop_Sar64 and prints their results. The implementation
2406 here was tested by using psrlq on mmx registers -- that generates
2407 Iop_Shr64 -- and temporarily hacking the front end to generate
2408 Iop_Sar64 for that instruction instead.
2421 cmovne %r3, %r4 // = resLo
2422 cmovne %r2, %r3 // = resHi
2424 HReg amount
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2425 HReg srcHi
= INVALID_HREG
, srcLo
= INVALID_HREG
;
2426 iselInt64Expr(&srcHi
, &srcLo
, env
, e
->Iex
.Binop
.arg1
);
2427 HReg r1
= newVRegI(env
);
2428 HReg r2
= newVRegI(env
);
2429 HReg r3
= newVRegI(env
);
2430 HReg r4
= newVRegI(env
);
2431 addInstr(env
, mk_iMOVsd_RR(amount
, hregX86_ECX()));
2432 addInstr(env
, mk_iMOVsd_RR(srcHi
, r1
));
2433 addInstr(env
, mk_iMOVsd_RR(srcLo
, r2
));
2435 addInstr(env
, mk_iMOVsd_RR(r1
, r3
));
2436 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 0/*%cl*/, r3
));
2437 addInstr(env
, mk_iMOVsd_RR(r2
, r4
));
2438 addInstr(env
, X86Instr_Sh3232(Xsh_SHR
, 0/*%cl*/, r1
, r4
));
2439 addInstr(env
, mk_iMOVsd_RR(r3
, r2
));
2440 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, r2
));
2441 addInstr(env
, X86Instr_Alu32R(Xalu_AND
, X86RMI_Imm(32),
2443 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(r3
), r4
));
2444 addInstr(env
, X86Instr_CMov32(Xcc_NZ
, X86RM_Reg(r2
), r3
));
2451 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2452 case. Unfortunately I see no easy way to avoid the
2454 case Iop_F64toI64S
: {
2455 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
2456 HReg tLo
= newVRegI(env
);
2457 HReg tHi
= newVRegI(env
);
2459 /* Used several times ... */
2460 /* Careful ... this sharing is only safe because
2461 zero_esp/four_esp do not hold any registers which the
2462 register allocator could attempt to swizzle later. */
2463 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
2464 X86AMode
* four_esp
= X86AMode_IR(4, hregX86_ESP());
2466 /* rf now holds the value to be converted, and rrm holds
2467 the rounding mode value, encoded as per the
2468 IRRoundingMode enum. The first thing to do is set the
2469 FPU's rounding mode accordingly. */
2471 /* Create a space for the format conversion. */
2473 sub_from_esp(env
, 8);
2475 /* Set host rounding mode */
2476 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
2478 /* gistll %rf, 0(%esp) */
2479 addInstr(env
, X86Instr_FpLdStI(False
/*store*/, 8, rf
, zero_esp
));
2481 /* movl 0(%esp), %dstLo */
2482 /* movl 4(%esp), %dstHi */
2483 addInstr(env
, X86Instr_Alu32R(
2484 Xalu_MOV
, X86RMI_Mem(zero_esp
), tLo
));
2485 addInstr(env
, X86Instr_Alu32R(
2486 Xalu_MOV
, X86RMI_Mem(four_esp
), tHi
));
2488 /* Restore default FPU rounding. */
2489 set_FPU_rounding_default( env
);
2500 fn
= (HWord
)h_generic_calc_Add8x8
; goto binnish
;
2502 fn
= (HWord
)h_generic_calc_Add16x4
; goto binnish
;
2504 fn
= (HWord
)h_generic_calc_Add32x2
; goto binnish
;
2507 fn
= (HWord
)h_generic_calc_Avg8Ux8
; goto binnish
;
2509 fn
= (HWord
)h_generic_calc_Avg16Ux4
; goto binnish
;
2512 fn
= (HWord
)h_generic_calc_CmpEQ8x8
; goto binnish
;
2514 fn
= (HWord
)h_generic_calc_CmpEQ16x4
; goto binnish
;
2516 fn
= (HWord
)h_generic_calc_CmpEQ32x2
; goto binnish
;
2519 fn
= (HWord
)h_generic_calc_CmpGT8Sx8
; goto binnish
;
2520 case Iop_CmpGT16Sx4
:
2521 fn
= (HWord
)h_generic_calc_CmpGT16Sx4
; goto binnish
;
2522 case Iop_CmpGT32Sx2
:
2523 fn
= (HWord
)h_generic_calc_CmpGT32Sx2
; goto binnish
;
2525 case Iop_InterleaveHI8x8
:
2526 fn
= (HWord
)h_generic_calc_InterleaveHI8x8
; goto binnish
;
2527 case Iop_InterleaveLO8x8
:
2528 fn
= (HWord
)h_generic_calc_InterleaveLO8x8
; goto binnish
;
2529 case Iop_InterleaveHI16x4
:
2530 fn
= (HWord
)h_generic_calc_InterleaveHI16x4
; goto binnish
;
2531 case Iop_InterleaveLO16x4
:
2532 fn
= (HWord
)h_generic_calc_InterleaveLO16x4
; goto binnish
;
2533 case Iop_InterleaveHI32x2
:
2534 fn
= (HWord
)h_generic_calc_InterleaveHI32x2
; goto binnish
;
2535 case Iop_InterleaveLO32x2
:
2536 fn
= (HWord
)h_generic_calc_InterleaveLO32x2
; goto binnish
;
2537 case Iop_CatOddLanes16x4
:
2538 fn
= (HWord
)h_generic_calc_CatOddLanes16x4
; goto binnish
;
2539 case Iop_CatEvenLanes16x4
:
2540 fn
= (HWord
)h_generic_calc_CatEvenLanes16x4
; goto binnish
;
2542 fn
= (HWord
)h_generic_calc_Perm8x8
; goto binnish
;
2545 fn
= (HWord
)h_generic_calc_Max8Ux8
; goto binnish
;
2547 fn
= (HWord
)h_generic_calc_Max16Sx4
; goto binnish
;
2549 fn
= (HWord
)h_generic_calc_Min8Ux8
; goto binnish
;
2551 fn
= (HWord
)h_generic_calc_Min16Sx4
; goto binnish
;
2554 fn
= (HWord
)h_generic_calc_Mul16x4
; goto binnish
;
2556 fn
= (HWord
)h_generic_calc_Mul32x2
; goto binnish
;
2557 case Iop_MulHi16Sx4
:
2558 fn
= (HWord
)h_generic_calc_MulHi16Sx4
; goto binnish
;
2559 case Iop_MulHi16Ux4
:
2560 fn
= (HWord
)h_generic_calc_MulHi16Ux4
; goto binnish
;
2563 fn
= (HWord
)h_generic_calc_QAdd8Sx8
; goto binnish
;
2565 fn
= (HWord
)h_generic_calc_QAdd16Sx4
; goto binnish
;
2567 fn
= (HWord
)h_generic_calc_QAdd8Ux8
; goto binnish
;
2569 fn
= (HWord
)h_generic_calc_QAdd16Ux4
; goto binnish
;
2571 case Iop_QNarrowBin32Sto16Sx4
:
2572 fn
= (HWord
)h_generic_calc_QNarrowBin32Sto16Sx4
; goto binnish
;
2573 case Iop_QNarrowBin16Sto8Sx8
:
2574 fn
= (HWord
)h_generic_calc_QNarrowBin16Sto8Sx8
; goto binnish
;
2575 case Iop_QNarrowBin16Sto8Ux8
:
2576 fn
= (HWord
)h_generic_calc_QNarrowBin16Sto8Ux8
; goto binnish
;
2577 case Iop_NarrowBin16to8x8
:
2578 fn
= (HWord
)h_generic_calc_NarrowBin16to8x8
; goto binnish
;
2579 case Iop_NarrowBin32to16x4
:
2580 fn
= (HWord
)h_generic_calc_NarrowBin32to16x4
; goto binnish
;
2583 fn
= (HWord
)h_generic_calc_QSub8Sx8
; goto binnish
;
2585 fn
= (HWord
)h_generic_calc_QSub16Sx4
; goto binnish
;
2587 fn
= (HWord
)h_generic_calc_QSub8Ux8
; goto binnish
;
2589 fn
= (HWord
)h_generic_calc_QSub16Ux4
; goto binnish
;
2592 fn
= (HWord
)h_generic_calc_Sub8x8
; goto binnish
;
2594 fn
= (HWord
)h_generic_calc_Sub16x4
; goto binnish
;
2596 fn
= (HWord
)h_generic_calc_Sub32x2
; goto binnish
;
2599 /* Note: the following assumes all helpers are of
2601 ULong fn ( ULong, ULong ), and they are
2602 not marked as regparm functions.
2604 HReg xLo
, xHi
, yLo
, yHi
;
2605 HReg tLo
= newVRegI(env
);
2606 HReg tHi
= newVRegI(env
);
2607 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2608 addInstr(env
, X86Instr_Push(X86RMI_Reg(yHi
)));
2609 addInstr(env
, X86Instr_Push(X86RMI_Reg(yLo
)));
2610 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2611 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
2612 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
2613 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
2614 0, mk_RetLoc_simple(RLPri_2Int
) ));
2615 add_to_esp(env
, 4*4);
2616 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2617 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2624 fn
= (HWord
)h_generic_calc_ShlN32x2
; goto shifty
;
2626 fn
= (HWord
)h_generic_calc_ShlN16x4
; goto shifty
;
2628 fn
= (HWord
)h_generic_calc_ShlN8x8
; goto shifty
;
2630 fn
= (HWord
)h_generic_calc_ShrN32x2
; goto shifty
;
2632 fn
= (HWord
)h_generic_calc_ShrN16x4
; goto shifty
;
2634 fn
= (HWord
)h_generic_calc_SarN32x2
; goto shifty
;
2636 fn
= (HWord
)h_generic_calc_SarN16x4
; goto shifty
;
2638 fn
= (HWord
)h_generic_calc_SarN8x8
; goto shifty
;
2640 /* Note: the following assumes all helpers are of
2642 ULong fn ( ULong, UInt ), and they are
2643 not marked as regparm functions.
2646 HReg tLo
= newVRegI(env
);
2647 HReg tHi
= newVRegI(env
);
2648 X86RMI
* y
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2649 addInstr(env
, X86Instr_Push(y
));
2650 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2651 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
2652 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
2653 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
2654 0, mk_RetLoc_simple(RLPri_2Int
) ));
2655 add_to_esp(env
, 3*4);
2656 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2657 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2666 } /* if (e->tag == Iex_Binop) */
2669 /* --------- UNARY ops --------- */
2670 if (e
->tag
== Iex_Unop
) {
2671 switch (e
->Iex
.Unop
.op
) {
2675 HReg tLo
= newVRegI(env
);
2676 HReg tHi
= newVRegI(env
);
2677 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2678 addInstr(env
, mk_iMOVsd_RR(src
,tHi
));
2679 addInstr(env
, mk_iMOVsd_RR(src
,tLo
));
2680 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tHi
));
2688 HReg tLo
= newVRegI(env
);
2689 HReg tHi
= newVRegI(env
);
2690 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2691 addInstr(env
, mk_iMOVsd_RR(src
,tLo
));
2692 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tHi
));
2700 HReg tLo
= newVRegI(env
);
2701 HReg tHi
= newVRegI(env
);
2702 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2703 addInstr(env
, mk_iMOVsd_RR(src
,tLo
));
2704 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
2705 X86RMI_Imm(0xFFFF), tLo
));
2706 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tHi
));
2713 case Iop_V128HIto64
:
2714 case Iop_V128to64
: {
2715 Int off
= e
->Iex
.Unop
.op
==Iop_V128HIto64
? 8 : 0;
2716 HReg tLo
= newVRegI(env
);
2717 HReg tHi
= newVRegI(env
);
2718 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
2719 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
2720 X86AMode
* espLO
= X86AMode_IR(off
, hregX86_ESP());
2721 X86AMode
* espHI
= X86AMode_IR(off
+4, hregX86_ESP());
2722 sub_from_esp(env
, 16);
2723 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, vec
, esp0
));
2724 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
,
2725 X86RMI_Mem(espLO
), tLo
));
2726 addInstr(env
, X86Instr_Alu32R( Xalu_MOV
,
2727 X86RMI_Mem(espHI
), tHi
));
2728 add_to_esp(env
, 16);
2734 /* could do better than this, but for now ... */
2736 HReg tLo
= newVRegI(env
);
2737 HReg tHi
= newVRegI(env
);
2738 X86CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
2739 addInstr(env
, X86Instr_Set32(cond
,tLo
));
2740 addInstr(env
, X86Instr_Sh32(Xsh_SHL
, 31, tLo
));
2741 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tLo
));
2742 addInstr(env
, mk_iMOVsd_RR(tLo
, tHi
));
2750 HReg tLo
= newVRegI(env
);
2751 HReg tHi
= newVRegI(env
);
2753 iselInt64Expr(&sHi
, &sLo
, env
, e
->Iex
.Unop
.arg
);
2754 addInstr(env
, mk_iMOVsd_RR(sHi
, tHi
));
2755 addInstr(env
, mk_iMOVsd_RR(sLo
, tLo
));
2756 addInstr(env
, X86Instr_Unary32(Xun_NOT
,tHi
));
2757 addInstr(env
, X86Instr_Unary32(Xun_NOT
,tLo
));
2766 HReg tLo
= newVRegI(env
);
2767 HReg tHi
= newVRegI(env
);
2769 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Unop
.arg
);
2770 /* tLo = 0 - yLo, and set carry */
2771 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tLo
));
2772 addInstr(env
, X86Instr_Alu32R(Xalu_SUB
, X86RMI_Reg(yLo
), tLo
));
2773 /* tHi = 0 - yHi - carry */
2774 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Imm(0), tHi
));
2775 addInstr(env
, X86Instr_Alu32R(Xalu_SBB
, X86RMI_Reg(yHi
), tHi
));
2776 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2777 back in, so as to give the final result
2778 tHi:tLo = arg | -arg. */
2779 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(yLo
), tLo
));
2780 addInstr(env
, X86Instr_Alu32R(Xalu_OR
, X86RMI_Reg(yHi
), tHi
));
2786 /* --- patterns rooted at: CmpwNEZ64 --- */
2789 case Iop_CmpwNEZ64
: {
2791 DECLARE_PATTERN(p_CmpwNEZ64_Or64
);
2792 DEFINE_PATTERN(p_CmpwNEZ64_Or64
,
2793 unop(Iop_CmpwNEZ64
,binop(Iop_Or64
,bind(0),bind(1))));
2794 if (matchIRExpr(&mi
, p_CmpwNEZ64_Or64
, e
)) {
2795 /* CmpwNEZ64(Or64(x,y)) */
2796 HReg xHi
,xLo
,yHi
,yLo
;
2797 HReg xBoth
= newVRegI(env
);
2798 HReg merged
= newVRegI(env
);
2799 HReg tmp2
= newVRegI(env
);
2801 iselInt64Expr(&xHi
,&xLo
, env
, mi
.bindee
[0]);
2802 addInstr(env
, mk_iMOVsd_RR(xHi
,xBoth
));
2803 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2804 X86RMI_Reg(xLo
),xBoth
));
2806 iselInt64Expr(&yHi
,&yLo
, env
, mi
.bindee
[1]);
2807 addInstr(env
, mk_iMOVsd_RR(yHi
,merged
));
2808 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2809 X86RMI_Reg(yLo
),merged
));
2810 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2811 X86RMI_Reg(xBoth
),merged
));
2813 /* tmp2 = (merged | -merged) >>s 31 */
2814 addInstr(env
, mk_iMOVsd_RR(merged
,tmp2
));
2815 addInstr(env
, X86Instr_Unary32(Xun_NEG
,tmp2
));
2816 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2817 X86RMI_Reg(merged
), tmp2
));
2818 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tmp2
));
2825 HReg tmp1
= newVRegI(env
);
2826 HReg tmp2
= newVRegI(env
);
2827 /* srcHi:srcLo = arg */
2828 iselInt64Expr(&srcHi
, &srcLo
, env
, e
->Iex
.Unop
.arg
);
2829 /* tmp1 = srcHi | srcLo */
2830 addInstr(env
, mk_iMOVsd_RR(srcHi
,tmp1
));
2831 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2832 X86RMI_Reg(srcLo
), tmp1
));
2833 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2834 addInstr(env
, mk_iMOVsd_RR(tmp1
,tmp2
));
2835 addInstr(env
, X86Instr_Unary32(Xun_NEG
,tmp2
));
2836 addInstr(env
, X86Instr_Alu32R(Xalu_OR
,
2837 X86RMI_Reg(tmp1
), tmp2
));
2838 addInstr(env
, X86Instr_Sh32(Xsh_SAR
, 31, tmp2
));
2845 /* ReinterpF64asI64(e) */
2846 /* Given an IEEE754 double, produce an I64 with the same bit
2848 case Iop_ReinterpF64asI64
: {
2849 HReg rf
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
2850 HReg tLo
= newVRegI(env
);
2851 HReg tHi
= newVRegI(env
);
2852 X86AMode
* zero_esp
= X86AMode_IR(0, hregX86_ESP());
2853 X86AMode
* four_esp
= X86AMode_IR(4, hregX86_ESP());
2855 set_FPU_rounding_default(env
);
2857 sub_from_esp(env
, 8);
2858 /* gstD %rf, 0(%esp) */
2860 X86Instr_FpLdSt(False
/*store*/, 8, rf
, zero_esp
));
2861 /* movl 0(%esp), %tLo */
2863 X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(zero_esp
), tLo
));
2864 /* movl 4(%esp), %tHi */
2866 X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(four_esp
), tHi
));
2874 case Iop_CmpNEZ32x2
:
2875 fn
= (HWord
)h_generic_calc_CmpNEZ32x2
; goto unish
;
2876 case Iop_CmpNEZ16x4
:
2877 fn
= (HWord
)h_generic_calc_CmpNEZ16x4
; goto unish
;
2879 fn
= (HWord
)h_generic_calc_CmpNEZ8x8
; goto unish
;
2881 /* Note: the following assumes all helpers are of
2883 ULong fn ( ULong ), and they are
2884 not marked as regparm functions.
2887 HReg tLo
= newVRegI(env
);
2888 HReg tHi
= newVRegI(env
);
2889 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Unop
.arg
);
2890 addInstr(env
, X86Instr_Push(X86RMI_Reg(xHi
)));
2891 addInstr(env
, X86Instr_Push(X86RMI_Reg(xLo
)));
2892 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
2893 0, mk_RetLoc_simple(RLPri_2Int
) ));
2894 add_to_esp(env
, 2*4);
2895 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2896 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2905 } /* if (e->tag == Iex_Unop) */
2908 /* --------- CCALL --------- */
2909 if (e
->tag
== Iex_CCall
) {
2910 HReg tLo
= newVRegI(env
);
2911 HReg tHi
= newVRegI(env
);
2913 /* Marshal args, do the call, clear stack. */
2915 RetLoc rloc
= mk_RetLoc_INVALID();
2916 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
2918 e
->Iex
.CCall
.retty
, e
->Iex
.CCall
.args
);
2919 vassert(is_sane_RetLoc(rloc
));
2920 vassert(rloc
.pri
== RLPri_2Int
);
2921 vassert(addToSp
== 0);
2924 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(), tHi
));
2925 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(), tLo
));
2932 vpanic("iselInt64Expr");
2936 /*---------------------------------------------------------*/
2937 /*--- ISEL: Floating point expressions (32 bit) ---*/
2938 /*---------------------------------------------------------*/
2940 /* Nothing interesting here; really just wrappers for
2943 static HReg
iselFltExpr ( ISelEnv
* env
, const IRExpr
* e
)
2945 HReg r
= iselFltExpr_wrk( env
, e
);
2947 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2949 vassert(hregClass(r
) == HRcFlt64
); /* yes, really Flt64 */
2950 vassert(hregIsVirtual(r
));
2954 /* DO NOT CALL THIS DIRECTLY */
2955 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2957 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2958 vassert(ty
== Ity_F32
);
2960 if (e
->tag
== Iex_RdTmp
) {
2961 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2964 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2966 HReg res
= newVRegF(env
);
2967 vassert(e
->Iex
.Load
.ty
== Ity_F32
);
2968 am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2969 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 4, res
, am
));
2973 if (e
->tag
== Iex_Binop
2974 && e
->Iex
.Binop
.op
== Iop_F64toF32
) {
2975 /* Although the result is still held in a standard FPU register,
2976 we need to round it to reflect the loss of accuracy/range
2977 entailed in casting it to a 32-bit float. */
2978 HReg dst
= newVRegF(env
);
2979 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
2980 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
2981 addInstr(env
, X86Instr_Fp64to32(src
,dst
));
2982 set_FPU_rounding_default( env
);
2986 if (e
->tag
== Iex_Get
) {
2987 X86AMode
* am
= X86AMode_IR( e
->Iex
.Get
.offset
,
2989 HReg res
= newVRegF(env
);
2990 addInstr(env
, X86Instr_FpLdSt( True
/*load*/, 4, res
, am
));
2994 if (e
->tag
== Iex_Unop
2995 && e
->Iex
.Unop
.op
== Iop_ReinterpI32asF32
) {
2996 /* Given an I32, produce an IEEE754 float with the same bit
2998 HReg dst
= newVRegF(env
);
2999 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Unop
.arg
);
3001 addInstr(env
, X86Instr_Push(rmi
));
3002 addInstr(env
, X86Instr_FpLdSt(
3003 True
/*load*/, 4, dst
,
3004 X86AMode_IR(0, hregX86_ESP())));
3009 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_RoundF32toInt
) {
3010 HReg rf
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
3011 HReg dst
= newVRegF(env
);
3013 /* rf now holds the value to be rounded. The first thing to do
3014 is set the FPU's rounding mode accordingly. */
3016 /* Set host rounding mode */
3017 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3019 /* grndint %rf, %dst */
3020 addInstr(env
, X86Instr_FpUnary(Xfp_ROUND
, rf
, dst
));
3022 /* Restore default FPU rounding. */
3023 set_FPU_rounding_default( env
);
3029 vpanic("iselFltExpr_wrk");
3033 /*---------------------------------------------------------*/
3034 /*--- ISEL: Floating point expressions (64 bit) ---*/
3035 /*---------------------------------------------------------*/
3037 /* Compute a 64-bit floating point value into a register, the identity
3038 of which is returned. As with iselIntExpr_R, the reg may be either
3039 real or virtual; in any case it must not be changed by subsequent
3040 code emitted by the caller. */
3042 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3044 Type S (1 bit) E (11 bits) F (52 bits)
3045 ---- --------- ----------- -----------
3046 signalling NaN u 2047 (max) .0uuuuu---u
3049 quiet NaN u 2047 (max) .1uuuuu---u
3051 negative infinity 1 2047 (max) .000000---0
3053 positive infinity 0 2047 (max) .000000---0
3055 negative zero 1 0 .000000---0
3057 positive zero 0 0 .000000---0
3060 static HReg
iselDblExpr ( ISelEnv
* env
, const IRExpr
* e
)
3062 HReg r
= iselDblExpr_wrk( env
, e
);
3064 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3066 vassert(hregClass(r
) == HRcFlt64
);
3067 vassert(hregIsVirtual(r
));
3071 /* DO NOT CALL THIS DIRECTLY */
3072 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
3074 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3076 vassert(ty
== Ity_F64
);
3078 if (e
->tag
== Iex_RdTmp
) {
3079 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3082 if (e
->tag
== Iex_Const
) {
3083 union { UInt u32x2
[2]; ULong u64
; Double f64
; } u
;
3084 HReg freg
= newVRegF(env
);
3085 vassert(sizeof(u
) == 8);
3086 vassert(sizeof(u
.u64
) == 8);
3087 vassert(sizeof(u
.f64
) == 8);
3088 vassert(sizeof(u
.u32x2
) == 8);
3090 if (e
->Iex
.Const
.con
->tag
== Ico_F64
) {
3091 u
.f64
= e
->Iex
.Const
.con
->Ico
.F64
;
3093 else if (e
->Iex
.Const
.con
->tag
== Ico_F64i
) {
3094 u
.u64
= e
->Iex
.Const
.con
->Ico
.F64i
;
3097 vpanic("iselDblExpr(x86): const");
3099 addInstr(env
, X86Instr_Push(X86RMI_Imm(u
.u32x2
[1])));
3100 addInstr(env
, X86Instr_Push(X86RMI_Imm(u
.u32x2
[0])));
3101 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 8, freg
,
3102 X86AMode_IR(0, hregX86_ESP())));
3107 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3109 HReg res
= newVRegF(env
);
3110 vassert(e
->Iex
.Load
.ty
== Ity_F64
);
3111 am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
3112 addInstr(env
, X86Instr_FpLdSt(True
/*load*/, 8, res
, am
));
3116 if (e
->tag
== Iex_Get
) {
3117 X86AMode
* am
= X86AMode_IR( e
->Iex
.Get
.offset
,
3119 HReg res
= newVRegF(env
);
3120 addInstr(env
, X86Instr_FpLdSt( True
/*load*/, 8, res
, am
));
3124 if (e
->tag
== Iex_GetI
) {
3126 = genGuestArrayOffset(
3127 env
, e
->Iex
.GetI
.descr
,
3128 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
3129 HReg res
= newVRegF(env
);
3130 addInstr(env
, X86Instr_FpLdSt( True
/*load*/, 8, res
, am
));
3134 if (e
->tag
== Iex_Triop
) {
3135 X86FpOp fpop
= Xfp_INVALID
;
3136 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3137 switch (triop
->op
) {
3138 case Iop_AddF64
: fpop
= Xfp_ADD
; break;
3139 case Iop_SubF64
: fpop
= Xfp_SUB
; break;
3140 case Iop_MulF64
: fpop
= Xfp_MUL
; break;
3141 case Iop_DivF64
: fpop
= Xfp_DIV
; break;
3142 case Iop_ScaleF64
: fpop
= Xfp_SCALE
; break;
3143 case Iop_Yl2xF64
: fpop
= Xfp_YL2X
; break;
3144 case Iop_Yl2xp1F64
: fpop
= Xfp_YL2XP1
; break;
3145 case Iop_AtanF64
: fpop
= Xfp_ATAN
; break;
3146 case Iop_PRemF64
: fpop
= Xfp_PREM
; break;
3147 case Iop_PRem1F64
: fpop
= Xfp_PREM1
; break;
3150 if (fpop
!= Xfp_INVALID
) {
3151 HReg res
= newVRegF(env
);
3152 HReg srcL
= iselDblExpr(env
, triop
->arg2
);
3153 HReg srcR
= iselDblExpr(env
, triop
->arg3
);
3154 /* XXXROUNDINGFIXME */
3155 /* set roundingmode here */
3156 addInstr(env
, X86Instr_FpBinary(fpop
,srcL
,srcR
,res
));
3157 if (fpop
!= Xfp_ADD
&& fpop
!= Xfp_SUB
3158 && fpop
!= Xfp_MUL
&& fpop
!= Xfp_DIV
)
3159 roundToF64(env
, res
);
3164 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_RoundF64toInt
) {
3165 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3166 HReg dst
= newVRegF(env
);
3168 /* rf now holds the value to be rounded. The first thing to do
3169 is set the FPU's rounding mode accordingly. */
3171 /* Set host rounding mode */
3172 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3174 /* grndint %rf, %dst */
3175 addInstr(env
, X86Instr_FpUnary(Xfp_ROUND
, rf
, dst
));
3177 /* Restore default FPU rounding. */
3178 set_FPU_rounding_default( env
);
3183 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_I64StoF64
) {
3184 HReg dst
= newVRegF(env
);
3186 iselInt64Expr( &rHi
, &rLo
, env
, e
->Iex
.Binop
.arg2
);
3187 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
3188 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
3190 /* Set host rounding mode */
3191 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3193 addInstr(env
, X86Instr_FpLdStI(
3194 True
/*load*/, 8, dst
,
3195 X86AMode_IR(0, hregX86_ESP())));
3197 /* Restore default FPU rounding. */
3198 set_FPU_rounding_default( env
);
3204 if (e
->tag
== Iex_Binop
) {
3205 X86FpOp fpop
= Xfp_INVALID
;
3206 switch (e
->Iex
.Binop
.op
) {
3207 case Iop_SinF64
: fpop
= Xfp_SIN
; break;
3208 case Iop_CosF64
: fpop
= Xfp_COS
; break;
3209 case Iop_TanF64
: fpop
= Xfp_TAN
; break;
3210 case Iop_2xm1F64
: fpop
= Xfp_2XM1
; break;
3211 case Iop_SqrtF64
: fpop
= Xfp_SQRT
; break;
3214 if (fpop
!= Xfp_INVALID
) {
3215 HReg res
= newVRegF(env
);
3216 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3217 /* XXXROUNDINGFIXME */
3218 /* set roundingmode here */
3219 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3220 codes. I don't think that matters, since this insn
3221 selector never generates such an instruction intervening
3222 between an flag-setting instruction and a flag-using
3224 addInstr(env
, X86Instr_FpUnary(fpop
,src
,res
));
3225 if (fpop
!= Xfp_SQRT
3226 && fpop
!= Xfp_NEG
&& fpop
!= Xfp_ABS
)
3227 roundToF64(env
, res
);
3232 if (e
->tag
== Iex_Unop
) {
3233 X86FpOp fpop
= Xfp_INVALID
;
3234 switch (e
->Iex
.Unop
.op
) {
3235 case Iop_NegF64
: fpop
= Xfp_NEG
; break;
3236 case Iop_AbsF64
: fpop
= Xfp_ABS
; break;
3239 if (fpop
!= Xfp_INVALID
) {
3240 HReg res
= newVRegF(env
);
3241 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3242 addInstr(env
, X86Instr_FpUnary(fpop
,src
,res
));
3243 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3244 but might need to do that for other unary ops. */
3249 if (e
->tag
== Iex_Unop
) {
3250 switch (e
->Iex
.Unop
.op
) {
3251 case Iop_I32StoF64
: {
3252 HReg dst
= newVRegF(env
);
3253 HReg ri
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3254 addInstr(env
, X86Instr_Push(X86RMI_Reg(ri
)));
3255 set_FPU_rounding_default(env
);
3256 addInstr(env
, X86Instr_FpLdStI(
3257 True
/*load*/, 4, dst
,
3258 X86AMode_IR(0, hregX86_ESP())));
3262 case Iop_ReinterpI64asF64
: {
3263 /* Given an I64, produce an IEEE754 double with the same
3265 HReg dst
= newVRegF(env
);
3267 iselInt64Expr( &rHi
, &rLo
, env
, e
->Iex
.Unop
.arg
);
3269 set_FPU_rounding_default(env
);
3270 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
3271 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
3272 addInstr(env
, X86Instr_FpLdSt(
3273 True
/*load*/, 8, dst
,
3274 X86AMode_IR(0, hregX86_ESP())));
3278 case Iop_F32toF64
: {
3279 /* this is a no-op */
3280 HReg res
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3288 /* --------- MULTIPLEX --------- */
3289 if (e
->tag
== Iex_ITE
) { // VFD
3291 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
3292 HReg r1
= iselDblExpr(env
, e
->Iex
.ITE
.iftrue
);
3293 HReg r0
= iselDblExpr(env
, e
->Iex
.ITE
.iffalse
);
3294 HReg dst
= newVRegF(env
);
3295 addInstr(env
, X86Instr_FpUnary(Xfp_MOV
,r1
,dst
));
3296 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
3297 addInstr(env
, X86Instr_FpCMov(cc
^ 1, r0
, dst
));
3303 vpanic("iselDblExpr_wrk");
3307 /*---------------------------------------------------------*/
3308 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3309 /*---------------------------------------------------------*/
3311 static HReg
iselVecExpr ( ISelEnv
* env
, const IRExpr
* e
)
3313 HReg r
= iselVecExpr_wrk( env
, e
);
3315 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3317 vassert(hregClass(r
) == HRcVec128
);
3318 vassert(hregIsVirtual(r
));
3323 /* DO NOT CALL THIS DIRECTLY */
3324 static HReg
iselVecExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
3327 # define REQUIRE_SSE1 \
3328 do { if (env->hwcaps == 0/*baseline, no sse*/ \
3329 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3333 # define REQUIRE_SSE2 \
3334 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3338 # define SSE2_OR_ABOVE \
3339 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3341 HWord fn
= 0; /* address of helper fn, if required */
3343 Bool arg1isEReg
= False
;
3344 X86SseOp op
= Xsse_INVALID
;
3345 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
3347 vassert(ty
== Ity_V128
);
3351 if (e
->tag
== Iex_RdTmp
) {
3352 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3355 if (e
->tag
== Iex_Get
) {
3356 HReg dst
= newVRegV(env
);
3357 addInstr(env
, X86Instr_SseLdSt(
3360 X86AMode_IR(e
->Iex
.Get
.offset
, hregX86_EBP())
3366 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3367 HReg dst
= newVRegV(env
);
3368 X86AMode
* am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
3369 addInstr(env
, X86Instr_SseLdSt( True
/*load*/, dst
, am
));
3373 if (e
->tag
== Iex_Const
) {
3374 HReg dst
= newVRegV(env
);
3375 vassert(e
->Iex
.Const
.con
->tag
== Ico_V128
);
3376 addInstr(env
, X86Instr_SseConst(e
->Iex
.Const
.con
->Ico
.V128
, dst
));
3380 if (e
->tag
== Iex_Unop
) {
3382 if (SSE2_OR_ABOVE
) {
3383 /* 64UtoV128(LDle:I64(addr)) */
3384 DECLARE_PATTERN(p_zwiden_load64
);
3385 DEFINE_PATTERN(p_zwiden_load64
,
3387 IRExpr_Load(Iend_LE
,Ity_I64
,bind(0))));
3388 if (matchIRExpr(&mi
, p_zwiden_load64
, e
)) {
3389 X86AMode
* am
= iselIntExpr_AMode(env
, mi
.bindee
[0]);
3390 HReg dst
= newVRegV(env
);
3391 addInstr(env
, X86Instr_SseLdzLO(8, dst
, am
));
3396 switch (e
->Iex
.Unop
.op
) {
3399 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3400 return do_sse_Not128(env
, arg
);
3403 case Iop_CmpNEZ64x2
: {
3404 /* We can use SSE2 instructions for this. */
3405 /* Ideally, we want to do a 64Ix2 comparison against zero of
3406 the operand. Problem is no such insn exists. Solution
3407 therefore is to do a 32Ix4 comparison instead, and bitwise-
3408 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3409 let the not'd result of this initial comparison be a:b:c:d.
3410 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3411 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3412 giving the required result.
3414 The required selection sequence is 2,3,0,1, which
3415 according to Intel's documentation means the pshufd
3416 literal value is 0xB1, that is,
3417 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3419 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3420 HReg tmp
= newVRegV(env
);
3421 HReg dst
= newVRegV(env
);
3423 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, tmp
, tmp
));
3424 addInstr(env
, X86Instr_SseReRg(Xsse_CMPEQ32
, arg
, tmp
));
3425 tmp
= do_sse_Not128(env
, tmp
);
3426 addInstr(env
, X86Instr_SseShuf(0xB1, tmp
, dst
));
3427 addInstr(env
, X86Instr_SseReRg(Xsse_OR
, tmp
, dst
));
3431 case Iop_CmpNEZ32x4
: {
3432 /* Sigh, we have to generate lousy code since this has to
3433 work on SSE1 hosts */
3434 /* basically, the idea is: for each lane:
3435 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3436 sbbl %r, %r (now %r = 1Sto32(CF))
3441 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3442 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3443 HReg dst
= newVRegV(env
);
3444 HReg r32
= newVRegI(env
);
3445 sub_from_esp(env
, 16);
3446 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, arg
, esp0
));
3447 for (i
= 0; i
< 4; i
++) {
3448 am
= X86AMode_IR(i
*4, hregX86_ESP());
3449 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Mem(am
), r32
));
3450 addInstr(env
, X86Instr_Unary32(Xun_NEG
, r32
));
3451 addInstr(env
, X86Instr_Alu32R(Xalu_SBB
, X86RMI_Reg(r32
), r32
));
3452 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r32
), am
));
3454 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3455 add_to_esp(env
, 16);
3459 case Iop_CmpNEZ8x16
:
3460 case Iop_CmpNEZ16x8
: {
3461 /* We can use SSE2 instructions for this. */
3463 HReg vec0
= newVRegV(env
);
3464 HReg vec1
= newVRegV(env
);
3465 HReg dst
= newVRegV(env
);
3467 = e
->Iex
.Unop
.op
==Iop_CmpNEZ16x8
? Xsse_CMPEQ16
3470 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, vec0
, vec0
));
3471 addInstr(env
, mk_vMOVsd_RR(vec0
, vec1
));
3472 addInstr(env
, X86Instr_Sse32Fx4(Xsse_CMPEQF
, vec1
, vec1
));
3473 /* defer arg computation to here so as to give CMPEQF as long
3474 as possible to complete */
3475 arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3476 /* vec0 is all 0s; vec1 is all 1s */
3477 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3478 /* 16x8 or 8x16 comparison == */
3479 addInstr(env
, X86Instr_SseReRg(cmpOp
, vec0
, dst
));
3481 addInstr(env
, X86Instr_SseReRg(Xsse_XOR
, vec1
, dst
));
3485 case Iop_RecipEst32Fx4
: op
= Xsse_RCPF
; goto do_32Fx4_unary
;
3486 case Iop_RSqrtEst32Fx4
: op
= Xsse_RSQRTF
; goto do_32Fx4_unary
;
3489 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3490 HReg dst
= newVRegV(env
);
3491 addInstr(env
, X86Instr_Sse32Fx4(op
, arg
, dst
));
3495 case Iop_RecipEst32F0x4
: op
= Xsse_RCPF
; goto do_32F0x4_unary
;
3496 case Iop_RSqrtEst32F0x4
: op
= Xsse_RSQRTF
; goto do_32F0x4_unary
;
3497 case Iop_Sqrt32F0x4
: op
= Xsse_SQRTF
; goto do_32F0x4_unary
;
3500 /* A bit subtle. We have to copy the arg to the result
3501 register first, because actually doing the SSE scalar insn
3502 leaves the upper 3/4 of the destination register
3503 unchanged. Whereas the required semantics of these
3504 primops is that the upper 3/4 is simply copied in from the
3506 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3507 HReg dst
= newVRegV(env
);
3508 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3509 addInstr(env
, X86Instr_Sse32FLo(op
, arg
, dst
));
3513 case Iop_Sqrt64F0x2
: op
= Xsse_SQRTF
; goto do_64F0x2_unary
;
3516 /* A bit subtle. We have to copy the arg to the result
3517 register first, because actually doing the SSE scalar insn
3518 leaves the upper half of the destination register
3519 unchanged. Whereas the required semantics of these
3520 primops is that the upper half is simply copied in from the
3522 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3523 HReg dst
= newVRegV(env
);
3525 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3526 addInstr(env
, X86Instr_Sse64FLo(op
, arg
, dst
));
3530 case Iop_32UtoV128
: {
3531 HReg dst
= newVRegV(env
);
3532 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3533 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Unop
.arg
);
3534 addInstr(env
, X86Instr_Push(rmi
));
3535 addInstr(env
, X86Instr_SseLdzLO(4, dst
, esp0
));
3540 case Iop_64UtoV128
: {
3542 HReg dst
= newVRegV(env
);
3543 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3544 iselInt64Expr(&rHi
, &rLo
, env
, e
->Iex
.Unop
.arg
);
3545 addInstr(env
, X86Instr_Push(X86RMI_Reg(rHi
)));
3546 addInstr(env
, X86Instr_Push(X86RMI_Reg(rLo
)));
3547 addInstr(env
, X86Instr_SseLdzLO(8, dst
, esp0
));
3554 } /* switch (e->Iex.Unop.op) */
3555 } /* if (e->tag == Iex_Unop) */
3557 if (e
->tag
== Iex_Binop
) {
3558 switch (e
->Iex
.Binop
.op
) {
3563 case Iop_Sqrt32Fx4
: {
3564 /* :: (rmode, vec) -> vec */
3565 HReg arg
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3566 HReg dst
= newVRegV(env
);
3567 /* XXXROUNDINGFIXME */
3568 /* set roundingmode here */
3569 addInstr(env
, (e
->Iex
.Binop
.op
== Iop_Sqrt64Fx2
3570 ? X86Instr_Sse64Fx2
: X86Instr_Sse32Fx4
)
3571 (Xsse_SQRTF
, arg
, dst
));
3575 case Iop_SetV128lo32
: {
3576 HReg dst
= newVRegV(env
);
3577 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3578 HReg srcI
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3579 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3580 sub_from_esp(env
, 16);
3581 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, srcV
, esp0
));
3582 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(srcI
), esp0
));
3583 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3584 add_to_esp(env
, 16);
3588 case Iop_SetV128lo64
: {
3589 HReg dst
= newVRegV(env
);
3590 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3591 HReg srcIhi
, srcIlo
;
3592 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3593 X86AMode
* esp4
= advance4(esp0
);
3594 iselInt64Expr(&srcIhi
, &srcIlo
, env
, e
->Iex
.Binop
.arg2
);
3595 sub_from_esp(env
, 16);
3596 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, srcV
, esp0
));
3597 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(srcIlo
), esp0
));
3598 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(srcIhi
), esp4
));
3599 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3600 add_to_esp(env
, 16);
3604 case Iop_64HLtoV128
: {
3605 HReg r3
, r2
, r1
, r0
;
3606 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3607 X86AMode
* esp4
= advance4(esp0
);
3608 X86AMode
* esp8
= advance4(esp4
);
3609 X86AMode
* esp12
= advance4(esp8
);
3610 HReg dst
= newVRegV(env
);
3611 /* do this via the stack (easy, convenient, etc) */
3612 sub_from_esp(env
, 16);
3613 /* Do the less significant 64 bits */
3614 iselInt64Expr(&r1
, &r0
, env
, e
->Iex
.Binop
.arg2
);
3615 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r0
), esp0
));
3616 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r1
), esp4
));
3617 /* Do the more significant 64 bits */
3618 iselInt64Expr(&r3
, &r2
, env
, e
->Iex
.Binop
.arg1
);
3619 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r2
), esp8
));
3620 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
, X86RI_Reg(r3
), esp12
));
3621 /* Fetch result back from stack. */
3622 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, dst
, esp0
));
3623 add_to_esp(env
, 16);
3627 case Iop_CmpEQ32Fx4
: op
= Xsse_CMPEQF
; goto do_32Fx4
;
3628 case Iop_CmpLT32Fx4
: op
= Xsse_CMPLTF
; goto do_32Fx4
;
3629 case Iop_CmpLE32Fx4
: op
= Xsse_CMPLEF
; goto do_32Fx4
;
3630 case Iop_CmpUN32Fx4
: op
= Xsse_CMPUNF
; goto do_32Fx4
;
3631 case Iop_Max32Fx4
: op
= Xsse_MAXF
; goto do_32Fx4
;
3632 case Iop_Min32Fx4
: op
= Xsse_MINF
; goto do_32Fx4
;
3635 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3636 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3637 HReg dst
= newVRegV(env
);
3638 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3639 addInstr(env
, X86Instr_Sse32Fx4(op
, argR
, dst
));
3643 case Iop_CmpEQ64Fx2
: op
= Xsse_CMPEQF
; goto do_64Fx2
;
3644 case Iop_CmpLT64Fx2
: op
= Xsse_CMPLTF
; goto do_64Fx2
;
3645 case Iop_CmpLE64Fx2
: op
= Xsse_CMPLEF
; goto do_64Fx2
;
3646 case Iop_CmpUN64Fx2
: op
= Xsse_CMPUNF
; goto do_64Fx2
;
3647 case Iop_Max64Fx2
: op
= Xsse_MAXF
; goto do_64Fx2
;
3648 case Iop_Min64Fx2
: op
= Xsse_MINF
; goto do_64Fx2
;
3651 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3652 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3653 HReg dst
= newVRegV(env
);
3655 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3656 addInstr(env
, X86Instr_Sse64Fx2(op
, argR
, dst
));
3660 case Iop_CmpEQ32F0x4
: op
= Xsse_CMPEQF
; goto do_32F0x4
;
3661 case Iop_CmpLT32F0x4
: op
= Xsse_CMPLTF
; goto do_32F0x4
;
3662 case Iop_CmpLE32F0x4
: op
= Xsse_CMPLEF
; goto do_32F0x4
;
3663 case Iop_CmpUN32F0x4
: op
= Xsse_CMPUNF
; goto do_32F0x4
;
3664 case Iop_Add32F0x4
: op
= Xsse_ADDF
; goto do_32F0x4
;
3665 case Iop_Div32F0x4
: op
= Xsse_DIVF
; goto do_32F0x4
;
3666 case Iop_Max32F0x4
: op
= Xsse_MAXF
; goto do_32F0x4
;
3667 case Iop_Min32F0x4
: op
= Xsse_MINF
; goto do_32F0x4
;
3668 case Iop_Mul32F0x4
: op
= Xsse_MULF
; goto do_32F0x4
;
3669 case Iop_Sub32F0x4
: op
= Xsse_SUBF
; goto do_32F0x4
;
3671 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3672 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3673 HReg dst
= newVRegV(env
);
3674 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3675 addInstr(env
, X86Instr_Sse32FLo(op
, argR
, dst
));
3679 case Iop_CmpEQ64F0x2
: op
= Xsse_CMPEQF
; goto do_64F0x2
;
3680 case Iop_CmpLT64F0x2
: op
= Xsse_CMPLTF
; goto do_64F0x2
;
3681 case Iop_CmpLE64F0x2
: op
= Xsse_CMPLEF
; goto do_64F0x2
;
3682 case Iop_CmpUN64F0x2
: op
= Xsse_CMPUNF
; goto do_64F0x2
;
3683 case Iop_Add64F0x2
: op
= Xsse_ADDF
; goto do_64F0x2
;
3684 case Iop_Div64F0x2
: op
= Xsse_DIVF
; goto do_64F0x2
;
3685 case Iop_Max64F0x2
: op
= Xsse_MAXF
; goto do_64F0x2
;
3686 case Iop_Min64F0x2
: op
= Xsse_MINF
; goto do_64F0x2
;
3687 case Iop_Mul64F0x2
: op
= Xsse_MULF
; goto do_64F0x2
;
3688 case Iop_Sub64F0x2
: op
= Xsse_SUBF
; goto do_64F0x2
;
3690 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3691 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3692 HReg dst
= newVRegV(env
);
3694 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3695 addInstr(env
, X86Instr_Sse64FLo(op
, argR
, dst
));
3699 case Iop_QNarrowBin32Sto16Sx8
:
3700 op
= Xsse_PACKSSD
; arg1isEReg
= True
; goto do_SseReRg
;
3701 case Iop_QNarrowBin16Sto8Sx16
:
3702 op
= Xsse_PACKSSW
; arg1isEReg
= True
; goto do_SseReRg
;
3703 case Iop_QNarrowBin16Sto8Ux16
:
3704 op
= Xsse_PACKUSW
; arg1isEReg
= True
; goto do_SseReRg
;
3706 case Iop_InterleaveHI8x16
:
3707 op
= Xsse_UNPCKHB
; arg1isEReg
= True
; goto do_SseReRg
;
3708 case Iop_InterleaveHI16x8
:
3709 op
= Xsse_UNPCKHW
; arg1isEReg
= True
; goto do_SseReRg
;
3710 case Iop_InterleaveHI32x4
:
3711 op
= Xsse_UNPCKHD
; arg1isEReg
= True
; goto do_SseReRg
;
3712 case Iop_InterleaveHI64x2
:
3713 op
= Xsse_UNPCKHQ
; arg1isEReg
= True
; goto do_SseReRg
;
3715 case Iop_InterleaveLO8x16
:
3716 op
= Xsse_UNPCKLB
; arg1isEReg
= True
; goto do_SseReRg
;
3717 case Iop_InterleaveLO16x8
:
3718 op
= Xsse_UNPCKLW
; arg1isEReg
= True
; goto do_SseReRg
;
3719 case Iop_InterleaveLO32x4
:
3720 op
= Xsse_UNPCKLD
; arg1isEReg
= True
; goto do_SseReRg
;
3721 case Iop_InterleaveLO64x2
:
3722 op
= Xsse_UNPCKLQ
; arg1isEReg
= True
; goto do_SseReRg
;
3724 case Iop_AndV128
: op
= Xsse_AND
; goto do_SseReRg
;
3725 case Iop_OrV128
: op
= Xsse_OR
; goto do_SseReRg
;
3726 case Iop_XorV128
: op
= Xsse_XOR
; goto do_SseReRg
;
3727 case Iop_Add8x16
: op
= Xsse_ADD8
; goto do_SseReRg
;
3728 case Iop_Add16x8
: op
= Xsse_ADD16
; goto do_SseReRg
;
3729 case Iop_Add32x4
: op
= Xsse_ADD32
; goto do_SseReRg
;
3730 case Iop_Add64x2
: op
= Xsse_ADD64
; goto do_SseReRg
;
3731 case Iop_QAdd8Sx16
: op
= Xsse_QADD8S
; goto do_SseReRg
;
3732 case Iop_QAdd16Sx8
: op
= Xsse_QADD16S
; goto do_SseReRg
;
3733 case Iop_QAdd8Ux16
: op
= Xsse_QADD8U
; goto do_SseReRg
;
3734 case Iop_QAdd16Ux8
: op
= Xsse_QADD16U
; goto do_SseReRg
;
3735 case Iop_Avg8Ux16
: op
= Xsse_AVG8U
; goto do_SseReRg
;
3736 case Iop_Avg16Ux8
: op
= Xsse_AVG16U
; goto do_SseReRg
;
3737 case Iop_CmpEQ8x16
: op
= Xsse_CMPEQ8
; goto do_SseReRg
;
3738 case Iop_CmpEQ16x8
: op
= Xsse_CMPEQ16
; goto do_SseReRg
;
3739 case Iop_CmpEQ32x4
: op
= Xsse_CMPEQ32
; goto do_SseReRg
;
3740 case Iop_CmpGT8Sx16
: op
= Xsse_CMPGT8S
; goto do_SseReRg
;
3741 case Iop_CmpGT16Sx8
: op
= Xsse_CMPGT16S
; goto do_SseReRg
;
3742 case Iop_CmpGT32Sx4
: op
= Xsse_CMPGT32S
; goto do_SseReRg
;
3743 case Iop_Max16Sx8
: op
= Xsse_MAX16S
; goto do_SseReRg
;
3744 case Iop_Max8Ux16
: op
= Xsse_MAX8U
; goto do_SseReRg
;
3745 case Iop_Min16Sx8
: op
= Xsse_MIN16S
; goto do_SseReRg
;
3746 case Iop_Min8Ux16
: op
= Xsse_MIN8U
; goto do_SseReRg
;
3747 case Iop_MulHi16Ux8
: op
= Xsse_MULHI16U
; goto do_SseReRg
;
3748 case Iop_MulHi16Sx8
: op
= Xsse_MULHI16S
; goto do_SseReRg
;
3749 case Iop_Mul16x8
: op
= Xsse_MUL16
; goto do_SseReRg
;
3750 case Iop_Sub8x16
: op
= Xsse_SUB8
; goto do_SseReRg
;
3751 case Iop_Sub16x8
: op
= Xsse_SUB16
; goto do_SseReRg
;
3752 case Iop_Sub32x4
: op
= Xsse_SUB32
; goto do_SseReRg
;
3753 case Iop_Sub64x2
: op
= Xsse_SUB64
; goto do_SseReRg
;
3754 case Iop_QSub8Sx16
: op
= Xsse_QSUB8S
; goto do_SseReRg
;
3755 case Iop_QSub16Sx8
: op
= Xsse_QSUB16S
; goto do_SseReRg
;
3756 case Iop_QSub8Ux16
: op
= Xsse_QSUB8U
; goto do_SseReRg
;
3757 case Iop_QSub16Ux8
: op
= Xsse_QSUB16U
; goto do_SseReRg
;
3759 HReg arg1
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3760 HReg arg2
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3761 HReg dst
= newVRegV(env
);
3762 if (op
!= Xsse_OR
&& op
!= Xsse_AND
&& op
!= Xsse_XOR
)
3765 addInstr(env
, mk_vMOVsd_RR(arg2
, dst
));
3766 addInstr(env
, X86Instr_SseReRg(op
, arg1
, dst
));
3768 addInstr(env
, mk_vMOVsd_RR(arg1
, dst
));
3769 addInstr(env
, X86Instr_SseReRg(op
, arg2
, dst
));
3774 case Iop_ShlN16x8
: op
= Xsse_SHL16
; goto do_SseShift
;
3775 case Iop_ShlN32x4
: op
= Xsse_SHL32
; goto do_SseShift
;
3776 case Iop_ShlN64x2
: op
= Xsse_SHL64
; goto do_SseShift
;
3777 case Iop_SarN16x8
: op
= Xsse_SAR16
; goto do_SseShift
;
3778 case Iop_SarN32x4
: op
= Xsse_SAR32
; goto do_SseShift
;
3779 case Iop_ShrN16x8
: op
= Xsse_SHR16
; goto do_SseShift
;
3780 case Iop_ShrN32x4
: op
= Xsse_SHR32
; goto do_SseShift
;
3781 case Iop_ShrN64x2
: op
= Xsse_SHR64
; goto do_SseShift
;
3783 HReg greg
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3784 X86RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
3785 X86AMode
* esp0
= X86AMode_IR(0, hregX86_ESP());
3786 HReg ereg
= newVRegV(env
);
3787 HReg dst
= newVRegV(env
);
3789 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3790 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3791 addInstr(env
, X86Instr_Push(X86RMI_Imm(0)));
3792 addInstr(env
, X86Instr_Push(rmi
));
3793 addInstr(env
, X86Instr_SseLdSt(True
/*load*/, ereg
, esp0
));
3794 addInstr(env
, mk_vMOVsd_RR(greg
, dst
));
3795 addInstr(env
, X86Instr_SseReRg(op
, ereg
, dst
));
3796 add_to_esp(env
, 16);
3800 case Iop_NarrowBin32to16x8
:
3801 fn
= (HWord
)h_generic_calc_NarrowBin32to16x8
;
3802 goto do_SseAssistedBinary
;
3803 case Iop_NarrowBin16to8x16
:
3804 fn
= (HWord
)h_generic_calc_NarrowBin16to8x16
;
3805 goto do_SseAssistedBinary
;
3806 do_SseAssistedBinary
: {
3807 /* As with the amd64 case (where this is copied from) we
3808 generate pretty bad code. */
3810 HReg dst
= newVRegV(env
);
3811 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3812 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3813 HReg argp
= newVRegI(env
);
3814 /* subl $112, %esp -- make a space */
3815 sub_from_esp(env
, 112);
3816 /* leal 48(%esp), %r_argp -- point into it */
3817 addInstr(env
, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3819 /* andl $-16, %r_argp -- 16-align the pointer */
3820 addInstr(env
, X86Instr_Alu32R(Xalu_AND
,
3821 X86RMI_Imm( ~(UInt
)15 ),
3823 /* Prepare 3 arg regs:
3824 leal 0(%r_argp), %eax
3825 leal 16(%r_argp), %edx
3826 leal 32(%r_argp), %ecx
3828 addInstr(env
, X86Instr_Lea32(X86AMode_IR(0, argp
),
3830 addInstr(env
, X86Instr_Lea32(X86AMode_IR(16, argp
),
3832 addInstr(env
, X86Instr_Lea32(X86AMode_IR(32, argp
),
3834 /* Store the two args, at (%edx) and (%ecx):
3835 movupd %argL, 0(%edx)
3836 movupd %argR, 0(%ecx)
3838 addInstr(env
, X86Instr_SseLdSt(False
/*!isLoad*/, argL
,
3839 X86AMode_IR(0, hregX86_EDX())));
3840 addInstr(env
, X86Instr_SseLdSt(False
/*!isLoad*/, argR
,
3841 X86AMode_IR(0, hregX86_ECX())));
3842 /* call the helper */
3843 addInstr(env
, X86Instr_Call( Xcc_ALWAYS
, (Addr32
)fn
,
3844 3, mk_RetLoc_simple(RLPri_None
) ));
3845 /* fetch the result from memory, using %r_argp, which the
3846 register allocator will keep alive across the call. */
3847 addInstr(env
, X86Instr_SseLdSt(True
/*isLoad*/, dst
,
3848 X86AMode_IR(0, argp
)));
3849 /* and finally, clear the space */
3850 add_to_esp(env
, 112);
3856 } /* switch (e->Iex.Binop.op) */
3857 } /* if (e->tag == Iex_Binop) */
3860 if (e
->tag
== Iex_Triop
) {
3861 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3862 switch (triop
->op
) {
3864 case Iop_Add32Fx4
: op
= Xsse_ADDF
; goto do_32Fx4_w_rm
;
3865 case Iop_Sub32Fx4
: op
= Xsse_SUBF
; goto do_32Fx4_w_rm
;
3866 case Iop_Mul32Fx4
: op
= Xsse_MULF
; goto do_32Fx4_w_rm
;
3867 case Iop_Div32Fx4
: op
= Xsse_DIVF
; goto do_32Fx4_w_rm
;
3870 HReg argL
= iselVecExpr(env
, triop
->arg2
);
3871 HReg argR
= iselVecExpr(env
, triop
->arg3
);
3872 HReg dst
= newVRegV(env
);
3873 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3874 /* XXXROUNDINGFIXME */
3875 /* set roundingmode here */
3876 addInstr(env
, X86Instr_Sse32Fx4(op
, argR
, dst
));
3880 case Iop_Add64Fx2
: op
= Xsse_ADDF
; goto do_64Fx2_w_rm
;
3881 case Iop_Sub64Fx2
: op
= Xsse_SUBF
; goto do_64Fx2_w_rm
;
3882 case Iop_Mul64Fx2
: op
= Xsse_MULF
; goto do_64Fx2_w_rm
;
3883 case Iop_Div64Fx2
: op
= Xsse_DIVF
; goto do_64Fx2_w_rm
;
3886 HReg argL
= iselVecExpr(env
, triop
->arg2
);
3887 HReg argR
= iselVecExpr(env
, triop
->arg3
);
3888 HReg dst
= newVRegV(env
);
3890 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3891 /* XXXROUNDINGFIXME */
3892 /* set roundingmode here */
3893 addInstr(env
, X86Instr_Sse64Fx2(op
, argR
, dst
));
3899 } /* switch (triop->op) */
3900 } /* if (e->tag == Iex_Triop) */
3903 if (e
->tag
== Iex_ITE
) { // VFD
3904 HReg r1
= iselVecExpr(env
, e
->Iex
.ITE
.iftrue
);
3905 HReg r0
= iselVecExpr(env
, e
->Iex
.ITE
.iffalse
);
3906 HReg dst
= newVRegV(env
);
3907 addInstr(env
, mk_vMOVsd_RR(r1
,dst
));
3908 X86CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
3909 addInstr(env
, X86Instr_SseCMov(cc
^ 1, r0
, dst
));
3914 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3915 LibVEX_ppVexHwCaps(VexArchX86
,env
->hwcaps
));
3917 vpanic("iselVecExpr_wrk");
3919 # undef REQUIRE_SSE1
3920 # undef REQUIRE_SSE2
3921 # undef SSE2_OR_ABOVE
3925 /*---------------------------------------------------------*/
3926 /*--- ISEL: Statements ---*/
3927 /*---------------------------------------------------------*/
3929 static void iselStmt ( ISelEnv
* env
, IRStmt
* stmt
)
3931 if (vex_traceflags
& VEX_TRACE_VCODE
) {
3932 vex_printf("\n-- ");
3937 switch (stmt
->tag
) {
3939 /* --------- STORE --------- */
3941 IRType tya
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.addr
);
3942 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.data
);
3943 IREndness end
= stmt
->Ist
.Store
.end
;
3945 if (tya
!= Ity_I32
|| end
!= Iend_LE
)
3948 if (tyd
== Ity_I32
) {
3949 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
3950 X86RI
* ri
= iselIntExpr_RI(env
, stmt
->Ist
.Store
.data
);
3951 addInstr(env
, X86Instr_Alu32M(Xalu_MOV
,ri
,am
));
3954 if (tyd
== Ity_I8
|| tyd
== Ity_I16
) {
3955 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
3956 HReg r
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
3957 addInstr(env
, X86Instr_Store( toUChar(tyd
==Ity_I8
? 1 : 2),
3961 if (tyd
== Ity_F64
) {
3962 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
3963 HReg r
= iselDblExpr(env
, stmt
->Ist
.Store
.data
);
3964 addInstr(env
, X86Instr_FpLdSt(False
/*store*/, 8, r
, am
));
3967 if (tyd
== Ity_F32
) {
3968 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
3969 HReg r
= iselFltExpr(env
, stmt
->Ist
.Store
.data
);
3970 addInstr(env
, X86Instr_FpLdSt(False
/*store*/, 4, r
, am
));
3973 if (tyd
== Ity_I64
) {
3975 iselInt64Expr(&vHi
, &vLo
, env
, stmt
->Ist
.Store
.data
);
3976 rA
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
3977 addInstr(env
, X86Instr_Alu32M(
3978 Xalu_MOV
, X86RI_Reg(vLo
), X86AMode_IR(0, rA
)));
3979 addInstr(env
, X86Instr_Alu32M(
3980 Xalu_MOV
, X86RI_Reg(vHi
), X86AMode_IR(4, rA
)));
3983 if (tyd
== Ity_V128
) {
3984 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
3985 HReg r
= iselVecExpr(env
, stmt
->Ist
.Store
.data
);
3986 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, r
, am
));
3992 /* --------- PUT --------- */
3994 IRType ty
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Put
.data
);
3995 if (ty
== Ity_I32
) {
3996 /* We're going to write to memory, so compute the RHS into an
3998 X86RI
* ri
= iselIntExpr_RI(env
, stmt
->Ist
.Put
.data
);
4003 X86AMode_IR(stmt
->Ist
.Put
.offset
,hregX86_EBP())
4007 if (ty
== Ity_I8
|| ty
== Ity_I16
) {
4008 HReg r
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4009 addInstr(env
, X86Instr_Store(
4010 toUChar(ty
==Ity_I8
? 1 : 2),
4012 X86AMode_IR(stmt
->Ist
.Put
.offset
,
4016 if (ty
== Ity_I64
) {
4018 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4019 X86AMode
* am4
= advance4(am
);
4020 iselInt64Expr(&vHi
, &vLo
, env
, stmt
->Ist
.Put
.data
);
4021 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(vLo
), am
));
4022 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(vHi
), am4
));
4025 if (ty
== Ity_V128
) {
4026 HReg vec
= iselVecExpr(env
, stmt
->Ist
.Put
.data
);
4027 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4028 addInstr(env
, X86Instr_SseLdSt(False
/*store*/, vec
, am
));
4031 if (ty
== Ity_F32
) {
4032 HReg f32
= iselFltExpr(env
, stmt
->Ist
.Put
.data
);
4033 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4034 set_FPU_rounding_default(env
); /* paranoia */
4035 addInstr(env
, X86Instr_FpLdSt( False
/*store*/, 4, f32
, am
));
4038 if (ty
== Ity_F64
) {
4039 HReg f64
= iselDblExpr(env
, stmt
->Ist
.Put
.data
);
4040 X86AMode
* am
= X86AMode_IR(stmt
->Ist
.Put
.offset
, hregX86_EBP());
4041 set_FPU_rounding_default(env
); /* paranoia */
4042 addInstr(env
, X86Instr_FpLdSt( False
/*store*/, 8, f64
, am
));
4048 /* --------- Indexed PUT --------- */
4050 IRPutI
*puti
= stmt
->Ist
.PutI
.details
;
4053 = genGuestArrayOffset(
4055 puti
->ix
, puti
->bias
);
4057 IRType ty
= typeOfIRExpr(env
->type_env
, puti
->data
);
4058 if (ty
== Ity_F64
) {
4059 HReg val
= iselDblExpr(env
, puti
->data
);
4060 addInstr(env
, X86Instr_FpLdSt( False
/*store*/, 8, val
, am
));
4064 HReg r
= iselIntExpr_R(env
, puti
->data
);
4065 addInstr(env
, X86Instr_Store( 1, r
, am
));
4068 if (ty
== Ity_I32
) {
4069 HReg r
= iselIntExpr_R(env
, puti
->data
);
4070 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(r
), am
));
4073 if (ty
== Ity_I64
) {
4075 X86AMode
* am4
= advance4(am
);
4076 iselInt64Expr(&rHi
, &rLo
, env
, puti
->data
);
4077 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(rLo
), am
));
4078 addInstr(env
, X86Instr_Alu32M( Xalu_MOV
, X86RI_Reg(rHi
), am4
));
4084 /* --------- TMP --------- */
4086 IRTemp tmp
= stmt
->Ist
.WrTmp
.tmp
;
4087 IRType ty
= typeOfIRTemp(env
->type_env
, tmp
);
4089 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4090 compute it into an AMode and then use LEA. This usually
4091 produces fewer instructions, often because (for memcheck
4092 created IR) we get t = address-expression, (t is later used
4093 twice) and so doing this naturally turns address-expression
4094 back into an X86 amode. */
4096 && stmt
->Ist
.WrTmp
.data
->tag
== Iex_Binop
4097 && stmt
->Ist
.WrTmp
.data
->Iex
.Binop
.op
== Iop_Add32
) {
4098 X86AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.WrTmp
.data
);
4099 HReg dst
= lookupIRTemp(env
, tmp
);
4100 if (am
->tag
== Xam_IR
&& am
->Xam
.IR
.imm
== 0) {
4101 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4102 value into a register. Just emit a normal reg-reg move
4103 so reg-alloc can coalesce it away in the usual way. */
4104 HReg src
= am
->Xam
.IR
.reg
;
4105 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
, X86RMI_Reg(src
), dst
));
4107 addInstr(env
, X86Instr_Lea32(am
,dst
));
4112 if (ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
) {
4113 X86RMI
* rmi
= iselIntExpr_RMI(env
, stmt
->Ist
.WrTmp
.data
);
4114 HReg dst
= lookupIRTemp(env
, tmp
);
4115 addInstr(env
, X86Instr_Alu32R(Xalu_MOV
,rmi
,dst
));
4118 if (ty
== Ity_I64
) {
4119 HReg rHi
, rLo
, dstHi
, dstLo
;
4120 iselInt64Expr(&rHi
,&rLo
, env
, stmt
->Ist
.WrTmp
.data
);
4121 lookupIRTemp64( &dstHi
, &dstLo
, env
, tmp
);
4122 addInstr(env
, mk_iMOVsd_RR(rHi
,dstHi
) );
4123 addInstr(env
, mk_iMOVsd_RR(rLo
,dstLo
) );
4127 X86CondCode cond
= iselCondCode(env
, stmt
->Ist
.WrTmp
.data
);
4128 HReg dst
= lookupIRTemp(env
, tmp
);
4129 addInstr(env
, X86Instr_Set32(cond
, dst
));
4132 if (ty
== Ity_F64
) {
4133 HReg dst
= lookupIRTemp(env
, tmp
);
4134 HReg src
= iselDblExpr(env
, stmt
->Ist
.WrTmp
.data
);
4135 addInstr(env
, X86Instr_FpUnary(Xfp_MOV
,src
,dst
));
4138 if (ty
== Ity_F32
) {
4139 HReg dst
= lookupIRTemp(env
, tmp
);
4140 HReg src
= iselFltExpr(env
, stmt
->Ist
.WrTmp
.data
);
4141 addInstr(env
, X86Instr_FpUnary(Xfp_MOV
,src
,dst
));
4144 if (ty
== Ity_V128
) {
4145 HReg dst
= lookupIRTemp(env
, tmp
);
4146 HReg src
= iselVecExpr(env
, stmt
->Ist
.WrTmp
.data
);
4147 addInstr(env
, mk_vMOVsd_RR(src
,dst
));
4153 /* --------- Call to DIRTY helper --------- */
4155 IRDirty
* d
= stmt
->Ist
.Dirty
.details
;
4157 /* Figure out the return type, if any. */
4158 IRType retty
= Ity_INVALID
;
4159 if (d
->tmp
!= IRTemp_INVALID
)
4160 retty
= typeOfIRTemp(env
->type_env
, d
->tmp
);
4162 Bool retty_ok
= False
;
4164 case Ity_INVALID
: /* function doesn't return anything */
4165 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
4167 retty_ok
= True
; break;
4172 break; /* will go to stmt_fail: */
4174 /* Marshal args, do the call, and set the return value to
4175 0x555..555 if this is a conditional call that returns a value
4176 and the call is skipped. */
4178 RetLoc rloc
= mk_RetLoc_INVALID();
4179 doHelperCall( &addToSp
, &rloc
, env
, d
->guard
, d
->cee
, retty
, d
->args
);
4180 vassert(is_sane_RetLoc(rloc
));
4182 /* Now figure out what to do with the returned value, if any. */
4185 /* No return value. Nothing to do. */
4186 vassert(d
->tmp
== IRTemp_INVALID
);
4187 vassert(rloc
.pri
== RLPri_None
);
4188 vassert(addToSp
== 0);
4191 case Ity_I32
: case Ity_I16
: case Ity_I8
: {
4192 /* The returned value is in %eax. Park it in the register
4193 associated with tmp. */
4194 vassert(rloc
.pri
== RLPri_Int
);
4195 vassert(addToSp
== 0);
4196 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4197 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(),dst
) );
4201 /* The returned value is in %edx:%eax. Park it in the
4202 register-pair associated with tmp. */
4203 vassert(rloc
.pri
== RLPri_2Int
);
4204 vassert(addToSp
== 0);
4206 lookupIRTemp64( &dstHi
, &dstLo
, env
, d
->tmp
);
4207 addInstr(env
, mk_iMOVsd_RR(hregX86_EDX(),dstHi
) );
4208 addInstr(env
, mk_iMOVsd_RR(hregX86_EAX(),dstLo
) );
4212 /* The returned value is on the stack, and *retloc tells
4213 us where. Fish it off the stack and then move the
4214 stack pointer upwards to clear it, as directed by
4216 vassert(rloc
.pri
== RLPri_V128SpRel
);
4217 vassert(addToSp
>= 16);
4218 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4219 X86AMode
* am
= X86AMode_IR(rloc
.spOff
, hregX86_ESP());
4220 addInstr(env
, X86Instr_SseLdSt( True
/*load*/, dst
, am
));
4221 add_to_esp(env
, addToSp
);
4231 /* --------- MEM FENCE --------- */
4233 switch (stmt
->Ist
.MBE
.event
) {
4235 addInstr(env
, X86Instr_MFence(env
->hwcaps
));
4242 /* --------- ACAS --------- */
4244 if (stmt
->Ist
.CAS
.details
->oldHi
== IRTemp_INVALID
) {
4245 /* "normal" singleton CAS */
4247 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4248 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4249 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4250 X86AMode
* am
= iselIntExpr_AMode(env
, cas
->addr
);
4251 HReg rDataLo
= iselIntExpr_R(env
, cas
->dataLo
);
4252 HReg rExpdLo
= iselIntExpr_R(env
, cas
->expdLo
);
4253 HReg rOldLo
= lookupIRTemp(env
, cas
->oldLo
);
4254 vassert(cas
->expdHi
== NULL
);
4255 vassert(cas
->dataHi
== NULL
);
4256 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, rOldLo
));
4257 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, hregX86_EAX()));
4258 addInstr(env
, mk_iMOVsd_RR(rDataLo
, hregX86_EBX()));
4260 case Ity_I32
: sz
= 4; break;
4261 case Ity_I16
: sz
= 2; break;
4262 case Ity_I8
: sz
= 1; break;
4263 default: goto unhandled_cas
;
4265 addInstr(env
, X86Instr_ACAS(am
, sz
));
4267 X86Instr_CMov32(Xcc_NZ
,
4268 X86RM_Reg(hregX86_EAX()), rOldLo
));
4272 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4273 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4274 /* only 32-bit allowed in this case */
4275 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4276 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4277 X86AMode
* am
= iselIntExpr_AMode(env
, cas
->addr
);
4278 HReg rDataHi
= iselIntExpr_R(env
, cas
->dataHi
);
4279 HReg rDataLo
= iselIntExpr_R(env
, cas
->dataLo
);
4280 HReg rExpdHi
= iselIntExpr_R(env
, cas
->expdHi
);
4281 HReg rExpdLo
= iselIntExpr_R(env
, cas
->expdLo
);
4282 HReg rOldHi
= lookupIRTemp(env
, cas
->oldHi
);
4283 HReg rOldLo
= lookupIRTemp(env
, cas
->oldLo
);
4286 addInstr(env
, mk_iMOVsd_RR(rExpdHi
, rOldHi
));
4287 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, rOldLo
));
4288 addInstr(env
, mk_iMOVsd_RR(rExpdHi
, hregX86_EDX()));
4289 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, hregX86_EAX()));
4290 addInstr(env
, mk_iMOVsd_RR(rDataHi
, hregX86_ECX()));
4291 addInstr(env
, mk_iMOVsd_RR(rDataLo
, hregX86_EBX()));
4292 addInstr(env
, X86Instr_DACAS(am
));
4294 X86Instr_CMov32(Xcc_NZ
,
4295 X86RM_Reg(hregX86_EDX()), rOldHi
));
4297 X86Instr_CMov32(Xcc_NZ
,
4298 X86RM_Reg(hregX86_EAX()), rOldLo
));
4304 /* --------- INSTR MARK --------- */
4305 /* Doesn't generate any executable code ... */
4309 /* --------- NO-OP --------- */
4310 /* Fairly self-explanatory, wouldn't you say? */
4314 /* --------- EXIT --------- */
4316 if (stmt
->Ist
.Exit
.dst
->tag
!= Ico_U32
)
4317 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4319 X86CondCode cc
= iselCondCode(env
, stmt
->Ist
.Exit
.guard
);
4320 X86AMode
* amEIP
= X86AMode_IR(stmt
->Ist
.Exit
.offsIP
,
4323 /* Case: boring transfer to known address */
4324 if (stmt
->Ist
.Exit
.jk
== Ijk_Boring
) {
4325 if (env
->chainingAllowed
) {
4326 /* .. almost always true .. */
4327 /* Skip the event check at the dst if this is a forwards
4330 = ((Addr32
)stmt
->Ist
.Exit
.dst
->Ico
.U32
) > env
->max_ga
;
4331 if (0) vex_printf("%s", toFastEP
? "Y" : ",");
4332 addInstr(env
, X86Instr_XDirect(stmt
->Ist
.Exit
.dst
->Ico
.U32
,
4333 amEIP
, cc
, toFastEP
));
4335 /* .. very occasionally .. */
4336 /* We can't use chaining, so ask for an assisted transfer,
4337 as that's the only alternative that is allowable. */
4338 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
4339 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, cc
, Ijk_Boring
));
4344 /* Case: assisted transfer to arbitrary address */
4345 switch (stmt
->Ist
.Exit
.jk
) {
4346 /* Keep this list in sync with that in iselNext below */
4354 case Ijk_Sys_int128
:
4355 case Ijk_Sys_int129
:
4356 case Ijk_Sys_int130
:
4357 case Ijk_Sys_int145
:
4358 case Ijk_Sys_int210
:
4359 case Ijk_Sys_syscall
:
4360 case Ijk_Sys_sysenter
:
4361 case Ijk_InvalICache
:
4364 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
4365 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, cc
, stmt
->Ist
.Exit
.jk
));
4372 /* Do we ever expect to see any other kind? */
4384 /*---------------------------------------------------------*/
4385 /*--- ISEL: Basic block terminators (Nexts) ---*/
4386 /*---------------------------------------------------------*/
4388 static void iselNext ( ISelEnv
* env
,
4389 IRExpr
* next
, IRJumpKind jk
, Int offsIP
)
4391 if (vex_traceflags
& VEX_TRACE_VCODE
) {
4392 vex_printf( "\n-- PUT(%d) = ", offsIP
);
4394 vex_printf( "; exit-");
4399 /* Case: boring transfer to known address */
4400 if (next
->tag
== Iex_Const
) {
4401 IRConst
* cdst
= next
->Iex
.Const
.con
;
4402 vassert(cdst
->tag
== Ico_U32
);
4403 if (jk
== Ijk_Boring
|| jk
== Ijk_Call
) {
4404 /* Boring transfer to known address */
4405 X86AMode
* amEIP
= X86AMode_IR(offsIP
, hregX86_EBP());
4406 if (env
->chainingAllowed
) {
4407 /* .. almost always true .. */
4408 /* Skip the event check at the dst if this is a forwards
4411 = ((Addr32
)cdst
->Ico
.U32
) > env
->max_ga
;
4412 if (0) vex_printf("%s", toFastEP
? "X" : ".");
4413 addInstr(env
, X86Instr_XDirect(cdst
->Ico
.U32
,
4417 /* .. very occasionally .. */
4418 /* We can't use chaining, so ask for an assisted transfer,
4419 as that's the only alternative that is allowable. */
4420 HReg r
= iselIntExpr_R(env
, next
);
4421 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, Xcc_ALWAYS
,
4428 /* Case: call/return (==boring) transfer to any address */
4430 case Ijk_Boring
: case Ijk_Ret
: case Ijk_Call
: {
4431 HReg r
= iselIntExpr_R(env
, next
);
4432 X86AMode
* amEIP
= X86AMode_IR(offsIP
, hregX86_EBP());
4433 if (env
->chainingAllowed
) {
4434 addInstr(env
, X86Instr_XIndir(r
, amEIP
, Xcc_ALWAYS
));
4436 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, Xcc_ALWAYS
,
4445 /* Case: assisted transfer to arbitrary address */
4447 /* Keep this list in sync with that for Ist_Exit above */
4455 case Ijk_Sys_int128
:
4456 case Ijk_Sys_int129
:
4457 case Ijk_Sys_int130
:
4458 case Ijk_Sys_int145
:
4459 case Ijk_Sys_int210
:
4460 case Ijk_Sys_syscall
:
4461 case Ijk_Sys_sysenter
:
4462 case Ijk_InvalICache
:
4465 HReg r
= iselIntExpr_R(env
, next
);
4466 X86AMode
* amEIP
= X86AMode_IR(offsIP
, hregX86_EBP());
4467 addInstr(env
, X86Instr_XAssisted(r
, amEIP
, Xcc_ALWAYS
, jk
));
4474 vex_printf( "\n-- PUT(%d) = ", offsIP
);
4476 vex_printf( "; exit-");
4479 vassert(0); // are we expecting any other kind?
4483 /*---------------------------------------------------------*/
4484 /*--- Insn selector top-level ---*/
4485 /*---------------------------------------------------------*/
4487 /* Translate an entire SB to x86 code. */
4489 HInstrArray
* iselSB_X86 ( const IRSB
* bb
,
4491 const VexArchInfo
* archinfo_host
,
4492 const VexAbiInfo
* vbi
/*UNUSED*/,
4493 Int offs_Host_EvC_Counter
,
4494 Int offs_Host_EvC_FailAddr
,
4495 Bool chainingAllowed
,
4502 UInt hwcaps_host
= archinfo_host
->hwcaps
;
4503 X86AMode
*amCounter
, *amFailAddr
;
4506 vassert(arch_host
== VexArchX86
);
4507 vassert(0 == (hwcaps_host
4508 & ~(VEX_HWCAPS_X86_MMXEXT
4509 | VEX_HWCAPS_X86_SSE1
4510 | VEX_HWCAPS_X86_SSE2
4511 | VEX_HWCAPS_X86_SSE3
4512 | VEX_HWCAPS_X86_LZCNT
)));
4514 /* Check that the host's endianness is as expected. */
4515 vassert(archinfo_host
->endness
== VexEndnessLE
);
4517 /* Make up an initial environment to use. */
4518 env
= LibVEX_Alloc_inline(sizeof(ISelEnv
));
4521 /* Set up output code array. */
4522 env
->code
= newHInstrArray();
4524 /* Copy BB's type env. */
4525 env
->type_env
= bb
->tyenv
;
4527 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4528 change as we go along. */
4529 env
->n_vregmap
= bb
->tyenv
->types_used
;
4530 env
->vregmap
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
4531 env
->vregmapHI
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
4533 /* and finally ... */
4534 env
->chainingAllowed
= chainingAllowed
;
4535 env
->hwcaps
= hwcaps_host
;
4536 env
->max_ga
= max_ga
;
4538 /* For each IR temporary, allocate a suitably-kinded virtual
4541 for (i
= 0; i
< env
->n_vregmap
; i
++) {
4542 hregHI
= hreg
= INVALID_HREG
;
4543 switch (bb
->tyenv
->types
[i
]) {
4547 case Ity_I32
: hreg
= mkHReg(True
, HRcInt32
, 0, j
++); break;
4548 case Ity_I64
: hreg
= mkHReg(True
, HRcInt32
, 0, j
++);
4549 hregHI
= mkHReg(True
, HRcInt32
, 0, j
++); break;
4551 case Ity_F64
: hreg
= mkHReg(True
, HRcFlt64
, 0, j
++); break;
4552 case Ity_V128
: hreg
= mkHReg(True
, HRcVec128
, 0, j
++); break;
4553 default: ppIRType(bb
->tyenv
->types
[i
]);
4554 vpanic("iselBB: IRTemp type");
4556 env
->vregmap
[i
] = hreg
;
4557 env
->vregmapHI
[i
] = hregHI
;
4561 /* The very first instruction must be an event check. */
4562 amCounter
= X86AMode_IR(offs_Host_EvC_Counter
, hregX86_EBP());
4563 amFailAddr
= X86AMode_IR(offs_Host_EvC_FailAddr
, hregX86_EBP());
4564 addInstr(env
, X86Instr_EvCheck(amCounter
, amFailAddr
));
4566 /* Possibly a block counter increment (for profiling). At this
4567 point we don't know the address of the counter, so just pretend
4568 it is zero. It will have to be patched later, but before this
4569 translation is used, by a call to LibVEX_patchProfCtr. */
4571 addInstr(env
, X86Instr_ProfInc());
4574 /* Ok, finally we can iterate over the statements. */
4575 for (i
= 0; i
< bb
->stmts_used
; i
++)
4576 iselStmt(env
, bb
->stmts
[i
]);
4578 iselNext(env
, bb
->next
, bb
->jumpkind
, bb
->offsIP
);
4580 /* record the number of vregs we used. */
4581 env
->code
->n_vregs
= env
->vreg_ctr
;
4586 /*---------------------------------------------------------------*/
4587 /*--- end host_x86_isel.c ---*/
4588 /*---------------------------------------------------------------*/