-> 3.20.0 final
[valgrind.git] / VEX / priv / host_x86_isel.c
blob50b3235ac0edbeb6f11718155529bdafbac6148b
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
38 #include "ir_match.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h"
43 #include "host_generic_simd128.h"
44 #include "host_x86_defs.h"
46 /* TODO 21 Apr 2005:
48 -- (Really an assembler issue) don't emit CMov32 as a cmov
49 insn, since that's expensive on P4 and conditional branch
50 is cheaper if (as we expect) the condition is highly predictable
52 -- preserve xmm registers across function calls (by declaring them
53 as trashed by call insns)
55 -- preserve x87 ST stack discipline across function calls. Sigh.
57 -- Check doHelperCall: if a call is conditional, we cannot safely
58 compute any regparm args directly to registers. Hence, the
59 fast-regparm marshalling should be restricted to unconditional
60 calls only.
63 /*---------------------------------------------------------*/
64 /*--- x87 control word stuff ---*/
65 /*---------------------------------------------------------*/
67 /* Vex-generated code expects to run with the FPU set as follows: all
68 exceptions masked, round-to-nearest, precision = 53 bits. This
69 corresponds to a FPU control word value of 0x027F.
71 Similarly the SSE control word (%mxcsr) should be 0x1F80.
73 %fpucw and %mxcsr should have these values on entry to
74 Vex-generated code, and should those values should be
75 unchanged at exit.
78 #define DEFAULT_FPUCW 0x027F
80 /* debugging only, do not use */
81 /* define DEFAULT_FPUCW 0x037F */
84 /*---------------------------------------------------------*/
85 /*--- misc helpers ---*/
86 /*---------------------------------------------------------*/
88 /* These are duplicated in guest-x86/toIR.c */
89 static IRExpr* unop ( IROp op, IRExpr* a )
91 return IRExpr_Unop(op, a);
94 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
96 return IRExpr_Binop(op, a1, a2);
99 static IRExpr* bind ( Int binder )
101 return IRExpr_Binder(binder);
104 static Bool isZeroU8 ( IRExpr* e )
106 return e->tag == Iex_Const
107 && e->Iex.Const.con->tag == Ico_U8
108 && e->Iex.Const.con->Ico.U8 == 0;
111 static Bool isZeroU32 ( IRExpr* e )
113 return e->tag == Iex_Const
114 && e->Iex.Const.con->tag == Ico_U32
115 && e->Iex.Const.con->Ico.U32 == 0;
118 //static Bool isZeroU64 ( IRExpr* e )
120 // return e->tag == Iex_Const
121 // && e->Iex.Const.con->tag == Ico_U64
122 // && e->Iex.Const.con->Ico.U64 == 0ULL;
126 /*---------------------------------------------------------*/
127 /*--- ISelEnv ---*/
128 /*---------------------------------------------------------*/
130 /* This carries around:
132 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
133 might encounter. This is computed before insn selection starts,
134 and does not change.
136 - A mapping from IRTemp to HReg. This tells the insn selector
137 which virtual register(s) are associated with each IRTemp
138 temporary. This is computed before insn selection starts, and
139 does not change. We expect this mapping to map precisely the
140 same set of IRTemps as the type mapping does.
142 - vregmap holds the primary register for the IRTemp.
143 - vregmapHI is only used for 64-bit integer-typed
144 IRTemps. It holds the identity of a second
145 32-bit virtual HReg, which holds the high half
146 of the value.
148 - The code array, that is, the insns selected so far.
150 - A counter, for generating new virtual registers.
152 - The host subarchitecture we are selecting insns for.
153 This is set at the start and does not change.
155 - A Bool for indicating whether we may generate chain-me
156 instructions for control flow transfers, or whether we must use
157 XAssisted.
159 - The maximum guest address of any guest insn in this block.
160 Actually, the address of the highest-addressed byte from any insn
161 in this block. Is set at the start and does not change. This is
162 used for detecting jumps which are definitely forward-edges from
163 this block, and therefore can be made (chained) to the fast entry
164 point of the destination, thereby avoiding the destination's
165 event check.
167 Note, this is all (well, mostly) host-independent.
170 typedef
171 struct {
172 /* Constant -- are set at the start and do not change. */
173 IRTypeEnv* type_env;
175 HReg* vregmap;
176 HReg* vregmapHI;
177 Int n_vregmap;
179 UInt hwcaps;
181 Bool chainingAllowed;
182 Addr32 max_ga;
184 /* These are modified as we go along. */
185 HInstrArray* code;
186 Int vreg_ctr;
188 ISelEnv;
191 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
193 vassert(tmp >= 0);
194 vassert(tmp < env->n_vregmap);
195 return env->vregmap[tmp];
198 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
200 vassert(tmp >= 0);
201 vassert(tmp < env->n_vregmap);
202 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
203 *vrLO = env->vregmap[tmp];
204 *vrHI = env->vregmapHI[tmp];
207 static void addInstr ( ISelEnv* env, X86Instr* instr )
209 addHInstr(env->code, instr);
210 if (vex_traceflags & VEX_TRACE_VCODE) {
211 ppX86Instr(instr, False);
212 vex_printf("\n");
216 static HReg newVRegI ( ISelEnv* env )
218 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
219 env->vreg_ctr++;
220 return reg;
223 static HReg newVRegF ( ISelEnv* env )
225 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
226 env->vreg_ctr++;
227 return reg;
230 static HReg newVRegV ( ISelEnv* env )
232 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
233 env->vreg_ctr++;
234 return reg;
238 /*---------------------------------------------------------*/
239 /*--- ISEL: Forward declarations ---*/
240 /*---------------------------------------------------------*/
242 /* These are organised as iselXXX and iselXXX_wrk pairs. The
243 iselXXX_wrk do the real work, but are not to be called directly.
244 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
245 checks that all returned registers are virtual. You should not
246 call the _wrk version directly.
248 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
249 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e );
251 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
252 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e );
254 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
255 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e );
257 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
258 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e );
260 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
261 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e );
263 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
264 ISelEnv* env, const IRExpr* e );
265 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
266 ISelEnv* env, const IRExpr* e );
268 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
269 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e );
271 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
272 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e );
274 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
275 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e );
277 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
278 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e );
281 /*---------------------------------------------------------*/
282 /*--- ISEL: Misc helpers ---*/
283 /*---------------------------------------------------------*/
285 /* Make a int reg-reg move. */
287 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
289 vassert(hregClass(src) == HRcInt32);
290 vassert(hregClass(dst) == HRcInt32);
291 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
295 /* Make a vector reg-reg move. */
297 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
299 vassert(hregClass(src) == HRcVec128);
300 vassert(hregClass(dst) == HRcVec128);
301 return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 /* Advance/retreat %esp by n. */
306 static void add_to_esp ( ISelEnv* env, Int n )
308 vassert(n > 0 && n < 256 && (n%4) == 0);
309 addInstr(env,
310 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 static void sub_from_esp ( ISelEnv* env, Int n )
315 vassert(n > 0 && n < 256 && (n%4) == 0);
316 addInstr(env,
317 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
321 /* Given an amode, return one which references 4 bytes further
322 along. */
324 static X86AMode* advance4 ( X86AMode* am )
326 X86AMode* am4 = dopyX86AMode(am);
327 switch (am4->tag) {
328 case Xam_IRRS:
329 am4->Xam.IRRS.imm += 4; break;
330 case Xam_IR:
331 am4->Xam.IR.imm += 4; break;
332 default:
333 vpanic("advance4(x86,host)");
335 return am4;
339 /* Push an arg onto the host stack, in preparation for a call to a
340 helper function of some kind. Returns the number of 32-bit words
341 pushed. If we encounter an IRExpr_VECRET() then we expect that
342 r_vecRetAddr will be a valid register, that holds the relevant
343 address.
345 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
347 if (UNLIKELY(arg->tag == Iex_VECRET)) {
348 vassert(0); //ATC
349 vassert(!hregIsInvalid(r_vecRetAddr));
350 addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
351 return 1;
353 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
354 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
355 return 1;
357 /* Else it's a "normal" expression. */
358 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
359 if (arg_ty == Ity_I32) {
360 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
361 return 1;
362 } else
363 if (arg_ty == Ity_I64) {
364 HReg rHi, rLo;
365 iselInt64Expr(&rHi, &rLo, env, arg);
366 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
367 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
368 return 2;
370 ppIRExpr(arg);
371 vpanic("pushArg(x86): can't handle arg of this type");
375 /* Complete the call to a helper function, by calling the
376 helper and clearing the args off the stack. */
378 static
379 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
380 IRCallee* cee, Int n_arg_ws,
381 RetLoc rloc )
383 /* Complication. Need to decide which reg to use as the fn address
384 pointer, in a way that doesn't trash regparm-passed
385 parameters. */
386 vassert(sizeof(void*) == 4);
388 addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
389 cee->regparms, rloc));
390 if (n_arg_ws > 0)
391 add_to_esp(env, 4*n_arg_ws);
395 /* Used only in doHelperCall. See big comment in doHelperCall re
396 handling of regparm args. This function figures out whether
397 evaluation of an expression might require use of a fixed register.
398 If in doubt return True (safe but suboptimal).
400 static
401 Bool mightRequireFixedRegs ( IRExpr* e )
403 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
404 // These are always "safe" -- either a copy of %esp in some
405 // arbitrary vreg, or a copy of %ebp, respectively.
406 return False;
408 /* Else it's a "normal" expression. */
409 switch (e->tag) {
410 case Iex_RdTmp: case Iex_Const: case Iex_Get:
411 return False;
412 default:
413 return True;
418 /* Do a complete function call. |guard| is a Ity_Bit expression
419 indicating whether or not the call happens. If guard==NULL, the
420 call is unconditional. |retloc| is set to indicate where the
421 return value is after the call. The caller (of this fn) must
422 generate code to add |stackAdjustAfterCall| to the stack pointer
423 after the call is done. */
425 static
426 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
427 /*OUT*/RetLoc* retloc,
428 ISelEnv* env,
429 IRExpr* guard,
430 IRCallee* cee, IRType retTy, IRExpr** args )
432 X86CondCode cc;
433 HReg argregs[3];
434 HReg tmpregs[3];
435 Bool danger;
436 Int not_done_yet, n_args, n_arg_ws, stack_limit,
437 i, argreg, argregX;
439 /* Set default returns. We'll update them later if needed. */
440 *stackAdjustAfterCall = 0;
441 *retloc = mk_RetLoc_INVALID();
443 /* These are used for cross-checking that IR-level constraints on
444 the use of Iex_VECRET and Iex_GSPTR are observed. */
445 UInt nVECRETs = 0;
446 UInt nGSPTRs = 0;
448 /* Marshal args for a call, do the call, and clear the stack.
449 Complexities to consider:
451 * The return type can be I{64,32,16,8} or V128. In the V128
452 case, it is expected that |args| will contain the special
453 node IRExpr_VECRET(), in which case this routine generates
454 code to allocate space on the stack for the vector return
455 value. Since we are not passing any scalars on the stack, it
456 is enough to preallocate the return space before marshalling
457 any arguments, in this case.
459 |args| may also contain IRExpr_GSPTR(), in which case the
460 value in %ebp is passed as the corresponding argument.
462 * If the callee claims regparmness of 1, 2 or 3, we must pass the
463 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
464 respectively). To keep things relatively simple, only args of
465 type I32 may be passed as regparms -- just bomb out if anything
466 else turns up. Clearly this depends on the front ends not
467 trying to pass any other types as regparms.
470 /* 16 Nov 2004: the regparm handling is complicated by the
471 following problem.
473 Consider a call two a function with two regparm parameters:
474 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
475 Suppose code is first generated to compute e1 into %eax. Then,
476 code is generated to compute e2 into %edx. Unfortunately, if
477 the latter code sequence uses %eax, it will trash the value of
478 e1 computed by the former sequence. This could happen if (for
479 example) e2 itself involved a function call. In the code below,
480 args are evaluated right-to-left, not left-to-right, but the
481 principle and the problem are the same.
483 One solution is to compute all regparm-bound args into vregs
484 first, and once they are all done, move them to the relevant
485 real regs. This always gives correct code, but it also gives
486 a bunch of vreg-to-rreg moves which are usually redundant but
487 are hard for the register allocator to get rid of.
489 A compromise is to first examine all regparm'd argument
490 expressions. If they are all so simple that it is clear
491 they will be evaluated without use of any fixed registers,
492 use the old compute-directly-to-fixed-target scheme. If not,
493 be safe and use the via-vregs scheme.
495 Note this requires being able to examine an expression and
496 determine whether or not evaluation of it might use a fixed
497 register. That requires knowledge of how the rest of this
498 insn selector works. Currently just the following 3 are
499 regarded as safe -- hopefully they cover the majority of
500 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
502 vassert(cee->regparms >= 0 && cee->regparms <= 3);
504 /* Count the number of args and also the VECRETs */
505 n_args = n_arg_ws = 0;
506 while (args[n_args]) {
507 IRExpr* arg = args[n_args];
508 n_args++;
509 if (UNLIKELY(arg->tag == Iex_VECRET)) {
510 nVECRETs++;
511 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
512 nGSPTRs++;
516 /* If this fails, the IR is ill-formed */
517 vassert(nGSPTRs == 0 || nGSPTRs == 1);
519 /* If we have a VECRET, allocate space on the stack for the return
520 value, and record the stack pointer after that. */
521 HReg r_vecRetAddr = INVALID_HREG;
522 if (nVECRETs == 1) {
523 vassert(retTy == Ity_V128 || retTy == Ity_V256);
524 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
525 r_vecRetAddr = newVRegI(env);
526 sub_from_esp(env, 16);
527 addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
528 } else {
529 // If either of these fail, the IR is ill-formed
530 vassert(retTy != Ity_V128 && retTy != Ity_V256);
531 vassert(nVECRETs == 0);
534 not_done_yet = n_args;
536 stack_limit = cee->regparms;
538 /* ------ BEGIN marshall all arguments ------ */
540 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
541 for (i = n_args-1; i >= stack_limit; i--) {
542 n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
543 not_done_yet--;
546 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
547 registers. */
549 if (cee->regparms > 0) {
551 /* ------ BEGIN deal with regparms ------ */
553 /* deal with regparms, not forgetting %ebp if needed. */
554 argregs[0] = hregX86_EAX();
555 argregs[1] = hregX86_EDX();
556 argregs[2] = hregX86_ECX();
557 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
559 argreg = cee->regparms;
561 /* In keeping with big comment above, detect potential danger
562 and use the via-vregs scheme if needed. */
563 danger = False;
564 for (i = stack_limit-1; i >= 0; i--) {
565 if (mightRequireFixedRegs(args[i])) {
566 danger = True;
567 break;
571 if (danger) {
573 /* Move via temporaries */
574 argregX = argreg;
575 for (i = stack_limit-1; i >= 0; i--) {
577 if (0) {
578 vex_printf("x86 host: register param is complex: ");
579 ppIRExpr(args[i]);
580 vex_printf("\n");
583 IRExpr* arg = args[i];
584 argreg--;
585 vassert(argreg >= 0);
586 if (UNLIKELY(arg->tag == Iex_VECRET)) {
587 vassert(0); //ATC
589 else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
590 vassert(0); //ATC
591 } else {
592 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
593 tmpregs[argreg] = iselIntExpr_R(env, arg);
595 not_done_yet--;
597 for (i = stack_limit-1; i >= 0; i--) {
598 argregX--;
599 vassert(argregX >= 0);
600 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
603 } else {
604 /* It's safe to compute all regparm args directly into their
605 target registers. */
606 for (i = stack_limit-1; i >= 0; i--) {
607 IRExpr* arg = args[i];
608 argreg--;
609 vassert(argreg >= 0);
610 if (UNLIKELY(arg->tag == Iex_VECRET)) {
611 vassert(!hregIsInvalid(r_vecRetAddr));
612 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
613 X86RMI_Reg(r_vecRetAddr),
614 argregs[argreg]));
616 else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
617 vassert(0); //ATC
618 } else {
619 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
620 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
621 iselIntExpr_RMI(env, arg),
622 argregs[argreg]));
624 not_done_yet--;
629 /* ------ END deal with regparms ------ */
633 vassert(not_done_yet == 0);
635 /* ------ END marshall all arguments ------ */
637 /* Now we can compute the condition. We can't do it earlier
638 because the argument computations could trash the condition
639 codes. Be a bit clever to handle the common case where the
640 guard is 1:Bit. */
641 cc = Xcc_ALWAYS;
642 if (guard) {
643 if (guard->tag == Iex_Const
644 && guard->Iex.Const.con->tag == Ico_U1
645 && guard->Iex.Const.con->Ico.U1 == True) {
646 /* unconditional -- do nothing */
647 } else {
648 cc = iselCondCode( env, guard );
652 /* Do final checks, set the return values, and generate the call
653 instruction proper. */
654 vassert(*stackAdjustAfterCall == 0);
655 vassert(is_RetLoc_INVALID(*retloc));
656 switch (retTy) {
657 case Ity_INVALID:
658 /* Function doesn't return a value. */
659 *retloc = mk_RetLoc_simple(RLPri_None);
660 break;
661 case Ity_I64:
662 *retloc = mk_RetLoc_simple(RLPri_2Int);
663 break;
664 case Ity_I32: case Ity_I16: case Ity_I8:
665 *retloc = mk_RetLoc_simple(RLPri_Int);
666 break;
667 case Ity_V128:
668 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
669 *stackAdjustAfterCall = 16;
670 break;
671 case Ity_V256:
672 vassert(0); // ATC
673 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
674 *stackAdjustAfterCall = 32;
675 break;
676 default:
677 /* IR can denote other possible return types, but we don't
678 handle those here. */
679 vassert(0);
682 /* Finally, generate the call itself. This needs the *retloc value
683 set in the switch above, which is why it's at the end. */
684 callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
688 /* Given a guest-state array descriptor, an index expression and a
689 bias, generate an X86AMode holding the relevant guest state
690 offset. */
692 static
693 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
694 IRExpr* off, Int bias )
696 HReg tmp, roff;
697 Int elemSz = sizeofIRType(descr->elemTy);
698 Int nElems = descr->nElems;
699 Int shift = 0;
701 /* throw out any cases not generated by an x86 front end. In
702 theory there might be a day where we need to handle them -- if
703 we ever run non-x86-guest on x86 host. */
705 if (nElems != 8)
706 vpanic("genGuestArrayOffset(x86 host)(1)");
708 switch (elemSz) {
709 case 1: shift = 0; break;
710 case 4: shift = 2; break;
711 case 8: shift = 3; break;
712 default: vpanic("genGuestArrayOffset(x86 host)(2)");
715 /* Compute off into a reg, %off. Then return:
717 movl %off, %tmp
718 addl $bias, %tmp (if bias != 0)
719 andl %tmp, 7
720 ... base(%ebp, %tmp, shift) ...
722 tmp = newVRegI(env);
723 roff = iselIntExpr_R(env, off);
724 addInstr(env, mk_iMOVsd_RR(roff, tmp));
725 if (bias != 0) {
726 addInstr(env,
727 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
729 addInstr(env,
730 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
731 return
732 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
736 /* Mess with the FPU's rounding mode: set to the default rounding mode
737 (DEFAULT_FPUCW). */
738 static
739 void set_FPU_rounding_default ( ISelEnv* env )
741 /* pushl $DEFAULT_FPUCW
742 fldcw 0(%esp)
743 addl $4, %esp
745 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
746 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
747 addInstr(env, X86Instr_FpLdCW(zero_esp));
748 add_to_esp(env, 4);
752 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
753 expression denoting a value in the range 0 .. 3, indicating a round
754 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
755 the same rounding.
757 static
758 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
760 HReg rrm = iselIntExpr_R(env, mode);
761 HReg rrm2 = newVRegI(env);
762 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
764 /* movl %rrm, %rrm2
765 andl $3, %rrm2 -- shouldn't be needed; paranoia
766 shll $10, %rrm2
767 orl $DEFAULT_FPUCW, %rrm2
768 pushl %rrm2
769 fldcw 0(%esp)
770 addl $4, %esp
772 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
773 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
774 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
775 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
776 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
777 addInstr(env, X86Instr_FpLdCW(zero_esp));
778 add_to_esp(env, 4);
782 /* Generate !src into a new vector register, and be sure that the code
783 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
784 way to do this.
786 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
788 HReg dst = newVRegV(env);
789 /* Set dst to zero. If dst contains a NaN then all hell might
790 break loose after the comparison. So, first zero it. */
791 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
792 /* And now make it all 1s ... */
793 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
794 /* Finally, xor 'src' into it. */
795 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
796 /* Doesn't that just totally suck? */
797 return dst;
801 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
802 after most non-simple FPU operations (simple = +, -, *, / and
803 sqrt).
805 This could be done a lot more efficiently if needed, by loading
806 zero and adding it to the value to be rounded (fldz ; faddp?).
808 static void roundToF64 ( ISelEnv* env, HReg reg )
810 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
811 sub_from_esp(env, 8);
812 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
813 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
814 add_to_esp(env, 8);
818 /*---------------------------------------------------------*/
819 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
820 /*---------------------------------------------------------*/
822 /* Select insns for an integer-typed expression, and add them to the
823 code list. Return a reg holding the result. This reg will be a
824 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
825 want to modify it, ask for a new vreg, copy it in there, and modify
826 the copy. The register allocator will do its best to map both
827 vregs to the same real register, so the copies will often disappear
828 later in the game.
830 This should handle expressions of 32, 16 and 8-bit type. All
831 results are returned in a 32-bit register. For 16- and 8-bit
832 expressions, the upper 16/24 bits are arbitrary, so you should mask
833 or sign extend partial values if necessary.
836 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
838 HReg r = iselIntExpr_R_wrk(env, e);
839 /* sanity checks ... */
840 # if 0
841 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
842 # endif
843 vassert(hregClass(r) == HRcInt32);
844 vassert(hregIsVirtual(r));
845 return r;
848 /* DO NOT CALL THIS DIRECTLY ! */
849 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
851 MatchInfo mi;
853 IRType ty = typeOfIRExpr(env->type_env,e);
854 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
856 switch (e->tag) {
858 /* --------- TEMP --------- */
859 case Iex_RdTmp: {
860 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
863 /* --------- LOAD --------- */
864 case Iex_Load: {
865 HReg dst = newVRegI(env);
866 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
868 /* We can't handle big-endian loads, nor load-linked. */
869 if (e->Iex.Load.end != Iend_LE)
870 goto irreducible;
872 if (ty == Ity_I32) {
873 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
874 X86RMI_Mem(amode), dst) );
875 return dst;
877 if (ty == Ity_I16) {
878 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
879 return dst;
881 if (ty == Ity_I8) {
882 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
883 return dst;
885 break;
888 /* --------- TERNARY OP --------- */
889 case Iex_Triop: {
890 IRTriop *triop = e->Iex.Triop.details;
891 /* C3210 flags following FPU partial remainder (fprem), both
892 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
893 if (triop->op == Iop_PRemC3210F64
894 || triop->op == Iop_PRem1C3210F64) {
895 HReg junk = newVRegF(env);
896 HReg dst = newVRegI(env);
897 HReg srcL = iselDblExpr(env, triop->arg2);
898 HReg srcR = iselDblExpr(env, triop->arg3);
899 /* XXXROUNDINGFIXME */
900 /* set roundingmode here */
901 addInstr(env, X86Instr_FpBinary(
902 e->Iex.Binop.op==Iop_PRemC3210F64
903 ? Xfp_PREM : Xfp_PREM1,
904 srcL,srcR,junk
906 /* The previous pseudo-insn will have left the FPU's C3210
907 flags set correctly. So bag them. */
908 addInstr(env, X86Instr_FpStSW_AX());
909 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
910 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
911 return dst;
914 break;
917 /* --------- BINARY OP --------- */
918 case Iex_Binop: {
919 X86AluOp aluOp;
920 X86ShiftOp shOp;
922 /* Pattern: Sub32(0,x) */
923 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
924 HReg dst = newVRegI(env);
925 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
926 addInstr(env, mk_iMOVsd_RR(reg,dst));
927 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
928 return dst;
931 /* Is it an addition or logical style op? */
932 switch (e->Iex.Binop.op) {
933 case Iop_Add8: case Iop_Add16: case Iop_Add32:
934 aluOp = Xalu_ADD; break;
935 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
936 aluOp = Xalu_SUB; break;
937 case Iop_And8: case Iop_And16: case Iop_And32:
938 aluOp = Xalu_AND; break;
939 case Iop_Or8: case Iop_Or16: case Iop_Or32:
940 aluOp = Xalu_OR; break;
941 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
942 aluOp = Xalu_XOR; break;
943 case Iop_Mul16: case Iop_Mul32:
944 aluOp = Xalu_MUL; break;
945 default:
946 aluOp = Xalu_INVALID; break;
948 /* For commutative ops we assume any literal
949 values are on the second operand. */
950 if (aluOp != Xalu_INVALID) {
951 HReg dst = newVRegI(env);
952 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
953 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
954 addInstr(env, mk_iMOVsd_RR(reg,dst));
955 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
956 return dst;
958 /* Could do better here; forcing the first arg into a reg
959 isn't always clever.
960 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
961 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
962 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
963 movl 0xFFFFFFA0(%vr41),%vr107
964 movl 0xFFFFFFA4(%vr41),%vr108
965 movl %vr107,%vr106
966 xorl %vr108,%vr106
967 movl 0xFFFFFFA8(%vr41),%vr109
968 movl %vr106,%vr105
969 andl %vr109,%vr105
970 movl 0xFFFFFFA0(%vr41),%vr110
971 movl %vr105,%vr104
972 xorl %vr110,%vr104
973 movl %vr104,%vr70
976 /* Perhaps a shift op? */
977 switch (e->Iex.Binop.op) {
978 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
979 shOp = Xsh_SHL; break;
980 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
981 shOp = Xsh_SHR; break;
982 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
983 shOp = Xsh_SAR; break;
984 default:
985 shOp = Xsh_INVALID; break;
987 if (shOp != Xsh_INVALID) {
988 HReg dst = newVRegI(env);
990 /* regL = the value to be shifted */
991 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
992 addInstr(env, mk_iMOVsd_RR(regL,dst));
994 /* Do any necessary widening for 16/8 bit operands */
995 switch (e->Iex.Binop.op) {
996 case Iop_Shr8:
997 addInstr(env, X86Instr_Alu32R(
998 Xalu_AND, X86RMI_Imm(0xFF), dst));
999 break;
1000 case Iop_Shr16:
1001 addInstr(env, X86Instr_Alu32R(
1002 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1003 break;
1004 case Iop_Sar8:
1005 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1006 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1007 break;
1008 case Iop_Sar16:
1009 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1010 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1011 break;
1012 default: break;
1015 /* Now consider the shift amount. If it's a literal, we
1016 can do a much better job than the general case. */
1017 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1018 /* assert that the IR is well-typed */
1019 Int nshift;
1020 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1021 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1022 vassert(nshift >= 0);
1023 if (nshift > 0)
1024 /* Can't allow nshift==0 since that means %cl */
1025 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1026 } else {
1027 /* General case; we have to force the amount into %cl. */
1028 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1029 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1030 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1032 return dst;
1035 /* Handle misc other ops. */
1037 if (e->Iex.Binop.op == Iop_Max32U) {
1038 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1039 HReg dst = newVRegI(env);
1040 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1041 addInstr(env, mk_iMOVsd_RR(src1,dst));
1042 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1043 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1044 return dst;
1047 if (e->Iex.Binop.op == Iop_8HLto16) {
1048 HReg hi8 = newVRegI(env);
1049 HReg lo8 = newVRegI(env);
1050 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1051 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1052 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1053 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1054 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1055 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1056 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1057 return hi8;
1060 if (e->Iex.Binop.op == Iop_16HLto32) {
1061 HReg hi16 = newVRegI(env);
1062 HReg lo16 = newVRegI(env);
1063 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1064 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1065 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1066 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1067 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1068 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1069 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1070 return hi16;
1073 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1074 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1075 HReg a16 = newVRegI(env);
1076 HReg b16 = newVRegI(env);
1077 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1078 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1079 Int shift = (e->Iex.Binop.op == Iop_MullS8
1080 || e->Iex.Binop.op == Iop_MullU8)
1081 ? 24 : 16;
1082 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1083 || e->Iex.Binop.op == Iop_MullS16)
1084 ? Xsh_SAR : Xsh_SHR;
1086 addInstr(env, mk_iMOVsd_RR(a16s, a16));
1087 addInstr(env, mk_iMOVsd_RR(b16s, b16));
1088 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1089 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1090 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
1091 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
1092 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1093 return b16;
1096 if (e->Iex.Binop.op == Iop_CmpF64) {
1097 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1098 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1099 HReg dst = newVRegI(env);
1100 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1101 /* shift this right 8 bits so as to conform to CmpF64
1102 definition. */
1103 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1104 return dst;
1107 if (e->Iex.Binop.op == Iop_F64toI32S
1108 || e->Iex.Binop.op == Iop_F64toI16S) {
1109 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1110 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1111 HReg dst = newVRegI(env);
1113 /* Used several times ... */
1114 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1116 /* rf now holds the value to be converted, and rrm holds the
1117 rounding mode value, encoded as per the IRRoundingMode
1118 enum. The first thing to do is set the FPU's rounding
1119 mode accordingly. */
1121 /* Create a space for the format conversion. */
1122 /* subl $4, %esp */
1123 sub_from_esp(env, 4);
1125 /* Set host rounding mode */
1126 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1128 /* gistw/l %rf, 0(%esp) */
1129 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1130 toUChar(sz), rf, zero_esp));
1132 if (sz == 2) {
1133 /* movzwl 0(%esp), %dst */
1134 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1135 } else {
1136 /* movl 0(%esp), %dst */
1137 vassert(sz == 4);
1138 addInstr(env, X86Instr_Alu32R(
1139 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1142 /* Restore default FPU rounding. */
1143 set_FPU_rounding_default( env );
1145 /* addl $4, %esp */
1146 add_to_esp(env, 4);
1147 return dst;
1150 break;
1153 /* --------- UNARY OP --------- */
1154 case Iex_Unop: {
1156 /* 1Uto8(32to1(expr32)) */
1157 if (e->Iex.Unop.op == Iop_1Uto8) {
1158 DECLARE_PATTERN(p_32to1_then_1Uto8);
1159 DEFINE_PATTERN(p_32to1_then_1Uto8,
1160 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1161 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1162 const IRExpr* expr32 = mi.bindee[0];
1163 HReg dst = newVRegI(env);
1164 HReg src = iselIntExpr_R(env, expr32);
1165 addInstr(env, mk_iMOVsd_RR(src,dst) );
1166 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1167 X86RMI_Imm(1), dst));
1168 return dst;
1172 /* 8Uto32(LDle(expr32)) */
1173 if (e->Iex.Unop.op == Iop_8Uto32) {
1174 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1175 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1176 unop(Iop_8Uto32,
1177 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1178 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1179 HReg dst = newVRegI(env);
1180 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1181 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1182 return dst;
1186 /* 8Sto32(LDle(expr32)) */
1187 if (e->Iex.Unop.op == Iop_8Sto32) {
1188 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1189 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1190 unop(Iop_8Sto32,
1191 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1192 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1193 HReg dst = newVRegI(env);
1194 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1195 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1196 return dst;
1200 /* 16Uto32(LDle(expr32)) */
1201 if (e->Iex.Unop.op == Iop_16Uto32) {
1202 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1203 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1204 unop(Iop_16Uto32,
1205 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1206 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1207 HReg dst = newVRegI(env);
1208 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1209 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1210 return dst;
1214 /* 8Uto32(GET:I8) */
1215 if (e->Iex.Unop.op == Iop_8Uto32) {
1216 if (e->Iex.Unop.arg->tag == Iex_Get) {
1217 HReg dst;
1218 X86AMode* amode;
1219 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1220 dst = newVRegI(env);
1221 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1222 hregX86_EBP());
1223 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1224 return dst;
1228 /* 16to32(GET:I16) */
1229 if (e->Iex.Unop.op == Iop_16Uto32) {
1230 if (e->Iex.Unop.arg->tag == Iex_Get) {
1231 HReg dst;
1232 X86AMode* amode;
1233 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1234 dst = newVRegI(env);
1235 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1236 hregX86_EBP());
1237 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1238 return dst;
1242 switch (e->Iex.Unop.op) {
1243 case Iop_8Uto16:
1244 case Iop_8Uto32:
1245 case Iop_16Uto32: {
1246 HReg dst = newVRegI(env);
1247 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1248 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1249 addInstr(env, mk_iMOVsd_RR(src,dst) );
1250 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1251 X86RMI_Imm(mask), dst));
1252 return dst;
1254 case Iop_8Sto16:
1255 case Iop_8Sto32:
1256 case Iop_16Sto32: {
1257 HReg dst = newVRegI(env);
1258 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1259 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1260 addInstr(env, mk_iMOVsd_RR(src,dst) );
1261 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1262 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1263 return dst;
1265 case Iop_Not8:
1266 case Iop_Not16:
1267 case Iop_Not32: {
1268 HReg dst = newVRegI(env);
1269 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1270 addInstr(env, mk_iMOVsd_RR(src,dst) );
1271 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1272 return dst;
1274 case Iop_64HIto32: {
1275 HReg rHi, rLo;
1276 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1277 return rHi; /* and abandon rLo .. poor wee thing :-) */
1279 case Iop_64to32: {
1280 HReg rHi, rLo;
1281 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1282 return rLo; /* similar stupid comment to the above ... */
1284 case Iop_16HIto8:
1285 case Iop_32HIto16: {
1286 HReg dst = newVRegI(env);
1287 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1288 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1289 addInstr(env, mk_iMOVsd_RR(src,dst) );
1290 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1291 return dst;
1293 case Iop_1Uto32:
1294 case Iop_1Uto8: {
1295 HReg dst = newVRegI(env);
1296 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1297 addInstr(env, X86Instr_Set32(cond,dst));
1298 return dst;
1300 case Iop_1Sto8:
1301 case Iop_1Sto16:
1302 case Iop_1Sto32: {
1303 /* could do better than this, but for now ... */
1304 HReg dst = newVRegI(env);
1305 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1306 addInstr(env, X86Instr_Set32(cond,dst));
1307 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1308 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1309 return dst;
1311 case Iop_Ctz32: {
1312 /* Count trailing zeroes, implemented by x86 'bsfl' */
1313 HReg dst = newVRegI(env);
1314 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1315 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1316 return dst;
1318 case Iop_Clz32: {
1319 /* Count leading zeroes. Do 'bsrl' to establish the index
1320 of the highest set bit, and subtract that value from
1321 31. */
1322 HReg tmp = newVRegI(env);
1323 HReg dst = newVRegI(env);
1324 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1325 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1326 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1327 X86RMI_Imm(31), dst));
1328 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1329 X86RMI_Reg(tmp), dst));
1330 return dst;
1333 case Iop_CmpwNEZ32: {
1334 HReg dst = newVRegI(env);
1335 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1336 addInstr(env, mk_iMOVsd_RR(src,dst));
1337 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1338 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1339 X86RMI_Reg(src), dst));
1340 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1341 return dst;
1343 case Iop_Left8:
1344 case Iop_Left16:
1345 case Iop_Left32: {
1346 HReg dst = newVRegI(env);
1347 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1348 addInstr(env, mk_iMOVsd_RR(src, dst));
1349 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1350 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1351 return dst;
1354 case Iop_V128to32: {
1355 HReg dst = newVRegI(env);
1356 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1357 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1358 sub_from_esp(env, 16);
1359 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1360 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1361 add_to_esp(env, 16);
1362 return dst;
1365 /* ReinterpF32asI32(e) */
1366 /* Given an IEEE754 single, produce an I32 with the same bit
1367 pattern. Keep stack 8-aligned even though only using 4
1368 bytes. */
1369 case Iop_ReinterpF32asI32: {
1370 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1371 HReg dst = newVRegI(env);
1372 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1373 /* paranoia */
1374 set_FPU_rounding_default(env);
1375 /* subl $8, %esp */
1376 sub_from_esp(env, 8);
1377 /* gstF %rf, 0(%esp) */
1378 addInstr(env,
1379 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1380 /* movl 0(%esp), %dst */
1381 addInstr(env,
1382 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1383 /* addl $8, %esp */
1384 add_to_esp(env, 8);
1385 return dst;
1388 case Iop_16to8:
1389 case Iop_32to8:
1390 case Iop_32to16:
1391 /* These are no-ops. */
1392 return iselIntExpr_R(env, e->Iex.Unop.arg);
1394 case Iop_GetMSBs8x8: {
1395 /* Note: the following assumes the helper is of
1396 signature
1397 UInt fn ( ULong ), and is not a regparm fn.
1399 HReg xLo, xHi;
1400 HReg dst = newVRegI(env);
1401 Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1402 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1403 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1404 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1405 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1406 0, mk_RetLoc_simple(RLPri_Int) ));
1407 add_to_esp(env, 2*4);
1408 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1409 return dst;
1412 default:
1413 break;
1415 break;
1418 /* --------- GET --------- */
1419 case Iex_Get: {
1420 if (ty == Ity_I32) {
1421 HReg dst = newVRegI(env);
1422 addInstr(env, X86Instr_Alu32R(
1423 Xalu_MOV,
1424 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1425 hregX86_EBP())),
1426 dst));
1427 return dst;
1429 if (ty == Ity_I8 || ty == Ity_I16) {
1430 HReg dst = newVRegI(env);
1431 addInstr(env, X86Instr_LoadEX(
1432 toUChar(ty==Ity_I8 ? 1 : 2),
1433 False,
1434 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1435 dst));
1436 return dst;
1438 break;
1441 case Iex_GetI: {
1442 X86AMode* am
1443 = genGuestArrayOffset(
1444 env, e->Iex.GetI.descr,
1445 e->Iex.GetI.ix, e->Iex.GetI.bias );
1446 HReg dst = newVRegI(env);
1447 if (ty == Ity_I8) {
1448 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1449 return dst;
1451 if (ty == Ity_I32) {
1452 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1453 return dst;
1455 break;
1458 /* --------- CCALL --------- */
1459 case Iex_CCall: {
1460 HReg dst = newVRegI(env);
1461 vassert(ty == e->Iex.CCall.retty);
1463 /* be very restrictive for now. Only 32/64-bit ints allowed for
1464 args, and 32 bits for return type. Don't forget to change
1465 the RetLoc if more return types are allowed in future. */
1466 if (e->Iex.CCall.retty != Ity_I32)
1467 goto irreducible;
1469 /* Marshal args, do the call, clear stack. */
1470 UInt addToSp = 0;
1471 RetLoc rloc = mk_RetLoc_INVALID();
1472 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1473 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1474 vassert(is_sane_RetLoc(rloc));
1475 vassert(rloc.pri == RLPri_Int);
1476 vassert(addToSp == 0);
1478 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1479 return dst;
1482 /* --------- LITERAL --------- */
1483 /* 32/16/8-bit literals */
1484 case Iex_Const: {
1485 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1486 HReg r = newVRegI(env);
1487 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1488 return r;
1491 /* --------- MULTIPLEX --------- */
1492 case Iex_ITE: { // VFD
1493 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1494 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1495 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1496 X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1497 HReg dst = newVRegI(env);
1498 addInstr(env, mk_iMOVsd_RR(r1,dst));
1499 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1500 addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1501 return dst;
1503 break;
1506 default:
1507 break;
1508 } /* switch (e->tag) */
1510 /* We get here if no pattern matched. */
1511 irreducible:
1512 ppIRExpr(e);
1513 vpanic("iselIntExpr_R: cannot reduce tree");
1517 /*---------------------------------------------------------*/
1518 /*--- ISEL: Integer expression auxiliaries ---*/
1519 /*---------------------------------------------------------*/
1521 /* --------------------- AMODEs --------------------- */
1523 /* Return an AMode which computes the value of the specified
1524 expression, possibly also adding insns to the code list as a
1525 result. The expression may only be a 32-bit one.
1528 static Bool sane_AMode ( X86AMode* am )
1530 switch (am->tag) {
1531 case Xam_IR:
1532 return
1533 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1534 && (hregIsVirtual(am->Xam.IR.reg)
1535 || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1536 case Xam_IRRS:
1537 return
1538 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1539 && hregIsVirtual(am->Xam.IRRS.base)
1540 && hregClass(am->Xam.IRRS.index) == HRcInt32
1541 && hregIsVirtual(am->Xam.IRRS.index) );
1542 default:
1543 vpanic("sane_AMode: unknown x86 amode tag");
1547 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
1549 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1550 vassert(sane_AMode(am));
1551 return am;
1554 /* DO NOT CALL THIS DIRECTLY ! */
1555 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
1557 IRType ty = typeOfIRExpr(env->type_env,e);
1558 vassert(ty == Ity_I32);
1560 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1561 if (e->tag == Iex_Binop
1562 && e->Iex.Binop.op == Iop_Add32
1563 && e->Iex.Binop.arg2->tag == Iex_Const
1564 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1565 && e->Iex.Binop.arg1->tag == Iex_Binop
1566 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1567 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1568 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1569 && e->Iex.Binop.arg1
1570 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1571 && e->Iex.Binop.arg1
1572 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1573 UInt shift = e->Iex.Binop.arg1
1574 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1575 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1576 if (shift == 1 || shift == 2 || shift == 3) {
1577 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1578 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1579 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1580 return X86AMode_IRRS(imm32, r1, r2, shift);
1584 /* Add32(expr1, Shl32(expr2, imm)) */
1585 if (e->tag == Iex_Binop
1586 && e->Iex.Binop.op == Iop_Add32
1587 && e->Iex.Binop.arg2->tag == Iex_Binop
1588 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1589 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1590 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1591 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1592 if (shift == 1 || shift == 2 || shift == 3) {
1593 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1594 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1595 return X86AMode_IRRS(0, r1, r2, shift);
1599 /* Add32(expr,i) */
1600 if (e->tag == Iex_Binop
1601 && e->Iex.Binop.op == Iop_Add32
1602 && e->Iex.Binop.arg2->tag == Iex_Const
1603 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1604 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1605 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1608 /* Doesn't match anything in particular. Generate it into
1609 a register and use that. */
1611 HReg r1 = iselIntExpr_R(env, e);
1612 return X86AMode_IR(0, r1);
1617 /* --------------------- RMIs --------------------- */
1619 /* Similarly, calculate an expression into an X86RMI operand. As with
1620 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1622 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
1624 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1625 /* sanity checks ... */
1626 switch (rmi->tag) {
1627 case Xrmi_Imm:
1628 return rmi;
1629 case Xrmi_Reg:
1630 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1631 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1632 return rmi;
1633 case Xrmi_Mem:
1634 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1635 return rmi;
1636 default:
1637 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1641 /* DO NOT CALL THIS DIRECTLY ! */
1642 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
1644 IRType ty = typeOfIRExpr(env->type_env,e);
1645 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1647 /* special case: immediate */
1648 if (e->tag == Iex_Const) {
1649 UInt u;
1650 switch (e->Iex.Const.con->tag) {
1651 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1652 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1653 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1654 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1656 return X86RMI_Imm(u);
1659 /* special case: 32-bit GET */
1660 if (e->tag == Iex_Get && ty == Ity_I32) {
1661 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1662 hregX86_EBP()));
1665 /* special case: 32-bit load from memory */
1666 if (e->tag == Iex_Load && ty == Ity_I32
1667 && e->Iex.Load.end == Iend_LE) {
1668 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1669 return X86RMI_Mem(am);
1672 /* default case: calculate into a register and return that */
1674 HReg r = iselIntExpr_R ( env, e );
1675 return X86RMI_Reg(r);
1680 /* --------------------- RIs --------------------- */
1682 /* Calculate an expression into an X86RI operand. As with
1683 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1685 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
1687 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1688 /* sanity checks ... */
1689 switch (ri->tag) {
1690 case Xri_Imm:
1691 return ri;
1692 case Xri_Reg:
1693 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1694 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1695 return ri;
1696 default:
1697 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1701 /* DO NOT CALL THIS DIRECTLY ! */
1702 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
1704 IRType ty = typeOfIRExpr(env->type_env,e);
1705 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1707 /* special case: immediate */
1708 if (e->tag == Iex_Const) {
1709 UInt u;
1710 switch (e->Iex.Const.con->tag) {
1711 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1712 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1713 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1714 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1716 return X86RI_Imm(u);
1719 /* default case: calculate into a register and return that */
1721 HReg r = iselIntExpr_R ( env, e );
1722 return X86RI_Reg(r);
1727 /* --------------------- RMs --------------------- */
1729 /* Similarly, calculate an expression into an X86RM operand. As with
1730 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1732 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
1734 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1735 /* sanity checks ... */
1736 switch (rm->tag) {
1737 case Xrm_Reg:
1738 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1739 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1740 return rm;
1741 case Xrm_Mem:
1742 vassert(sane_AMode(rm->Xrm.Mem.am));
1743 return rm;
1744 default:
1745 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1749 /* DO NOT CALL THIS DIRECTLY ! */
1750 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
1752 IRType ty = typeOfIRExpr(env->type_env,e);
1753 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1755 /* special case: 32-bit GET */
1756 if (e->tag == Iex_Get && ty == Ity_I32) {
1757 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1758 hregX86_EBP()));
1761 /* special case: load from memory */
1763 /* default case: calculate into a register and return that */
1765 HReg r = iselIntExpr_R ( env, e );
1766 return X86RM_Reg(r);
1771 /* --------------------- CONDCODE --------------------- */
1773 /* Generate code to evaluated a bit-typed expression, returning the
1774 condition code which would correspond when the expression would
1775 notionally have returned 1. */
1777 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
1779 /* Uh, there's nothing we can sanity check here, unfortunately. */
1780 return iselCondCode_wrk(env,e);
1783 /* DO NOT CALL THIS DIRECTLY ! */
1784 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
1786 MatchInfo mi;
1788 vassert(e);
1789 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1791 /* var */
1792 if (e->tag == Iex_RdTmp) {
1793 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1794 /* Test32 doesn't modify r32; so this is OK. */
1795 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1796 return Xcc_NZ;
1799 /* Constant 1:Bit */
1800 if (e->tag == Iex_Const) {
1801 HReg r;
1802 vassert(e->Iex.Const.con->tag == Ico_U1);
1803 vassert(e->Iex.Const.con->Ico.U1 == True
1804 || e->Iex.Const.con->Ico.U1 == False);
1805 r = newVRegI(env);
1806 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1807 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1808 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1811 /* Not1(e) */
1812 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1813 /* Generate code for the arg, and negate the test condition */
1814 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1817 /* --- patterns rooted at: 32to1 --- */
1819 if (e->tag == Iex_Unop
1820 && e->Iex.Unop.op == Iop_32to1) {
1821 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1822 addInstr(env, X86Instr_Test32(1,rm));
1823 return Xcc_NZ;
1826 /* --- patterns rooted at: CmpNEZ8 --- */
1828 /* CmpNEZ8(x) */
1829 if (e->tag == Iex_Unop
1830 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1831 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1832 addInstr(env, X86Instr_Test32(0xFF,rm));
1833 return Xcc_NZ;
1836 /* --- patterns rooted at: CmpNEZ16 --- */
1838 /* CmpNEZ16(x) */
1839 if (e->tag == Iex_Unop
1840 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1841 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1842 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1843 return Xcc_NZ;
1846 /* --- patterns rooted at: CmpNEZ32 --- */
1848 /* CmpNEZ32(And32(x,y)) */
1850 DECLARE_PATTERN(p_CmpNEZ32_And32);
1851 DEFINE_PATTERN(p_CmpNEZ32_And32,
1852 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1853 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1854 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1855 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1856 HReg tmp = newVRegI(env);
1857 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1858 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1859 return Xcc_NZ;
1863 /* CmpNEZ32(Or32(x,y)) */
1865 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1866 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1867 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1868 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1869 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1870 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1871 HReg tmp = newVRegI(env);
1872 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1873 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1874 return Xcc_NZ;
1878 /* CmpNEZ32(GET(..):I32) */
1879 if (e->tag == Iex_Unop
1880 && e->Iex.Unop.op == Iop_CmpNEZ32
1881 && e->Iex.Unop.arg->tag == Iex_Get) {
1882 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1883 hregX86_EBP());
1884 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1885 return Xcc_NZ;
1888 /* CmpNEZ32(x) */
1889 if (e->tag == Iex_Unop
1890 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1891 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1892 X86RMI* rmi2 = X86RMI_Imm(0);
1893 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1894 return Xcc_NZ;
1897 /* --- patterns rooted at: CmpNEZ64 --- */
1899 /* CmpNEZ64(Or64(x,y)) */
1901 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1902 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1903 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1904 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1905 HReg hi1, lo1, hi2, lo2;
1906 HReg tmp = newVRegI(env);
1907 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1908 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1909 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1910 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1911 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1912 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1913 return Xcc_NZ;
1917 /* CmpNEZ64(x) */
1918 if (e->tag == Iex_Unop
1919 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1920 HReg hi, lo;
1921 HReg tmp = newVRegI(env);
1922 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1923 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1924 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1925 return Xcc_NZ;
1928 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1930 /* CmpEQ8 / CmpNE8 */
1931 if (e->tag == Iex_Binop
1932 && (e->Iex.Binop.op == Iop_CmpEQ8
1933 || e->Iex.Binop.op == Iop_CmpNE8
1934 || e->Iex.Binop.op == Iop_CasCmpEQ8
1935 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1936 if (isZeroU8(e->Iex.Binop.arg2)) {
1937 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1938 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1939 switch (e->Iex.Binop.op) {
1940 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1941 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1942 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1944 } else {
1945 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1946 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1947 HReg r = newVRegI(env);
1948 addInstr(env, mk_iMOVsd_RR(r1,r));
1949 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1950 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1951 switch (e->Iex.Binop.op) {
1952 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1953 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1954 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1959 /* CmpEQ16 / CmpNE16 */
1960 if (e->tag == Iex_Binop
1961 && (e->Iex.Binop.op == Iop_CmpEQ16
1962 || e->Iex.Binop.op == Iop_CmpNE16
1963 || e->Iex.Binop.op == Iop_CasCmpEQ16
1964 || e->Iex.Binop.op == Iop_CasCmpNE16
1965 || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1966 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1967 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1968 HReg r = newVRegI(env);
1969 addInstr(env, mk_iMOVsd_RR(r1,r));
1970 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1971 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1972 switch (e->Iex.Binop.op) {
1973 case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1974 return Xcc_Z;
1975 case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1976 return Xcc_NZ;
1977 default:
1978 vpanic("iselCondCode(x86): CmpXX16");
1982 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1983 Saves a "movl %eax, %tmp" compared to the default route. */
1984 if (e->tag == Iex_Binop
1985 && e->Iex.Binop.op == Iop_CmpNE32
1986 && e->Iex.Binop.arg1->tag == Iex_CCall
1987 && e->Iex.Binop.arg2->tag == Iex_Const) {
1988 IRExpr* cal = e->Iex.Binop.arg1;
1989 IRExpr* con = e->Iex.Binop.arg2;
1990 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1991 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1992 vassert(con->Iex.Const.con->tag == Ico_U32);
1993 /* Marshal args, do the call. */
1994 UInt addToSp = 0;
1995 RetLoc rloc = mk_RetLoc_INVALID();
1996 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1997 cal->Iex.CCall.cee,
1998 cal->Iex.CCall.retty, cal->Iex.CCall.args );
1999 vassert(is_sane_RetLoc(rloc));
2000 vassert(rloc.pri == RLPri_Int);
2001 vassert(addToSp == 0);
2002 /* */
2003 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2004 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2005 hregX86_EAX()));
2006 return Xcc_NZ;
2009 /* Cmp*32*(x,y) */
2010 if (e->tag == Iex_Binop
2011 && (e->Iex.Binop.op == Iop_CmpEQ32
2012 || e->Iex.Binop.op == Iop_CmpNE32
2013 || e->Iex.Binop.op == Iop_CmpLT32S
2014 || e->Iex.Binop.op == Iop_CmpLT32U
2015 || e->Iex.Binop.op == Iop_CmpLE32S
2016 || e->Iex.Binop.op == Iop_CmpLE32U
2017 || e->Iex.Binop.op == Iop_CasCmpEQ32
2018 || e->Iex.Binop.op == Iop_CasCmpNE32
2019 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2020 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2021 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2022 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2023 switch (e->Iex.Binop.op) {
2024 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2025 case Iop_CmpNE32:
2026 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2027 case Iop_CmpLT32S: return Xcc_L;
2028 case Iop_CmpLT32U: return Xcc_B;
2029 case Iop_CmpLE32S: return Xcc_LE;
2030 case Iop_CmpLE32U: return Xcc_BE;
2031 default: vpanic("iselCondCode(x86): CmpXX32");
2035 /* CmpNE64 */
2036 if (e->tag == Iex_Binop
2037 && (e->Iex.Binop.op == Iop_CmpNE64
2038 || e->Iex.Binop.op == Iop_CmpEQ64)) {
2039 HReg hi1, hi2, lo1, lo2;
2040 HReg tHi = newVRegI(env);
2041 HReg tLo = newVRegI(env);
2042 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2043 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2044 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2045 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2046 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2047 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2048 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2049 switch (e->Iex.Binop.op) {
2050 case Iop_CmpNE64: return Xcc_NZ;
2051 case Iop_CmpEQ64: return Xcc_Z;
2052 default: vpanic("iselCondCode(x86): CmpXX64");
2056 /* And1(x,y), Or1(x,y) */
2057 /* FIXME: We could (and probably should) do a lot better here. If both args
2058 are in temps already then we can just emit a reg-reg And/Or directly,
2059 followed by the final Test. */
2060 if (e->tag == Iex_Binop
2061 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
2062 // We could probably be cleverer about this. In the meantime ..
2063 HReg x_as_32 = newVRegI(env);
2064 X86CondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1);
2065 addInstr(env, X86Instr_Set32(cc_x, x_as_32));
2066 HReg y_as_32 = newVRegI(env);
2067 X86CondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2);
2068 addInstr(env, X86Instr_Set32(cc_y, y_as_32));
2069 X86AluOp aop = e->Iex.Binop.op == Iop_And1 ? Xalu_AND : Xalu_OR;
2070 addInstr(env, X86Instr_Alu32R(aop, X86RMI_Reg(x_as_32), y_as_32));
2071 addInstr(env, X86Instr_Test32(1, X86RM_Reg(y_as_32)));
2072 return Xcc_NZ;
2075 ppIRExpr(e);
2076 vpanic("iselCondCode");
2080 /*---------------------------------------------------------*/
2081 /*--- ISEL: Integer expressions (64 bit) ---*/
2082 /*---------------------------------------------------------*/
2084 /* Compute a 64-bit value into a register pair, which is returned as
2085 the first two parameters. As with iselIntExpr_R, these may be
2086 either real or virtual regs; in any case they must not be changed
2087 by subsequent code emitted by the caller. */
2089 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2090 const IRExpr* e )
2092 iselInt64Expr_wrk(rHi, rLo, env, e);
2093 # if 0
2094 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2095 # endif
2096 vassert(hregClass(*rHi) == HRcInt32);
2097 vassert(hregIsVirtual(*rHi));
2098 vassert(hregClass(*rLo) == HRcInt32);
2099 vassert(hregIsVirtual(*rLo));
2102 /* DO NOT CALL THIS DIRECTLY ! */
2103 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2104 const IRExpr* e )
2106 MatchInfo mi;
2107 HWord fn = 0; /* helper fn for most SIMD64 stuff */
2108 vassert(e);
2109 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2111 /* 64-bit literal */
2112 if (e->tag == Iex_Const) {
2113 ULong w64 = e->Iex.Const.con->Ico.U64;
2114 UInt wHi = toUInt(w64 >> 32);
2115 UInt wLo = toUInt(w64);
2116 HReg tLo = newVRegI(env);
2117 HReg tHi = newVRegI(env);
2118 vassert(e->Iex.Const.con->tag == Ico_U64);
2119 if (wLo == wHi) {
2120 /* Save a precious Int register in this special case. */
2121 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2122 *rHi = tLo;
2123 *rLo = tLo;
2124 } else {
2125 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2126 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2127 *rHi = tHi;
2128 *rLo = tLo;
2130 return;
2133 /* read 64-bit IRTemp */
2134 if (e->tag == Iex_RdTmp) {
2135 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2136 return;
2139 /* 64-bit load */
2140 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2141 HReg tLo, tHi;
2142 X86AMode *am0, *am4;
2143 vassert(e->Iex.Load.ty == Ity_I64);
2144 tLo = newVRegI(env);
2145 tHi = newVRegI(env);
2146 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2147 am4 = advance4(am0);
2148 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2149 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2150 *rHi = tHi;
2151 *rLo = tLo;
2152 return;
2155 /* 64-bit GET */
2156 if (e->tag == Iex_Get) {
2157 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2158 X86AMode* am4 = advance4(am);
2159 HReg tLo = newVRegI(env);
2160 HReg tHi = newVRegI(env);
2161 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2162 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2163 *rHi = tHi;
2164 *rLo = tLo;
2165 return;
2168 /* 64-bit GETI */
2169 if (e->tag == Iex_GetI) {
2170 X86AMode* am
2171 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2172 e->Iex.GetI.ix, e->Iex.GetI.bias );
2173 X86AMode* am4 = advance4(am);
2174 HReg tLo = newVRegI(env);
2175 HReg tHi = newVRegI(env);
2176 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2177 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2178 *rHi = tHi;
2179 *rLo = tLo;
2180 return;
2183 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2184 if (e->tag == Iex_ITE) {
2185 HReg e0Lo, e0Hi, e1Lo, e1Hi;
2186 HReg tLo = newVRegI(env);
2187 HReg tHi = newVRegI(env);
2188 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2189 iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2190 addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2191 addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2192 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2193 /* This assumes the first cmov32 doesn't trash the condition
2194 codes, so they are still available for the second cmov32 */
2195 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2196 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2197 *rHi = tHi;
2198 *rLo = tLo;
2199 return;
2202 /* --------- BINARY ops --------- */
2203 if (e->tag == Iex_Binop) {
2204 switch (e->Iex.Binop.op) {
2205 /* 32 x 32 -> 64 multiply */
2206 case Iop_MullU32:
2207 case Iop_MullS32: {
2208 /* get one operand into %eax, and the other into a R/M.
2209 Need to make an educated guess about which is better in
2210 which. */
2211 HReg tLo = newVRegI(env);
2212 HReg tHi = newVRegI(env);
2213 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2214 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2215 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2216 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2217 addInstr(env, X86Instr_MulL(syned, rmLeft));
2218 /* Result is now in EDX:EAX. Tell the caller. */
2219 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2220 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2221 *rHi = tHi;
2222 *rLo = tLo;
2223 return;
2226 /* 64 x 32 -> (32(rem),32(div)) division */
2227 case Iop_DivModU64to32:
2228 case Iop_DivModS64to32: {
2229 /* Get the 64-bit operand into edx:eax, and the other into
2230 any old R/M. */
2231 HReg sHi, sLo;
2232 HReg tLo = newVRegI(env);
2233 HReg tHi = newVRegI(env);
2234 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2235 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2236 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2237 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2238 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2239 addInstr(env, X86Instr_Div(syned, rmRight));
2240 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2241 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2242 *rHi = tHi;
2243 *rLo = tLo;
2244 return;
2247 /* Or64/And64/Xor64 */
2248 case Iop_Or64:
2249 case Iop_And64:
2250 case Iop_Xor64: {
2251 HReg xLo, xHi, yLo, yHi;
2252 HReg tLo = newVRegI(env);
2253 HReg tHi = newVRegI(env);
2254 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2255 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2256 : Xalu_XOR;
2257 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2258 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2259 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2260 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2261 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2262 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2263 *rHi = tHi;
2264 *rLo = tLo;
2265 return;
2268 /* Add64/Sub64 */
2269 case Iop_Add64:
2270 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2271 /* special case Add64(e, const) */
2272 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2273 UInt wHi = toUInt(w64 >> 32);
2274 UInt wLo = toUInt(w64);
2275 HReg tLo = newVRegI(env);
2276 HReg tHi = newVRegI(env);
2277 HReg xLo, xHi;
2278 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2279 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2280 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2281 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2282 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2283 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2284 *rHi = tHi;
2285 *rLo = tLo;
2286 return;
2288 /* else fall through to the generic case */
2289 case Iop_Sub64: {
2290 HReg xLo, xHi, yLo, yHi;
2291 HReg tLo = newVRegI(env);
2292 HReg tHi = newVRegI(env);
2293 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2294 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2295 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2296 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2297 if (e->Iex.Binop.op==Iop_Add64) {
2298 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2299 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2300 } else {
2301 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2302 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2304 *rHi = tHi;
2305 *rLo = tLo;
2306 return;
2309 /* 32HLto64(e1,e2) */
2310 case Iop_32HLto64:
2311 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2312 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2313 return;
2315 /* 64-bit shifts */
2316 case Iop_Shl64: {
2317 /* We use the same ingenious scheme as gcc. Put the value
2318 to be shifted into %hi:%lo, and the shift amount into
2319 %cl. Then (dsts on right, a la ATT syntax):
2321 shldl %cl, %lo, %hi -- make %hi be right for the
2322 -- shift amt %cl % 32
2323 shll %cl, %lo -- make %lo be right for the
2324 -- shift amt %cl % 32
2326 Now, if (shift amount % 64) is in the range 32 .. 63,
2327 we have to do a fixup, which puts the result low half
2328 into the result high half, and zeroes the low half:
2330 testl $32, %ecx
2332 cmovnz %lo, %hi
2333 movl $0, %tmp -- sigh; need yet another reg
2334 cmovnz %tmp, %lo
2336 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2337 tLo = newVRegI(env);
2338 tHi = newVRegI(env);
2339 tTemp = newVRegI(env);
2340 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2341 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2342 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2343 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2344 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2345 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2346 and those regs are legitimately modifiable. */
2347 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2348 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2349 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2350 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2351 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2352 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2353 *rHi = tHi;
2354 *rLo = tLo;
2355 return;
2358 case Iop_Shr64: {
2359 /* We use the same ingenious scheme as gcc. Put the value
2360 to be shifted into %hi:%lo, and the shift amount into
2361 %cl. Then:
2363 shrdl %cl, %hi, %lo -- make %lo be right for the
2364 -- shift amt %cl % 32
2365 shrl %cl, %hi -- make %hi be right for the
2366 -- shift amt %cl % 32
2368 Now, if (shift amount % 64) is in the range 32 .. 63,
2369 we have to do a fixup, which puts the result high half
2370 into the result low half, and zeroes the high half:
2372 testl $32, %ecx
2374 cmovnz %hi, %lo
2375 movl $0, %tmp -- sigh; need yet another reg
2376 cmovnz %tmp, %hi
2378 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2379 tLo = newVRegI(env);
2380 tHi = newVRegI(env);
2381 tTemp = newVRegI(env);
2382 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2383 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2384 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2385 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2386 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2387 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2388 and those regs are legitimately modifiable. */
2389 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2390 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2391 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2392 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2393 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2394 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2395 *rHi = tHi;
2396 *rLo = tLo;
2397 return;
2400 case Iop_Sar64: {
2401 /* gcc -O2 does the following. I don't know how it works, but it
2402 does work. Don't mess with it. This is hard to test because the
2403 x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2404 so it's impossible to write a test program that feeds values
2405 through Iop_Sar64 and prints their results. The implementation
2406 here was tested by using psrlq on mmx registers -- that generates
2407 Iop_Shr64 -- and temporarily hacking the front end to generate
2408 Iop_Sar64 for that instruction instead.
2410 movl %amount, %ecx
2411 movl %srcHi, %r1
2412 movl %srcLo, %r2
2414 movl %r1, %r3
2415 sarl %cl, %r3
2416 movl %r2, %r4
2417 shrdl %cl, %r1, %r4
2418 movl %r3, %r2
2419 sarl $31, %r2
2420 andl $32, %ecx
2421 cmovne %r3, %r4 // = resLo
2422 cmovne %r2, %r3 // = resHi
2424 HReg amount = iselIntExpr_R(env, e->Iex.Binop.arg2);
2425 HReg srcHi = INVALID_HREG, srcLo = INVALID_HREG;
2426 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg1);
2427 HReg r1 = newVRegI(env);
2428 HReg r2 = newVRegI(env);
2429 HReg r3 = newVRegI(env);
2430 HReg r4 = newVRegI(env);
2431 addInstr(env, mk_iMOVsd_RR(amount, hregX86_ECX()));
2432 addInstr(env, mk_iMOVsd_RR(srcHi, r1));
2433 addInstr(env, mk_iMOVsd_RR(srcLo, r2));
2435 addInstr(env, mk_iMOVsd_RR(r1, r3));
2436 addInstr(env, X86Instr_Sh32(Xsh_SAR, 0/*%cl*/, r3));
2437 addInstr(env, mk_iMOVsd_RR(r2, r4));
2438 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, r1, r4));
2439 addInstr(env, mk_iMOVsd_RR(r3, r2));
2440 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, r2));
2441 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(32),
2442 hregX86_ECX()));
2443 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r3), r4));
2444 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r2), r3));
2445 *rHi = r3;
2446 *rLo = r4;
2447 return;
2450 /* F64 -> I64 */
2451 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2452 case. Unfortunately I see no easy way to avoid the
2453 duplication. */
2454 case Iop_F64toI64S: {
2455 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2456 HReg tLo = newVRegI(env);
2457 HReg tHi = newVRegI(env);
2459 /* Used several times ... */
2460 /* Careful ... this sharing is only safe because
2461 zero_esp/four_esp do not hold any registers which the
2462 register allocator could attempt to swizzle later. */
2463 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2464 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2466 /* rf now holds the value to be converted, and rrm holds
2467 the rounding mode value, encoded as per the
2468 IRRoundingMode enum. The first thing to do is set the
2469 FPU's rounding mode accordingly. */
2471 /* Create a space for the format conversion. */
2472 /* subl $8, %esp */
2473 sub_from_esp(env, 8);
2475 /* Set host rounding mode */
2476 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2478 /* gistll %rf, 0(%esp) */
2479 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2481 /* movl 0(%esp), %dstLo */
2482 /* movl 4(%esp), %dstHi */
2483 addInstr(env, X86Instr_Alu32R(
2484 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2485 addInstr(env, X86Instr_Alu32R(
2486 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2488 /* Restore default FPU rounding. */
2489 set_FPU_rounding_default( env );
2491 /* addl $8, %esp */
2492 add_to_esp(env, 8);
2494 *rHi = tHi;
2495 *rLo = tLo;
2496 return;
2499 case Iop_Add8x8:
2500 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2501 case Iop_Add16x4:
2502 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2503 case Iop_Add32x2:
2504 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2506 case Iop_Avg8Ux8:
2507 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2508 case Iop_Avg16Ux4:
2509 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2511 case Iop_CmpEQ8x8:
2512 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2513 case Iop_CmpEQ16x4:
2514 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2515 case Iop_CmpEQ32x2:
2516 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2518 case Iop_CmpGT8Sx8:
2519 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2520 case Iop_CmpGT16Sx4:
2521 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2522 case Iop_CmpGT32Sx2:
2523 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2525 case Iop_InterleaveHI8x8:
2526 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2527 case Iop_InterleaveLO8x8:
2528 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2529 case Iop_InterleaveHI16x4:
2530 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2531 case Iop_InterleaveLO16x4:
2532 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2533 case Iop_InterleaveHI32x2:
2534 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2535 case Iop_InterleaveLO32x2:
2536 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2537 case Iop_CatOddLanes16x4:
2538 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2539 case Iop_CatEvenLanes16x4:
2540 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2541 case Iop_Perm8x8:
2542 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2544 case Iop_Max8Ux8:
2545 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2546 case Iop_Max16Sx4:
2547 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2548 case Iop_Min8Ux8:
2549 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2550 case Iop_Min16Sx4:
2551 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2553 case Iop_Mul16x4:
2554 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2555 case Iop_Mul32x2:
2556 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2557 case Iop_MulHi16Sx4:
2558 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2559 case Iop_MulHi16Ux4:
2560 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2562 case Iop_QAdd8Sx8:
2563 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2564 case Iop_QAdd16Sx4:
2565 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2566 case Iop_QAdd8Ux8:
2567 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2568 case Iop_QAdd16Ux4:
2569 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2571 case Iop_QNarrowBin32Sto16Sx4:
2572 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2573 case Iop_QNarrowBin16Sto8Sx8:
2574 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2575 case Iop_QNarrowBin16Sto8Ux8:
2576 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2577 case Iop_NarrowBin16to8x8:
2578 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2579 case Iop_NarrowBin32to16x4:
2580 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2582 case Iop_QSub8Sx8:
2583 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2584 case Iop_QSub16Sx4:
2585 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2586 case Iop_QSub8Ux8:
2587 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2588 case Iop_QSub16Ux4:
2589 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2591 case Iop_Sub8x8:
2592 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2593 case Iop_Sub16x4:
2594 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2595 case Iop_Sub32x2:
2596 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2598 binnish: {
2599 /* Note: the following assumes all helpers are of
2600 signature
2601 ULong fn ( ULong, ULong ), and they are
2602 not marked as regparm functions.
2604 HReg xLo, xHi, yLo, yHi;
2605 HReg tLo = newVRegI(env);
2606 HReg tHi = newVRegI(env);
2607 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2608 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2609 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2610 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2611 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2612 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2613 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2614 0, mk_RetLoc_simple(RLPri_2Int) ));
2615 add_to_esp(env, 4*4);
2616 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2617 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2618 *rHi = tHi;
2619 *rLo = tLo;
2620 return;
2623 case Iop_ShlN32x2:
2624 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2625 case Iop_ShlN16x4:
2626 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2627 case Iop_ShlN8x8:
2628 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2629 case Iop_ShrN32x2:
2630 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2631 case Iop_ShrN16x4:
2632 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2633 case Iop_SarN32x2:
2634 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2635 case Iop_SarN16x4:
2636 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2637 case Iop_SarN8x8:
2638 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2639 shifty: {
2640 /* Note: the following assumes all helpers are of
2641 signature
2642 ULong fn ( ULong, UInt ), and they are
2643 not marked as regparm functions.
2645 HReg xLo, xHi;
2646 HReg tLo = newVRegI(env);
2647 HReg tHi = newVRegI(env);
2648 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2649 addInstr(env, X86Instr_Push(y));
2650 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2651 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2652 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2653 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2654 0, mk_RetLoc_simple(RLPri_2Int) ));
2655 add_to_esp(env, 3*4);
2656 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2657 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2658 *rHi = tHi;
2659 *rLo = tLo;
2660 return;
2663 default:
2664 break;
2666 } /* if (e->tag == Iex_Binop) */
2669 /* --------- UNARY ops --------- */
2670 if (e->tag == Iex_Unop) {
2671 switch (e->Iex.Unop.op) {
2673 /* 32Sto64(e) */
2674 case Iop_32Sto64: {
2675 HReg tLo = newVRegI(env);
2676 HReg tHi = newVRegI(env);
2677 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2678 addInstr(env, mk_iMOVsd_RR(src,tHi));
2679 addInstr(env, mk_iMOVsd_RR(src,tLo));
2680 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2681 *rHi = tHi;
2682 *rLo = tLo;
2683 return;
2686 /* 32Uto64(e) */
2687 case Iop_32Uto64: {
2688 HReg tLo = newVRegI(env);
2689 HReg tHi = newVRegI(env);
2690 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2691 addInstr(env, mk_iMOVsd_RR(src,tLo));
2692 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2693 *rHi = tHi;
2694 *rLo = tLo;
2695 return;
2698 /* 16Uto64(e) */
2699 case Iop_16Uto64: {
2700 HReg tLo = newVRegI(env);
2701 HReg tHi = newVRegI(env);
2702 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2703 addInstr(env, mk_iMOVsd_RR(src,tLo));
2704 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2705 X86RMI_Imm(0xFFFF), tLo));
2706 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2707 *rHi = tHi;
2708 *rLo = tLo;
2709 return;
2712 /* V128{HI}to64 */
2713 case Iop_V128HIto64:
2714 case Iop_V128to64: {
2715 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2716 HReg tLo = newVRegI(env);
2717 HReg tHi = newVRegI(env);
2718 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2719 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2720 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2721 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2722 sub_from_esp(env, 16);
2723 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2724 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2725 X86RMI_Mem(espLO), tLo ));
2726 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2727 X86RMI_Mem(espHI), tHi ));
2728 add_to_esp(env, 16);
2729 *rHi = tHi;
2730 *rLo = tLo;
2731 return;
2734 /* could do better than this, but for now ... */
2735 case Iop_1Sto64: {
2736 HReg tLo = newVRegI(env);
2737 HReg tHi = newVRegI(env);
2738 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2739 addInstr(env, X86Instr_Set32(cond,tLo));
2740 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2741 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2742 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2743 *rHi = tHi;
2744 *rLo = tLo;
2745 return;
2748 /* Not64(e) */
2749 case Iop_Not64: {
2750 HReg tLo = newVRegI(env);
2751 HReg tHi = newVRegI(env);
2752 HReg sHi, sLo;
2753 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2754 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2755 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2756 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2757 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2758 *rHi = tHi;
2759 *rLo = tLo;
2760 return;
2763 /* Left64(e) */
2764 case Iop_Left64: {
2765 HReg yLo, yHi;
2766 HReg tLo = newVRegI(env);
2767 HReg tHi = newVRegI(env);
2768 /* yHi:yLo = arg */
2769 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2770 /* tLo = 0 - yLo, and set carry */
2771 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2772 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2773 /* tHi = 0 - yHi - carry */
2774 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2775 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2776 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2777 back in, so as to give the final result
2778 tHi:tLo = arg | -arg. */
2779 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2780 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2781 *rHi = tHi;
2782 *rLo = tLo;
2783 return;
2786 /* --- patterns rooted at: CmpwNEZ64 --- */
2788 /* CmpwNEZ64(e) */
2789 case Iop_CmpwNEZ64: {
2791 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2792 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2793 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2794 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2795 /* CmpwNEZ64(Or64(x,y)) */
2796 HReg xHi,xLo,yHi,yLo;
2797 HReg xBoth = newVRegI(env);
2798 HReg merged = newVRegI(env);
2799 HReg tmp2 = newVRegI(env);
2801 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2802 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2803 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2804 X86RMI_Reg(xLo),xBoth));
2806 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2807 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2808 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2809 X86RMI_Reg(yLo),merged));
2810 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2811 X86RMI_Reg(xBoth),merged));
2813 /* tmp2 = (merged | -merged) >>s 31 */
2814 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2815 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2816 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2817 X86RMI_Reg(merged), tmp2));
2818 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2819 *rHi = tmp2;
2820 *rLo = tmp2;
2821 return;
2822 } else {
2823 /* CmpwNEZ64(e) */
2824 HReg srcLo, srcHi;
2825 HReg tmp1 = newVRegI(env);
2826 HReg tmp2 = newVRegI(env);
2827 /* srcHi:srcLo = arg */
2828 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2829 /* tmp1 = srcHi | srcLo */
2830 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2831 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2832 X86RMI_Reg(srcLo), tmp1));
2833 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2834 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2835 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2836 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2837 X86RMI_Reg(tmp1), tmp2));
2838 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2839 *rHi = tmp2;
2840 *rLo = tmp2;
2841 return;
2845 /* ReinterpF64asI64(e) */
2846 /* Given an IEEE754 double, produce an I64 with the same bit
2847 pattern. */
2848 case Iop_ReinterpF64asI64: {
2849 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2850 HReg tLo = newVRegI(env);
2851 HReg tHi = newVRegI(env);
2852 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2853 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2854 /* paranoia */
2855 set_FPU_rounding_default(env);
2856 /* subl $8, %esp */
2857 sub_from_esp(env, 8);
2858 /* gstD %rf, 0(%esp) */
2859 addInstr(env,
2860 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2861 /* movl 0(%esp), %tLo */
2862 addInstr(env,
2863 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2864 /* movl 4(%esp), %tHi */
2865 addInstr(env,
2866 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2867 /* addl $8, %esp */
2868 add_to_esp(env, 8);
2869 *rHi = tHi;
2870 *rLo = tLo;
2871 return;
2874 case Iop_CmpNEZ32x2:
2875 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2876 case Iop_CmpNEZ16x4:
2877 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2878 case Iop_CmpNEZ8x8:
2879 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2880 unish: {
2881 /* Note: the following assumes all helpers are of
2882 signature
2883 ULong fn ( ULong ), and they are
2884 not marked as regparm functions.
2886 HReg xLo, xHi;
2887 HReg tLo = newVRegI(env);
2888 HReg tHi = newVRegI(env);
2889 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2890 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2891 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2892 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2893 0, mk_RetLoc_simple(RLPri_2Int) ));
2894 add_to_esp(env, 2*4);
2895 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2896 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2897 *rHi = tHi;
2898 *rLo = tLo;
2899 return;
2902 default:
2903 break;
2905 } /* if (e->tag == Iex_Unop) */
2908 /* --------- CCALL --------- */
2909 if (e->tag == Iex_CCall) {
2910 HReg tLo = newVRegI(env);
2911 HReg tHi = newVRegI(env);
2913 /* Marshal args, do the call, clear stack. */
2914 UInt addToSp = 0;
2915 RetLoc rloc = mk_RetLoc_INVALID();
2916 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2917 e->Iex.CCall.cee,
2918 e->Iex.CCall.retty, e->Iex.CCall.args );
2919 vassert(is_sane_RetLoc(rloc));
2920 vassert(rloc.pri == RLPri_2Int);
2921 vassert(addToSp == 0);
2922 /* */
2924 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2925 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2926 *rHi = tHi;
2927 *rLo = tLo;
2928 return;
2931 ppIRExpr(e);
2932 vpanic("iselInt64Expr");
2936 /*---------------------------------------------------------*/
2937 /*--- ISEL: Floating point expressions (32 bit) ---*/
2938 /*---------------------------------------------------------*/
2940 /* Nothing interesting here; really just wrappers for
2941 64-bit stuff. */
2943 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
2945 HReg r = iselFltExpr_wrk( env, e );
2946 # if 0
2947 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2948 # endif
2949 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2950 vassert(hregIsVirtual(r));
2951 return r;
2954 /* DO NOT CALL THIS DIRECTLY */
2955 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
2957 IRType ty = typeOfIRExpr(env->type_env,e);
2958 vassert(ty == Ity_F32);
2960 if (e->tag == Iex_RdTmp) {
2961 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2964 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2965 X86AMode* am;
2966 HReg res = newVRegF(env);
2967 vassert(e->Iex.Load.ty == Ity_F32);
2968 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2969 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2970 return res;
2973 if (e->tag == Iex_Binop
2974 && e->Iex.Binop.op == Iop_F64toF32) {
2975 /* Although the result is still held in a standard FPU register,
2976 we need to round it to reflect the loss of accuracy/range
2977 entailed in casting it to a 32-bit float. */
2978 HReg dst = newVRegF(env);
2979 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2980 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2981 addInstr(env, X86Instr_Fp64to32(src,dst));
2982 set_FPU_rounding_default( env );
2983 return dst;
2986 if (e->tag == Iex_Get) {
2987 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2988 hregX86_EBP() );
2989 HReg res = newVRegF(env);
2990 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2991 return res;
2994 if (e->tag == Iex_Unop
2995 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2996 /* Given an I32, produce an IEEE754 float with the same bit
2997 pattern. */
2998 HReg dst = newVRegF(env);
2999 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3000 /* paranoia */
3001 addInstr(env, X86Instr_Push(rmi));
3002 addInstr(env, X86Instr_FpLdSt(
3003 True/*load*/, 4, dst,
3004 X86AMode_IR(0, hregX86_ESP())));
3005 add_to_esp(env, 4);
3006 return dst;
3009 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
3010 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
3011 HReg dst = newVRegF(env);
3013 /* rf now holds the value to be rounded. The first thing to do
3014 is set the FPU's rounding mode accordingly. */
3016 /* Set host rounding mode */
3017 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3019 /* grndint %rf, %dst */
3020 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3022 /* Restore default FPU rounding. */
3023 set_FPU_rounding_default( env );
3025 return dst;
3028 ppIRExpr(e);
3029 vpanic("iselFltExpr_wrk");
3033 /*---------------------------------------------------------*/
3034 /*--- ISEL: Floating point expressions (64 bit) ---*/
3035 /*---------------------------------------------------------*/
3037 /* Compute a 64-bit floating point value into a register, the identity
3038 of which is returned. As with iselIntExpr_R, the reg may be either
3039 real or virtual; in any case it must not be changed by subsequent
3040 code emitted by the caller. */
3042 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3044 Type S (1 bit) E (11 bits) F (52 bits)
3045 ---- --------- ----------- -----------
3046 signalling NaN u 2047 (max) .0uuuuu---u
3047 (with at least
3048 one 1 bit)
3049 quiet NaN u 2047 (max) .1uuuuu---u
3051 negative infinity 1 2047 (max) .000000---0
3053 positive infinity 0 2047 (max) .000000---0
3055 negative zero 1 0 .000000---0
3057 positive zero 0 0 .000000---0
3060 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
3062 HReg r = iselDblExpr_wrk( env, e );
3063 # if 0
3064 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3065 # endif
3066 vassert(hregClass(r) == HRcFlt64);
3067 vassert(hregIsVirtual(r));
3068 return r;
3071 /* DO NOT CALL THIS DIRECTLY */
3072 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
3074 IRType ty = typeOfIRExpr(env->type_env,e);
3075 vassert(e);
3076 vassert(ty == Ity_F64);
3078 if (e->tag == Iex_RdTmp) {
3079 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3082 if (e->tag == Iex_Const) {
3083 union { UInt u32x2[2]; ULong u64; Double f64; } u;
3084 HReg freg = newVRegF(env);
3085 vassert(sizeof(u) == 8);
3086 vassert(sizeof(u.u64) == 8);
3087 vassert(sizeof(u.f64) == 8);
3088 vassert(sizeof(u.u32x2) == 8);
3090 if (e->Iex.Const.con->tag == Ico_F64) {
3091 u.f64 = e->Iex.Const.con->Ico.F64;
3093 else if (e->Iex.Const.con->tag == Ico_F64i) {
3094 u.u64 = e->Iex.Const.con->Ico.F64i;
3096 else
3097 vpanic("iselDblExpr(x86): const");
3099 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3100 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3101 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3102 X86AMode_IR(0, hregX86_ESP())));
3103 add_to_esp(env, 8);
3104 return freg;
3107 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3108 X86AMode* am;
3109 HReg res = newVRegF(env);
3110 vassert(e->Iex.Load.ty == Ity_F64);
3111 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3112 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3113 return res;
3116 if (e->tag == Iex_Get) {
3117 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3118 hregX86_EBP() );
3119 HReg res = newVRegF(env);
3120 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3121 return res;
3124 if (e->tag == Iex_GetI) {
3125 X86AMode* am
3126 = genGuestArrayOffset(
3127 env, e->Iex.GetI.descr,
3128 e->Iex.GetI.ix, e->Iex.GetI.bias );
3129 HReg res = newVRegF(env);
3130 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3131 return res;
3134 if (e->tag == Iex_Triop) {
3135 X86FpOp fpop = Xfp_INVALID;
3136 IRTriop *triop = e->Iex.Triop.details;
3137 switch (triop->op) {
3138 case Iop_AddF64: fpop = Xfp_ADD; break;
3139 case Iop_SubF64: fpop = Xfp_SUB; break;
3140 case Iop_MulF64: fpop = Xfp_MUL; break;
3141 case Iop_DivF64: fpop = Xfp_DIV; break;
3142 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
3143 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
3144 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3145 case Iop_AtanF64: fpop = Xfp_ATAN; break;
3146 case Iop_PRemF64: fpop = Xfp_PREM; break;
3147 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
3148 default: break;
3150 if (fpop != Xfp_INVALID) {
3151 HReg res = newVRegF(env);
3152 HReg srcL = iselDblExpr(env, triop->arg2);
3153 HReg srcR = iselDblExpr(env, triop->arg3);
3154 /* XXXROUNDINGFIXME */
3155 /* set roundingmode here */
3156 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3157 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3158 && fpop != Xfp_MUL && fpop != Xfp_DIV)
3159 roundToF64(env, res);
3160 return res;
3164 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3165 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
3166 HReg dst = newVRegF(env);
3168 /* rf now holds the value to be rounded. The first thing to do
3169 is set the FPU's rounding mode accordingly. */
3171 /* Set host rounding mode */
3172 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3174 /* grndint %rf, %dst */
3175 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3177 /* Restore default FPU rounding. */
3178 set_FPU_rounding_default( env );
3180 return dst;
3183 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3184 HReg dst = newVRegF(env);
3185 HReg rHi,rLo;
3186 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3187 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3188 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3190 /* Set host rounding mode */
3191 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3193 addInstr(env, X86Instr_FpLdStI(
3194 True/*load*/, 8, dst,
3195 X86AMode_IR(0, hregX86_ESP())));
3197 /* Restore default FPU rounding. */
3198 set_FPU_rounding_default( env );
3200 add_to_esp(env, 8);
3201 return dst;
3204 if (e->tag == Iex_Binop) {
3205 X86FpOp fpop = Xfp_INVALID;
3206 switch (e->Iex.Binop.op) {
3207 case Iop_SinF64: fpop = Xfp_SIN; break;
3208 case Iop_CosF64: fpop = Xfp_COS; break;
3209 case Iop_TanF64: fpop = Xfp_TAN; break;
3210 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3211 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3212 default: break;
3214 if (fpop != Xfp_INVALID) {
3215 HReg res = newVRegF(env);
3216 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3217 /* XXXROUNDINGFIXME */
3218 /* set roundingmode here */
3219 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3220 codes. I don't think that matters, since this insn
3221 selector never generates such an instruction intervening
3222 between an flag-setting instruction and a flag-using
3223 instruction. */
3224 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3225 if (fpop != Xfp_SQRT
3226 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3227 roundToF64(env, res);
3228 return res;
3232 if (e->tag == Iex_Unop) {
3233 X86FpOp fpop = Xfp_INVALID;
3234 switch (e->Iex.Unop.op) {
3235 case Iop_NegF64: fpop = Xfp_NEG; break;
3236 case Iop_AbsF64: fpop = Xfp_ABS; break;
3237 default: break;
3239 if (fpop != Xfp_INVALID) {
3240 HReg res = newVRegF(env);
3241 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3242 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3243 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3244 but might need to do that for other unary ops. */
3245 return res;
3249 if (e->tag == Iex_Unop) {
3250 switch (e->Iex.Unop.op) {
3251 case Iop_I32StoF64: {
3252 HReg dst = newVRegF(env);
3253 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3254 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3255 set_FPU_rounding_default(env);
3256 addInstr(env, X86Instr_FpLdStI(
3257 True/*load*/, 4, dst,
3258 X86AMode_IR(0, hregX86_ESP())));
3259 add_to_esp(env, 4);
3260 return dst;
3262 case Iop_ReinterpI64asF64: {
3263 /* Given an I64, produce an IEEE754 double with the same
3264 bit pattern. */
3265 HReg dst = newVRegF(env);
3266 HReg rHi, rLo;
3267 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3268 /* paranoia */
3269 set_FPU_rounding_default(env);
3270 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3271 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3272 addInstr(env, X86Instr_FpLdSt(
3273 True/*load*/, 8, dst,
3274 X86AMode_IR(0, hregX86_ESP())));
3275 add_to_esp(env, 8);
3276 return dst;
3278 case Iop_F32toF64: {
3279 /* this is a no-op */
3280 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3281 return res;
3283 default:
3284 break;
3288 /* --------- MULTIPLEX --------- */
3289 if (e->tag == Iex_ITE) { // VFD
3290 if (ty == Ity_F64
3291 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3292 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3293 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3294 HReg dst = newVRegF(env);
3295 addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3296 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3297 addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3298 return dst;
3302 ppIRExpr(e);
3303 vpanic("iselDblExpr_wrk");
3307 /*---------------------------------------------------------*/
3308 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3309 /*---------------------------------------------------------*/
3311 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
3313 HReg r = iselVecExpr_wrk( env, e );
3314 # if 0
3315 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3316 # endif
3317 vassert(hregClass(r) == HRcVec128);
3318 vassert(hregIsVirtual(r));
3319 return r;
3323 /* DO NOT CALL THIS DIRECTLY */
3324 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
3327 # define REQUIRE_SSE1 \
3328 do { if (env->hwcaps == 0/*baseline, no sse*/ \
3329 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3330 goto vec_fail; \
3331 } while (0)
3333 # define REQUIRE_SSE2 \
3334 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3335 goto vec_fail; \
3336 } while (0)
3338 # define SSE2_OR_ABOVE \
3339 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3341 HWord fn = 0; /* address of helper fn, if required */
3342 MatchInfo mi;
3343 Bool arg1isEReg = False;
3344 X86SseOp op = Xsse_INVALID;
3345 IRType ty = typeOfIRExpr(env->type_env,e);
3346 vassert(e);
3347 vassert(ty == Ity_V128);
3349 REQUIRE_SSE1;
3351 if (e->tag == Iex_RdTmp) {
3352 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3355 if (e->tag == Iex_Get) {
3356 HReg dst = newVRegV(env);
3357 addInstr(env, X86Instr_SseLdSt(
3358 True/*load*/,
3359 dst,
3360 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3363 return dst;
3366 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3367 HReg dst = newVRegV(env);
3368 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3369 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3370 return dst;
3373 if (e->tag == Iex_Const) {
3374 HReg dst = newVRegV(env);
3375 vassert(e->Iex.Const.con->tag == Ico_V128);
3376 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3377 return dst;
3380 if (e->tag == Iex_Unop) {
3382 if (SSE2_OR_ABOVE) {
3383 /* 64UtoV128(LDle:I64(addr)) */
3384 DECLARE_PATTERN(p_zwiden_load64);
3385 DEFINE_PATTERN(p_zwiden_load64,
3386 unop(Iop_64UtoV128,
3387 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3388 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3389 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3390 HReg dst = newVRegV(env);
3391 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3392 return dst;
3396 switch (e->Iex.Unop.op) {
3398 case Iop_NotV128: {
3399 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3400 return do_sse_Not128(env, arg);
3403 case Iop_CmpNEZ64x2: {
3404 /* We can use SSE2 instructions for this. */
3405 /* Ideally, we want to do a 64Ix2 comparison against zero of
3406 the operand. Problem is no such insn exists. Solution
3407 therefore is to do a 32Ix4 comparison instead, and bitwise-
3408 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3409 let the not'd result of this initial comparison be a:b:c:d.
3410 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3411 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3412 giving the required result.
3414 The required selection sequence is 2,3,0,1, which
3415 according to Intel's documentation means the pshufd
3416 literal value is 0xB1, that is,
3417 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3419 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3420 HReg tmp = newVRegV(env);
3421 HReg dst = newVRegV(env);
3422 REQUIRE_SSE2;
3423 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3424 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3425 tmp = do_sse_Not128(env, tmp);
3426 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3427 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3428 return dst;
3431 case Iop_CmpNEZ32x4: {
3432 /* Sigh, we have to generate lousy code since this has to
3433 work on SSE1 hosts */
3434 /* basically, the idea is: for each lane:
3435 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3436 sbbl %r, %r (now %r = 1Sto32(CF))
3437 movl %r, lane
3439 Int i;
3440 X86AMode* am;
3441 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3442 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3443 HReg dst = newVRegV(env);
3444 HReg r32 = newVRegI(env);
3445 sub_from_esp(env, 16);
3446 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3447 for (i = 0; i < 4; i++) {
3448 am = X86AMode_IR(i*4, hregX86_ESP());
3449 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3450 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3451 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3452 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3454 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3455 add_to_esp(env, 16);
3456 return dst;
3459 case Iop_CmpNEZ8x16:
3460 case Iop_CmpNEZ16x8: {
3461 /* We can use SSE2 instructions for this. */
3462 HReg arg;
3463 HReg vec0 = newVRegV(env);
3464 HReg vec1 = newVRegV(env);
3465 HReg dst = newVRegV(env);
3466 X86SseOp cmpOp
3467 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3468 : Xsse_CMPEQ8;
3469 REQUIRE_SSE2;
3470 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3471 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3472 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3473 /* defer arg computation to here so as to give CMPEQF as long
3474 as possible to complete */
3475 arg = iselVecExpr(env, e->Iex.Unop.arg);
3476 /* vec0 is all 0s; vec1 is all 1s */
3477 addInstr(env, mk_vMOVsd_RR(arg, dst));
3478 /* 16x8 or 8x16 comparison == */
3479 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3480 /* invert result */
3481 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3482 return dst;
3485 case Iop_RecipEst32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3486 case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3487 do_32Fx4_unary:
3489 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3490 HReg dst = newVRegV(env);
3491 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3492 return dst;
3495 case Iop_RecipEst32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3496 case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3497 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3498 do_32F0x4_unary:
3500 /* A bit subtle. We have to copy the arg to the result
3501 register first, because actually doing the SSE scalar insn
3502 leaves the upper 3/4 of the destination register
3503 unchanged. Whereas the required semantics of these
3504 primops is that the upper 3/4 is simply copied in from the
3505 argument. */
3506 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3507 HReg dst = newVRegV(env);
3508 addInstr(env, mk_vMOVsd_RR(arg, dst));
3509 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3510 return dst;
3513 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3514 do_64F0x2_unary:
3516 /* A bit subtle. We have to copy the arg to the result
3517 register first, because actually doing the SSE scalar insn
3518 leaves the upper half of the destination register
3519 unchanged. Whereas the required semantics of these
3520 primops is that the upper half is simply copied in from the
3521 argument. */
3522 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3523 HReg dst = newVRegV(env);
3524 REQUIRE_SSE2;
3525 addInstr(env, mk_vMOVsd_RR(arg, dst));
3526 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3527 return dst;
3530 case Iop_32UtoV128: {
3531 HReg dst = newVRegV(env);
3532 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3533 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3534 addInstr(env, X86Instr_Push(rmi));
3535 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3536 add_to_esp(env, 4);
3537 return dst;
3540 case Iop_64UtoV128: {
3541 HReg rHi, rLo;
3542 HReg dst = newVRegV(env);
3543 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3544 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3545 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3546 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3547 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3548 add_to_esp(env, 8);
3549 return dst;
3552 default:
3553 break;
3554 } /* switch (e->Iex.Unop.op) */
3555 } /* if (e->tag == Iex_Unop) */
3557 if (e->tag == Iex_Binop) {
3558 switch (e->Iex.Binop.op) {
3560 case Iop_Sqrt64Fx2:
3561 REQUIRE_SSE2;
3562 /* fallthrough */
3563 case Iop_Sqrt32Fx4: {
3564 /* :: (rmode, vec) -> vec */
3565 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3566 HReg dst = newVRegV(env);
3567 /* XXXROUNDINGFIXME */
3568 /* set roundingmode here */
3569 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3570 ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3571 (Xsse_SQRTF, arg, dst));
3572 return dst;
3575 case Iop_SetV128lo32: {
3576 HReg dst = newVRegV(env);
3577 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3578 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3579 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3580 sub_from_esp(env, 16);
3581 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3582 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3583 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3584 add_to_esp(env, 16);
3585 return dst;
3588 case Iop_SetV128lo64: {
3589 HReg dst = newVRegV(env);
3590 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3591 HReg srcIhi, srcIlo;
3592 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3593 X86AMode* esp4 = advance4(esp0);
3594 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3595 sub_from_esp(env, 16);
3596 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3597 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3598 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3599 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3600 add_to_esp(env, 16);
3601 return dst;
3604 case Iop_64HLtoV128: {
3605 HReg r3, r2, r1, r0;
3606 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3607 X86AMode* esp4 = advance4(esp0);
3608 X86AMode* esp8 = advance4(esp4);
3609 X86AMode* esp12 = advance4(esp8);
3610 HReg dst = newVRegV(env);
3611 /* do this via the stack (easy, convenient, etc) */
3612 sub_from_esp(env, 16);
3613 /* Do the less significant 64 bits */
3614 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3615 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3616 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3617 /* Do the more significant 64 bits */
3618 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3619 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3620 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3621 /* Fetch result back from stack. */
3622 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3623 add_to_esp(env, 16);
3624 return dst;
3627 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3628 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3629 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3630 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3631 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3632 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3633 do_32Fx4:
3635 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3636 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3637 HReg dst = newVRegV(env);
3638 addInstr(env, mk_vMOVsd_RR(argL, dst));
3639 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3640 return dst;
3643 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3644 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3645 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3646 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3647 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3648 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3649 do_64Fx2:
3651 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3652 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3653 HReg dst = newVRegV(env);
3654 REQUIRE_SSE2;
3655 addInstr(env, mk_vMOVsd_RR(argL, dst));
3656 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3657 return dst;
3660 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3661 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3662 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3663 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3664 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3665 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3666 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3667 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3668 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3669 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3670 do_32F0x4: {
3671 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3672 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3673 HReg dst = newVRegV(env);
3674 addInstr(env, mk_vMOVsd_RR(argL, dst));
3675 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3676 return dst;
3679 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3680 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3681 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3682 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3683 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3684 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3685 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3686 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3687 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3688 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3689 do_64F0x2: {
3690 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3691 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3692 HReg dst = newVRegV(env);
3693 REQUIRE_SSE2;
3694 addInstr(env, mk_vMOVsd_RR(argL, dst));
3695 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3696 return dst;
3699 case Iop_QNarrowBin32Sto16Sx8:
3700 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3701 case Iop_QNarrowBin16Sto8Sx16:
3702 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3703 case Iop_QNarrowBin16Sto8Ux16:
3704 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3706 case Iop_InterleaveHI8x16:
3707 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3708 case Iop_InterleaveHI16x8:
3709 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3710 case Iop_InterleaveHI32x4:
3711 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3712 case Iop_InterleaveHI64x2:
3713 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3715 case Iop_InterleaveLO8x16:
3716 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3717 case Iop_InterleaveLO16x8:
3718 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3719 case Iop_InterleaveLO32x4:
3720 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3721 case Iop_InterleaveLO64x2:
3722 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3724 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3725 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3726 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3727 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3728 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3729 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3730 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3731 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3732 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3733 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3734 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3735 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3736 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3737 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3738 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3739 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3740 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3741 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3742 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3743 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3744 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3745 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3746 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3747 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3748 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3749 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3750 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3751 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3752 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3753 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3754 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3755 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3756 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3757 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3758 do_SseReRg: {
3759 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3760 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3761 HReg dst = newVRegV(env);
3762 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3763 REQUIRE_SSE2;
3764 if (arg1isEReg) {
3765 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3766 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3767 } else {
3768 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3769 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3771 return dst;
3774 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3775 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3776 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3777 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3778 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3779 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3780 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3781 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3782 do_SseShift: {
3783 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3784 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3785 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3786 HReg ereg = newVRegV(env);
3787 HReg dst = newVRegV(env);
3788 REQUIRE_SSE2;
3789 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3790 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3791 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3792 addInstr(env, X86Instr_Push(rmi));
3793 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3794 addInstr(env, mk_vMOVsd_RR(greg, dst));
3795 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3796 add_to_esp(env, 16);
3797 return dst;
3800 case Iop_NarrowBin32to16x8:
3801 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3802 goto do_SseAssistedBinary;
3803 case Iop_NarrowBin16to8x16:
3804 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3805 goto do_SseAssistedBinary;
3806 do_SseAssistedBinary: {
3807 /* As with the amd64 case (where this is copied from) we
3808 generate pretty bad code. */
3809 vassert(fn != 0);
3810 HReg dst = newVRegV(env);
3811 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3812 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3813 HReg argp = newVRegI(env);
3814 /* subl $112, %esp -- make a space */
3815 sub_from_esp(env, 112);
3816 /* leal 48(%esp), %r_argp -- point into it */
3817 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3818 argp));
3819 /* andl $-16, %r_argp -- 16-align the pointer */
3820 addInstr(env, X86Instr_Alu32R(Xalu_AND,
3821 X86RMI_Imm( ~(UInt)15 ),
3822 argp));
3823 /* Prepare 3 arg regs:
3824 leal 0(%r_argp), %eax
3825 leal 16(%r_argp), %edx
3826 leal 32(%r_argp), %ecx
3828 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3829 hregX86_EAX()));
3830 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3831 hregX86_EDX()));
3832 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3833 hregX86_ECX()));
3834 /* Store the two args, at (%edx) and (%ecx):
3835 movupd %argL, 0(%edx)
3836 movupd %argR, 0(%ecx)
3838 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3839 X86AMode_IR(0, hregX86_EDX())));
3840 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3841 X86AMode_IR(0, hregX86_ECX())));
3842 /* call the helper */
3843 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3844 3, mk_RetLoc_simple(RLPri_None) ));
3845 /* fetch the result from memory, using %r_argp, which the
3846 register allocator will keep alive across the call. */
3847 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3848 X86AMode_IR(0, argp)));
3849 /* and finally, clear the space */
3850 add_to_esp(env, 112);
3851 return dst;
3854 default:
3855 break;
3856 } /* switch (e->Iex.Binop.op) */
3857 } /* if (e->tag == Iex_Binop) */
3860 if (e->tag == Iex_Triop) {
3861 IRTriop *triop = e->Iex.Triop.details;
3862 switch (triop->op) {
3864 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3865 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3866 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3867 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3868 do_32Fx4_w_rm:
3870 HReg argL = iselVecExpr(env, triop->arg2);
3871 HReg argR = iselVecExpr(env, triop->arg3);
3872 HReg dst = newVRegV(env);
3873 addInstr(env, mk_vMOVsd_RR(argL, dst));
3874 /* XXXROUNDINGFIXME */
3875 /* set roundingmode here */
3876 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3877 return dst;
3880 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3881 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3882 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3883 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3884 do_64Fx2_w_rm:
3886 HReg argL = iselVecExpr(env, triop->arg2);
3887 HReg argR = iselVecExpr(env, triop->arg3);
3888 HReg dst = newVRegV(env);
3889 REQUIRE_SSE2;
3890 addInstr(env, mk_vMOVsd_RR(argL, dst));
3891 /* XXXROUNDINGFIXME */
3892 /* set roundingmode here */
3893 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3894 return dst;
3897 default:
3898 break;
3899 } /* switch (triop->op) */
3900 } /* if (e->tag == Iex_Triop) */
3903 if (e->tag == Iex_ITE) { // VFD
3904 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3905 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
3906 HReg dst = newVRegV(env);
3907 addInstr(env, mk_vMOVsd_RR(r1,dst));
3908 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3909 addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3910 return dst;
3913 vec_fail:
3914 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3915 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3916 ppIRExpr(e);
3917 vpanic("iselVecExpr_wrk");
3919 # undef REQUIRE_SSE1
3920 # undef REQUIRE_SSE2
3921 # undef SSE2_OR_ABOVE
3925 /*---------------------------------------------------------*/
3926 /*--- ISEL: Statements ---*/
3927 /*---------------------------------------------------------*/
3929 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3931 if (vex_traceflags & VEX_TRACE_VCODE) {
3932 vex_printf("\n-- ");
3933 ppIRStmt(stmt);
3934 vex_printf("\n");
3937 switch (stmt->tag) {
3939 /* --------- STORE --------- */
3940 case Ist_Store: {
3941 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3942 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3943 IREndness end = stmt->Ist.Store.end;
3945 if (tya != Ity_I32 || end != Iend_LE)
3946 goto stmt_fail;
3948 if (tyd == Ity_I32) {
3949 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3950 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3951 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3952 return;
3954 if (tyd == Ity_I8 || tyd == Ity_I16) {
3955 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3956 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3957 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3958 r,am ));
3959 return;
3961 if (tyd == Ity_F64) {
3962 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3963 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3964 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3965 return;
3967 if (tyd == Ity_F32) {
3968 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3969 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3970 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3971 return;
3973 if (tyd == Ity_I64) {
3974 HReg vHi, vLo, rA;
3975 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3976 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3977 addInstr(env, X86Instr_Alu32M(
3978 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3979 addInstr(env, X86Instr_Alu32M(
3980 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3981 return;
3983 if (tyd == Ity_V128) {
3984 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3985 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3986 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3987 return;
3989 break;
3992 /* --------- PUT --------- */
3993 case Ist_Put: {
3994 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3995 if (ty == Ity_I32) {
3996 /* We're going to write to memory, so compute the RHS into an
3997 X86RI. */
3998 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3999 addInstr(env,
4000 X86Instr_Alu32M(
4001 Xalu_MOV,
4003 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
4005 return;
4007 if (ty == Ity_I8 || ty == Ity_I16) {
4008 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4009 addInstr(env, X86Instr_Store(
4010 toUChar(ty==Ity_I8 ? 1 : 2),
4012 X86AMode_IR(stmt->Ist.Put.offset,
4013 hregX86_EBP())));
4014 return;
4016 if (ty == Ity_I64) {
4017 HReg vHi, vLo;
4018 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4019 X86AMode* am4 = advance4(am);
4020 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
4021 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
4022 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
4023 return;
4025 if (ty == Ity_V128) {
4026 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4027 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4028 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
4029 return;
4031 if (ty == Ity_F32) {
4032 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4033 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4034 set_FPU_rounding_default(env); /* paranoia */
4035 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
4036 return;
4038 if (ty == Ity_F64) {
4039 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4040 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4041 set_FPU_rounding_default(env); /* paranoia */
4042 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
4043 return;
4045 break;
4048 /* --------- Indexed PUT --------- */
4049 case Ist_PutI: {
4050 IRPutI *puti = stmt->Ist.PutI.details;
4052 X86AMode* am
4053 = genGuestArrayOffset(
4054 env, puti->descr,
4055 puti->ix, puti->bias );
4057 IRType ty = typeOfIRExpr(env->type_env, puti->data);
4058 if (ty == Ity_F64) {
4059 HReg val = iselDblExpr(env, puti->data);
4060 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
4061 return;
4063 if (ty == Ity_I8) {
4064 HReg r = iselIntExpr_R(env, puti->data);
4065 addInstr(env, X86Instr_Store( 1, r, am ));
4066 return;
4068 if (ty == Ity_I32) {
4069 HReg r = iselIntExpr_R(env, puti->data);
4070 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4071 return;
4073 if (ty == Ity_I64) {
4074 HReg rHi, rLo;
4075 X86AMode* am4 = advance4(am);
4076 iselInt64Expr(&rHi, &rLo, env, puti->data);
4077 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4078 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4079 return;
4081 break;
4084 /* --------- TMP --------- */
4085 case Ist_WrTmp: {
4086 IRTemp tmp = stmt->Ist.WrTmp.tmp;
4087 IRType ty = typeOfIRTemp(env->type_env, tmp);
4089 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4090 compute it into an AMode and then use LEA. This usually
4091 produces fewer instructions, often because (for memcheck
4092 created IR) we get t = address-expression, (t is later used
4093 twice) and so doing this naturally turns address-expression
4094 back into an X86 amode. */
4095 if (ty == Ity_I32
4096 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4097 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4098 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4099 HReg dst = lookupIRTemp(env, tmp);
4100 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4101 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4102 value into a register. Just emit a normal reg-reg move
4103 so reg-alloc can coalesce it away in the usual way. */
4104 HReg src = am->Xam.IR.reg;
4105 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4106 } else {
4107 addInstr(env, X86Instr_Lea32(am,dst));
4109 return;
4112 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4113 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4114 HReg dst = lookupIRTemp(env, tmp);
4115 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4116 return;
4118 if (ty == Ity_I64) {
4119 HReg rHi, rLo, dstHi, dstLo;
4120 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4121 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4122 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4123 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4124 return;
4126 if (ty == Ity_I1) {
4127 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4128 HReg dst = lookupIRTemp(env, tmp);
4129 addInstr(env, X86Instr_Set32(cond, dst));
4130 return;
4132 if (ty == Ity_F64) {
4133 HReg dst = lookupIRTemp(env, tmp);
4134 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4135 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4136 return;
4138 if (ty == Ity_F32) {
4139 HReg dst = lookupIRTemp(env, tmp);
4140 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4141 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4142 return;
4144 if (ty == Ity_V128) {
4145 HReg dst = lookupIRTemp(env, tmp);
4146 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4147 addInstr(env, mk_vMOVsd_RR(src,dst));
4148 return;
4150 break;
4153 /* --------- Call to DIRTY helper --------- */
4154 case Ist_Dirty: {
4155 IRDirty* d = stmt->Ist.Dirty.details;
4157 /* Figure out the return type, if any. */
4158 IRType retty = Ity_INVALID;
4159 if (d->tmp != IRTemp_INVALID)
4160 retty = typeOfIRTemp(env->type_env, d->tmp);
4162 Bool retty_ok = False;
4163 switch (retty) {
4164 case Ity_INVALID: /* function doesn't return anything */
4165 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4166 case Ity_V128:
4167 retty_ok = True; break;
4168 default:
4169 break;
4171 if (!retty_ok)
4172 break; /* will go to stmt_fail: */
4174 /* Marshal args, do the call, and set the return value to
4175 0x555..555 if this is a conditional call that returns a value
4176 and the call is skipped. */
4177 UInt addToSp = 0;
4178 RetLoc rloc = mk_RetLoc_INVALID();
4179 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4180 vassert(is_sane_RetLoc(rloc));
4182 /* Now figure out what to do with the returned value, if any. */
4183 switch (retty) {
4184 case Ity_INVALID: {
4185 /* No return value. Nothing to do. */
4186 vassert(d->tmp == IRTemp_INVALID);
4187 vassert(rloc.pri == RLPri_None);
4188 vassert(addToSp == 0);
4189 return;
4191 case Ity_I32: case Ity_I16: case Ity_I8: {
4192 /* The returned value is in %eax. Park it in the register
4193 associated with tmp. */
4194 vassert(rloc.pri == RLPri_Int);
4195 vassert(addToSp == 0);
4196 HReg dst = lookupIRTemp(env, d->tmp);
4197 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4198 return;
4200 case Ity_I64: {
4201 /* The returned value is in %edx:%eax. Park it in the
4202 register-pair associated with tmp. */
4203 vassert(rloc.pri == RLPri_2Int);
4204 vassert(addToSp == 0);
4205 HReg dstHi, dstLo;
4206 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4207 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4208 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4209 return;
4211 case Ity_V128: {
4212 /* The returned value is on the stack, and *retloc tells
4213 us where. Fish it off the stack and then move the
4214 stack pointer upwards to clear it, as directed by
4215 doHelperCall. */
4216 vassert(rloc.pri == RLPri_V128SpRel);
4217 vassert(addToSp >= 16);
4218 HReg dst = lookupIRTemp(env, d->tmp);
4219 X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP());
4220 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4221 add_to_esp(env, addToSp);
4222 return;
4224 default:
4225 /*NOTREACHED*/
4226 vassert(0);
4228 break;
4231 /* --------- MEM FENCE --------- */
4232 case Ist_MBE:
4233 switch (stmt->Ist.MBE.event) {
4234 case Imbe_Fence:
4235 addInstr(env, X86Instr_MFence(env->hwcaps));
4236 return;
4237 default:
4238 break;
4240 break;
4242 /* --------- ACAS --------- */
4243 case Ist_CAS:
4244 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4245 /* "normal" singleton CAS */
4246 UChar sz;
4247 IRCAS* cas = stmt->Ist.CAS.details;
4248 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4249 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4250 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4251 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4252 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4253 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4254 vassert(cas->expdHi == NULL);
4255 vassert(cas->dataHi == NULL);
4256 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4257 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4258 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4259 switch (ty) {
4260 case Ity_I32: sz = 4; break;
4261 case Ity_I16: sz = 2; break;
4262 case Ity_I8: sz = 1; break;
4263 default: goto unhandled_cas;
4265 addInstr(env, X86Instr_ACAS(am, sz));
4266 addInstr(env,
4267 X86Instr_CMov32(Xcc_NZ,
4268 X86RM_Reg(hregX86_EAX()), rOldLo));
4269 return;
4270 } else {
4271 /* double CAS */
4272 IRCAS* cas = stmt->Ist.CAS.details;
4273 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4274 /* only 32-bit allowed in this case */
4275 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4276 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4277 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4278 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4279 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4280 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4281 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4282 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4283 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4284 if (ty != Ity_I32)
4285 goto unhandled_cas;
4286 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4287 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4288 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4289 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4290 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4291 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4292 addInstr(env, X86Instr_DACAS(am));
4293 addInstr(env,
4294 X86Instr_CMov32(Xcc_NZ,
4295 X86RM_Reg(hregX86_EDX()), rOldHi));
4296 addInstr(env,
4297 X86Instr_CMov32(Xcc_NZ,
4298 X86RM_Reg(hregX86_EAX()), rOldLo));
4299 return;
4301 unhandled_cas:
4302 break;
4304 /* --------- INSTR MARK --------- */
4305 /* Doesn't generate any executable code ... */
4306 case Ist_IMark:
4307 return;
4309 /* --------- NO-OP --------- */
4310 /* Fairly self-explanatory, wouldn't you say? */
4311 case Ist_NoOp:
4312 return;
4314 /* --------- EXIT --------- */
4315 case Ist_Exit: {
4316 if (stmt->Ist.Exit.dst->tag != Ico_U32)
4317 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4319 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4320 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4321 hregX86_EBP());
4323 /* Case: boring transfer to known address */
4324 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4325 if (env->chainingAllowed) {
4326 /* .. almost always true .. */
4327 /* Skip the event check at the dst if this is a forwards
4328 edge. */
4329 Bool toFastEP
4330 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4331 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4332 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4333 amEIP, cc, toFastEP));
4334 } else {
4335 /* .. very occasionally .. */
4336 /* We can't use chaining, so ask for an assisted transfer,
4337 as that's the only alternative that is allowable. */
4338 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4339 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4341 return;
4344 /* Case: assisted transfer to arbitrary address */
4345 switch (stmt->Ist.Exit.jk) {
4346 /* Keep this list in sync with that in iselNext below */
4347 case Ijk_ClientReq:
4348 case Ijk_EmWarn:
4349 case Ijk_MapFail:
4350 case Ijk_NoDecode:
4351 case Ijk_NoRedir:
4352 case Ijk_SigSEGV:
4353 case Ijk_SigTRAP:
4354 case Ijk_Sys_int128:
4355 case Ijk_Sys_int129:
4356 case Ijk_Sys_int130:
4357 case Ijk_Sys_int145:
4358 case Ijk_Sys_int210:
4359 case Ijk_Sys_syscall:
4360 case Ijk_Sys_sysenter:
4361 case Ijk_InvalICache:
4362 case Ijk_Yield:
4364 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4365 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4366 return;
4368 default:
4369 break;
4372 /* Do we ever expect to see any other kind? */
4373 goto stmt_fail;
4376 default: break;
4378 stmt_fail:
4379 ppIRStmt(stmt);
4380 vpanic("iselStmt");
4384 /*---------------------------------------------------------*/
4385 /*--- ISEL: Basic block terminators (Nexts) ---*/
4386 /*---------------------------------------------------------*/
4388 static void iselNext ( ISelEnv* env,
4389 IRExpr* next, IRJumpKind jk, Int offsIP )
4391 if (vex_traceflags & VEX_TRACE_VCODE) {
4392 vex_printf( "\n-- PUT(%d) = ", offsIP);
4393 ppIRExpr( next );
4394 vex_printf( "; exit-");
4395 ppIRJumpKind(jk);
4396 vex_printf( "\n");
4399 /* Case: boring transfer to known address */
4400 if (next->tag == Iex_Const) {
4401 IRConst* cdst = next->Iex.Const.con;
4402 vassert(cdst->tag == Ico_U32);
4403 if (jk == Ijk_Boring || jk == Ijk_Call) {
4404 /* Boring transfer to known address */
4405 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4406 if (env->chainingAllowed) {
4407 /* .. almost always true .. */
4408 /* Skip the event check at the dst if this is a forwards
4409 edge. */
4410 Bool toFastEP
4411 = ((Addr32)cdst->Ico.U32) > env->max_ga;
4412 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4413 addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4414 amEIP, Xcc_ALWAYS,
4415 toFastEP));
4416 } else {
4417 /* .. very occasionally .. */
4418 /* We can't use chaining, so ask for an assisted transfer,
4419 as that's the only alternative that is allowable. */
4420 HReg r = iselIntExpr_R(env, next);
4421 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4422 Ijk_Boring));
4424 return;
4428 /* Case: call/return (==boring) transfer to any address */
4429 switch (jk) {
4430 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4431 HReg r = iselIntExpr_R(env, next);
4432 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4433 if (env->chainingAllowed) {
4434 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4435 } else {
4436 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4437 Ijk_Boring));
4439 return;
4441 default:
4442 break;
4445 /* Case: assisted transfer to arbitrary address */
4446 switch (jk) {
4447 /* Keep this list in sync with that for Ist_Exit above */
4448 case Ijk_ClientReq:
4449 case Ijk_EmWarn:
4450 case Ijk_MapFail:
4451 case Ijk_NoDecode:
4452 case Ijk_NoRedir:
4453 case Ijk_SigSEGV:
4454 case Ijk_SigTRAP:
4455 case Ijk_Sys_int128:
4456 case Ijk_Sys_int129:
4457 case Ijk_Sys_int130:
4458 case Ijk_Sys_int145:
4459 case Ijk_Sys_int210:
4460 case Ijk_Sys_syscall:
4461 case Ijk_Sys_sysenter:
4462 case Ijk_InvalICache:
4463 case Ijk_Yield:
4465 HReg r = iselIntExpr_R(env, next);
4466 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4467 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4468 return;
4470 default:
4471 break;
4474 vex_printf( "\n-- PUT(%d) = ", offsIP);
4475 ppIRExpr( next );
4476 vex_printf( "; exit-");
4477 ppIRJumpKind(jk);
4478 vex_printf( "\n");
4479 vassert(0); // are we expecting any other kind?
4483 /*---------------------------------------------------------*/
4484 /*--- Insn selector top-level ---*/
4485 /*---------------------------------------------------------*/
4487 /* Translate an entire SB to x86 code. */
4489 HInstrArray* iselSB_X86 ( const IRSB* bb,
4490 VexArch arch_host,
4491 const VexArchInfo* archinfo_host,
4492 const VexAbiInfo* vbi/*UNUSED*/,
4493 Int offs_Host_EvC_Counter,
4494 Int offs_Host_EvC_FailAddr,
4495 Bool chainingAllowed,
4496 Bool addProfInc,
4497 Addr max_ga )
4499 Int i, j;
4500 HReg hreg, hregHI;
4501 ISelEnv* env;
4502 UInt hwcaps_host = archinfo_host->hwcaps;
4503 X86AMode *amCounter, *amFailAddr;
4505 /* sanity ... */
4506 vassert(arch_host == VexArchX86);
4507 vassert(0 == (hwcaps_host
4508 & ~(VEX_HWCAPS_X86_MMXEXT
4509 | VEX_HWCAPS_X86_SSE1
4510 | VEX_HWCAPS_X86_SSE2
4511 | VEX_HWCAPS_X86_SSE3
4512 | VEX_HWCAPS_X86_LZCNT)));
4514 /* Check that the host's endianness is as expected. */
4515 vassert(archinfo_host->endness == VexEndnessLE);
4517 /* Make up an initial environment to use. */
4518 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4519 env->vreg_ctr = 0;
4521 /* Set up output code array. */
4522 env->code = newHInstrArray();
4524 /* Copy BB's type env. */
4525 env->type_env = bb->tyenv;
4527 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4528 change as we go along. */
4529 env->n_vregmap = bb->tyenv->types_used;
4530 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4531 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4533 /* and finally ... */
4534 env->chainingAllowed = chainingAllowed;
4535 env->hwcaps = hwcaps_host;
4536 env->max_ga = max_ga;
4538 /* For each IR temporary, allocate a suitably-kinded virtual
4539 register. */
4540 j = 0;
4541 for (i = 0; i < env->n_vregmap; i++) {
4542 hregHI = hreg = INVALID_HREG;
4543 switch (bb->tyenv->types[i]) {
4544 case Ity_I1:
4545 case Ity_I8:
4546 case Ity_I16:
4547 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
4548 case Ity_I64: hreg = mkHReg(True, HRcInt32, 0, j++);
4549 hregHI = mkHReg(True, HRcInt32, 0, j++); break;
4550 case Ity_F32:
4551 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
4552 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
4553 default: ppIRType(bb->tyenv->types[i]);
4554 vpanic("iselBB: IRTemp type");
4556 env->vregmap[i] = hreg;
4557 env->vregmapHI[i] = hregHI;
4559 env->vreg_ctr = j;
4561 /* The very first instruction must be an event check. */
4562 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
4563 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4564 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4566 /* Possibly a block counter increment (for profiling). At this
4567 point we don't know the address of the counter, so just pretend
4568 it is zero. It will have to be patched later, but before this
4569 translation is used, by a call to LibVEX_patchProfCtr. */
4570 if (addProfInc) {
4571 addInstr(env, X86Instr_ProfInc());
4574 /* Ok, finally we can iterate over the statements. */
4575 for (i = 0; i < bb->stmts_used; i++)
4576 iselStmt(env, bb->stmts[i]);
4578 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4580 /* record the number of vregs we used. */
4581 env->code->n_vregs = env->vreg_ctr;
4582 return env->code;
4586 /*---------------------------------------------------------------*/
4587 /*--- end host_x86_isel.c ---*/
4588 /*---------------------------------------------------------------*/