2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
14 Copyright (C) 2010-2017 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, see <http://www.gnu.org/licenses/>.
31 The GNU General Public License is contained in the file COPYING.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
43 #include "host_arm_defs.h"
46 /*---------------------------------------------------------*/
47 /*--- ARMvfp control word stuff ---*/
48 /*---------------------------------------------------------*/
50 /* Vex-generated code expects to run with the FPU set as follows: all
51 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
52 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
53 this corresponds to a FPSCR value of zero.
55 fpscr should therefore be zero on entry to Vex-generated code, and
56 should be unchanged at exit. (Or at least the bottom 28 bits
60 #define DEFAULT_FPSCR 0
63 /*---------------------------------------------------------*/
65 /*---------------------------------------------------------*/
67 /* This carries around:
69 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
70 might encounter. This is computed before insn selection starts,
73 - A mapping from IRTemp to HReg. This tells the insn selector
74 which virtual register(s) are associated with each IRTemp
75 temporary. This is computed before insn selection starts, and
76 does not change. We expect this mapping to map precisely the
77 same set of IRTemps as the type mapping does.
79 - vregmap holds the primary register for the IRTemp.
80 - vregmapHI is only used for 64-bit integer-typed
81 IRTemps. It holds the identity of a second
82 32-bit virtual HReg, which holds the high half
85 - The code array, that is, the insns selected so far.
87 - A counter, for generating new virtual registers.
89 - The host hardware capabilities word. This is set at the start
92 - A Bool for indicating whether we may generate chain-me
93 instructions for control flow transfers, or whether we must use
96 - The maximum guest address of any guest insn in this block.
97 Actually, the address of the highest-addressed byte from any insn
98 in this block. Is set at the start and does not change. This is
99 used for detecting jumps which are definitely forward-edges from
100 this block, and therefore can be made (chained) to the fast entry
101 point of the destination, thereby avoiding the destination's
104 Note, this is all (well, mostly) host-independent.
109 /* Constant -- are set at the start and do not change. */
118 Bool chainingAllowed
;
121 /* These are modified as we go along. */
127 static HReg
lookupIRTemp ( ISelEnv
* env
, IRTemp tmp
)
129 vassert(tmp
< env
->n_vregmap
);
130 return env
->vregmap
[tmp
];
133 static void lookupIRTemp64 ( HReg
* vrHI
, HReg
* vrLO
, ISelEnv
* env
, IRTemp tmp
)
135 vassert(tmp
< env
->n_vregmap
);
136 vassert(! hregIsInvalid(env
->vregmapHI
[tmp
]));
137 *vrLO
= env
->vregmap
[tmp
];
138 *vrHI
= env
->vregmapHI
[tmp
];
141 static void addInstr ( ISelEnv
* env
, ARMInstr
* instr
)
143 addHInstr(env
->code
, instr
);
144 if (vex_traceflags
& VEX_TRACE_VCODE
) {
150 static HReg
newVRegI ( ISelEnv
* env
)
152 HReg reg
= mkHReg(True
/*virtual reg*/, HRcInt32
, 0/*enc*/, env
->vreg_ctr
);
157 static HReg
newVRegD ( ISelEnv
* env
)
159 HReg reg
= mkHReg(True
/*virtual reg*/, HRcFlt64
, 0/*enc*/, env
->vreg_ctr
);
164 static HReg
newVRegF ( ISelEnv
* env
)
166 HReg reg
= mkHReg(True
/*virtual reg*/, HRcFlt32
, 0/*enc*/, env
->vreg_ctr
);
171 static HReg
newVRegV ( ISelEnv
* env
)
173 HReg reg
= mkHReg(True
/*virtual reg*/, HRcVec128
, 0/*enc*/, env
->vreg_ctr
);
178 /* These are duplicated in guest_arm_toIR.c */
179 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
181 return IRExpr_Unop(op
, a
);
184 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
186 return IRExpr_Binop(op
, a1
, a2
);
189 static IRExpr
* bind ( Int binder
)
191 return IRExpr_Binder(binder
);
195 /*---------------------------------------------------------*/
196 /*--- ISEL: Forward declarations ---*/
197 /*---------------------------------------------------------*/
199 /* These are organised as iselXXX and iselXXX_wrk pairs. The
200 iselXXX_wrk do the real work, but are not to be called directly.
201 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202 checks that all returned registers are virtual. You should not
203 call the _wrk version directly.
205 static ARMAMode1
* iselIntExpr_AMode1_wrk ( ISelEnv
* env
, IRExpr
* e
);
206 static ARMAMode1
* iselIntExpr_AMode1 ( ISelEnv
* env
, IRExpr
* e
);
208 static ARMAMode2
* iselIntExpr_AMode2_wrk ( ISelEnv
* env
, IRExpr
* e
);
209 static ARMAMode2
* iselIntExpr_AMode2 ( ISelEnv
* env
, IRExpr
* e
);
211 static ARMAModeV
* iselIntExpr_AModeV_wrk ( ISelEnv
* env
, IRExpr
* e
);
212 static ARMAModeV
* iselIntExpr_AModeV ( ISelEnv
* env
, IRExpr
* e
);
214 static ARMAModeN
* iselIntExpr_AModeN_wrk ( ISelEnv
* env
, IRExpr
* e
);
215 static ARMAModeN
* iselIntExpr_AModeN ( ISelEnv
* env
, IRExpr
* e
);
217 static ARMRI84
* iselIntExpr_RI84_wrk
218 ( /*OUT*/Bool
* didInv
, Bool mayInv
, ISelEnv
* env
, IRExpr
* e
);
219 static ARMRI84
* iselIntExpr_RI84
220 ( /*OUT*/Bool
* didInv
, Bool mayInv
, ISelEnv
* env
, IRExpr
* e
);
222 static ARMRI5
* iselIntExpr_RI5_wrk ( ISelEnv
* env
, IRExpr
* e
);
223 static ARMRI5
* iselIntExpr_RI5 ( ISelEnv
* env
, IRExpr
* e
);
225 static ARMCondCode
iselCondCode_wrk ( ISelEnv
* env
, IRExpr
* e
);
226 static ARMCondCode
iselCondCode ( ISelEnv
* env
, IRExpr
* e
);
228 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, IRExpr
* e
);
229 static HReg
iselIntExpr_R ( ISelEnv
* env
, IRExpr
* e
);
231 static void iselInt64Expr_wrk ( HReg
* rHi
, HReg
* rLo
,
232 ISelEnv
* env
, const IRExpr
* e
);
233 static void iselInt64Expr ( HReg
* rHi
, HReg
* rLo
,
234 ISelEnv
* env
, const IRExpr
* e
);
236 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, IRExpr
* e
);
237 static HReg
iselDblExpr ( ISelEnv
* env
, IRExpr
* e
);
239 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, IRExpr
* e
);
240 static HReg
iselFltExpr ( ISelEnv
* env
, IRExpr
* e
);
242 static HReg
iselNeon64Expr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
243 static HReg
iselNeon64Expr ( ISelEnv
* env
, const IRExpr
* e
);
245 static HReg
iselNeonExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
246 static HReg
iselNeonExpr ( ISelEnv
* env
, const IRExpr
* e
);
248 /*---------------------------------------------------------*/
249 /*--- ISEL: Misc helpers ---*/
250 /*---------------------------------------------------------*/
252 static UInt
ROR32 ( UInt x
, UInt sh
) {
257 return (x
<< (32-sh
)) | (x
>> sh
);
260 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261 form, and if so return the components. */
262 static Bool
fitsIn8x4 ( /*OUT*/UInt
* u8
, /*OUT*/UInt
* u4
, UInt u
)
265 for (i
= 0; i
< 16; i
++) {
266 if (0 == (u
& 0xFFFFFF00)) {
277 /* Make a int reg-reg move. */
278 static ARMInstr
* mk_iMOVds_RR ( HReg dst
, HReg src
)
280 vassert(hregClass(src
) == HRcInt32
);
281 vassert(hregClass(dst
) == HRcInt32
);
282 return ARMInstr_Mov(dst
, ARMRI84_R(src
));
285 /* Set the VFP unit's rounding mode to default (round to nearest). */
286 static void set_VFP_rounding_default ( ISelEnv
* env
)
288 /* mov rTmp, #DEFAULT_FPSCR
291 HReg rTmp
= newVRegI(env
);
292 addInstr(env
, ARMInstr_Imm32(rTmp
, DEFAULT_FPSCR
));
293 addInstr(env
, ARMInstr_FPSCR(True
/*toFPSCR*/, rTmp
));
296 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297 expression denoting a value in the range 0 .. 3, indicating a round
298 mode encoded as per type IRRoundingMode. Set FPSCR to have the
302 void set_VFP_rounding_mode ( ISelEnv
* env
, IRExpr
* mode
)
304 /* This isn't simple, because 'mode' carries an IR rounding
305 encoding, and we need to translate that to an ARMvfp one:
307 00 to nearest (the default)
316 Easy enough to do; just swap the two bits.
318 HReg irrm
= iselIntExpr_R(env
, mode
);
319 HReg tL
= newVRegI(env
);
320 HReg tR
= newVRegI(env
);
321 HReg t3
= newVRegI(env
);
323 tR = irrm >> 1; if we're lucky, these will issue together
330 addInstr(env
, ARMInstr_Shift(ARMsh_SHL
, tL
, irrm
, ARMRI5_I5(1)));
331 addInstr(env
, ARMInstr_Shift(ARMsh_SHR
, tR
, irrm
, ARMRI5_I5(1)));
332 addInstr(env
, ARMInstr_Alu(ARMalu_AND
, tL
, tL
, ARMRI84_I84(2,0)));
333 addInstr(env
, ARMInstr_Alu(ARMalu_AND
, tR
, tR
, ARMRI84_I84(1,0)));
334 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, t3
, tL
, ARMRI84_R(tR
)));
335 addInstr(env
, ARMInstr_Shift(ARMsh_SHL
, t3
, t3
, ARMRI5_I5(22)));
336 addInstr(env
, ARMInstr_FPSCR(True
/*toFPSCR*/, t3
));
340 /*---------------------------------------------------------*/
341 /*--- ISEL: Function call helpers ---*/
342 /*---------------------------------------------------------*/
344 /* Used only in doHelperCall. See big comment in doHelperCall re
345 handling of register-parameter args. This function figures out
346 whether evaluation of an expression might require use of a fixed
347 register. If in doubt return True (safe but suboptimal).
350 Bool
mightRequireFixedRegs ( IRExpr
* e
)
352 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e
))) {
353 // These are always "safe" -- either a copy of r13(sp) in some
354 // arbitrary vreg, or a copy of r8, respectively.
357 /* Else it's a "normal" expression. */
359 case Iex_RdTmp
: case Iex_Const
: case Iex_Get
:
368 Bool
doHelperCallWithArgsOnStack ( /*OUT*/UInt
* stackAdjustAfterCall
,
369 /*OUT*/RetLoc
* retloc
,
372 IRCallee
* cee
, IRType retTy
, IRExpr
** args
)
374 /* This function deals just with the case where the arg sequence is:
375 VECRET followed by between 4 and 12 Ity_I32 values. So far no other
376 cases are necessary or supported. */
378 /* Check this matches the required format. */
379 if (args
[0] == NULL
|| args
[0]->tag
!= Iex_VECRET
)
383 UInt n_real_args
= 0;
384 for (i
= 1; args
[i
]; i
++) {
385 IRExpr
* arg
= args
[i
];
386 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)))
388 IRType argTy
= typeOfIRExpr(env
->type_env
, arg
);
389 if (UNLIKELY(argTy
!= Ity_I32
))
394 /* We expect to pass at least some args on the stack. */
395 if (n_real_args
<= 3)
398 /* But not too many. */
399 if (n_real_args
> 12)
402 /* General rules for a call:
404 Args 1 .. 4 go in R0 .. R3. The rest are pushed R to L on the
405 stack; that is, arg 5 is at the lowest address, arg 6 at the
408 The stack is to be kept 8 aligned.
410 It appears (for unclear reasons) that the highest 3 words made
411 available when moving SP downwards are not to be used. For
412 example, if 5 args are to go on the stack, then SP must be moved
413 down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
417 /* For this particular case, we use the following layout:
423 ------ original SP - 128
425 args words, between 1 and 11
426 ------ new SP = original_SP - 256
428 Using 256 bytes is overkill, but it is simple and good enough.
431 /* This should really be
432 HReg argVRegs[n_real_args];
433 but that makes it impossible to do 'goto's forward past.
434 Hence the following kludge. */
435 vassert(n_real_args
<= 12);
437 for (i
= 0; i
< 12; i
++)
438 argVRegs
[i
] = INVALID_HREG
;
440 /* Compute args into vregs. */
441 for (i
= 0; i
< n_real_args
; i
++) {
442 argVRegs
[i
] = iselIntExpr_R(env
, args
[i
+1]);
445 /* Now we can compute the condition. We can't do it earlier
446 because the argument computations could trash the condition
447 codes. Be a bit clever to handle the common case where the
449 ARMCondCode cc
= ARMcc_AL
;
451 if (guard
->tag
== Iex_Const
452 && guard
->Iex
.Const
.con
->tag
== Ico_U1
453 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
454 /* unconditional -- do nothing */
457 cc
= iselCondCode( env
, guard
);
461 HReg r0
= hregARM_R0();
462 HReg sp
= hregARM_R13();
464 ARMRI84
* c256
= ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
466 addInstr(env
, ARMInstr_Alu(ARMalu_SUB
, r0
, sp
, ARMRI84_I84(128, 0)));
468 addInstr(env
, mk_iMOVds_RR(hregARM_R1(), argVRegs
[0]));
469 addInstr(env
, mk_iMOVds_RR(hregARM_R2(), argVRegs
[1]));
470 addInstr(env
, mk_iMOVds_RR(hregARM_R3(), argVRegs
[2]));
472 addInstr(env
, ARMInstr_Alu(ARMalu_SUB
, sp
, sp
, c256
));
474 for (i
= 3; i
< n_real_args
; i
++) {
475 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*store*/, argVRegs
[i
],
476 ARMAMode1_RI(sp
, (i
-3) * 4)));
479 vassert(*stackAdjustAfterCall
== 0);
480 vassert(is_RetLoc_INVALID(*retloc
));
482 *stackAdjustAfterCall
= 256;
483 *retloc
= mk_RetLoc_spRel(RLPri_V128SpRel
, 128);
485 Addr32 target
= (Addr
)cee
->addr
;
486 addInstr(env
, ARMInstr_Call( cc
, target
, 4, *retloc
));
488 return True
; /* success */
495 /* Do a complete function call. |guard| is a Ity_Bit expression
496 indicating whether or not the call happens. If guard==NULL, the
497 call is unconditional. |retloc| is set to indicate where the
498 return value is after the call. The caller (of this fn) must
499 generate code to add |stackAdjustAfterCall| to the stack pointer
500 after the call is done. Returns True iff it managed to handle this
501 combination of arg/return types, else returns False. */
504 Bool
doHelperCall ( /*OUT*/UInt
* stackAdjustAfterCall
,
505 /*OUT*/RetLoc
* retloc
,
508 IRCallee
* cee
, IRType retTy
, IRExpr
** args
)
511 HReg argregs
[ARM_N_ARGREGS
];
512 HReg tmpregs
[ARM_N_ARGREGS
];
514 Int n_args
, i
, nextArgReg
;
517 vassert(ARM_N_ARGREGS
== 4);
519 /* Set default returns. We'll update them later if needed. */
520 *stackAdjustAfterCall
= 0;
521 *retloc
= mk_RetLoc_INVALID();
523 /* These are used for cross-checking that IR-level constraints on
524 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
528 /* Marshal args for a call and do the call.
530 This function only deals with a tiny set of possibilities, which
531 cover all helpers in practice. The restrictions are that only
532 arguments in registers are supported, hence only ARM_N_REGPARMS
533 x 32 integer bits in total can be passed. In fact the only
534 supported arg types are I32 and I64.
536 The return type can be I{64,32} or V128. In the V128 case, it
537 is expected that |args| will contain the special node
538 IRExpr_VECRET(), in which case this routine generates code to
539 allocate space on the stack for the vector return value. Since
540 we are not passing any scalars on the stack, it is enough to
541 preallocate the return space before marshalling any arguments,
544 |args| may also contain IRExpr_GSPTR(), in which case the
545 value in r8 is passed as the corresponding argument.
547 Generating code which is both efficient and correct when
548 parameters are to be passed in registers is difficult, for the
549 reasons elaborated in detail in comments attached to
550 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
551 of the method described in those comments.
553 The problem is split into two cases: the fast scheme and the
554 slow scheme. In the fast scheme, arguments are computed
555 directly into the target (real) registers. This is only safe
556 when we can be sure that computation of each argument will not
557 trash any real registers set by computation of any other
560 In the slow scheme, all args are first computed into vregs, and
561 once they are all done, they are moved to the relevant real
562 regs. This always gives correct code, but it also gives a bunch
563 of vreg-to-rreg moves which are usually redundant but are hard
564 for the register allocator to get rid of.
566 To decide which scheme to use, all argument expressions are
567 first examined. If they are all so simple that it is clear they
568 will be evaluated without use of any fixed registers, use the
569 fast scheme, else use the slow scheme. Note also that only
570 unconditional calls may use the fast scheme, since having to
571 compute a condition expression could itself trash real
574 Note this requires being able to examine an expression and
575 determine whether or not evaluation of it might use a fixed
576 register. That requires knowledge of how the rest of this insn
577 selector works. Currently just the following 3 are regarded as
578 safe -- hopefully they cover the majority of arguments in
579 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
582 /* Note that the cee->regparms field is meaningless on ARM hosts
583 (since there is only one calling convention) and so we always
587 for (i
= 0; args
[i
]; i
++) {
588 IRExpr
* arg
= args
[i
];
589 if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
591 } else if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
597 /* If there are more than 4 args, we are going to have to pass
598 some via memory. Use a different function to (possibly) deal with
599 that; dealing with it here is too complex. */
600 if (n_args
> ARM_N_ARGREGS
) {
601 return doHelperCallWithArgsOnStack(stackAdjustAfterCall
, retloc
,
602 env
, guard
, cee
, retTy
, args
);
606 /* After this point we make no attempt to pass args on the stack,
607 and just give up if that case (which is OK because it never
608 happens). Even if there are for example only 3 args, it might
609 still be necessary to pass some of them on the stack if for example
610 two or more of them are 64-bit integers. */
612 argregs
[0] = hregARM_R0();
613 argregs
[1] = hregARM_R1();
614 argregs
[2] = hregARM_R2();
615 argregs
[3] = hregARM_R3();
617 tmpregs
[0] = tmpregs
[1] = tmpregs
[2] =
618 tmpregs
[3] = INVALID_HREG
;
620 /* First decide which scheme (slow or fast) is to be used. First
621 assume the fast scheme, and select slow if any contraindications
627 if (guard
->tag
== Iex_Const
628 && guard
->Iex
.Const
.con
->tag
== Ico_U1
629 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
632 /* Not manifestly unconditional -- be conservative. */
638 for (i
= 0; i
< n_args
; i
++) {
639 if (mightRequireFixedRegs(args
[i
])) {
647 if (retTy
== Ity_V128
|| retTy
== Ity_V256
)
651 /* At this point the scheme to use has been established. Generate
652 code to get the arg values into the argument rregs. If we run
653 out of arg regs, give up. */
660 for (i
= 0; i
< n_args
; i
++) {
661 IRExpr
* arg
= args
[i
];
663 IRType aTy
= Ity_INVALID
;
664 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
665 aTy
= typeOfIRExpr(env
->type_env
, arg
);
667 if (nextArgReg
>= ARM_N_ARGREGS
)
668 return False
; /* out of argregs */
670 if (aTy
== Ity_I32
) {
671 addInstr(env
, mk_iMOVds_RR( argregs
[nextArgReg
],
672 iselIntExpr_R(env
, arg
) ));
675 else if (aTy
== Ity_I64
) {
676 /* 64-bit args must be passed in an a reg-pair of the form
677 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
678 On a little-endian host, the less significant word is
679 passed in the lower-numbered register. */
680 if (nextArgReg
& 1) {
681 if (nextArgReg
>= ARM_N_ARGREGS
)
682 return False
; /* out of argregs */
683 addInstr(env
, ARMInstr_Imm32( argregs
[nextArgReg
], 0xAA ));
686 if (nextArgReg
+ 1 >= ARM_N_ARGREGS
)
687 return False
; /* out of argregs */
689 iselInt64Expr(&raHi
, &raLo
, env
, arg
);
690 addInstr(env
, mk_iMOVds_RR( argregs
[nextArgReg
], raLo
));
692 addInstr(env
, mk_iMOVds_RR( argregs
[nextArgReg
], raHi
));
695 else if (arg
->tag
== Iex_GSPTR
) {
697 addInstr(env
, mk_iMOVds_RR( argregs
[nextArgReg
],
701 else if (arg
->tag
== Iex_VECRET
) {
702 // If this happens, it denotes ill-formed IR
706 return False
; /* unhandled arg type */
709 /* Fast scheme only applies for unconditional calls. Hence: */
714 /* SLOW SCHEME; move via temporaries */
717 for (i
= 0; i
< n_args
; i
++) {
718 IRExpr
* arg
= args
[i
];
720 IRType aTy
= Ity_INVALID
;
721 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
722 aTy
= typeOfIRExpr(env
->type_env
, arg
);
724 if (nextArgReg
>= ARM_N_ARGREGS
)
725 return False
; /* out of argregs */
727 if (aTy
== Ity_I32
) {
728 tmpregs
[nextArgReg
] = iselIntExpr_R(env
, args
[i
]);
731 else if (aTy
== Ity_I64
) {
732 /* Same comment applies as in the Fast-scheme case. */
735 if (nextArgReg
+ 1 >= ARM_N_ARGREGS
)
736 return False
; /* out of argregs */
738 iselInt64Expr(&raHi
, &raLo
, env
, args
[i
]);
739 tmpregs
[nextArgReg
] = raLo
;
741 tmpregs
[nextArgReg
] = raHi
;
744 else if (arg
->tag
== Iex_GSPTR
) {
746 tmpregs
[nextArgReg
] = hregARM_R8();
749 else if (arg
->tag
== Iex_VECRET
) {
750 // If this happens, it denotes ill-formed IR
754 return False
; /* unhandled arg type */
757 /* Now we can compute the condition. We can't do it earlier
758 because the argument computations could trash the condition
759 codes. Be a bit clever to handle the common case where the
763 if (guard
->tag
== Iex_Const
764 && guard
->Iex
.Const
.con
->tag
== Ico_U1
765 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
766 /* unconditional -- do nothing */
768 cc
= iselCondCode( env
, guard
);
772 /* Move the args to their final destinations. */
773 for (i
= 0; i
< nextArgReg
; i
++) {
774 if (hregIsInvalid(tmpregs
[i
])) { // Skip invalid regs
775 addInstr(env
, ARMInstr_Imm32( argregs
[i
], 0xAA ));
778 /* None of these insns, including any spill code that might
779 be generated, may alter the condition codes. */
780 addInstr( env
, mk_iMOVds_RR( argregs
[i
], tmpregs
[i
] ) );
785 /* Should be assured by checks above */
786 vassert(nextArgReg
<= ARM_N_ARGREGS
);
788 /* Do final checks, set the return values, and generate the call
789 instruction proper. */
790 vassert(nGSPTRs
== 0 || nGSPTRs
== 1);
791 vassert(nVECRETs
== ((retTy
== Ity_V128
|| retTy
== Ity_V256
) ? 1 : 0));
792 vassert(*stackAdjustAfterCall
== 0);
793 vassert(is_RetLoc_INVALID(*retloc
));
796 /* Function doesn't return a value. */
797 *retloc
= mk_RetLoc_simple(RLPri_None
);
800 *retloc
= mk_RetLoc_simple(RLPri_2Int
);
802 case Ity_I32
: case Ity_I16
: case Ity_I8
:
803 *retloc
= mk_RetLoc_simple(RLPri_Int
);
807 *retloc
= mk_RetLoc_spRel(RLPri_V128SpRel
, 0);
808 *stackAdjustAfterCall
= 16;
812 *retloc
= mk_RetLoc_spRel(RLPri_V256SpRel
, 0);
813 *stackAdjustAfterCall
= 32;
816 /* IR can denote other possible return types, but we don't
817 handle those here. */
821 /* Finally, generate the call itself. This needs the *retloc value
822 set in the switch above, which is why it's at the end. */
824 /* nextArgReg doles out argument registers. Since these are
825 assigned in the order r0, r1, r2, r3, its numeric value at this
826 point, which must be between 0 and 4 inclusive, is going to be
827 equal to the number of arg regs in use for the call. Hence bake
828 that number into the call (we'll need to know it when doing
829 register allocation, to know what regs the call reads.)
831 There is a bit of a twist -- harmless but worth recording.
832 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
833 the first arg in r0 and the second in r3:r2, but r1 isn't used.
834 We nevertheless have nextArgReg==4 and bake that into the call
835 instruction. This will mean the register allocator wil believe
836 this insn reads r1 when in fact it doesn't. But that's
837 harmless; it just artificially extends the live range of r1
838 unnecessarily. The best fix would be to put into the
839 instruction, a bitmask indicating which of r0/1/2/3 carry live
840 values. But that's too much hassle. */
842 target
= (Addr
)cee
->addr
;
843 addInstr(env
, ARMInstr_Call( cc
, target
, nextArgReg
, *retloc
));
845 return True
; /* success */
849 /*---------------------------------------------------------*/
850 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
851 /*---------------------------------------------------------*/
853 /* Select insns for an integer-typed expression, and add them to the
854 code list. Return a reg holding the result. This reg will be a
855 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
856 want to modify it, ask for a new vreg, copy it in there, and modify
857 the copy. The register allocator will do its best to map both
858 vregs to the same real register, so the copies will often disappear
861 This should handle expressions of 32, 16 and 8-bit type. All
862 results are returned in a 32-bit register. For 16- and 8-bit
863 expressions, the upper 16/24 bits are arbitrary, so you should mask
864 or sign extend partial values if necessary.
867 /* --------------------- AMode1 --------------------- */
869 /* Return an AMode1 which computes the value of the specified
870 expression, possibly also adding insns to the code list as a
871 result. The expression may only be a 32-bit one.
874 static Bool
sane_AMode1 ( ARMAMode1
* am
)
879 toBool( hregClass(am
->ARMam1
.RI
.reg
) == HRcInt32
880 && (hregIsVirtual(am
->ARMam1
.RI
.reg
)
881 || sameHReg(am
->ARMam1
.RI
.reg
, hregARM_R8()))
882 && am
->ARMam1
.RI
.simm13
>= -4095
883 && am
->ARMam1
.RI
.simm13
<= 4095 );
886 toBool( hregClass(am
->ARMam1
.RRS
.base
) == HRcInt32
887 && hregIsVirtual(am
->ARMam1
.RRS
.base
)
888 && hregClass(am
->ARMam1
.RRS
.index
) == HRcInt32
889 && hregIsVirtual(am
->ARMam1
.RRS
.index
)
890 && am
->ARMam1
.RRS
.shift
>= 0
891 && am
->ARMam1
.RRS
.shift
<= 3 );
893 vpanic("sane_AMode: unknown ARM AMode1 tag");
897 static ARMAMode1
* iselIntExpr_AMode1 ( ISelEnv
* env
, IRExpr
* e
)
899 ARMAMode1
* am
= iselIntExpr_AMode1_wrk(env
, e
);
900 vassert(sane_AMode1(am
));
904 static ARMAMode1
* iselIntExpr_AMode1_wrk ( ISelEnv
* env
, IRExpr
* e
)
906 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
907 vassert(ty
== Ity_I32
);
909 /* FIXME: add RRS matching */
911 /* {Add32,Sub32}(expr,simm13) */
912 if (e
->tag
== Iex_Binop
913 && (e
->Iex
.Binop
.op
== Iop_Add32
|| e
->Iex
.Binop
.op
== Iop_Sub32
)
914 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
915 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
) {
916 Int simm
= (Int
)e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
917 if (simm
>= -4095 && simm
<= 4095) {
919 if (e
->Iex
.Binop
.op
== Iop_Sub32
)
921 reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
922 return ARMAMode1_RI(reg
, simm
);
926 /* Doesn't match anything in particular. Generate it into
927 a register and use that. */
929 HReg reg
= iselIntExpr_R(env
, e
);
930 return ARMAMode1_RI(reg
, 0);
936 /* --------------------- AMode2 --------------------- */
938 /* Return an AMode2 which computes the value of the specified
939 expression, possibly also adding insns to the code list as a
940 result. The expression may only be a 32-bit one.
943 static Bool
sane_AMode2 ( ARMAMode2
* am
)
948 toBool( hregClass(am
->ARMam2
.RI
.reg
) == HRcInt32
949 && hregIsVirtual(am
->ARMam2
.RI
.reg
)
950 && am
->ARMam2
.RI
.simm9
>= -255
951 && am
->ARMam2
.RI
.simm9
<= 255 );
954 toBool( hregClass(am
->ARMam2
.RR
.base
) == HRcInt32
955 && hregIsVirtual(am
->ARMam2
.RR
.base
)
956 && hregClass(am
->ARMam2
.RR
.index
) == HRcInt32
957 && hregIsVirtual(am
->ARMam2
.RR
.index
) );
959 vpanic("sane_AMode: unknown ARM AMode2 tag");
963 static ARMAMode2
* iselIntExpr_AMode2 ( ISelEnv
* env
, IRExpr
* e
)
965 ARMAMode2
* am
= iselIntExpr_AMode2_wrk(env
, e
);
966 vassert(sane_AMode2(am
));
970 static ARMAMode2
* iselIntExpr_AMode2_wrk ( ISelEnv
* env
, IRExpr
* e
)
972 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
973 vassert(ty
== Ity_I32
);
975 /* FIXME: add RR matching */
977 /* {Add32,Sub32}(expr,simm8) */
978 if (e
->tag
== Iex_Binop
979 && (e
->Iex
.Binop
.op
== Iop_Add32
|| e
->Iex
.Binop
.op
== Iop_Sub32
)
980 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
981 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
) {
982 Int simm
= (Int
)e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
983 if (simm
>= -255 && simm
<= 255) {
985 if (e
->Iex
.Binop
.op
== Iop_Sub32
)
987 reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
988 return ARMAMode2_RI(reg
, simm
);
992 /* Doesn't match anything in particular. Generate it into
993 a register and use that. */
995 HReg reg
= iselIntExpr_R(env
, e
);
996 return ARMAMode2_RI(reg
, 0);
1002 /* --------------------- AModeV --------------------- */
1004 /* Return an AModeV which computes the value of the specified
1005 expression, possibly also adding insns to the code list as a
1006 result. The expression may only be a 32-bit one.
1009 static Bool
sane_AModeV ( ARMAModeV
* am
)
1011 return toBool( hregClass(am
->reg
) == HRcInt32
1012 && hregIsVirtual(am
->reg
)
1013 && am
->simm11
>= -1020 && am
->simm11
<= 1020
1014 && 0 == (am
->simm11
& 3) );
1017 static ARMAModeV
* iselIntExpr_AModeV ( ISelEnv
* env
, IRExpr
* e
)
1019 ARMAModeV
* am
= iselIntExpr_AModeV_wrk(env
, e
);
1020 vassert(sane_AModeV(am
));
1024 static ARMAModeV
* iselIntExpr_AModeV_wrk ( ISelEnv
* env
, IRExpr
* e
)
1026 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1027 vassert(ty
== Ity_I32
);
1029 /* {Add32,Sub32}(expr, simm8 << 2) */
1030 if (e
->tag
== Iex_Binop
1031 && (e
->Iex
.Binop
.op
== Iop_Add32
|| e
->Iex
.Binop
.op
== Iop_Sub32
)
1032 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
1033 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U32
) {
1034 Int simm
= (Int
)e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1035 if (simm
>= -1020 && simm
<= 1020 && 0 == (simm
& 3)) {
1037 if (e
->Iex
.Binop
.op
== Iop_Sub32
)
1039 reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1040 return mkARMAModeV(reg
, simm
);
1044 /* Doesn't match anything in particular. Generate it into
1045 a register and use that. */
1047 HReg reg
= iselIntExpr_R(env
, e
);
1048 return mkARMAModeV(reg
, 0);
1053 /* -------------------- AModeN -------------------- */
1055 static ARMAModeN
* iselIntExpr_AModeN ( ISelEnv
* env
, IRExpr
* e
)
1057 return iselIntExpr_AModeN_wrk(env
, e
);
1060 static ARMAModeN
* iselIntExpr_AModeN_wrk ( ISelEnv
* env
, IRExpr
* e
)
1062 HReg reg
= iselIntExpr_R(env
, e
);
1063 return mkARMAModeN_R(reg
);
1067 /* --------------------- RI84 --------------------- */
1069 /* Select instructions to generate 'e' into a RI84. If mayInv is
1070 true, then the caller will also accept an I84 form that denotes
1071 'not e'. In this case didInv may not be NULL, and *didInv is set
1072 to True. This complication is so as to allow generation of an RI84
1073 which is suitable for use in either an AND or BIC instruction,
1074 without knowing (before this call) which one.
1076 static ARMRI84
* iselIntExpr_RI84 ( /*OUT*/Bool
* didInv
, Bool mayInv
,
1077 ISelEnv
* env
, IRExpr
* e
)
1081 vassert(didInv
!= NULL
);
1082 ri
= iselIntExpr_RI84_wrk(didInv
, mayInv
, env
, e
);
1083 /* sanity checks ... */
1088 vassert(hregClass(ri
->ARMri84
.R
.reg
) == HRcInt32
);
1089 vassert(hregIsVirtual(ri
->ARMri84
.R
.reg
));
1092 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1096 /* DO NOT CALL THIS DIRECTLY ! */
1097 static ARMRI84
* iselIntExpr_RI84_wrk ( /*OUT*/Bool
* didInv
, Bool mayInv
,
1098 ISelEnv
* env
, IRExpr
* e
)
1100 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1101 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1103 if (didInv
) *didInv
= False
;
1105 /* special case: immediate */
1106 if (e
->tag
== Iex_Const
) {
1107 UInt u
, u8
= 0x100, u4
= 0x10; /* both invalid */
1108 switch (e
->Iex
.Const
.con
->tag
) {
1109 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1110 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
1111 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
1112 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1114 if (fitsIn8x4(&u8
, &u4
, u
)) {
1115 return ARMRI84_I84( (UShort
)u8
, (UShort
)u4
);
1117 if (mayInv
&& fitsIn8x4(&u8
, &u4
, ~u
)) {
1120 return ARMRI84_I84( (UShort
)u8
, (UShort
)u4
);
1122 /* else fail, fall through to default case */
1125 /* default case: calculate into a register and return that */
1127 HReg r
= iselIntExpr_R ( env
, e
);
1128 return ARMRI84_R(r
);
1133 /* --------------------- RI5 --------------------- */
1135 /* Select instructions to generate 'e' into a RI5. */
1137 static ARMRI5
* iselIntExpr_RI5 ( ISelEnv
* env
, IRExpr
* e
)
1139 ARMRI5
* ri
= iselIntExpr_RI5_wrk(env
, e
);
1140 /* sanity checks ... */
1145 vassert(hregClass(ri
->ARMri5
.R
.reg
) == HRcInt32
);
1146 vassert(hregIsVirtual(ri
->ARMri5
.R
.reg
));
1149 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1153 /* DO NOT CALL THIS DIRECTLY ! */
1154 static ARMRI5
* iselIntExpr_RI5_wrk ( ISelEnv
* env
, IRExpr
* e
)
1156 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1157 vassert(ty
== Ity_I32
|| ty
== Ity_I8
);
1159 /* special case: immediate */
1160 if (e
->tag
== Iex_Const
) {
1161 UInt u
; /* both invalid */
1162 switch (e
->Iex
.Const
.con
->tag
) {
1163 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
1164 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
1165 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
1166 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1168 if (u
>= 1 && u
<= 31) {
1169 return ARMRI5_I5(u
);
1171 /* else fail, fall through to default case */
1174 /* default case: calculate into a register and return that */
1176 HReg r
= iselIntExpr_R ( env
, e
);
1182 /* ------------------- CondCode ------------------- */
1184 /* Generate code to evaluated a bit-typed expression, returning the
1185 condition code which would correspond when the expression would
1186 notionally have returned 1. */
1188 static ARMCondCode
iselCondCode ( ISelEnv
* env
, IRExpr
* e
)
1190 ARMCondCode cc
= iselCondCode_wrk(env
,e
);
1191 vassert(cc
!= ARMcc_NV
);
1195 static ARMCondCode
iselCondCode_wrk ( ISelEnv
* env
, IRExpr
* e
)
1198 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I1
);
1201 if (e
->tag
== Iex_RdTmp
) {
1202 HReg rTmp
= lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1203 /* CmpOrTst doesn't modify rTmp; so this is OK. */
1204 ARMRI84
* one
= ARMRI84_I84(1,0);
1205 addInstr(env
, ARMInstr_CmpOrTst(False
/*test*/, rTmp
, one
));
1210 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_Not1
) {
1211 /* Generate code for the arg, and negate the test condition */
1212 return 1 ^ iselCondCode(env
, e
->Iex
.Unop
.arg
);
1215 /* --- patterns rooted at: 32to1 --- */
1217 if (e
->tag
== Iex_Unop
1218 && e
->Iex
.Unop
.op
== Iop_32to1
) {
1219 HReg rTmp
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1220 ARMRI84
* one
= ARMRI84_I84(1,0);
1221 addInstr(env
, ARMInstr_CmpOrTst(False
/*test*/, rTmp
, one
));
1225 /* --- patterns rooted at: CmpNEZ8 --- */
1227 if (e
->tag
== Iex_Unop
1228 && e
->Iex
.Unop
.op
== Iop_CmpNEZ8
) {
1229 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1230 ARMRI84
* xFF
= ARMRI84_I84(0xFF,0);
1231 addInstr(env
, ARMInstr_CmpOrTst(False
/*!isCmp*/, r1
, xFF
));
1235 /* --- patterns rooted at: CmpNEZ32 --- */
1237 if (e
->tag
== Iex_Unop
1238 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
) {
1239 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1240 ARMRI84
* zero
= ARMRI84_I84(0,0);
1241 addInstr(env
, ARMInstr_CmpOrTst(True
/*isCmp*/, r1
, zero
));
1245 /* --- patterns rooted at: CmpNEZ64 --- */
1247 if (e
->tag
== Iex_Unop
1248 && e
->Iex
.Unop
.op
== Iop_CmpNEZ64
) {
1250 HReg tmp
= newVRegI(env
);
1251 ARMRI84
* zero
= ARMRI84_I84(0,0);
1252 iselInt64Expr(&tHi
, &tLo
, env
, e
->Iex
.Unop
.arg
);
1253 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, tmp
, tHi
, ARMRI84_R(tLo
)));
1254 addInstr(env
, ARMInstr_CmpOrTst(True
/*isCmp*/, tmp
, zero
));
1258 /* --- Cmp*32*(x,y) --- */
1259 if (e
->tag
== Iex_Binop
1260 && (e
->Iex
.Binop
.op
== Iop_CmpEQ32
1261 || e
->Iex
.Binop
.op
== Iop_CmpNE32
1262 || e
->Iex
.Binop
.op
== Iop_CmpLT32S
1263 || e
->Iex
.Binop
.op
== Iop_CmpLT32U
1264 || e
->Iex
.Binop
.op
== Iop_CmpLE32S
1265 || e
->Iex
.Binop
.op
== Iop_CmpLE32U
)) {
1266 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1267 ARMRI84
* argR
= iselIntExpr_RI84(NULL
,False
,
1268 env
, e
->Iex
.Binop
.arg2
);
1269 addInstr(env
, ARMInstr_CmpOrTst(True
/*isCmp*/, argL
, argR
));
1270 switch (e
->Iex
.Binop
.op
) {
1271 case Iop_CmpEQ32
: return ARMcc_EQ
;
1272 case Iop_CmpNE32
: return ARMcc_NE
;
1273 case Iop_CmpLT32S
: return ARMcc_LT
;
1274 case Iop_CmpLT32U
: return ARMcc_LO
;
1275 case Iop_CmpLE32S
: return ARMcc_LE
;
1276 case Iop_CmpLE32U
: return ARMcc_LS
;
1277 default: vpanic("iselCondCode(arm): CmpXX32");
1282 /* Constant 1:Bit */
1283 if (e
->tag
== Iex_Const
) {
1285 vassert(e
->Iex
.Const
.con
->tag
== Ico_U1
);
1286 vassert(e
->Iex
.Const
.con
->Ico
.U1
== True
1287 || e
->Iex
.Const
.con
->Ico
.U1
== False
);
1289 addInstr(env
, ARMInstr_Imm32(r
, 0));
1290 addInstr(env
, ARMInstr_CmpOrTst(True
/*isCmp*/, r
, ARMRI84_R(r
)));
1291 return e
->Iex
.Const
.con
->Ico
.U1
? ARMcc_EQ
: ARMcc_NE
;
1294 /* --- And1(x,y), Or1(x,y) --- */
1295 /* FIXME: We could (and probably should) do a lot better here, by using the
1296 iselCondCode_C/_R scheme used in the amd64 insn selector. */
1297 if (e
->tag
== Iex_Binop
1298 && (e
->Iex
.Binop
.op
== Iop_And1
|| e
->Iex
.Binop
.op
== Iop_Or1
)) {
1299 HReg x_as_32
= newVRegI(env
);
1300 ARMCondCode cc_x
= iselCondCode(env
, e
->Iex
.Binop
.arg1
);
1301 addInstr(env
, ARMInstr_Mov(x_as_32
, ARMRI84_I84(0,0)));
1302 addInstr(env
, ARMInstr_CMov(cc_x
, x_as_32
, ARMRI84_I84(1,0)));
1304 HReg y_as_32
= newVRegI(env
);
1305 ARMCondCode cc_y
= iselCondCode(env
, e
->Iex
.Binop
.arg2
);
1306 addInstr(env
, ARMInstr_Mov(y_as_32
, ARMRI84_I84(0,0)));
1307 addInstr(env
, ARMInstr_CMov(cc_y
, y_as_32
, ARMRI84_I84(1,0)));
1309 HReg tmp
= newVRegI(env
);
1310 ARMAluOp aop
= e
->Iex
.Binop
.op
== Iop_And1
? ARMalu_AND
: ARMalu_OR
;
1311 addInstr(env
, ARMInstr_Alu(aop
, tmp
, x_as_32
, ARMRI84_R(y_as_32
)));
1313 ARMRI84
* one
= ARMRI84_I84(1,0);
1314 addInstr(env
, ARMInstr_CmpOrTst(False
/*test*/, tmp
, one
));
1318 // JRS 2013-Jan-03: this seems completely nonsensical
1319 /* --- CasCmpEQ* --- */
1320 /* Ist_Cas has a dummy argument to compare with, so comparison is
1322 //if (e->tag == Iex_Binop
1323 // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1324 // || e->Iex.Binop.op == Iop_CasCmpEQ16
1325 // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1330 vpanic("iselCondCode");
1334 /* --------------------- Reg --------------------- */
1336 static HReg
iselIntExpr_R ( ISelEnv
* env
, IRExpr
* e
)
1338 HReg r
= iselIntExpr_R_wrk(env
, e
);
1339 /* sanity checks ... */
1341 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
1343 vassert(hregClass(r
) == HRcInt32
);
1344 vassert(hregIsVirtual(r
));
1348 /* DO NOT CALL THIS DIRECTLY ! */
1349 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, IRExpr
* e
)
1351 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
1352 vassert(ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
1356 /* --------- TEMP --------- */
1358 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
1361 /* --------- LOAD --------- */
1363 HReg dst
= newVRegI(env
);
1365 if (e
->Iex
.Load
.end
!= Iend_LE
)
1368 if (ty
== Ity_I32
) {
1369 ARMAMode1
* amode
= iselIntExpr_AMode1 ( env
, e
->Iex
.Load
.addr
);
1370 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, True
/*isLoad*/, dst
, amode
));
1373 if (ty
== Ity_I16
) {
1374 ARMAMode2
* amode
= iselIntExpr_AMode2 ( env
, e
->Iex
.Load
.addr
);
1375 addInstr(env
, ARMInstr_LdSt16(ARMcc_AL
,
1376 True
/*isLoad*/, False
/*!signedLoad*/,
1381 ARMAMode1
* amode
= iselIntExpr_AMode1 ( env
, e
->Iex
.Load
.addr
);
1382 addInstr(env
, ARMInstr_LdSt8U(ARMcc_AL
, True
/*isLoad*/, dst
, amode
));
1388 //zz /* --------- TERNARY OP --------- */
1389 //zz case Iex_Triop: {
1390 //zz IRTriop *triop = e->Iex.Triop.details;
1391 //zz /* C3210 flags following FPU partial remainder (fprem), both
1392 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1393 //zz if (triop->op == Iop_PRemC3210F64
1394 //zz || triop->op == Iop_PRem1C3210F64) {
1395 //zz HReg junk = newVRegF(env);
1396 //zz HReg dst = newVRegI(env);
1397 //zz HReg srcL = iselDblExpr(env, triop->arg2);
1398 //zz HReg srcR = iselDblExpr(env, triop->arg3);
1399 //zz /* XXXROUNDINGFIXME */
1400 //zz /* set roundingmode here */
1401 //zz addInstr(env, X86Instr_FpBinary(
1402 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1403 //zz ? Xfp_PREM : Xfp_PREM1,
1406 //zz /* The previous pseudo-insn will have left the FPU's C3210
1407 //zz flags set correctly. So bag them. */
1408 //zz addInstr(env, X86Instr_FpStSW_AX());
1409 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1410 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1417 /* --------- BINARY OP --------- */
1420 ARMAluOp aop
= 0; /* invalid */
1421 ARMShiftOp sop
= 0; /* invalid */
1423 /* ADD/SUB/AND/OR/XOR */
1424 switch (e
->Iex
.Binop
.op
) {
1426 Bool didInv
= False
;
1427 HReg dst
= newVRegI(env
);
1428 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1429 ARMRI84
* argR
= iselIntExpr_RI84(&didInv
, True
/*mayInv*/,
1430 env
, e
->Iex
.Binop
.arg2
);
1431 addInstr(env
, ARMInstr_Alu(didInv
? ARMalu_BIC
: ARMalu_AND
,
1435 case Iop_Or32
: aop
= ARMalu_OR
; goto std_binop
;
1436 case Iop_Xor32
: aop
= ARMalu_XOR
; goto std_binop
;
1437 case Iop_Sub32
: aop
= ARMalu_SUB
; goto std_binop
;
1438 case Iop_Add32
: aop
= ARMalu_ADD
; goto std_binop
;
1440 HReg dst
= newVRegI(env
);
1441 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1442 ARMRI84
* argR
= iselIntExpr_RI84(NULL
, False
/*mayInv*/,
1443 env
, e
->Iex
.Binop
.arg2
);
1444 addInstr(env
, ARMInstr_Alu(aop
, dst
, argL
, argR
));
1451 switch (e
->Iex
.Binop
.op
) {
1452 case Iop_Shl32
: sop
= ARMsh_SHL
; goto sh_binop
;
1453 case Iop_Shr32
: sop
= ARMsh_SHR
; goto sh_binop
;
1454 case Iop_Sar32
: sop
= ARMsh_SAR
; goto sh_binop
;
1456 HReg dst
= newVRegI(env
);
1457 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1458 ARMRI5
* argR
= iselIntExpr_RI5(env
, e
->Iex
.Binop
.arg2
);
1459 addInstr(env
, ARMInstr_Shift(sop
, dst
, argL
, argR
));
1460 vassert(ty
== Ity_I32
); /* else the IR is ill-typed */
1467 if (e
->Iex
.Binop
.op
== Iop_Mul32
) {
1468 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1469 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1470 HReg dst
= newVRegI(env
);
1471 addInstr(env
, mk_iMOVds_RR(hregARM_R2(), argL
));
1472 addInstr(env
, mk_iMOVds_RR(hregARM_R3(), argR
));
1473 addInstr(env
, ARMInstr_Mul(ARMmul_PLAIN
));
1474 addInstr(env
, mk_iMOVds_RR(dst
, hregARM_R0()));
1478 /* Handle misc other ops. */
1480 if (e
->Iex
.Binop
.op
== Iop_Max32U
) {
1481 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1482 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1483 HReg dst
= newVRegI(env
);
1484 addInstr(env
, ARMInstr_CmpOrTst(True
/*isCmp*/, argL
,
1486 addInstr(env
, mk_iMOVds_RR(dst
, argL
));
1487 addInstr(env
, ARMInstr_CMov(ARMcc_LO
, dst
, ARMRI84_R(argR
)));
1491 if (e
->Iex
.Binop
.op
== Iop_CmpF64
) {
1492 HReg dL
= iselDblExpr(env
, e
->Iex
.Binop
.arg1
);
1493 HReg dR
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1494 HReg dst
= newVRegI(env
);
1495 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1496 FMSTAT, so we can examine the results directly. */
1497 addInstr(env
, ARMInstr_VCmpD(dL
, dR
));
1498 /* Create in dst, the IRCmpF64Result encoded result. */
1499 addInstr(env
, ARMInstr_Imm32(dst
, 0));
1500 addInstr(env
, ARMInstr_CMov(ARMcc_EQ
, dst
, ARMRI84_I84(0x40,0))); //EQ
1501 addInstr(env
, ARMInstr_CMov(ARMcc_MI
, dst
, ARMRI84_I84(0x01,0))); //LT
1502 addInstr(env
, ARMInstr_CMov(ARMcc_GT
, dst
, ARMRI84_I84(0x00,0))); //GT
1503 addInstr(env
, ARMInstr_CMov(ARMcc_VS
, dst
, ARMRI84_I84(0x45,0))); //UN
1507 if (e
->Iex
.Binop
.op
== Iop_F64toI32S
1508 || e
->Iex
.Binop
.op
== Iop_F64toI32U
) {
1509 /* Wretched uglyness all round, due to having to deal
1510 with rounding modes. Oh well. */
1511 /* FIXME: if arg1 is a constant indicating round-to-zero,
1512 then we could skip all this arsing around with FPSCR and
1513 simply emit FTO{S,U}IZD. */
1514 Bool syned
= e
->Iex
.Binop
.op
== Iop_F64toI32S
;
1515 HReg valD
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1516 set_VFP_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
1517 /* FTO{S,U}ID valF, valD */
1518 HReg valF
= newVRegF(env
);
1519 addInstr(env
, ARMInstr_VCvtID(False
/*!iToD*/, syned
,
1521 set_VFP_rounding_default(env
);
1522 /* VMOV dst, valF */
1523 HReg dst
= newVRegI(env
);
1524 addInstr(env
, ARMInstr_VXferS(False
/*!toS*/, valF
, dst
));
1528 if (e
->Iex
.Binop
.op
== Iop_GetElem8x8
1529 || e
->Iex
.Binop
.op
== Iop_GetElem16x4
1530 || e
->Iex
.Binop
.op
== Iop_GetElem32x2
) {
1531 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
1532 HReg res
= newVRegI(env
);
1533 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
1535 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
1536 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
1537 vpanic("ARM target supports GetElem with constant "
1538 "second argument only (neon)\n");
1540 index
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1541 switch (e
->Iex
.Binop
.op
) {
1542 case Iop_GetElem8x8
: vassert(index
< 8); size
= 0; break;
1543 case Iop_GetElem16x4
: vassert(index
< 4); size
= 1; break;
1544 case Iop_GetElem32x2
: vassert(index
< 2); size
= 2; break;
1545 default: vassert(0);
1547 addInstr(env
, ARMInstr_NUnaryS(ARMneon_GETELEMS
,
1548 mkARMNRS(ARMNRS_Reg
, res
, 0),
1549 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
1555 if (e
->Iex
.Binop
.op
== Iop_GetElem32x2
1556 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
1557 && !(env
->hwcaps
& VEX_HWCAPS_ARM_NEON
)) {
1558 /* We may have to do GetElem32x2 on a non-NEON capable
1560 IRConst
* con
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
;
1561 vassert(con
->tag
== Ico_U8
); /* else IR is ill-typed */
1562 UInt index
= con
->Ico
.U8
;
1563 if (index
>= 0 && index
<= 1) {
1565 iselInt64Expr(&rHi
, &rLo
, env
, e
->Iex
.Binop
.arg1
);
1566 return index
== 0 ? rLo
: rHi
;
1570 if (e
->Iex
.Binop
.op
== Iop_GetElem8x16
1571 || e
->Iex
.Binop
.op
== Iop_GetElem16x8
1572 || e
->Iex
.Binop
.op
== Iop_GetElem32x4
) {
1573 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
1574 HReg res
= newVRegI(env
);
1575 HReg arg
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
1577 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
1578 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
1579 vpanic("ARM target supports GetElem with constant "
1580 "second argument only (neon)\n");
1582 index
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1583 switch (e
->Iex
.Binop
.op
) {
1584 case Iop_GetElem8x16
: vassert(index
< 16); size
= 0; break;
1585 case Iop_GetElem16x8
: vassert(index
< 8); size
= 1; break;
1586 case Iop_GetElem32x4
: vassert(index
< 4); size
= 2; break;
1587 default: vassert(0);
1589 addInstr(env
, ARMInstr_NUnaryS(ARMneon_GETELEMS
,
1590 mkARMNRS(ARMNRS_Reg
, res
, 0),
1591 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
1597 /* All cases involving host-side helper calls. */
1599 switch (e
->Iex
.Binop
.op
) {
1601 fn
= &h_generic_calc_Add16x2
; break;
1603 fn
= &h_generic_calc_Sub16x2
; break;
1605 fn
= &h_generic_calc_HAdd16Ux2
; break;
1607 fn
= &h_generic_calc_HAdd16Sx2
; break;
1609 fn
= &h_generic_calc_HSub16Ux2
; break;
1611 fn
= &h_generic_calc_HSub16Sx2
; break;
1613 fn
= &h_generic_calc_QAdd16Sx2
; break;
1615 fn
= &h_generic_calc_QAdd16Ux2
; break;
1617 fn
= &h_generic_calc_QSub16Sx2
; break;
1619 fn
= &h_generic_calc_Add8x4
; break;
1621 fn
= &h_generic_calc_Sub8x4
; break;
1623 fn
= &h_generic_calc_HAdd8Ux4
; break;
1625 fn
= &h_generic_calc_HAdd8Sx4
; break;
1627 fn
= &h_generic_calc_HSub8Ux4
; break;
1629 fn
= &h_generic_calc_HSub8Sx4
; break;
1631 fn
= &h_generic_calc_QAdd8Sx4
; break;
1633 fn
= &h_generic_calc_QAdd8Ux4
; break;
1635 fn
= &h_generic_calc_QSub8Sx4
; break;
1637 fn
= &h_generic_calc_QSub8Ux4
; break;
1639 fn
= &h_generic_calc_Sad8Ux4
; break;
1641 fn
= &h_generic_calc_QAdd32S
; break;
1643 fn
= &h_generic_calc_QSub32S
; break;
1645 fn
= &h_generic_calc_QSub16Ux2
; break;
1647 fn
= &h_calc_udiv32_w_arm_semantics
; break;
1649 fn
= &h_calc_sdiv32_w_arm_semantics
; break;
1655 HReg regL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1656 HReg regR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1657 HReg res
= newVRegI(env
);
1658 addInstr(env
, mk_iMOVds_RR(hregARM_R0(), regL
));
1659 addInstr(env
, mk_iMOVds_RR(hregARM_R1(), regR
));
1660 addInstr(env
, ARMInstr_Call( ARMcc_AL
, (Addr
)fn
,
1661 2, mk_RetLoc_simple(RLPri_Int
) ));
1662 addInstr(env
, mk_iMOVds_RR(res
, hregARM_R0()));
1669 /* --------- UNARY OP --------- */
1672 //zz /* 1Uto8(32to1(expr32)) */
1673 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1674 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1675 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1676 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1677 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1678 //zz const IRExpr* expr32 = mi.bindee[0];
1679 //zz HReg dst = newVRegI(env);
1680 //zz HReg src = iselIntExpr_R(env, expr32);
1681 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1682 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1683 //zz X86RMI_Imm(1), dst));
1688 //zz /* 8Uto32(LDle(expr32)) */
1689 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1690 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1691 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1692 //zz unop(Iop_8Uto32,
1693 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1694 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1695 //zz HReg dst = newVRegI(env);
1696 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1697 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1702 //zz /* 8Sto32(LDle(expr32)) */
1703 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1704 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1705 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1706 //zz unop(Iop_8Sto32,
1707 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1708 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1709 //zz HReg dst = newVRegI(env);
1710 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1711 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1716 //zz /* 16Uto32(LDle(expr32)) */
1717 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1718 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1719 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1720 //zz unop(Iop_16Uto32,
1721 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1722 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1723 //zz HReg dst = newVRegI(env);
1724 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1725 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1730 //zz /* 8Uto32(GET:I8) */
1731 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1732 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1734 //zz X86AMode* amode;
1735 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1736 //zz dst = newVRegI(env);
1737 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1738 //zz hregX86_EBP());
1739 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1744 //zz /* 16to32(GET:I16) */
1745 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1746 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1748 //zz X86AMode* amode;
1749 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1750 //zz dst = newVRegI(env);
1751 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1752 //zz hregX86_EBP());
1753 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1758 switch (e
->Iex
.Unop
.op
) {
1760 HReg dst
= newVRegI(env
);
1761 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1762 addInstr(env
, ARMInstr_Alu(ARMalu_AND
,
1763 dst
, src
, ARMRI84_I84(0xFF,0)));
1766 //zz case Iop_8Uto16:
1767 //zz case Iop_8Uto32:
1768 //zz case Iop_16Uto32: {
1769 //zz HReg dst = newVRegI(env);
1770 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1771 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1772 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1773 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1774 //zz X86RMI_Imm(mask), dst));
1777 //zz case Iop_8Sto16:
1778 //zz case Iop_8Sto32:
1780 HReg dst
= newVRegI(env
);
1781 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1782 ARMRI5
* amt
= ARMRI5_I5(16);
1783 addInstr(env
, ARMInstr_Shift(ARMsh_SHL
, dst
, src
, amt
));
1784 addInstr(env
, ARMInstr_Shift(ARMsh_SHR
, dst
, dst
, amt
));
1789 HReg dst
= newVRegI(env
);
1790 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1791 ARMRI5
* amt
= ARMRI5_I5(e
->Iex
.Unop
.op
==Iop_16Sto32
? 16 : 24);
1792 addInstr(env
, ARMInstr_Shift(ARMsh_SHL
, dst
, src
, amt
));
1793 addInstr(env
, ARMInstr_Shift(ARMsh_SAR
, dst
, dst
, amt
));
1797 //zz case Iop_Not16:
1799 HReg dst
= newVRegI(env
);
1800 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1801 addInstr(env
, ARMInstr_Unary(ARMun_NOT
, dst
, src
));
1804 case Iop_64HIto32
: {
1806 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1807 return rHi
; /* and abandon rLo .. poor wee thing :-) */
1811 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1812 return rLo
; /* similar stupid comment to the above ... */
1816 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
1817 HReg tHi
= newVRegI(env
);
1818 HReg tLo
= newVRegI(env
);
1819 HReg tmp
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
1820 addInstr(env
, ARMInstr_VXferD(False
, tmp
, tHi
, tLo
));
1824 iselInt64Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1830 /* 1Uto32(tmp). Since I1 values generated into registers
1831 are guaranteed to have value either only zero or one,
1832 we can simply return the value of the register in this
1834 if (e
->Iex
.Unop
.arg
->tag
== Iex_RdTmp
) {
1835 HReg dst
= lookupIRTemp(env
, e
->Iex
.Unop
.arg
->Iex
.RdTmp
.tmp
);
1838 /* else fall through */
1840 HReg dst
= newVRegI(env
);
1841 ARMCondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1842 addInstr(env
, ARMInstr_Mov(dst
, ARMRI84_I84(0,0)));
1843 addInstr(env
, ARMInstr_CMov(cond
, dst
, ARMRI84_I84(1,0)));
1848 HReg dst
= newVRegI(env
);
1849 ARMCondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1850 ARMRI5
* amt
= ARMRI5_I5(31);
1851 /* This is really rough. We could do much better here;
1852 perhaps mvn{cond} dst, #0 as the second insn?
1853 (same applies to 1Sto64) */
1854 addInstr(env
, ARMInstr_Mov(dst
, ARMRI84_I84(0,0)));
1855 addInstr(env
, ARMInstr_CMov(cond
, dst
, ARMRI84_I84(1,0)));
1856 addInstr(env
, ARMInstr_Shift(ARMsh_SHL
, dst
, dst
, amt
));
1857 addInstr(env
, ARMInstr_Shift(ARMsh_SAR
, dst
, dst
, amt
));
1862 //zz case Iop_1Sto8:
1863 //zz case Iop_1Sto16:
1864 //zz case Iop_1Sto32: {
1865 //zz /* could do better than this, but for now ... */
1866 //zz HReg dst = newVRegI(env);
1867 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1868 //zz addInstr(env, X86Instr_Set32(cond,dst));
1869 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1870 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1873 //zz case Iop_Ctz32: {
1874 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1875 //zz HReg dst = newVRegI(env);
1876 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1877 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1881 /* Count leading zeroes; easy on ARM. */
1882 HReg dst
= newVRegI(env
);
1883 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1884 addInstr(env
, ARMInstr_Unary(ARMun_CLZ
, dst
, src
));
1888 case Iop_CmpwNEZ32
: {
1889 HReg dst
= newVRegI(env
);
1890 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1891 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, dst
, src
));
1892 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, dst
, dst
, ARMRI84_R(src
)));
1893 addInstr(env
, ARMInstr_Shift(ARMsh_SAR
, dst
, dst
, ARMRI5_I5(31)));
1898 HReg dst
= newVRegI(env
);
1899 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1900 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, dst
, src
));
1901 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, dst
, dst
, ARMRI84_R(src
)));
1905 //zz case Iop_V128to32: {
1906 //zz HReg dst = newVRegI(env);
1907 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1908 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1909 //zz sub_from_esp(env, 16);
1910 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1911 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1912 //zz add_to_esp(env, 16);
1916 case Iop_ReinterpF32asI32
: {
1917 HReg dst
= newVRegI(env
);
1918 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
1919 addInstr(env
, ARMInstr_VXferS(False
/*!toS*/, src
, dst
));
1924 //zz case Iop_16to8:
1927 /* These are no-ops. */
1928 return iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1934 /* All Unop cases involving host-side helper calls. */
1936 switch (e
->Iex
.Unop
.op
) {
1937 case Iop_CmpNEZ16x2
:
1938 fn
= &h_generic_calc_CmpNEZ16x2
; break;
1940 fn
= &h_generic_calc_CmpNEZ8x4
; break;
1946 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1947 HReg res
= newVRegI(env
);
1948 addInstr(env
, mk_iMOVds_RR(hregARM_R0(), arg
));
1949 addInstr(env
, ARMInstr_Call( ARMcc_AL
, (Addr
)fn
,
1950 1, mk_RetLoc_simple(RLPri_Int
) ));
1951 addInstr(env
, mk_iMOVds_RR(res
, hregARM_R0()));
1958 /* --------- GET --------- */
1961 && 0 == (e
->Iex
.Get
.offset
& 3)
1962 && e
->Iex
.Get
.offset
< 4096-4) {
1963 HReg dst
= newVRegI(env
);
1964 addInstr(env
, ARMInstr_LdSt32(
1965 ARMcc_AL
, True
/*isLoad*/,
1967 ARMAMode1_RI(hregARM_R8(), e
->Iex
.Get
.offset
)));
1970 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1971 //zz HReg dst = newVRegI(env);
1972 //zz addInstr(env, X86Instr_LoadEX(
1973 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1975 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1982 //zz case Iex_GetI: {
1984 //zz = genGuestArrayOffset(
1985 //zz env, e->Iex.GetI.descr,
1986 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1987 //zz HReg dst = newVRegI(env);
1988 //zz if (ty == Ity_I8) {
1989 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1992 //zz if (ty == Ity_I32) {
1993 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1999 /* --------- CCALL --------- */
2001 HReg dst
= newVRegI(env
);
2002 vassert(ty
== e
->Iex
.CCall
.retty
);
2004 /* be very restrictive for now. Only 32/64-bit ints allowed for
2005 args, and 32 bits for return type. Don't forget to change
2006 the RetLoc if more types are allowed in future. */
2007 if (e
->Iex
.CCall
.retty
!= Ity_I32
)
2010 /* Marshal args, do the call, clear stack. */
2012 RetLoc rloc
= mk_RetLoc_INVALID();
2013 Bool ok
= doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
2014 e
->Iex
.CCall
.cee
, e
->Iex
.CCall
.retty
,
2015 e
->Iex
.CCall
.args
);
2018 vassert(is_sane_RetLoc(rloc
));
2019 vassert(rloc
.pri
== RLPri_Int
);
2020 vassert(addToSp
== 0);
2021 addInstr(env
, mk_iMOVds_RR(dst
, hregARM_R0()));
2027 /* --------- LITERAL --------- */
2031 HReg dst
= newVRegI(env
);
2032 switch (e
->Iex
.Const
.con
->tag
) {
2033 case Ico_U32
: u
= e
->Iex
.Const
.con
->Ico
.U32
; break;
2034 case Ico_U16
: u
= 0xFFFF & (e
->Iex
.Const
.con
->Ico
.U16
); break;
2035 case Ico_U8
: u
= 0xFF & (e
->Iex
.Const
.con
->Ico
.U8
); break;
2036 default: ppIRExpr(e
); vpanic("iselIntExpr_R.Iex_Const(arm)");
2038 addInstr(env
, ARMInstr_Imm32(dst
, u
));
2042 /* --------- MULTIPLEX --------- */
2043 case Iex_ITE
: { // VFD
2044 /* ITE(ccexpr, iftrue, iffalse) */
2045 if (ty
== Ity_I32
) {
2047 HReg r1
= iselIntExpr_R(env
, e
->Iex
.ITE
.iftrue
);
2048 ARMRI84
* r0
= iselIntExpr_RI84(NULL
, False
, env
, e
->Iex
.ITE
.iffalse
);
2049 HReg dst
= newVRegI(env
);
2050 addInstr(env
, mk_iMOVds_RR(dst
, r1
));
2051 cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
2052 addInstr(env
, ARMInstr_CMov(cc
^ 1, dst
, r0
));
2060 } /* switch (e->tag) */
2062 /* We get here if no pattern matched. */
2065 vpanic("iselIntExpr_R: cannot reduce tree");
2069 /* -------------------- 64-bit -------------------- */
2071 /* Compute a 64-bit value into a register pair, which is returned as
2072 the first two parameters. As with iselIntExpr_R, these may be
2073 either real or virtual regs; in any case they must not be changed
2074 by subsequent code emitted by the caller. */
2076 static void iselInt64Expr ( HReg
* rHi
, HReg
* rLo
, ISelEnv
* env
,
2079 iselInt64Expr_wrk(rHi
, rLo
, env
, e
);
2081 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2083 vassert(hregClass(*rHi
) == HRcInt32
);
2084 vassert(hregIsVirtual(*rHi
));
2085 vassert(hregClass(*rLo
) == HRcInt32
);
2086 vassert(hregIsVirtual(*rLo
));
2089 /* DO NOT CALL THIS DIRECTLY ! */
2090 static void iselInt64Expr_wrk ( HReg
* rHi
, HReg
* rLo
, ISelEnv
* env
,
2094 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I64
);
2096 /* 64-bit literal */
2097 if (e
->tag
== Iex_Const
) {
2098 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
2099 UInt wHi
= toUInt(w64
>> 32);
2100 UInt wLo
= toUInt(w64
);
2101 HReg tHi
= newVRegI(env
);
2102 HReg tLo
= newVRegI(env
);
2103 vassert(e
->Iex
.Const
.con
->tag
== Ico_U64
);
2104 addInstr(env
, ARMInstr_Imm32(tHi
, wHi
));
2105 addInstr(env
, ARMInstr_Imm32(tLo
, wLo
));
2111 /* read 64-bit IRTemp */
2112 if (e
->tag
== Iex_RdTmp
) {
2113 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
2114 HReg tHi
= newVRegI(env
);
2115 HReg tLo
= newVRegI(env
);
2116 HReg tmp
= iselNeon64Expr(env
, e
);
2117 addInstr(env
, ARMInstr_VXferD(False
, tmp
, tHi
, tLo
));
2121 lookupIRTemp64( rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
2127 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2129 vassert(e
->Iex
.Load
.ty
== Ity_I64
);
2130 rA
= iselIntExpr_R(env
, e
->Iex
.Load
.addr
);
2131 tHi
= newVRegI(env
);
2132 tLo
= newVRegI(env
);
2133 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, True
/*isLoad*/,
2134 tHi
, ARMAMode1_RI(rA
, 4)));
2135 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, True
/*isLoad*/,
2136 tLo
, ARMAMode1_RI(rA
, 0)));
2143 if (e
->tag
== Iex_Get
) {
2144 ARMAMode1
* am0
= ARMAMode1_RI(hregARM_R8(), e
->Iex
.Get
.offset
+ 0);
2145 ARMAMode1
* am4
= ARMAMode1_RI(hregARM_R8(), e
->Iex
.Get
.offset
+ 4);
2146 HReg tHi
= newVRegI(env
);
2147 HReg tLo
= newVRegI(env
);
2148 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, True
/*isLoad*/, tHi
, am4
));
2149 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, True
/*isLoad*/, tLo
, am0
));
2155 /* --------- BINARY ops --------- */
2156 if (e
->tag
== Iex_Binop
) {
2157 switch (e
->Iex
.Binop
.op
) {
2159 /* 32 x 32 -> 64 multiply */
2162 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2163 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2164 HReg tHi
= newVRegI(env
);
2165 HReg tLo
= newVRegI(env
);
2166 ARMMulOp mop
= e
->Iex
.Binop
.op
== Iop_MullS32
2167 ? ARMmul_SX
: ARMmul_ZX
;
2168 addInstr(env
, mk_iMOVds_RR(hregARM_R2(), argL
));
2169 addInstr(env
, mk_iMOVds_RR(hregARM_R3(), argR
));
2170 addInstr(env
, ARMInstr_Mul(mop
));
2171 addInstr(env
, mk_iMOVds_RR(tHi
, hregARM_R1()));
2172 addInstr(env
, mk_iMOVds_RR(tLo
, hregARM_R0()));
2179 HReg xLo
, xHi
, yLo
, yHi
;
2180 HReg tHi
= newVRegI(env
);
2181 HReg tLo
= newVRegI(env
);
2182 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2183 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2184 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, tHi
, xHi
, ARMRI84_R(yHi
)));
2185 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, tLo
, xLo
, ARMRI84_R(yLo
)));
2192 HReg xLo
, xHi
, yLo
, yHi
;
2193 HReg tHi
= newVRegI(env
);
2194 HReg tLo
= newVRegI(env
);
2195 iselInt64Expr(&xHi
, &xLo
, env
, e
->Iex
.Binop
.arg1
);
2196 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Binop
.arg2
);
2197 addInstr(env
, ARMInstr_Alu(ARMalu_ADDS
, tLo
, xLo
, ARMRI84_R(yLo
)));
2198 addInstr(env
, ARMInstr_Alu(ARMalu_ADC
, tHi
, xHi
, ARMRI84_R(yHi
)));
2204 /* 32HLto64(e1,e2) */
2205 case Iop_32HLto64
: {
2206 *rHi
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2207 *rLo
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2216 /* --------- UNARY ops --------- */
2217 if (e
->tag
== Iex_Unop
) {
2218 switch (e
->Iex
.Unop
.op
) {
2220 /* ReinterpF64asI64 */
2221 case Iop_ReinterpF64asI64
: {
2222 HReg dstHi
= newVRegI(env
);
2223 HReg dstLo
= newVRegI(env
);
2224 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
2225 addInstr(env
, ARMInstr_VXferD(False
/*!toD*/, src
, dstHi
, dstLo
));
2234 HReg tHi
= newVRegI(env
);
2235 HReg tLo
= newVRegI(env
);
2236 HReg zero
= newVRegI(env
);
2238 iselInt64Expr(&yHi
, &yLo
, env
, e
->Iex
.Unop
.arg
);
2240 addInstr(env
, ARMInstr_Imm32(zero
, 0));
2241 /* tLo = 0 - yLo, and set carry */
2242 addInstr(env
, ARMInstr_Alu(ARMalu_SUBS
,
2243 tLo
, zero
, ARMRI84_R(yLo
)));
2244 /* tHi = 0 - yHi - carry */
2245 addInstr(env
, ARMInstr_Alu(ARMalu_SBC
,
2246 tHi
, zero
, ARMRI84_R(yHi
)));
2247 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2248 back in, so as to give the final result
2249 tHi:tLo = arg | -arg. */
2250 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, tHi
, tHi
, ARMRI84_R(yHi
)));
2251 addInstr(env
, ARMInstr_Alu(ARMalu_OR
, tLo
, tLo
, ARMRI84_R(yLo
)));
2258 case Iop_CmpwNEZ64
: {
2260 HReg tmp1
= newVRegI(env
);
2261 HReg tmp2
= newVRegI(env
);
2262 /* srcHi:srcLo = arg */
2263 iselInt64Expr(&srcHi
, &srcLo
, env
, e
->Iex
.Unop
.arg
);
2264 /* tmp1 = srcHi | srcLo */
2265 addInstr(env
, ARMInstr_Alu(ARMalu_OR
,
2266 tmp1
, srcHi
, ARMRI84_R(srcLo
)));
2267 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2268 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, tmp2
, tmp1
));
2269 addInstr(env
, ARMInstr_Alu(ARMalu_OR
,
2270 tmp2
, tmp2
, ARMRI84_R(tmp1
)));
2271 addInstr(env
, ARMInstr_Shift(ARMsh_SAR
,
2272 tmp2
, tmp2
, ARMRI5_I5(31)));
2279 HReg dst
= newVRegI(env
);
2280 ARMCondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
2281 ARMRI5
* amt
= ARMRI5_I5(31);
2282 /* This is really rough. We could do much better here;
2283 perhaps mvn{cond} dst, #0 as the second insn?
2284 (same applies to 1Sto32) */
2285 addInstr(env
, ARMInstr_Mov(dst
, ARMRI84_I84(0,0)));
2286 addInstr(env
, ARMInstr_CMov(cond
, dst
, ARMRI84_I84(1,0)));
2287 addInstr(env
, ARMInstr_Shift(ARMsh_SHL
, dst
, dst
, amt
));
2288 addInstr(env
, ARMInstr_Shift(ARMsh_SAR
, dst
, dst
, amt
));
2297 } /* if (e->tag == Iex_Unop) */
2299 /* --------- MULTIPLEX --------- */
2300 if (e
->tag
== Iex_ITE
) { // VFD
2302 HReg r1hi
, r1lo
, r0hi
, r0lo
, dstHi
, dstLo
;
2304 tyC
= typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
);
2305 vassert(tyC
== Ity_I1
);
2306 iselInt64Expr(&r1hi
, &r1lo
, env
, e
->Iex
.ITE
.iftrue
);
2307 iselInt64Expr(&r0hi
, &r0lo
, env
, e
->Iex
.ITE
.iffalse
);
2308 dstHi
= newVRegI(env
);
2309 dstLo
= newVRegI(env
);
2310 addInstr(env
, mk_iMOVds_RR(dstHi
, r1hi
));
2311 addInstr(env
, mk_iMOVds_RR(dstLo
, r1lo
));
2312 cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
2313 addInstr(env
, ARMInstr_CMov(cc
^ 1, dstHi
, ARMRI84_R(r0hi
)));
2314 addInstr(env
, ARMInstr_CMov(cc
^ 1, dstLo
, ARMRI84_R(r0lo
)));
2320 /* It is convenient sometimes to call iselInt64Expr even when we
2321 have NEON support (e.g. in do_helper_call we need 64-bit
2322 arguments as 2 x 32 regs). */
2323 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
2324 HReg tHi
= newVRegI(env
);
2325 HReg tLo
= newVRegI(env
);
2326 HReg tmp
= iselNeon64Expr(env
, e
);
2327 addInstr(env
, ARMInstr_VXferD(False
, tmp
, tHi
, tLo
));
2334 vpanic("iselInt64Expr");
2338 /*---------------------------------------------------------*/
2339 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2340 /*---------------------------------------------------------*/
2342 static HReg
iselNeon64Expr ( ISelEnv
* env
, const IRExpr
* e
)
2345 vassert(env
->hwcaps
& VEX_HWCAPS_ARM_NEON
);
2346 r
= iselNeon64Expr_wrk( env
, e
);
2347 vassert(hregClass(r
) == HRcFlt64
);
2348 vassert(hregIsVirtual(r
));
2352 /* DO NOT CALL THIS DIRECTLY */
2353 static HReg
iselNeon64Expr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2355 IRType ty
= typeOfIRExpr(env
->type_env
, e
);
2358 vassert(ty
== Ity_I64
);
2360 if (e
->tag
== Iex_RdTmp
) {
2361 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2364 if (e
->tag
== Iex_Const
) {
2366 HReg res
= newVRegD(env
);
2367 iselInt64Expr(&rHi
, &rLo
, env
, e
);
2368 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
2373 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2374 HReg res
= newVRegD(env
);
2375 ARMAModeN
* am
= iselIntExpr_AModeN(env
, e
->Iex
.Load
.addr
);
2376 vassert(ty
== Ity_I64
);
2377 addInstr(env
, ARMInstr_NLdStD(True
, res
, am
));
2382 if (e
->tag
== Iex_Get
) {
2383 HReg addr
= newVRegI(env
);
2384 HReg res
= newVRegD(env
);
2385 vassert(ty
== Ity_I64
);
2386 addInstr(env
, ARMInstr_Add32(addr
, hregARM_R8(), e
->Iex
.Get
.offset
));
2387 addInstr(env
, ARMInstr_NLdStD(True
, res
, mkARMAModeN_R(addr
)));
2391 /* --------- BINARY ops --------- */
2392 if (e
->tag
== Iex_Binop
) {
2393 switch (e
->Iex
.Binop
.op
) {
2395 /* 32 x 32 -> 64 multiply */
2399 HReg res
= newVRegD(env
);
2400 iselInt64Expr(&rHi
, &rLo
, env
, e
);
2401 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
2406 HReg res
= newVRegD(env
);
2407 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2408 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2409 addInstr(env
, ARMInstr_NBinary(ARMneon_VAND
,
2410 res
, argL
, argR
, 4, False
));
2414 HReg res
= newVRegD(env
);
2415 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2416 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2417 addInstr(env
, ARMInstr_NBinary(ARMneon_VORR
,
2418 res
, argL
, argR
, 4, False
));
2422 HReg res
= newVRegD(env
);
2423 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2424 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2425 addInstr(env
, ARMInstr_NBinary(ARMneon_VXOR
,
2426 res
, argL
, argR
, 4, False
));
2430 /* 32HLto64(e1,e2) */
2431 case Iop_32HLto64
: {
2432 HReg rHi
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2433 HReg rLo
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2434 HReg res
= newVRegD(env
);
2435 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
2443 HReg res
= newVRegD(env
);
2444 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2445 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2447 switch (e
->Iex
.Binop
.op
) {
2448 case Iop_Add8x8
: size
= 0; break;
2449 case Iop_Add16x4
: size
= 1; break;
2450 case Iop_Add32x2
: size
= 2; break;
2451 case Iop_Add64
: size
= 3; break;
2452 default: vassert(0);
2454 addInstr(env
, ARMInstr_NBinary(ARMneon_VADD
,
2455 res
, argL
, argR
, size
, False
));
2458 case Iop_Add32Fx2
: {
2459 HReg res
= newVRegD(env
);
2460 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2461 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2463 addInstr(env
, ARMInstr_NBinary(ARMneon_VADDFP
,
2464 res
, argL
, argR
, size
, False
));
2467 case Iop_RecipStep32Fx2
: {
2468 HReg res
= newVRegD(env
);
2469 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2470 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2472 addInstr(env
, ARMInstr_NBinary(ARMneon_VRECPS
,
2473 res
, argL
, argR
, size
, False
));
2476 case Iop_RSqrtStep32Fx2
: {
2477 HReg res
= newVRegD(env
);
2478 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2479 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2481 addInstr(env
, ARMInstr_NBinary(ARMneon_VRSQRTS
,
2482 res
, argL
, argR
, size
, False
));
2486 // These 6 verified 18 Apr 2013
2487 case Iop_InterleaveHI32x2
:
2488 case Iop_InterleaveLO32x2
:
2489 case Iop_InterleaveOddLanes8x8
:
2490 case Iop_InterleaveEvenLanes8x8
:
2491 case Iop_InterleaveOddLanes16x4
:
2492 case Iop_InterleaveEvenLanes16x4
: {
2493 HReg rD
= newVRegD(env
);
2494 HReg rM
= newVRegD(env
);
2495 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2496 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2498 Bool resRd
; // is the result in rD or rM ?
2499 switch (e
->Iex
.Binop
.op
) {
2500 case Iop_InterleaveOddLanes8x8
: resRd
= False
; size
= 0; break;
2501 case Iop_InterleaveEvenLanes8x8
: resRd
= True
; size
= 0; break;
2502 case Iop_InterleaveOddLanes16x4
: resRd
= False
; size
= 1; break;
2503 case Iop_InterleaveEvenLanes16x4
: resRd
= True
; size
= 1; break;
2504 case Iop_InterleaveHI32x2
: resRd
= False
; size
= 2; break;
2505 case Iop_InterleaveLO32x2
: resRd
= True
; size
= 2; break;
2506 default: vassert(0);
2508 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rM
, argL
, 4, False
));
2509 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rD
, argR
, 4, False
));
2510 addInstr(env
, ARMInstr_NDual(ARMneon_TRN
, rD
, rM
, size
, False
));
2511 return resRd
? rD
: rM
;
2514 // These 4 verified 18 Apr 2013
2515 case Iop_InterleaveHI8x8
:
2516 case Iop_InterleaveLO8x8
:
2517 case Iop_InterleaveHI16x4
:
2518 case Iop_InterleaveLO16x4
: {
2519 HReg rD
= newVRegD(env
);
2520 HReg rM
= newVRegD(env
);
2521 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2522 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2524 Bool resRd
; // is the result in rD or rM ?
2525 switch (e
->Iex
.Binop
.op
) {
2526 case Iop_InterleaveHI8x8
: resRd
= False
; size
= 0; break;
2527 case Iop_InterleaveLO8x8
: resRd
= True
; size
= 0; break;
2528 case Iop_InterleaveHI16x4
: resRd
= False
; size
= 1; break;
2529 case Iop_InterleaveLO16x4
: resRd
= True
; size
= 1; break;
2530 default: vassert(0);
2532 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rM
, argL
, 4, False
));
2533 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rD
, argR
, 4, False
));
2534 addInstr(env
, ARMInstr_NDual(ARMneon_ZIP
, rD
, rM
, size
, False
));
2535 return resRd
? rD
: rM
;
2538 // These 4 verified 18 Apr 2013
2539 case Iop_CatOddLanes8x8
:
2540 case Iop_CatEvenLanes8x8
:
2541 case Iop_CatOddLanes16x4
:
2542 case Iop_CatEvenLanes16x4
: {
2543 HReg rD
= newVRegD(env
);
2544 HReg rM
= newVRegD(env
);
2545 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2546 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2548 Bool resRd
; // is the result in rD or rM ?
2549 switch (e
->Iex
.Binop
.op
) {
2550 case Iop_CatOddLanes8x8
: resRd
= False
; size
= 0; break;
2551 case Iop_CatEvenLanes8x8
: resRd
= True
; size
= 0; break;
2552 case Iop_CatOddLanes16x4
: resRd
= False
; size
= 1; break;
2553 case Iop_CatEvenLanes16x4
: resRd
= True
; size
= 1; break;
2554 default: vassert(0);
2556 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rM
, argL
, 4, False
));
2557 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rD
, argR
, 4, False
));
2558 addInstr(env
, ARMInstr_NDual(ARMneon_UZP
, rD
, rM
, size
, False
));
2559 return resRd
? rD
: rM
;
2565 case Iop_QAdd64Ux1
: {
2566 HReg res
= newVRegD(env
);
2567 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2568 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2570 switch (e
->Iex
.Binop
.op
) {
2571 case Iop_QAdd8Ux8
: size
= 0; break;
2572 case Iop_QAdd16Ux4
: size
= 1; break;
2573 case Iop_QAdd32Ux2
: size
= 2; break;
2574 case Iop_QAdd64Ux1
: size
= 3; break;
2575 default: vassert(0);
2577 addInstr(env
, ARMInstr_NBinary(ARMneon_VQADDU
,
2578 res
, argL
, argR
, size
, False
));
2584 case Iop_QAdd64Sx1
: {
2585 HReg res
= newVRegD(env
);
2586 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2587 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2589 switch (e
->Iex
.Binop
.op
) {
2590 case Iop_QAdd8Sx8
: size
= 0; break;
2591 case Iop_QAdd16Sx4
: size
= 1; break;
2592 case Iop_QAdd32Sx2
: size
= 2; break;
2593 case Iop_QAdd64Sx1
: size
= 3; break;
2594 default: vassert(0);
2596 addInstr(env
, ARMInstr_NBinary(ARMneon_VQADDS
,
2597 res
, argL
, argR
, size
, False
));
2604 HReg res
= newVRegD(env
);
2605 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2606 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2608 switch (e
->Iex
.Binop
.op
) {
2609 case Iop_Sub8x8
: size
= 0; break;
2610 case Iop_Sub16x4
: size
= 1; break;
2611 case Iop_Sub32x2
: size
= 2; break;
2612 case Iop_Sub64
: size
= 3; break;
2613 default: vassert(0);
2615 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
2616 res
, argL
, argR
, size
, False
));
2619 case Iop_Sub32Fx2
: {
2620 HReg res
= newVRegD(env
);
2621 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2622 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2624 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUBFP
,
2625 res
, argL
, argR
, size
, False
));
2631 case Iop_QSub64Ux1
: {
2632 HReg res
= newVRegD(env
);
2633 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2634 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2636 switch (e
->Iex
.Binop
.op
) {
2637 case Iop_QSub8Ux8
: size
= 0; break;
2638 case Iop_QSub16Ux4
: size
= 1; break;
2639 case Iop_QSub32Ux2
: size
= 2; break;
2640 case Iop_QSub64Ux1
: size
= 3; break;
2641 default: vassert(0);
2643 addInstr(env
, ARMInstr_NBinary(ARMneon_VQSUBU
,
2644 res
, argL
, argR
, size
, False
));
2650 case Iop_QSub64Sx1
: {
2651 HReg res
= newVRegD(env
);
2652 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2653 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2655 switch (e
->Iex
.Binop
.op
) {
2656 case Iop_QSub8Sx8
: size
= 0; break;
2657 case Iop_QSub16Sx4
: size
= 1; break;
2658 case Iop_QSub32Sx2
: size
= 2; break;
2659 case Iop_QSub64Sx1
: size
= 3; break;
2660 default: vassert(0);
2662 addInstr(env
, ARMInstr_NBinary(ARMneon_VQSUBS
,
2663 res
, argL
, argR
, size
, False
));
2668 case Iop_Max32Ux2
: {
2669 HReg res
= newVRegD(env
);
2670 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2671 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2673 switch (e
->Iex
.Binop
.op
) {
2674 case Iop_Max8Ux8
: size
= 0; break;
2675 case Iop_Max16Ux4
: size
= 1; break;
2676 case Iop_Max32Ux2
: size
= 2; break;
2677 default: vassert(0);
2679 addInstr(env
, ARMInstr_NBinary(ARMneon_VMAXU
,
2680 res
, argL
, argR
, size
, False
));
2685 case Iop_Max32Sx2
: {
2686 HReg res
= newVRegD(env
);
2687 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2688 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2690 switch (e
->Iex
.Binop
.op
) {
2691 case Iop_Max8Sx8
: size
= 0; break;
2692 case Iop_Max16Sx4
: size
= 1; break;
2693 case Iop_Max32Sx2
: size
= 2; break;
2694 default: vassert(0);
2696 addInstr(env
, ARMInstr_NBinary(ARMneon_VMAXS
,
2697 res
, argL
, argR
, size
, False
));
2702 case Iop_Min32Ux2
: {
2703 HReg res
= newVRegD(env
);
2704 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2705 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2707 switch (e
->Iex
.Binop
.op
) {
2708 case Iop_Min8Ux8
: size
= 0; break;
2709 case Iop_Min16Ux4
: size
= 1; break;
2710 case Iop_Min32Ux2
: size
= 2; break;
2711 default: vassert(0);
2713 addInstr(env
, ARMInstr_NBinary(ARMneon_VMINU
,
2714 res
, argL
, argR
, size
, False
));
2719 case Iop_Min32Sx2
: {
2720 HReg res
= newVRegD(env
);
2721 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2722 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2724 switch (e
->Iex
.Binop
.op
) {
2725 case Iop_Min8Sx8
: size
= 0; break;
2726 case Iop_Min16Sx4
: size
= 1; break;
2727 case Iop_Min32Sx2
: size
= 2; break;
2728 default: vassert(0);
2730 addInstr(env
, ARMInstr_NBinary(ARMneon_VMINS
,
2731 res
, argL
, argR
, size
, False
));
2737 HReg res
= newVRegD(env
);
2738 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2739 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2740 HReg argR2
= newVRegD(env
);
2741 HReg zero
= newVRegD(env
);
2743 switch (e
->Iex
.Binop
.op
) {
2744 case Iop_Sar8x8
: size
= 0; break;
2745 case Iop_Sar16x4
: size
= 1; break;
2746 case Iop_Sar32x2
: size
= 2; break;
2747 case Iop_Sar64
: size
= 3; break;
2748 default: vassert(0);
2750 addInstr(env
, ARMInstr_NeonImm(zero
, ARMNImm_TI(0,0)));
2751 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
2752 argR2
, zero
, argR
, size
, False
));
2753 addInstr(env
, ARMInstr_NShift(ARMneon_VSAL
,
2754 res
, argL
, argR2
, size
, False
));
2761 HReg res
= newVRegD(env
);
2762 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2763 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2765 switch (e
->Iex
.Binop
.op
) {
2766 case Iop_Sal8x8
: size
= 0; break;
2767 case Iop_Sal16x4
: size
= 1; break;
2768 case Iop_Sal32x2
: size
= 2; break;
2769 case Iop_Sal64x1
: size
= 3; break;
2770 default: vassert(0);
2772 addInstr(env
, ARMInstr_NShift(ARMneon_VSAL
,
2773 res
, argL
, argR
, size
, False
));
2779 HReg res
= newVRegD(env
);
2780 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2781 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2782 HReg argR2
= newVRegD(env
);
2783 HReg zero
= newVRegD(env
);
2785 switch (e
->Iex
.Binop
.op
) {
2786 case Iop_Shr8x8
: size
= 0; break;
2787 case Iop_Shr16x4
: size
= 1; break;
2788 case Iop_Shr32x2
: size
= 2; break;
2789 default: vassert(0);
2791 addInstr(env
, ARMInstr_NeonImm(zero
, ARMNImm_TI(0,0)));
2792 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
2793 argR2
, zero
, argR
, size
, False
));
2794 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
2795 res
, argL
, argR2
, size
, False
));
2801 HReg res
= newVRegD(env
);
2802 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2803 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2805 switch (e
->Iex
.Binop
.op
) {
2806 case Iop_Shl8x8
: size
= 0; break;
2807 case Iop_Shl16x4
: size
= 1; break;
2808 case Iop_Shl32x2
: size
= 2; break;
2809 default: vassert(0);
2811 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
2812 res
, argL
, argR
, size
, False
));
2818 case Iop_QShl64x1
: {
2819 HReg res
= newVRegD(env
);
2820 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2821 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2823 switch (e
->Iex
.Binop
.op
) {
2824 case Iop_QShl8x8
: size
= 0; break;
2825 case Iop_QShl16x4
: size
= 1; break;
2826 case Iop_QShl32x2
: size
= 2; break;
2827 case Iop_QShl64x1
: size
= 3; break;
2828 default: vassert(0);
2830 addInstr(env
, ARMInstr_NShift(ARMneon_VQSHL
,
2831 res
, argL
, argR
, size
, False
));
2837 case Iop_QSal64x1
: {
2838 HReg res
= newVRegD(env
);
2839 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2840 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
2842 switch (e
->Iex
.Binop
.op
) {
2843 case Iop_QSal8x8
: size
= 0; break;
2844 case Iop_QSal16x4
: size
= 1; break;
2845 case Iop_QSal32x2
: size
= 2; break;
2846 case Iop_QSal64x1
: size
= 3; break;
2847 default: vassert(0);
2849 addInstr(env
, ARMInstr_NShift(ARMneon_VQSAL
,
2850 res
, argL
, argR
, size
, False
));
2853 case Iop_QShlNsatUU8x8
:
2854 case Iop_QShlNsatUU16x4
:
2855 case Iop_QShlNsatUU32x2
:
2856 case Iop_QShlNsatUU64x1
: {
2857 HReg res
= newVRegD(env
);
2858 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2860 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
2861 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
2862 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2863 "second argument only\n");
2865 imm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
2866 switch (e
->Iex
.Binop
.op
) {
2867 case Iop_QShlNsatUU8x8
: size
= 8 | imm
; break;
2868 case Iop_QShlNsatUU16x4
: size
= 16 | imm
; break;
2869 case Iop_QShlNsatUU32x2
: size
= 32 | imm
; break;
2870 case Iop_QShlNsatUU64x1
: size
= 64 | imm
; break;
2871 default: vassert(0);
2873 addInstr(env
, ARMInstr_NUnary(ARMneon_VQSHLNUU
,
2874 res
, argL
, size
, False
));
2877 case Iop_QShlNsatSU8x8
:
2878 case Iop_QShlNsatSU16x4
:
2879 case Iop_QShlNsatSU32x2
:
2880 case Iop_QShlNsatSU64x1
: {
2881 HReg res
= newVRegD(env
);
2882 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2884 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
2885 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
2886 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2887 "second argument only\n");
2889 imm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
2890 switch (e
->Iex
.Binop
.op
) {
2891 case Iop_QShlNsatSU8x8
: size
= 8 | imm
; break;
2892 case Iop_QShlNsatSU16x4
: size
= 16 | imm
; break;
2893 case Iop_QShlNsatSU32x2
: size
= 32 | imm
; break;
2894 case Iop_QShlNsatSU64x1
: size
= 64 | imm
; break;
2895 default: vassert(0);
2897 addInstr(env
, ARMInstr_NUnary(ARMneon_VQSHLNUS
,
2898 res
, argL
, size
, False
));
2901 case Iop_QShlNsatSS8x8
:
2902 case Iop_QShlNsatSS16x4
:
2903 case Iop_QShlNsatSS32x2
:
2904 case Iop_QShlNsatSS64x1
: {
2905 HReg res
= newVRegD(env
);
2906 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2908 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
2909 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
2910 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2911 "second argument only\n");
2913 imm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
2914 switch (e
->Iex
.Binop
.op
) {
2915 case Iop_QShlNsatSS8x8
: size
= 8 | imm
; break;
2916 case Iop_QShlNsatSS16x4
: size
= 16 | imm
; break;
2917 case Iop_QShlNsatSS32x2
: size
= 32 | imm
; break;
2918 case Iop_QShlNsatSS64x1
: size
= 64 | imm
; break;
2919 default: vassert(0);
2921 addInstr(env
, ARMInstr_NUnary(ARMneon_VQSHLNSS
,
2922 res
, argL
, size
, False
));
2929 HReg res
= newVRegD(env
);
2930 HReg tmp
= newVRegD(env
);
2931 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2932 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2933 HReg argR2
= newVRegI(env
);
2935 switch (e
->Iex
.Binop
.op
) {
2936 case Iop_ShrN8x8
: size
= 0; break;
2937 case Iop_ShrN16x4
: size
= 1; break;
2938 case Iop_ShrN32x2
: size
= 2; break;
2939 case Iop_Shr64
: size
= 3; break;
2940 default: vassert(0);
2942 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, argR2
, argR
));
2943 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
, tmp
, argR2
, 0, False
));
2944 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
2945 res
, argL
, tmp
, size
, False
));
2952 HReg res
= newVRegD(env
);
2953 HReg tmp
= newVRegD(env
);
2954 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2955 /* special-case Shl64(x, imm8) since the Neon front
2956 end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2957 if (e
->Iex
.Binop
.op
== Iop_Shl64
2958 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
2959 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
2960 Int nshift
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
2961 if (nshift
>= 1 && nshift
<= 63) {
2962 addInstr(env
, ARMInstr_NShl64(res
, argL
, nshift
));
2965 /* else fall through to general case */
2967 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2969 switch (e
->Iex
.Binop
.op
) {
2970 case Iop_ShlN8x8
: size
= 0; break;
2971 case Iop_ShlN16x4
: size
= 1; break;
2972 case Iop_ShlN32x2
: size
= 2; break;
2973 case Iop_Shl64
: size
= 3; break;
2974 default: vassert(0);
2976 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
,
2977 tmp
, argR
, 0, False
));
2978 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
2979 res
, argL
, tmp
, size
, False
));
2986 HReg res
= newVRegD(env
);
2987 HReg tmp
= newVRegD(env
);
2988 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
2989 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2990 HReg argR2
= newVRegI(env
);
2992 switch (e
->Iex
.Binop
.op
) {
2993 case Iop_SarN8x8
: size
= 0; break;
2994 case Iop_SarN16x4
: size
= 1; break;
2995 case Iop_SarN32x2
: size
= 2; break;
2996 case Iop_Sar64
: size
= 3; break;
2997 default: vassert(0);
2999 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, argR2
, argR
));
3000 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
, tmp
, argR2
, 0, False
));
3001 addInstr(env
, ARMInstr_NShift(ARMneon_VSAL
,
3002 res
, argL
, tmp
, size
, False
));
3006 case Iop_CmpGT16Ux4
:
3007 case Iop_CmpGT32Ux2
: {
3008 HReg res
= newVRegD(env
);
3009 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3010 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3012 switch (e
->Iex
.Binop
.op
) {
3013 case Iop_CmpGT8Ux8
: size
= 0; break;
3014 case Iop_CmpGT16Ux4
: size
= 1; break;
3015 case Iop_CmpGT32Ux2
: size
= 2; break;
3016 default: vassert(0);
3018 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGTU
,
3019 res
, argL
, argR
, size
, False
));
3023 case Iop_CmpGT16Sx4
:
3024 case Iop_CmpGT32Sx2
: {
3025 HReg res
= newVRegD(env
);
3026 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3027 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3029 switch (e
->Iex
.Binop
.op
) {
3030 case Iop_CmpGT8Sx8
: size
= 0; break;
3031 case Iop_CmpGT16Sx4
: size
= 1; break;
3032 case Iop_CmpGT32Sx2
: size
= 2; break;
3033 default: vassert(0);
3035 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGTS
,
3036 res
, argL
, argR
, size
, False
));
3041 case Iop_CmpEQ32x2
: {
3042 HReg res
= newVRegD(env
);
3043 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3044 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3046 switch (e
->Iex
.Binop
.op
) {
3047 case Iop_CmpEQ8x8
: size
= 0; break;
3048 case Iop_CmpEQ16x4
: size
= 1; break;
3049 case Iop_CmpEQ32x2
: size
= 2; break;
3050 default: vassert(0);
3052 addInstr(env
, ARMInstr_NBinary(ARMneon_VCEQ
,
3053 res
, argL
, argR
, size
, False
));
3059 HReg res
= newVRegD(env
);
3060 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3061 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3063 switch(e
->Iex
.Binop
.op
) {
3064 case Iop_Mul8x8
: size
= 0; break;
3065 case Iop_Mul16x4
: size
= 1; break;
3066 case Iop_Mul32x2
: size
= 2; break;
3067 default: vassert(0);
3069 addInstr(env
, ARMInstr_NBinary(ARMneon_VMUL
,
3070 res
, argL
, argR
, size
, False
));
3073 case Iop_Mul32Fx2
: {
3074 HReg res
= newVRegD(env
);
3075 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3076 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3078 addInstr(env
, ARMInstr_NBinary(ARMneon_VMULFP
,
3079 res
, argL
, argR
, size
, False
));
3082 case Iop_QDMulHi16Sx4
:
3083 case Iop_QDMulHi32Sx2
: {
3084 HReg res
= newVRegD(env
);
3085 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3086 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3088 switch(e
->Iex
.Binop
.op
) {
3089 case Iop_QDMulHi16Sx4
: size
= 1; break;
3090 case Iop_QDMulHi32Sx2
: size
= 2; break;
3091 default: vassert(0);
3093 addInstr(env
, ARMInstr_NBinary(ARMneon_VQDMULH
,
3094 res
, argL
, argR
, size
, False
));
3098 case Iop_QRDMulHi16Sx4
:
3099 case Iop_QRDMulHi32Sx2
: {
3100 HReg res
= newVRegD(env
);
3101 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3102 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3104 switch(e
->Iex
.Binop
.op
) {
3105 case Iop_QRDMulHi16Sx4
: size
= 1; break;
3106 case Iop_QRDMulHi32Sx2
: size
= 2; break;
3107 default: vassert(0);
3109 addInstr(env
, ARMInstr_NBinary(ARMneon_VQRDMULH
,
3110 res
, argL
, argR
, size
, False
));
3116 case Iop_PwAdd32x2
: {
3117 HReg res
= newVRegD(env
);
3118 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3119 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3121 switch(e
->Iex
.Binop
.op
) {
3122 case Iop_PwAdd8x8
: size
= 0; break;
3123 case Iop_PwAdd16x4
: size
= 1; break;
3124 case Iop_PwAdd32x2
: size
= 2; break;
3125 default: vassert(0);
3127 addInstr(env
, ARMInstr_NBinary(ARMneon_VPADD
,
3128 res
, argL
, argR
, size
, False
));
3131 case Iop_PwAdd32Fx2
: {
3132 HReg res
= newVRegD(env
);
3133 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3134 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3136 addInstr(env
, ARMInstr_NBinary(ARMneon_VPADDFP
,
3137 res
, argL
, argR
, size
, False
));
3141 case Iop_PwMin16Ux4
:
3142 case Iop_PwMin32Ux2
: {
3143 HReg res
= newVRegD(env
);
3144 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3145 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3147 switch(e
->Iex
.Binop
.op
) {
3148 case Iop_PwMin8Ux8
: size
= 0; break;
3149 case Iop_PwMin16Ux4
: size
= 1; break;
3150 case Iop_PwMin32Ux2
: size
= 2; break;
3151 default: vassert(0);
3153 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMINU
,
3154 res
, argL
, argR
, size
, False
));
3158 case Iop_PwMin16Sx4
:
3159 case Iop_PwMin32Sx2
: {
3160 HReg res
= newVRegD(env
);
3161 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3162 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3164 switch(e
->Iex
.Binop
.op
) {
3165 case Iop_PwMin8Sx8
: size
= 0; break;
3166 case Iop_PwMin16Sx4
: size
= 1; break;
3167 case Iop_PwMin32Sx2
: size
= 2; break;
3168 default: vassert(0);
3170 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMINS
,
3171 res
, argL
, argR
, size
, False
));
3175 case Iop_PwMax16Ux4
:
3176 case Iop_PwMax32Ux2
: {
3177 HReg res
= newVRegD(env
);
3178 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3179 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3181 switch(e
->Iex
.Binop
.op
) {
3182 case Iop_PwMax8Ux8
: size
= 0; break;
3183 case Iop_PwMax16Ux4
: size
= 1; break;
3184 case Iop_PwMax32Ux2
: size
= 2; break;
3185 default: vassert(0);
3187 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMAXU
,
3188 res
, argL
, argR
, size
, False
));
3192 case Iop_PwMax16Sx4
:
3193 case Iop_PwMax32Sx2
: {
3194 HReg res
= newVRegD(env
);
3195 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3196 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3198 switch(e
->Iex
.Binop
.op
) {
3199 case Iop_PwMax8Sx8
: size
= 0; break;
3200 case Iop_PwMax16Sx4
: size
= 1; break;
3201 case Iop_PwMax32Sx2
: size
= 2; break;
3202 default: vassert(0);
3204 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMAXS
,
3205 res
, argL
, argR
, size
, False
));
3209 HReg res
= newVRegD(env
);
3210 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3211 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3212 addInstr(env
, ARMInstr_NBinary(ARMneon_VTBL
,
3213 res
, argL
, argR
, 0, False
));
3216 case Iop_PolynomialMul8x8
: {
3217 HReg res
= newVRegD(env
);
3218 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3219 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3221 addInstr(env
, ARMInstr_NBinary(ARMneon_VMULP
,
3222 res
, argL
, argR
, size
, False
));
3225 case Iop_Max32Fx2
: {
3226 HReg res
= newVRegD(env
);
3227 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3228 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3229 addInstr(env
, ARMInstr_NBinary(ARMneon_VMAXF
,
3230 res
, argL
, argR
, 2, False
));
3233 case Iop_Min32Fx2
: {
3234 HReg res
= newVRegD(env
);
3235 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3236 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3237 addInstr(env
, ARMInstr_NBinary(ARMneon_VMINF
,
3238 res
, argL
, argR
, 2, False
));
3241 case Iop_PwMax32Fx2
: {
3242 HReg res
= newVRegD(env
);
3243 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3244 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3245 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMAXF
,
3246 res
, argL
, argR
, 2, False
));
3249 case Iop_PwMin32Fx2
: {
3250 HReg res
= newVRegD(env
);
3251 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3252 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3253 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMINF
,
3254 res
, argL
, argR
, 2, False
));
3257 case Iop_CmpGT32Fx2
: {
3258 HReg res
= newVRegD(env
);
3259 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3260 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3261 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGTF
,
3262 res
, argL
, argR
, 2, False
));
3265 case Iop_CmpGE32Fx2
: {
3266 HReg res
= newVRegD(env
);
3267 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3268 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3269 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEF
,
3270 res
, argL
, argR
, 2, False
));
3273 case Iop_CmpEQ32Fx2
: {
3274 HReg res
= newVRegD(env
);
3275 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3276 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
3277 addInstr(env
, ARMInstr_NBinary(ARMneon_VCEQF
,
3278 res
, argL
, argR
, 2, False
));
3281 case Iop_F32ToFixed32Ux2_RZ
:
3282 case Iop_F32ToFixed32Sx2_RZ
:
3283 case Iop_Fixed32UToF32x2_RN
:
3284 case Iop_Fixed32SToF32x2_RN
: {
3285 HReg res
= newVRegD(env
);
3286 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3289 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
3290 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
3291 vpanic("ARM supports FP <-> Fixed conversion with constant "
3292 "second argument less than 33 only\n");
3294 imm6
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
3295 vassert(imm6
<= 32 && imm6
> 0);
3297 switch(e
->Iex
.Binop
.op
) {
3298 case Iop_F32ToFixed32Ux2_RZ
: op
= ARMneon_VCVTFtoFixedU
; break;
3299 case Iop_F32ToFixed32Sx2_RZ
: op
= ARMneon_VCVTFtoFixedS
; break;
3300 case Iop_Fixed32UToF32x2_RN
: op
= ARMneon_VCVTFixedUtoF
; break;
3301 case Iop_Fixed32SToF32x2_RN
: op
= ARMneon_VCVTFixedStoF
; break;
3302 default: vassert(0);
3304 addInstr(env
, ARMInstr_NUnary(op
, res
, arg
, imm6
, False
));
3308 FIXME: is this here or not?
3311 case Iop_VDup32x2: {
3312 HReg res = newVRegD(env);
3313 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3317 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3318 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3319 vpanic("ARM supports Iop_VDup with constant "
3320 "second argument less than 16 only\n");
3322 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3323 switch(e->Iex.Binop.op) {
3324 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3325 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3326 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3327 default: vassert(0);
3330 vpanic("ARM supports Iop_VDup with constant "
3331 "second argument less than 16 only\n");
3333 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3334 res, argL, imm4, False));
3343 /* --------- UNARY ops --------- */
3344 if (e
->tag
== Iex_Unop
) {
3345 switch (e
->Iex
.Unop
.op
) {
3349 HReg rLo
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3350 HReg rHi
= newVRegI(env
);
3351 HReg res
= newVRegD(env
);
3352 addInstr(env
, ARMInstr_Imm32(rHi
, 0));
3353 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
3359 HReg rLo
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3360 HReg rHi
= newVRegI(env
);
3361 addInstr(env
, mk_iMOVds_RR(rHi
, rLo
));
3362 addInstr(env
, ARMInstr_Shift(ARMsh_SAR
, rHi
, rHi
, ARMRI5_I5(31)));
3363 HReg res
= newVRegD(env
);
3364 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
3368 /* The next 3 are pass-throughs */
3369 /* ReinterpF64asI64 */
3370 case Iop_ReinterpF64asI64
:
3376 HReg res
= newVRegD(env
);
3377 iselInt64Expr(&rHi
, &rLo
, env
, e
);
3378 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
3383 DECLARE_PATTERN(p_veqz_8x8
);
3384 DECLARE_PATTERN(p_veqz_16x4
);
3385 DECLARE_PATTERN(p_veqz_32x2
);
3386 DECLARE_PATTERN(p_vcge_8sx8
);
3387 DECLARE_PATTERN(p_vcge_16sx4
);
3388 DECLARE_PATTERN(p_vcge_32sx2
);
3389 DECLARE_PATTERN(p_vcge_8ux8
);
3390 DECLARE_PATTERN(p_vcge_16ux4
);
3391 DECLARE_PATTERN(p_vcge_32ux2
);
3392 DEFINE_PATTERN(p_veqz_8x8
,
3393 unop(Iop_Not64
, unop(Iop_CmpNEZ8x8
, bind(0))));
3394 DEFINE_PATTERN(p_veqz_16x4
,
3395 unop(Iop_Not64
, unop(Iop_CmpNEZ16x4
, bind(0))));
3396 DEFINE_PATTERN(p_veqz_32x2
,
3397 unop(Iop_Not64
, unop(Iop_CmpNEZ32x2
, bind(0))));
3398 DEFINE_PATTERN(p_vcge_8sx8
,
3399 unop(Iop_Not64
, binop(Iop_CmpGT8Sx8
, bind(1), bind(0))));
3400 DEFINE_PATTERN(p_vcge_16sx4
,
3401 unop(Iop_Not64
, binop(Iop_CmpGT16Sx4
, bind(1), bind(0))));
3402 DEFINE_PATTERN(p_vcge_32sx2
,
3403 unop(Iop_Not64
, binop(Iop_CmpGT32Sx2
, bind(1), bind(0))));
3404 DEFINE_PATTERN(p_vcge_8ux8
,
3405 unop(Iop_Not64
, binop(Iop_CmpGT8Ux8
, bind(1), bind(0))));
3406 DEFINE_PATTERN(p_vcge_16ux4
,
3407 unop(Iop_Not64
, binop(Iop_CmpGT16Ux4
, bind(1), bind(0))));
3408 DEFINE_PATTERN(p_vcge_32ux2
,
3409 unop(Iop_Not64
, binop(Iop_CmpGT32Ux2
, bind(1), bind(0))));
3410 if (matchIRExpr(&mi
, p_veqz_8x8
, e
)) {
3411 HReg res
= newVRegD(env
);
3412 HReg arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
3413 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, res
, arg
, 0, False
));
3415 } else if (matchIRExpr(&mi
, p_veqz_16x4
, e
)) {
3416 HReg res
= newVRegD(env
);
3417 HReg arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
3418 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, res
, arg
, 1, False
));
3420 } else if (matchIRExpr(&mi
, p_veqz_32x2
, e
)) {
3421 HReg res
= newVRegD(env
);
3422 HReg arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
3423 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, res
, arg
, 2, False
));
3425 } else if (matchIRExpr(&mi
, p_vcge_8sx8
, e
)) {
3426 HReg res
= newVRegD(env
);
3427 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3428 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3429 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGES
,
3430 res
, argL
, argR
, 0, False
));
3432 } else if (matchIRExpr(&mi
, p_vcge_16sx4
, e
)) {
3433 HReg res
= newVRegD(env
);
3434 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3435 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3436 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGES
,
3437 res
, argL
, argR
, 1, False
));
3439 } else if (matchIRExpr(&mi
, p_vcge_32sx2
, e
)) {
3440 HReg res
= newVRegD(env
);
3441 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3442 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3443 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGES
,
3444 res
, argL
, argR
, 2, False
));
3446 } else if (matchIRExpr(&mi
, p_vcge_8ux8
, e
)) {
3447 HReg res
= newVRegD(env
);
3448 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3449 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3450 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEU
,
3451 res
, argL
, argR
, 0, False
));
3453 } else if (matchIRExpr(&mi
, p_vcge_16ux4
, e
)) {
3454 HReg res
= newVRegD(env
);
3455 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3456 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3457 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEU
,
3458 res
, argL
, argR
, 1, False
));
3460 } else if (matchIRExpr(&mi
, p_vcge_32ux2
, e
)) {
3461 HReg res
= newVRegD(env
);
3462 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3463 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3464 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEU
,
3465 res
, argL
, argR
, 2, False
));
3468 HReg res
= newVRegD(env
);
3469 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3470 addInstr(env
, ARMInstr_NUnary(ARMneon_NOT
, res
, arg
, 4, False
));
3479 DECLARE_PATTERN(p_vdup_8x8
);
3480 DECLARE_PATTERN(p_vdup_16x4
);
3481 DECLARE_PATTERN(p_vdup_32x2
);
3482 DEFINE_PATTERN(p_vdup_8x8
,
3483 unop(Iop_Dup8x8
, binop(Iop_GetElem8x8
, bind(0), bind(1))));
3484 DEFINE_PATTERN(p_vdup_16x4
,
3485 unop(Iop_Dup16x4
, binop(Iop_GetElem16x4
, bind(0), bind(1))));
3486 DEFINE_PATTERN(p_vdup_32x2
,
3487 unop(Iop_Dup32x2
, binop(Iop_GetElem32x2
, bind(0), bind(1))));
3488 if (matchIRExpr(&mi
, p_vdup_8x8
, e
)) {
3491 if (mi
.bindee
[1]->tag
== Iex_Const
&&
3492 typeOfIRExpr(env
->type_env
, mi
.bindee
[1]) == Ity_I8
) {
3493 index
= mi
.bindee
[1]->Iex
.Const
.con
->Ico
.U8
;
3494 imm4
= (index
<< 1) + 1;
3496 res
= newVRegD(env
);
3497 arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
3498 addInstr(env
, ARMInstr_NUnaryS(
3500 mkARMNRS(ARMNRS_Reg
, res
, 0),
3501 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
3507 } else if (matchIRExpr(&mi
, p_vdup_16x4
, e
)) {
3510 if (mi
.bindee
[1]->tag
== Iex_Const
&&
3511 typeOfIRExpr(env
->type_env
, mi
.bindee
[1]) == Ity_I8
) {
3512 index
= mi
.bindee
[1]->Iex
.Const
.con
->Ico
.U8
;
3513 imm4
= (index
<< 2) + 2;
3515 res
= newVRegD(env
);
3516 arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
3517 addInstr(env
, ARMInstr_NUnaryS(
3519 mkARMNRS(ARMNRS_Reg
, res
, 0),
3520 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
3526 } else if (matchIRExpr(&mi
, p_vdup_32x2
, e
)) {
3529 if (mi
.bindee
[1]->tag
== Iex_Const
&&
3530 typeOfIRExpr(env
->type_env
, mi
.bindee
[1]) == Ity_I8
) {
3531 index
= mi
.bindee
[1]->Iex
.Const
.con
->Ico
.U8
;
3532 imm4
= (index
<< 3) + 4;
3534 res
= newVRegD(env
);
3535 arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
3536 addInstr(env
, ARMInstr_NUnaryS(
3538 mkARMNRS(ARMNRS_Reg
, res
, 0),
3539 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
3546 arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3547 res
= newVRegD(env
);
3548 switch (e
->Iex
.Unop
.op
) {
3549 case Iop_Dup8x8
: size
= 0; break;
3550 case Iop_Dup16x4
: size
= 1; break;
3551 case Iop_Dup32x2
: size
= 2; break;
3552 default: vassert(0);
3554 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
, res
, arg
, size
, False
));
3560 HReg res
= newVRegD(env
);
3561 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3563 switch(e
->Iex
.Binop
.op
) {
3564 case Iop_Abs8x8
: size
= 0; break;
3565 case Iop_Abs16x4
: size
= 1; break;
3566 case Iop_Abs32x2
: size
= 2; break;
3567 default: vassert(0);
3569 addInstr(env
, ARMInstr_NUnary(ARMneon_ABS
, res
, arg
, size
, False
));
3572 case Iop_Reverse8sIn64_x1
:
3573 case Iop_Reverse16sIn64_x1
:
3574 case Iop_Reverse32sIn64_x1
: {
3575 HReg res
= newVRegD(env
);
3576 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3578 switch(e
->Iex
.Binop
.op
) {
3579 case Iop_Reverse8sIn64_x1
: size
= 0; break;
3580 case Iop_Reverse16sIn64_x1
: size
= 1; break;
3581 case Iop_Reverse32sIn64_x1
: size
= 2; break;
3582 default: vassert(0);
3584 addInstr(env
, ARMInstr_NUnary(ARMneon_REV64
,
3585 res
, arg
, size
, False
));
3588 case Iop_Reverse8sIn32_x2
:
3589 case Iop_Reverse16sIn32_x2
: {
3590 HReg res
= newVRegD(env
);
3591 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3593 switch(e
->Iex
.Binop
.op
) {
3594 case Iop_Reverse8sIn32_x2
: size
= 0; break;
3595 case Iop_Reverse16sIn32_x2
: size
= 1; break;
3596 default: vassert(0);
3598 addInstr(env
, ARMInstr_NUnary(ARMneon_REV32
,
3599 res
, arg
, size
, False
));
3602 case Iop_Reverse8sIn16_x4
: {
3603 HReg res
= newVRegD(env
);
3604 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3606 addInstr(env
, ARMInstr_NUnary(ARMneon_REV16
,
3607 res
, arg
, size
, False
));
3610 case Iop_CmpwNEZ64
: {
3611 HReg x_lsh
= newVRegD(env
);
3612 HReg x_rsh
= newVRegD(env
);
3613 HReg lsh_amt
= newVRegD(env
);
3614 HReg rsh_amt
= newVRegD(env
);
3615 HReg zero
= newVRegD(env
);
3616 HReg tmp
= newVRegD(env
);
3617 HReg tmp2
= newVRegD(env
);
3618 HReg res
= newVRegD(env
);
3619 HReg x
= newVRegD(env
);
3620 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3621 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, tmp2
, arg
, 2, False
));
3622 addInstr(env
, ARMInstr_NUnary(ARMneon_NOT
, x
, tmp2
, 4, False
));
3623 addInstr(env
, ARMInstr_NeonImm(lsh_amt
, ARMNImm_TI(0, 32)));
3624 addInstr(env
, ARMInstr_NeonImm(zero
, ARMNImm_TI(0, 0)));
3625 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
3626 rsh_amt
, zero
, lsh_amt
, 2, False
));
3627 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
3628 x_lsh
, x
, lsh_amt
, 3, False
));
3629 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
3630 x_rsh
, x
, rsh_amt
, 3, False
));
3631 addInstr(env
, ARMInstr_NBinary(ARMneon_VORR
,
3632 tmp
, x_lsh
, x_rsh
, 0, False
));
3633 addInstr(env
, ARMInstr_NBinary(ARMneon_VORR
,
3634 res
, tmp
, x
, 0, False
));
3638 case Iop_CmpNEZ16x4
:
3639 case Iop_CmpNEZ32x2
: {
3640 HReg res
= newVRegD(env
);
3641 HReg tmp
= newVRegD(env
);
3642 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3644 switch (e
->Iex
.Unop
.op
) {
3645 case Iop_CmpNEZ8x8
: size
= 0; break;
3646 case Iop_CmpNEZ16x4
: size
= 1; break;
3647 case Iop_CmpNEZ32x2
: size
= 2; break;
3648 default: vassert(0);
3650 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, tmp
, arg
, size
, False
));
3651 addInstr(env
, ARMInstr_NUnary(ARMneon_NOT
, res
, tmp
, 4, False
));
3654 case Iop_NarrowUn16to8x8
:
3655 case Iop_NarrowUn32to16x4
:
3656 case Iop_NarrowUn64to32x2
: {
3657 HReg res
= newVRegD(env
);
3658 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
3660 switch(e
->Iex
.Binop
.op
) {
3661 case Iop_NarrowUn16to8x8
: size
= 0; break;
3662 case Iop_NarrowUn32to16x4
: size
= 1; break;
3663 case Iop_NarrowUn64to32x2
: size
= 2; break;
3664 default: vassert(0);
3666 addInstr(env
, ARMInstr_NUnary(ARMneon_COPYN
,
3667 res
, arg
, size
, False
));
3670 case Iop_QNarrowUn16Sto8Sx8
:
3671 case Iop_QNarrowUn32Sto16Sx4
:
3672 case Iop_QNarrowUn64Sto32Sx2
: {
3673 HReg res
= newVRegD(env
);
3674 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
3676 switch(e
->Iex
.Binop
.op
) {
3677 case Iop_QNarrowUn16Sto8Sx8
: size
= 0; break;
3678 case Iop_QNarrowUn32Sto16Sx4
: size
= 1; break;
3679 case Iop_QNarrowUn64Sto32Sx2
: size
= 2; break;
3680 default: vassert(0);
3682 addInstr(env
, ARMInstr_NUnary(ARMneon_COPYQNSS
,
3683 res
, arg
, size
, False
));
3686 case Iop_QNarrowUn16Sto8Ux8
:
3687 case Iop_QNarrowUn32Sto16Ux4
:
3688 case Iop_QNarrowUn64Sto32Ux2
: {
3689 HReg res
= newVRegD(env
);
3690 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
3692 switch(e
->Iex
.Binop
.op
) {
3693 case Iop_QNarrowUn16Sto8Ux8
: size
= 0; break;
3694 case Iop_QNarrowUn32Sto16Ux4
: size
= 1; break;
3695 case Iop_QNarrowUn64Sto32Ux2
: size
= 2; break;
3696 default: vassert(0);
3698 addInstr(env
, ARMInstr_NUnary(ARMneon_COPYQNUS
,
3699 res
, arg
, size
, False
));
3702 case Iop_QNarrowUn16Uto8Ux8
:
3703 case Iop_QNarrowUn32Uto16Ux4
:
3704 case Iop_QNarrowUn64Uto32Ux2
: {
3705 HReg res
= newVRegD(env
);
3706 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
3708 switch(e
->Iex
.Binop
.op
) {
3709 case Iop_QNarrowUn16Uto8Ux8
: size
= 0; break;
3710 case Iop_QNarrowUn32Uto16Ux4
: size
= 1; break;
3711 case Iop_QNarrowUn64Uto32Ux2
: size
= 2; break;
3712 default: vassert(0);
3714 addInstr(env
, ARMInstr_NUnary(ARMneon_COPYQNUU
,
3715 res
, arg
, size
, False
));
3718 case Iop_PwAddL8Sx8
:
3719 case Iop_PwAddL16Sx4
:
3720 case Iop_PwAddL32Sx2
: {
3721 HReg res
= newVRegD(env
);
3722 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3724 switch(e
->Iex
.Binop
.op
) {
3725 case Iop_PwAddL8Sx8
: size
= 0; break;
3726 case Iop_PwAddL16Sx4
: size
= 1; break;
3727 case Iop_PwAddL32Sx2
: size
= 2; break;
3728 default: vassert(0);
3730 addInstr(env
, ARMInstr_NUnary(ARMneon_PADDLS
,
3731 res
, arg
, size
, False
));
3734 case Iop_PwAddL8Ux8
:
3735 case Iop_PwAddL16Ux4
:
3736 case Iop_PwAddL32Ux2
: {
3737 HReg res
= newVRegD(env
);
3738 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3740 switch(e
->Iex
.Binop
.op
) {
3741 case Iop_PwAddL8Ux8
: size
= 0; break;
3742 case Iop_PwAddL16Ux4
: size
= 1; break;
3743 case Iop_PwAddL32Ux2
: size
= 2; break;
3744 default: vassert(0);
3746 addInstr(env
, ARMInstr_NUnary(ARMneon_PADDLU
,
3747 res
, arg
, size
, False
));
3751 HReg res
= newVRegD(env
);
3752 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3754 addInstr(env
, ARMInstr_NUnary(ARMneon_CNT
,
3755 res
, arg
, size
, False
));
3761 HReg res
= newVRegD(env
);
3762 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3764 switch(e
->Iex
.Binop
.op
) {
3765 case Iop_Clz8x8
: size
= 0; break;
3766 case Iop_Clz16x4
: size
= 1; break;
3767 case Iop_Clz32x2
: size
= 2; break;
3768 default: vassert(0);
3770 addInstr(env
, ARMInstr_NUnary(ARMneon_CLZ
,
3771 res
, arg
, size
, False
));
3777 HReg res
= newVRegD(env
);
3778 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3780 switch(e
->Iex
.Binop
.op
) {
3781 case Iop_Cls8x8
: size
= 0; break;
3782 case Iop_Cls16x4
: size
= 1; break;
3783 case Iop_Cls32x2
: size
= 2; break;
3784 default: vassert(0);
3786 addInstr(env
, ARMInstr_NUnary(ARMneon_CLS
,
3787 res
, arg
, size
, False
));
3790 case Iop_F32toI32Sx2_RZ
: {
3791 HReg res
= newVRegD(env
);
3792 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3793 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTFtoS
,
3794 res
, arg
, 2, False
));
3797 case Iop_F32toI32Ux2_RZ
: {
3798 HReg res
= newVRegD(env
);
3799 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3800 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTFtoU
,
3801 res
, arg
, 2, False
));
3804 case Iop_I32StoF32x2_DEP
: {
3805 HReg res
= newVRegD(env
);
3806 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3807 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTStoF
,
3808 res
, arg
, 2, False
));
3811 case Iop_I32UtoF32x2_DEP
: {
3812 HReg res
= newVRegD(env
);
3813 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3814 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTUtoF
,
3815 res
, arg
, 2, False
));
3818 case Iop_F32toF16x4_DEP
: {
3819 HReg res
= newVRegD(env
);
3820 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
3821 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTF32toF16
,
3822 res
, arg
, 2, False
));
3825 case Iop_RecipEst32Fx2
: {
3826 HReg res
= newVRegD(env
);
3827 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3828 addInstr(env
, ARMInstr_NUnary(ARMneon_VRECIPF
,
3829 res
, argL
, 0, False
));
3832 case Iop_RecipEst32Ux2
: {
3833 HReg res
= newVRegD(env
);
3834 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
3835 addInstr(env
, ARMInstr_NUnary(ARMneon_VRECIP
,
3836 res
, argL
, 0, False
));
3839 case Iop_Abs32Fx2
: {
3840 DECLARE_PATTERN(p_vabd_32fx2
);
3841 DEFINE_PATTERN(p_vabd_32fx2
,
3846 if (matchIRExpr(&mi
, p_vabd_32fx2
, e
)) {
3847 HReg res
= newVRegD(env
);
3848 HReg argL
= iselNeon64Expr(env
, mi
.bindee
[0]);
3849 HReg argR
= iselNeon64Expr(env
, mi
.bindee
[1]);
3850 addInstr(env
, ARMInstr_NBinary(ARMneon_VABDFP
,
3851 res
, argL
, argR
, 0, False
));
3854 HReg res
= newVRegD(env
);
3855 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3856 addInstr(env
, ARMInstr_NUnary(ARMneon_VABSFP
,
3857 res
, arg
, 0, False
));
3861 case Iop_RSqrtEst32Fx2
: {
3862 HReg res
= newVRegD(env
);
3863 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3864 addInstr(env
, ARMInstr_NUnary(ARMneon_VRSQRTEFP
,
3865 res
, arg
, 0, False
));
3868 case Iop_RSqrtEst32Ux2
: {
3869 HReg res
= newVRegD(env
);
3870 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3871 addInstr(env
, ARMInstr_NUnary(ARMneon_VRSQRTE
,
3872 res
, arg
, 0, False
));
3875 case Iop_Neg32Fx2
: {
3876 HReg res
= newVRegD(env
);
3877 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
3878 addInstr(env
, ARMInstr_NUnary(ARMneon_VNEGF
,
3879 res
, arg
, 0, False
));
3883 case Iop_V128HIto64
: {
3884 HReg src
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
3885 HReg resLo
= newVRegD(env
);
3886 HReg resHi
= newVRegD(env
);
3887 addInstr(env
, ARMInstr_VXferQ(False
/*!toQ*/, src
, resHi
, resLo
));
3888 return e
->Iex
.Unop
.op
== Iop_V128HIto64
? resHi
: resLo
;
3893 } /* if (e->tag == Iex_Unop) */
3895 if (e
->tag
== Iex_Triop
) {
3896 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3898 switch (triop
->op
) {
3900 HReg res
= newVRegD(env
);
3901 HReg argL
= iselNeon64Expr(env
, triop
->arg2
);
3902 HReg argR
= iselNeon64Expr(env
, triop
->arg1
);
3904 if (triop
->arg3
->tag
!= Iex_Const
||
3905 typeOfIRExpr(env
->type_env
, triop
->arg3
) != Ity_I8
) {
3906 vpanic("ARM target supports Iop_Extract64 with constant "
3907 "third argument less than 16 only\n");
3909 imm4
= triop
->arg3
->Iex
.Const
.con
->Ico
.U8
;
3911 vpanic("ARM target supports Iop_Extract64 with constant "
3912 "third argument less than 16 only\n");
3914 addInstr(env
, ARMInstr_NBinary(ARMneon_VEXT
,
3915 res
, argL
, argR
, imm4
, False
));
3918 case Iop_SetElem8x8
:
3919 case Iop_SetElem16x4
:
3920 case Iop_SetElem32x2
: {
3921 HReg res
= newVRegD(env
);
3922 HReg dreg
= iselNeon64Expr(env
, triop
->arg1
);
3923 HReg arg
= iselIntExpr_R(env
, triop
->arg3
);
3925 if (triop
->arg2
->tag
!= Iex_Const
||
3926 typeOfIRExpr(env
->type_env
, triop
->arg2
) != Ity_I8
) {
3927 vpanic("ARM target supports SetElem with constant "
3928 "second argument only\n");
3930 index
= triop
->arg2
->Iex
.Const
.con
->Ico
.U8
;
3931 switch (triop
->op
) {
3932 case Iop_SetElem8x8
: vassert(index
< 8); size
= 0; break;
3933 case Iop_SetElem16x4
: vassert(index
< 4); size
= 1; break;
3934 case Iop_SetElem32x2
: vassert(index
< 2); size
= 2; break;
3935 default: vassert(0);
3937 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, res
, dreg
, 4, False
));
3938 addInstr(env
, ARMInstr_NUnaryS(ARMneon_SETELEM
,
3939 mkARMNRS(ARMNRS_Scalar
, res
, index
),
3940 mkARMNRS(ARMNRS_Reg
, arg
, 0),
3949 /* --------- MULTIPLEX --------- */
3950 if (e
->tag
== Iex_ITE
) { // VFD
3952 HReg res
= newVRegD(env
);
3953 iselInt64Expr(&rHi
, &rLo
, env
, e
);
3954 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, res
, rHi
, rLo
));
3959 vpanic("iselNeon64Expr");
3963 static HReg
iselNeonExpr ( ISelEnv
* env
, const IRExpr
* e
)
3966 vassert(env
->hwcaps
& VEX_HWCAPS_ARM_NEON
);
3967 r
= iselNeonExpr_wrk( env
, e
);
3968 vassert(hregClass(r
) == HRcVec128
);
3969 vassert(hregIsVirtual(r
));
3973 /* DO NOT CALL THIS DIRECTLY */
3974 static HReg
iselNeonExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
3976 IRType ty
= typeOfIRExpr(env
->type_env
, e
);
3979 vassert(ty
== Ity_V128
);
3981 if (e
->tag
== Iex_RdTmp
) {
3982 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3985 if (e
->tag
== Iex_Const
) {
3986 /* At the moment there should be no 128-bit constants in IR for ARM
3987 generated during disassemble. They are represented as Iop_64HLtoV128
3988 binary operation and are handled among binary ops. */
3989 /* But zero can be created by valgrind internal optimizer */
3990 if (e
->Iex
.Const
.con
->Ico
.V128
== 0x0000) {
3991 HReg res
= newVRegV(env
);
3992 addInstr(env
, ARMInstr_NeonImm(res
, ARMNImm_TI(6, 0)));
3995 if (e
->Iex
.Const
.con
->Ico
.V128
== 0xFFFF) {
3996 HReg res
= newVRegV(env
);
3997 addInstr(env
, ARMInstr_NeonImm(res
, ARMNImm_TI(6, 255)));
4001 vpanic("128-bit constant is not implemented");
4004 if (e
->tag
== Iex_Load
) {
4005 HReg res
= newVRegV(env
);
4006 ARMAModeN
* am
= iselIntExpr_AModeN(env
, e
->Iex
.Load
.addr
);
4007 vassert(ty
== Ity_V128
);
4008 addInstr(env
, ARMInstr_NLdStQ(True
, res
, am
));
4012 if (e
->tag
== Iex_Get
) {
4013 HReg addr
= newVRegI(env
);
4014 HReg res
= newVRegV(env
);
4015 vassert(ty
== Ity_V128
);
4016 addInstr(env
, ARMInstr_Add32(addr
, hregARM_R8(), e
->Iex
.Get
.offset
));
4017 addInstr(env
, ARMInstr_NLdStQ(True
, res
, mkARMAModeN_R(addr
)));
4021 if (e
->tag
== Iex_Unop
) {
4022 switch (e
->Iex
.Unop
.op
) {
4024 DECLARE_PATTERN(p_veqz_8x16
);
4025 DECLARE_PATTERN(p_veqz_16x8
);
4026 DECLARE_PATTERN(p_veqz_32x4
);
4027 DECLARE_PATTERN(p_vcge_8sx16
);
4028 DECLARE_PATTERN(p_vcge_16sx8
);
4029 DECLARE_PATTERN(p_vcge_32sx4
);
4030 DECLARE_PATTERN(p_vcge_8ux16
);
4031 DECLARE_PATTERN(p_vcge_16ux8
);
4032 DECLARE_PATTERN(p_vcge_32ux4
);
4033 DEFINE_PATTERN(p_veqz_8x16
,
4034 unop(Iop_NotV128
, unop(Iop_CmpNEZ8x16
, bind(0))));
4035 DEFINE_PATTERN(p_veqz_16x8
,
4036 unop(Iop_NotV128
, unop(Iop_CmpNEZ16x8
, bind(0))));
4037 DEFINE_PATTERN(p_veqz_32x4
,
4038 unop(Iop_NotV128
, unop(Iop_CmpNEZ32x4
, bind(0))));
4039 DEFINE_PATTERN(p_vcge_8sx16
,
4040 unop(Iop_NotV128
, binop(Iop_CmpGT8Sx16
, bind(1), bind(0))));
4041 DEFINE_PATTERN(p_vcge_16sx8
,
4042 unop(Iop_NotV128
, binop(Iop_CmpGT16Sx8
, bind(1), bind(0))));
4043 DEFINE_PATTERN(p_vcge_32sx4
,
4044 unop(Iop_NotV128
, binop(Iop_CmpGT32Sx4
, bind(1), bind(0))));
4045 DEFINE_PATTERN(p_vcge_8ux16
,
4046 unop(Iop_NotV128
, binop(Iop_CmpGT8Ux16
, bind(1), bind(0))));
4047 DEFINE_PATTERN(p_vcge_16ux8
,
4048 unop(Iop_NotV128
, binop(Iop_CmpGT16Ux8
, bind(1), bind(0))));
4049 DEFINE_PATTERN(p_vcge_32ux4
,
4050 unop(Iop_NotV128
, binop(Iop_CmpGT32Ux4
, bind(1), bind(0))));
4051 if (matchIRExpr(&mi
, p_veqz_8x16
, e
)) {
4052 HReg res
= newVRegV(env
);
4053 HReg arg
= iselNeonExpr(env
, mi
.bindee
[0]);
4054 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, res
, arg
, 0, True
));
4056 } else if (matchIRExpr(&mi
, p_veqz_16x8
, e
)) {
4057 HReg res
= newVRegV(env
);
4058 HReg arg
= iselNeonExpr(env
, mi
.bindee
[0]);
4059 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, res
, arg
, 1, True
));
4061 } else if (matchIRExpr(&mi
, p_veqz_32x4
, e
)) {
4062 HReg res
= newVRegV(env
);
4063 HReg arg
= iselNeonExpr(env
, mi
.bindee
[0]);
4064 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, res
, arg
, 2, True
));
4066 } else if (matchIRExpr(&mi
, p_vcge_8sx16
, e
)) {
4067 HReg res
= newVRegV(env
);
4068 HReg argL
= iselNeonExpr(env
, mi
.bindee
[0]);
4069 HReg argR
= iselNeonExpr(env
, mi
.bindee
[1]);
4070 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGES
,
4071 res
, argL
, argR
, 0, True
));
4073 } else if (matchIRExpr(&mi
, p_vcge_16sx8
, e
)) {
4074 HReg res
= newVRegV(env
);
4075 HReg argL
= iselNeonExpr(env
, mi
.bindee
[0]);
4076 HReg argR
= iselNeonExpr(env
, mi
.bindee
[1]);
4077 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGES
,
4078 res
, argL
, argR
, 1, True
));
4080 } else if (matchIRExpr(&mi
, p_vcge_32sx4
, e
)) {
4081 HReg res
= newVRegV(env
);
4082 HReg argL
= iselNeonExpr(env
, mi
.bindee
[0]);
4083 HReg argR
= iselNeonExpr(env
, mi
.bindee
[1]);
4084 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGES
,
4085 res
, argL
, argR
, 2, True
));
4087 } else if (matchIRExpr(&mi
, p_vcge_8ux16
, e
)) {
4088 HReg res
= newVRegV(env
);
4089 HReg argL
= iselNeonExpr(env
, mi
.bindee
[0]);
4090 HReg argR
= iselNeonExpr(env
, mi
.bindee
[1]);
4091 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEU
,
4092 res
, argL
, argR
, 0, True
));
4094 } else if (matchIRExpr(&mi
, p_vcge_16ux8
, e
)) {
4095 HReg res
= newVRegV(env
);
4096 HReg argL
= iselNeonExpr(env
, mi
.bindee
[0]);
4097 HReg argR
= iselNeonExpr(env
, mi
.bindee
[1]);
4098 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEU
,
4099 res
, argL
, argR
, 1, True
));
4101 } else if (matchIRExpr(&mi
, p_vcge_32ux4
, e
)) {
4102 HReg res
= newVRegV(env
);
4103 HReg argL
= iselNeonExpr(env
, mi
.bindee
[0]);
4104 HReg argR
= iselNeonExpr(env
, mi
.bindee
[1]);
4105 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEU
,
4106 res
, argL
, argR
, 2, True
));
4109 HReg res
= newVRegV(env
);
4110 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4111 addInstr(env
, ARMInstr_NUnary(ARMneon_NOT
, res
, arg
, 4, True
));
4120 DECLARE_PATTERN(p_vdup_8x16
);
4121 DECLARE_PATTERN(p_vdup_16x8
);
4122 DECLARE_PATTERN(p_vdup_32x4
);
4123 DEFINE_PATTERN(p_vdup_8x16
,
4124 unop(Iop_Dup8x16
, binop(Iop_GetElem8x8
, bind(0), bind(1))));
4125 DEFINE_PATTERN(p_vdup_16x8
,
4126 unop(Iop_Dup16x8
, binop(Iop_GetElem16x4
, bind(0), bind(1))));
4127 DEFINE_PATTERN(p_vdup_32x4
,
4128 unop(Iop_Dup32x4
, binop(Iop_GetElem32x2
, bind(0), bind(1))));
4129 if (matchIRExpr(&mi
, p_vdup_8x16
, e
)) {
4132 if (mi
.bindee
[1]->tag
== Iex_Const
&&
4133 typeOfIRExpr(env
->type_env
, mi
.bindee
[1]) == Ity_I8
) {
4134 index
= mi
.bindee
[1]->Iex
.Const
.con
->Ico
.U8
;
4135 imm4
= (index
<< 1) + 1;
4137 res
= newVRegV(env
);
4138 arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
4139 addInstr(env
, ARMInstr_NUnaryS(
4141 mkARMNRS(ARMNRS_Reg
, res
, 0),
4142 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
4148 } else if (matchIRExpr(&mi
, p_vdup_16x8
, e
)) {
4151 if (mi
.bindee
[1]->tag
== Iex_Const
&&
4152 typeOfIRExpr(env
->type_env
, mi
.bindee
[1]) == Ity_I8
) {
4153 index
= mi
.bindee
[1]->Iex
.Const
.con
->Ico
.U8
;
4154 imm4
= (index
<< 2) + 2;
4156 res
= newVRegV(env
);
4157 arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
4158 addInstr(env
, ARMInstr_NUnaryS(
4160 mkARMNRS(ARMNRS_Reg
, res
, 0),
4161 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
4167 } else if (matchIRExpr(&mi
, p_vdup_32x4
, e
)) {
4170 if (mi
.bindee
[1]->tag
== Iex_Const
&&
4171 typeOfIRExpr(env
->type_env
, mi
.bindee
[1]) == Ity_I8
) {
4172 index
= mi
.bindee
[1]->Iex
.Const
.con
->Ico
.U8
;
4173 imm4
= (index
<< 3) + 4;
4175 res
= newVRegV(env
);
4176 arg
= iselNeon64Expr(env
, mi
.bindee
[0]);
4177 addInstr(env
, ARMInstr_NUnaryS(
4179 mkARMNRS(ARMNRS_Reg
, res
, 0),
4180 mkARMNRS(ARMNRS_Scalar
, arg
, index
),
4187 arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
4188 res
= newVRegV(env
);
4189 switch (e
->Iex
.Unop
.op
) {
4190 case Iop_Dup8x16
: size
= 0; break;
4191 case Iop_Dup16x8
: size
= 1; break;
4192 case Iop_Dup32x4
: size
= 2; break;
4193 default: vassert(0);
4195 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
, res
, arg
, size
, True
));
4201 HReg res
= newVRegV(env
);
4202 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4204 switch(e
->Iex
.Binop
.op
) {
4205 case Iop_Abs8x16
: size
= 0; break;
4206 case Iop_Abs16x8
: size
= 1; break;
4207 case Iop_Abs32x4
: size
= 2; break;
4208 default: vassert(0);
4210 addInstr(env
, ARMInstr_NUnary(ARMneon_ABS
, res
, arg
, size
, True
));
4213 case Iop_Reverse8sIn64_x2
:
4214 case Iop_Reverse16sIn64_x2
:
4215 case Iop_Reverse32sIn64_x2
: {
4216 HReg res
= newVRegV(env
);
4217 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4219 switch(e
->Iex
.Binop
.op
) {
4220 case Iop_Reverse8sIn64_x2
: size
= 0; break;
4221 case Iop_Reverse16sIn64_x2
: size
= 1; break;
4222 case Iop_Reverse32sIn64_x2
: size
= 2; break;
4223 default: vassert(0);
4225 addInstr(env
, ARMInstr_NUnary(ARMneon_REV64
,
4226 res
, arg
, size
, True
));
4229 case Iop_Reverse8sIn32_x4
:
4230 case Iop_Reverse16sIn32_x4
: {
4231 HReg res
= newVRegV(env
);
4232 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4234 switch(e
->Iex
.Binop
.op
) {
4235 case Iop_Reverse8sIn32_x4
: size
= 0; break;
4236 case Iop_Reverse16sIn32_x4
: size
= 1; break;
4237 default: vassert(0);
4239 addInstr(env
, ARMInstr_NUnary(ARMneon_REV32
,
4240 res
, arg
, size
, True
));
4243 case Iop_Reverse8sIn16_x8
: {
4244 HReg res
= newVRegV(env
);
4245 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4247 addInstr(env
, ARMInstr_NUnary(ARMneon_REV16
,
4248 res
, arg
, size
, True
));
4251 case Iop_CmpNEZ64x2
: {
4252 HReg x_lsh
= newVRegV(env
);
4253 HReg x_rsh
= newVRegV(env
);
4254 HReg lsh_amt
= newVRegV(env
);
4255 HReg rsh_amt
= newVRegV(env
);
4256 HReg zero
= newVRegV(env
);
4257 HReg tmp
= newVRegV(env
);
4258 HReg tmp2
= newVRegV(env
);
4259 HReg res
= newVRegV(env
);
4260 HReg x
= newVRegV(env
);
4261 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4262 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, tmp2
, arg
, 2, True
));
4263 addInstr(env
, ARMInstr_NUnary(ARMneon_NOT
, x
, tmp2
, 4, True
));
4264 addInstr(env
, ARMInstr_NeonImm(lsh_amt
, ARMNImm_TI(0, 32)));
4265 addInstr(env
, ARMInstr_NeonImm(zero
, ARMNImm_TI(0, 0)));
4266 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
4267 rsh_amt
, zero
, lsh_amt
, 2, True
));
4268 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
4269 x_lsh
, x
, lsh_amt
, 3, True
));
4270 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
4271 x_rsh
, x
, rsh_amt
, 3, True
));
4272 addInstr(env
, ARMInstr_NBinary(ARMneon_VORR
,
4273 tmp
, x_lsh
, x_rsh
, 0, True
));
4274 addInstr(env
, ARMInstr_NBinary(ARMneon_VORR
,
4275 res
, tmp
, x
, 0, True
));
4278 case Iop_CmpNEZ8x16
:
4279 case Iop_CmpNEZ16x8
:
4280 case Iop_CmpNEZ32x4
: {
4281 HReg res
= newVRegV(env
);
4282 HReg tmp
= newVRegV(env
);
4283 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4285 switch (e
->Iex
.Unop
.op
) {
4286 case Iop_CmpNEZ8x16
: size
= 0; break;
4287 case Iop_CmpNEZ16x8
: size
= 1; break;
4288 case Iop_CmpNEZ32x4
: size
= 2; break;
4289 default: vassert(0);
4291 addInstr(env
, ARMInstr_NUnary(ARMneon_EQZ
, tmp
, arg
, size
, True
));
4292 addInstr(env
, ARMInstr_NUnary(ARMneon_NOT
, res
, tmp
, 4, True
));
4295 case Iop_Widen8Uto16x8
:
4296 case Iop_Widen16Uto32x4
:
4297 case Iop_Widen32Uto64x2
: {
4298 HReg res
= newVRegV(env
);
4299 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
4301 switch (e
->Iex
.Unop
.op
) {
4302 case Iop_Widen8Uto16x8
: size
= 0; break;
4303 case Iop_Widen16Uto32x4
: size
= 1; break;
4304 case Iop_Widen32Uto64x2
: size
= 2; break;
4305 default: vassert(0);
4307 addInstr(env
, ARMInstr_NUnary(ARMneon_COPYLU
,
4308 res
, arg
, size
, True
));
4311 case Iop_Widen8Sto16x8
:
4312 case Iop_Widen16Sto32x4
:
4313 case Iop_Widen32Sto64x2
: {
4314 HReg res
= newVRegV(env
);
4315 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
4317 switch (e
->Iex
.Unop
.op
) {
4318 case Iop_Widen8Sto16x8
: size
= 0; break;
4319 case Iop_Widen16Sto32x4
: size
= 1; break;
4320 case Iop_Widen32Sto64x2
: size
= 2; break;
4321 default: vassert(0);
4323 addInstr(env
, ARMInstr_NUnary(ARMneon_COPYLS
,
4324 res
, arg
, size
, True
));
4327 case Iop_PwAddL8Sx16
:
4328 case Iop_PwAddL16Sx8
:
4329 case Iop_PwAddL32Sx4
: {
4330 HReg res
= newVRegV(env
);
4331 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4333 switch(e
->Iex
.Binop
.op
) {
4334 case Iop_PwAddL8Sx16
: size
= 0; break;
4335 case Iop_PwAddL16Sx8
: size
= 1; break;
4336 case Iop_PwAddL32Sx4
: size
= 2; break;
4337 default: vassert(0);
4339 addInstr(env
, ARMInstr_NUnary(ARMneon_PADDLS
,
4340 res
, arg
, size
, True
));
4343 case Iop_PwAddL8Ux16
:
4344 case Iop_PwAddL16Ux8
:
4345 case Iop_PwAddL32Ux4
: {
4346 HReg res
= newVRegV(env
);
4347 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4349 switch(e
->Iex
.Binop
.op
) {
4350 case Iop_PwAddL8Ux16
: size
= 0; break;
4351 case Iop_PwAddL16Ux8
: size
= 1; break;
4352 case Iop_PwAddL32Ux4
: size
= 2; break;
4353 default: vassert(0);
4355 addInstr(env
, ARMInstr_NUnary(ARMneon_PADDLU
,
4356 res
, arg
, size
, True
));
4360 HReg res
= newVRegV(env
);
4361 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4363 addInstr(env
, ARMInstr_NUnary(ARMneon_CNT
, res
, arg
, size
, True
));
4369 HReg res
= newVRegV(env
);
4370 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4372 switch(e
->Iex
.Binop
.op
) {
4373 case Iop_Clz8x16
: size
= 0; break;
4374 case Iop_Clz16x8
: size
= 1; break;
4375 case Iop_Clz32x4
: size
= 2; break;
4376 default: vassert(0);
4378 addInstr(env
, ARMInstr_NUnary(ARMneon_CLZ
, res
, arg
, size
, True
));
4384 HReg res
= newVRegV(env
);
4385 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4387 switch(e
->Iex
.Binop
.op
) {
4388 case Iop_Cls8x16
: size
= 0; break;
4389 case Iop_Cls16x8
: size
= 1; break;
4390 case Iop_Cls32x4
: size
= 2; break;
4391 default: vassert(0);
4393 addInstr(env
, ARMInstr_NUnary(ARMneon_CLS
, res
, arg
, size
, True
));
4396 case Iop_F32toI32Sx4_RZ
: {
4397 HReg res
= newVRegV(env
);
4398 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4399 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTFtoS
,
4400 res
, arg
, 2, True
));
4403 case Iop_F32toI32Ux4_RZ
: {
4404 HReg res
= newVRegV(env
);
4405 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4406 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTFtoU
,
4407 res
, arg
, 2, True
));
4410 case Iop_I32StoF32x4_DEP
: {
4411 HReg res
= newVRegV(env
);
4412 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4413 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTStoF
,
4414 res
, arg
, 2, True
));
4417 case Iop_I32UtoF32x4_DEP
: {
4418 HReg res
= newVRegV(env
);
4419 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4420 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTUtoF
,
4421 res
, arg
, 2, True
));
4424 case Iop_F16toF32x4
: {
4425 HReg res
= newVRegV(env
);
4426 HReg arg
= iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
4427 addInstr(env
, ARMInstr_NUnary(ARMneon_VCVTF16toF32
,
4428 res
, arg
, 2, True
));
4431 case Iop_RecipEst32Fx4
: {
4432 HReg res
= newVRegV(env
);
4433 HReg argL
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4434 addInstr(env
, ARMInstr_NUnary(ARMneon_VRECIPF
,
4435 res
, argL
, 0, True
));
4438 case Iop_RecipEst32Ux4
: {
4439 HReg res
= newVRegV(env
);
4440 HReg argL
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4441 addInstr(env
, ARMInstr_NUnary(ARMneon_VRECIP
,
4442 res
, argL
, 0, True
));
4445 case Iop_Abs32Fx4
: {
4446 HReg res
= newVRegV(env
);
4447 HReg argL
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4448 addInstr(env
, ARMInstr_NUnary(ARMneon_VABSFP
,
4449 res
, argL
, 0, True
));
4452 case Iop_RSqrtEst32Fx4
: {
4453 HReg res
= newVRegV(env
);
4454 HReg argL
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4455 addInstr(env
, ARMInstr_NUnary(ARMneon_VRSQRTEFP
,
4456 res
, argL
, 0, True
));
4459 case Iop_RSqrtEst32Ux4
: {
4460 HReg res
= newVRegV(env
);
4461 HReg argL
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4462 addInstr(env
, ARMInstr_NUnary(ARMneon_VRSQRTE
,
4463 res
, argL
, 0, True
));
4466 case Iop_Neg32Fx4
: {
4467 HReg res
= newVRegV(env
);
4468 HReg arg
= iselNeonExpr(env
, e
->Iex
.Unop
.arg
);
4469 addInstr(env
, ARMInstr_NUnary(ARMneon_VNEGF
,
4470 res
, arg
, 0, True
));
4479 if (e
->tag
== Iex_Binop
) {
4480 switch (e
->Iex
.Binop
.op
) {
4481 case Iop_64HLtoV128
: {
4482 /* Try to match into single "VMOV reg, imm" instruction */
4483 if (e
->Iex
.Binop
.arg1
->tag
== Iex_Const
&&
4484 e
->Iex
.Binop
.arg2
->tag
== Iex_Const
&&
4485 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg1
) == Ity_I64
&&
4486 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) == Ity_I64
&&
4487 e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
==
4488 e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
) {
4489 ULong imm64
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
4490 ARMNImm
*imm
= Imm64_to_ARMNImm(imm64
);
4492 HReg res
= newVRegV(env
);
4493 addInstr(env
, ARMInstr_NeonImm(res
, imm
));
4496 if ((imm64
>> 32) == 0LL &&
4497 (imm
= Imm64_to_ARMNImm(imm64
| (imm64
<< 32))) != NULL
) {
4498 HReg tmp1
= newVRegV(env
);
4499 HReg tmp2
= newVRegV(env
);
4500 HReg res
= newVRegV(env
);
4501 if (imm
->type
< 10) {
4502 addInstr(env
, ARMInstr_NeonImm(tmp1
, ARMNImm_TI(9,0x0f)));
4503 addInstr(env
, ARMInstr_NeonImm(tmp2
, imm
));
4504 addInstr(env
, ARMInstr_NBinary(ARMneon_VAND
,
4505 res
, tmp1
, tmp2
, 4, True
));
4509 if ((imm64
& 0xFFFFFFFFLL
) == 0LL &&
4510 (imm
= Imm64_to_ARMNImm(imm64
| (imm64
>> 32))) != NULL
) {
4511 HReg tmp1
= newVRegV(env
);
4512 HReg tmp2
= newVRegV(env
);
4513 HReg res
= newVRegV(env
);
4514 if (imm
->type
< 10) {
4515 addInstr(env
, ARMInstr_NeonImm(tmp1
, ARMNImm_TI(9,0xf0)));
4516 addInstr(env
, ARMInstr_NeonImm(tmp2
, imm
));
4517 addInstr(env
, ARMInstr_NBinary(ARMneon_VAND
,
4518 res
, tmp1
, tmp2
, 4, True
));
4523 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4525 HReg dHi
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
4526 HReg dLo
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
4527 HReg res
= newVRegV(env
);
4528 addInstr(env
, ARMInstr_VXferQ(True
/*toQ*/, res
, dHi
, dLo
));
4532 HReg res
= newVRegV(env
);
4533 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4534 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4535 addInstr(env
, ARMInstr_NBinary(ARMneon_VAND
,
4536 res
, argL
, argR
, 4, True
));
4540 HReg res
= newVRegV(env
);
4541 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4542 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4543 addInstr(env
, ARMInstr_NBinary(ARMneon_VORR
,
4544 res
, argL
, argR
, 4, True
));
4548 HReg res
= newVRegV(env
);
4549 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4550 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4551 addInstr(env
, ARMInstr_NBinary(ARMneon_VXOR
,
4552 res
, argL
, argR
, 4, True
));
4560 FIXME: remove this if not used
4561 DECLARE_PATTERN(p_vrhadd_32sx4);
4562 ULong one = (1LL << 32) | 1LL;
4563 DEFINE_PATTERN(p_vrhadd_32sx4,
4584 HReg res
= newVRegV(env
);
4585 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4586 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4588 switch (e
->Iex
.Binop
.op
) {
4589 case Iop_Add8x16
: size
= 0; break;
4590 case Iop_Add16x8
: size
= 1; break;
4591 case Iop_Add32x4
: size
= 2; break;
4592 case Iop_Add64x2
: size
= 3; break;
4594 ppIROp(e
->Iex
.Binop
.op
);
4595 vpanic("Illegal element size in VADD");
4597 addInstr(env
, ARMInstr_NBinary(ARMneon_VADD
,
4598 res
, argL
, argR
, size
, True
));
4601 case Iop_RecipStep32Fx4
: {
4602 HReg res
= newVRegV(env
);
4603 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4604 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4606 addInstr(env
, ARMInstr_NBinary(ARMneon_VRECPS
,
4607 res
, argL
, argR
, size
, True
));
4610 case Iop_RSqrtStep32Fx4
: {
4611 HReg res
= newVRegV(env
);
4612 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4613 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4615 addInstr(env
, ARMInstr_NBinary(ARMneon_VRSQRTS
,
4616 res
, argL
, argR
, size
, True
));
4620 // These 6 verified 18 Apr 2013
4621 case Iop_InterleaveEvenLanes8x16
:
4622 case Iop_InterleaveOddLanes8x16
:
4623 case Iop_InterleaveEvenLanes16x8
:
4624 case Iop_InterleaveOddLanes16x8
:
4625 case Iop_InterleaveEvenLanes32x4
:
4626 case Iop_InterleaveOddLanes32x4
: {
4627 HReg rD
= newVRegV(env
);
4628 HReg rM
= newVRegV(env
);
4629 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4630 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4632 Bool resRd
; // is the result in rD or rM ?
4633 switch (e
->Iex
.Binop
.op
) {
4634 case Iop_InterleaveOddLanes8x16
: resRd
= False
; size
= 0; break;
4635 case Iop_InterleaveEvenLanes8x16
: resRd
= True
; size
= 0; break;
4636 case Iop_InterleaveOddLanes16x8
: resRd
= False
; size
= 1; break;
4637 case Iop_InterleaveEvenLanes16x8
: resRd
= True
; size
= 1; break;
4638 case Iop_InterleaveOddLanes32x4
: resRd
= False
; size
= 2; break;
4639 case Iop_InterleaveEvenLanes32x4
: resRd
= True
; size
= 2; break;
4640 default: vassert(0);
4642 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rM
, argL
, 4, True
));
4643 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rD
, argR
, 4, True
));
4644 addInstr(env
, ARMInstr_NDual(ARMneon_TRN
, rD
, rM
, size
, True
));
4645 return resRd
? rD
: rM
;
4648 // These 6 verified 18 Apr 2013
4649 case Iop_InterleaveHI8x16
:
4650 case Iop_InterleaveLO8x16
:
4651 case Iop_InterleaveHI16x8
:
4652 case Iop_InterleaveLO16x8
:
4653 case Iop_InterleaveHI32x4
:
4654 case Iop_InterleaveLO32x4
: {
4655 HReg rD
= newVRegV(env
);
4656 HReg rM
= newVRegV(env
);
4657 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4658 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4660 Bool resRd
; // is the result in rD or rM ?
4661 switch (e
->Iex
.Binop
.op
) {
4662 case Iop_InterleaveHI8x16
: resRd
= False
; size
= 0; break;
4663 case Iop_InterleaveLO8x16
: resRd
= True
; size
= 0; break;
4664 case Iop_InterleaveHI16x8
: resRd
= False
; size
= 1; break;
4665 case Iop_InterleaveLO16x8
: resRd
= True
; size
= 1; break;
4666 case Iop_InterleaveHI32x4
: resRd
= False
; size
= 2; break;
4667 case Iop_InterleaveLO32x4
: resRd
= True
; size
= 2; break;
4668 default: vassert(0);
4670 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rM
, argL
, 4, True
));
4671 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rD
, argR
, 4, True
));
4672 addInstr(env
, ARMInstr_NDual(ARMneon_ZIP
, rD
, rM
, size
, True
));
4673 return resRd
? rD
: rM
;
4676 // These 6 verified 18 Apr 2013
4677 case Iop_CatOddLanes8x16
:
4678 case Iop_CatEvenLanes8x16
:
4679 case Iop_CatOddLanes16x8
:
4680 case Iop_CatEvenLanes16x8
:
4681 case Iop_CatOddLanes32x4
:
4682 case Iop_CatEvenLanes32x4
: {
4683 HReg rD
= newVRegV(env
);
4684 HReg rM
= newVRegV(env
);
4685 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4686 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4688 Bool resRd
; // is the result in rD or rM ?
4689 switch (e
->Iex
.Binop
.op
) {
4690 case Iop_CatOddLanes8x16
: resRd
= False
; size
= 0; break;
4691 case Iop_CatEvenLanes8x16
: resRd
= True
; size
= 0; break;
4692 case Iop_CatOddLanes16x8
: resRd
= False
; size
= 1; break;
4693 case Iop_CatEvenLanes16x8
: resRd
= True
; size
= 1; break;
4694 case Iop_CatOddLanes32x4
: resRd
= False
; size
= 2; break;
4695 case Iop_CatEvenLanes32x4
: resRd
= True
; size
= 2; break;
4696 default: vassert(0);
4698 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rM
, argL
, 4, True
));
4699 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, rD
, argR
, 4, True
));
4700 addInstr(env
, ARMInstr_NDual(ARMneon_UZP
, rD
, rM
, size
, True
));
4701 return resRd
? rD
: rM
;
4707 case Iop_QAdd64Ux2
: {
4708 HReg res
= newVRegV(env
);
4709 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4710 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4712 switch (e
->Iex
.Binop
.op
) {
4713 case Iop_QAdd8Ux16
: size
= 0; break;
4714 case Iop_QAdd16Ux8
: size
= 1; break;
4715 case Iop_QAdd32Ux4
: size
= 2; break;
4716 case Iop_QAdd64Ux2
: size
= 3; break;
4718 ppIROp(e
->Iex
.Binop
.op
);
4719 vpanic("Illegal element size in VQADDU");
4721 addInstr(env
, ARMInstr_NBinary(ARMneon_VQADDU
,
4722 res
, argL
, argR
, size
, True
));
4728 case Iop_QAdd64Sx2
: {
4729 HReg res
= newVRegV(env
);
4730 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4731 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4733 switch (e
->Iex
.Binop
.op
) {
4734 case Iop_QAdd8Sx16
: size
= 0; break;
4735 case Iop_QAdd16Sx8
: size
= 1; break;
4736 case Iop_QAdd32Sx4
: size
= 2; break;
4737 case Iop_QAdd64Sx2
: size
= 3; break;
4739 ppIROp(e
->Iex
.Binop
.op
);
4740 vpanic("Illegal element size in VQADDS");
4742 addInstr(env
, ARMInstr_NBinary(ARMneon_VQADDS
,
4743 res
, argL
, argR
, size
, True
));
4750 HReg res
= newVRegV(env
);
4751 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4752 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4754 switch (e
->Iex
.Binop
.op
) {
4755 case Iop_Sub8x16
: size
= 0; break;
4756 case Iop_Sub16x8
: size
= 1; break;
4757 case Iop_Sub32x4
: size
= 2; break;
4758 case Iop_Sub64x2
: size
= 3; break;
4760 ppIROp(e
->Iex
.Binop
.op
);
4761 vpanic("Illegal element size in VSUB");
4763 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
4764 res
, argL
, argR
, size
, True
));
4770 case Iop_QSub64Ux2
: {
4771 HReg res
= newVRegV(env
);
4772 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4773 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4775 switch (e
->Iex
.Binop
.op
) {
4776 case Iop_QSub8Ux16
: size
= 0; break;
4777 case Iop_QSub16Ux8
: size
= 1; break;
4778 case Iop_QSub32Ux4
: size
= 2; break;
4779 case Iop_QSub64Ux2
: size
= 3; break;
4781 ppIROp(e
->Iex
.Binop
.op
);
4782 vpanic("Illegal element size in VQSUBU");
4784 addInstr(env
, ARMInstr_NBinary(ARMneon_VQSUBU
,
4785 res
, argL
, argR
, size
, True
));
4791 case Iop_QSub64Sx2
: {
4792 HReg res
= newVRegV(env
);
4793 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4794 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4796 switch (e
->Iex
.Binop
.op
) {
4797 case Iop_QSub8Sx16
: size
= 0; break;
4798 case Iop_QSub16Sx8
: size
= 1; break;
4799 case Iop_QSub32Sx4
: size
= 2; break;
4800 case Iop_QSub64Sx2
: size
= 3; break;
4802 ppIROp(e
->Iex
.Binop
.op
);
4803 vpanic("Illegal element size in VQSUBS");
4805 addInstr(env
, ARMInstr_NBinary(ARMneon_VQSUBS
,
4806 res
, argL
, argR
, size
, True
));
4811 case Iop_Max32Ux4
: {
4812 HReg res
= newVRegV(env
);
4813 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4814 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4816 switch (e
->Iex
.Binop
.op
) {
4817 case Iop_Max8Ux16
: size
= 0; break;
4818 case Iop_Max16Ux8
: size
= 1; break;
4819 case Iop_Max32Ux4
: size
= 2; break;
4820 default: vpanic("Illegal element size in VMAXU");
4822 addInstr(env
, ARMInstr_NBinary(ARMneon_VMAXU
,
4823 res
, argL
, argR
, size
, True
));
4828 case Iop_Max32Sx4
: {
4829 HReg res
= newVRegV(env
);
4830 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4831 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4833 switch (e
->Iex
.Binop
.op
) {
4834 case Iop_Max8Sx16
: size
= 0; break;
4835 case Iop_Max16Sx8
: size
= 1; break;
4836 case Iop_Max32Sx4
: size
= 2; break;
4837 default: vpanic("Illegal element size in VMAXU");
4839 addInstr(env
, ARMInstr_NBinary(ARMneon_VMAXS
,
4840 res
, argL
, argR
, size
, True
));
4845 case Iop_Min32Ux4
: {
4846 HReg res
= newVRegV(env
);
4847 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4848 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4850 switch (e
->Iex
.Binop
.op
) {
4851 case Iop_Min8Ux16
: size
= 0; break;
4852 case Iop_Min16Ux8
: size
= 1; break;
4853 case Iop_Min32Ux4
: size
= 2; break;
4854 default: vpanic("Illegal element size in VMAXU");
4856 addInstr(env
, ARMInstr_NBinary(ARMneon_VMINU
,
4857 res
, argL
, argR
, size
, True
));
4862 case Iop_Min32Sx4
: {
4863 HReg res
= newVRegV(env
);
4864 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4865 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4867 switch (e
->Iex
.Binop
.op
) {
4868 case Iop_Min8Sx16
: size
= 0; break;
4869 case Iop_Min16Sx8
: size
= 1; break;
4870 case Iop_Min32Sx4
: size
= 2; break;
4871 default: vpanic("Illegal element size in VMAXU");
4873 addInstr(env
, ARMInstr_NBinary(ARMneon_VMINS
,
4874 res
, argL
, argR
, size
, True
));
4881 HReg res
= newVRegV(env
);
4882 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4883 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4884 HReg argR2
= newVRegV(env
);
4885 HReg zero
= newVRegV(env
);
4887 switch (e
->Iex
.Binop
.op
) {
4888 case Iop_Sar8x16
: size
= 0; break;
4889 case Iop_Sar16x8
: size
= 1; break;
4890 case Iop_Sar32x4
: size
= 2; break;
4891 case Iop_Sar64x2
: size
= 3; break;
4892 default: vassert(0);
4894 addInstr(env
, ARMInstr_NeonImm(zero
, ARMNImm_TI(0,0)));
4895 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
4896 argR2
, zero
, argR
, size
, True
));
4897 addInstr(env
, ARMInstr_NShift(ARMneon_VSAL
,
4898 res
, argL
, argR2
, size
, True
));
4905 HReg res
= newVRegV(env
);
4906 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4907 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4909 switch (e
->Iex
.Binop
.op
) {
4910 case Iop_Sal8x16
: size
= 0; break;
4911 case Iop_Sal16x8
: size
= 1; break;
4912 case Iop_Sal32x4
: size
= 2; break;
4913 case Iop_Sal64x2
: size
= 3; break;
4914 default: vassert(0);
4916 addInstr(env
, ARMInstr_NShift(ARMneon_VSAL
,
4917 res
, argL
, argR
, size
, True
));
4924 HReg res
= newVRegV(env
);
4925 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4926 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4927 HReg argR2
= newVRegV(env
);
4928 HReg zero
= newVRegV(env
);
4930 switch (e
->Iex
.Binop
.op
) {
4931 case Iop_Shr8x16
: size
= 0; break;
4932 case Iop_Shr16x8
: size
= 1; break;
4933 case Iop_Shr32x4
: size
= 2; break;
4934 case Iop_Shr64x2
: size
= 3; break;
4935 default: vassert(0);
4937 addInstr(env
, ARMInstr_NeonImm(zero
, ARMNImm_TI(0,0)));
4938 addInstr(env
, ARMInstr_NBinary(ARMneon_VSUB
,
4939 argR2
, zero
, argR
, size
, True
));
4940 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
4941 res
, argL
, argR2
, size
, True
));
4948 HReg res
= newVRegV(env
);
4949 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4950 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4952 switch (e
->Iex
.Binop
.op
) {
4953 case Iop_Shl8x16
: size
= 0; break;
4954 case Iop_Shl16x8
: size
= 1; break;
4955 case Iop_Shl32x4
: size
= 2; break;
4956 case Iop_Shl64x2
: size
= 3; break;
4957 default: vassert(0);
4959 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
4960 res
, argL
, argR
, size
, True
));
4966 case Iop_QShl64x2
: {
4967 HReg res
= newVRegV(env
);
4968 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4969 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4971 switch (e
->Iex
.Binop
.op
) {
4972 case Iop_QShl8x16
: size
= 0; break;
4973 case Iop_QShl16x8
: size
= 1; break;
4974 case Iop_QShl32x4
: size
= 2; break;
4975 case Iop_QShl64x2
: size
= 3; break;
4976 default: vassert(0);
4978 addInstr(env
, ARMInstr_NShift(ARMneon_VQSHL
,
4979 res
, argL
, argR
, size
, True
));
4985 case Iop_QSal64x2
: {
4986 HReg res
= newVRegV(env
);
4987 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
4988 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
4990 switch (e
->Iex
.Binop
.op
) {
4991 case Iop_QSal8x16
: size
= 0; break;
4992 case Iop_QSal16x8
: size
= 1; break;
4993 case Iop_QSal32x4
: size
= 2; break;
4994 case Iop_QSal64x2
: size
= 3; break;
4995 default: vassert(0);
4997 addInstr(env
, ARMInstr_NShift(ARMneon_VQSAL
,
4998 res
, argL
, argR
, size
, True
));
5001 case Iop_QShlNsatUU8x16
:
5002 case Iop_QShlNsatUU16x8
:
5003 case Iop_QShlNsatUU32x4
:
5004 case Iop_QShlNsatUU64x2
: {
5005 HReg res
= newVRegV(env
);
5006 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5008 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
5009 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
5010 vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
5011 "second argument only\n");
5013 imm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
5014 switch (e
->Iex
.Binop
.op
) {
5015 case Iop_QShlNsatUU8x16
: size
= 8 | imm
; break;
5016 case Iop_QShlNsatUU16x8
: size
= 16 | imm
; break;
5017 case Iop_QShlNsatUU32x4
: size
= 32 | imm
; break;
5018 case Iop_QShlNsatUU64x2
: size
= 64 | imm
; break;
5019 default: vassert(0);
5021 addInstr(env
, ARMInstr_NUnary(ARMneon_VQSHLNUU
,
5022 res
, argL
, size
, True
));
5025 case Iop_QShlNsatSU8x16
:
5026 case Iop_QShlNsatSU16x8
:
5027 case Iop_QShlNsatSU32x4
:
5028 case Iop_QShlNsatSU64x2
: {
5029 HReg res
= newVRegV(env
);
5030 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5032 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
5033 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
5034 vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
5035 "second argument only\n");
5037 imm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
5038 switch (e
->Iex
.Binop
.op
) {
5039 case Iop_QShlNsatSU8x16
: size
= 8 | imm
; break;
5040 case Iop_QShlNsatSU16x8
: size
= 16 | imm
; break;
5041 case Iop_QShlNsatSU32x4
: size
= 32 | imm
; break;
5042 case Iop_QShlNsatSU64x2
: size
= 64 | imm
; break;
5043 default: vassert(0);
5045 addInstr(env
, ARMInstr_NUnary(ARMneon_VQSHLNUS
,
5046 res
, argL
, size
, True
));
5049 case Iop_QShlNsatSS8x16
:
5050 case Iop_QShlNsatSS16x8
:
5051 case Iop_QShlNsatSS32x4
:
5052 case Iop_QShlNsatSS64x2
: {
5053 HReg res
= newVRegV(env
);
5054 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5056 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
5057 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
5058 vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
5059 "second argument only\n");
5061 imm
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
5062 switch (e
->Iex
.Binop
.op
) {
5063 case Iop_QShlNsatSS8x16
: size
= 8 | imm
; break;
5064 case Iop_QShlNsatSS16x8
: size
= 16 | imm
; break;
5065 case Iop_QShlNsatSS32x4
: size
= 32 | imm
; break;
5066 case Iop_QShlNsatSS64x2
: size
= 64 | imm
; break;
5067 default: vassert(0);
5069 addInstr(env
, ARMInstr_NUnary(ARMneon_VQSHLNSS
,
5070 res
, argL
, size
, True
));
5076 case Iop_ShrN64x2
: {
5077 HReg res
= newVRegV(env
);
5078 HReg tmp
= newVRegV(env
);
5079 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5080 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
5081 HReg argR2
= newVRegI(env
);
5083 switch (e
->Iex
.Binop
.op
) {
5084 case Iop_ShrN8x16
: size
= 0; break;
5085 case Iop_ShrN16x8
: size
= 1; break;
5086 case Iop_ShrN32x4
: size
= 2; break;
5087 case Iop_ShrN64x2
: size
= 3; break;
5088 default: vassert(0);
5090 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, argR2
, argR
));
5091 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
,
5092 tmp
, argR2
, 0, True
));
5093 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
5094 res
, argL
, tmp
, size
, True
));
5100 case Iop_ShlN64x2
: {
5101 HReg res
= newVRegV(env
);
5102 HReg tmp
= newVRegV(env
);
5103 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5104 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
5106 switch (e
->Iex
.Binop
.op
) {
5107 case Iop_ShlN8x16
: size
= 0; break;
5108 case Iop_ShlN16x8
: size
= 1; break;
5109 case Iop_ShlN32x4
: size
= 2; break;
5110 case Iop_ShlN64x2
: size
= 3; break;
5111 default: vassert(0);
5113 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
, tmp
, argR
, 0, True
));
5114 addInstr(env
, ARMInstr_NShift(ARMneon_VSHL
,
5115 res
, argL
, tmp
, size
, True
));
5121 case Iop_SarN64x2
: {
5122 HReg res
= newVRegV(env
);
5123 HReg tmp
= newVRegV(env
);
5124 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5125 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
5126 HReg argR2
= newVRegI(env
);
5128 switch (e
->Iex
.Binop
.op
) {
5129 case Iop_SarN8x16
: size
= 0; break;
5130 case Iop_SarN16x8
: size
= 1; break;
5131 case Iop_SarN32x4
: size
= 2; break;
5132 case Iop_SarN64x2
: size
= 3; break;
5133 default: vassert(0);
5135 addInstr(env
, ARMInstr_Unary(ARMun_NEG
, argR2
, argR
));
5136 addInstr(env
, ARMInstr_NUnary(ARMneon_DUP
, tmp
, argR2
, 0, True
));
5137 addInstr(env
, ARMInstr_NShift(ARMneon_VSAL
,
5138 res
, argL
, tmp
, size
, True
));
5141 case Iop_CmpGT8Ux16
:
5142 case Iop_CmpGT16Ux8
:
5143 case Iop_CmpGT32Ux4
: {
5144 HReg res
= newVRegV(env
);
5145 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5146 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5148 switch (e
->Iex
.Binop
.op
) {
5149 case Iop_CmpGT8Ux16
: size
= 0; break;
5150 case Iop_CmpGT16Ux8
: size
= 1; break;
5151 case Iop_CmpGT32Ux4
: size
= 2; break;
5152 default: vassert(0);
5154 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGTU
,
5155 res
, argL
, argR
, size
, True
));
5158 case Iop_CmpGT8Sx16
:
5159 case Iop_CmpGT16Sx8
:
5160 case Iop_CmpGT32Sx4
: {
5161 HReg res
= newVRegV(env
);
5162 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5163 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5165 switch (e
->Iex
.Binop
.op
) {
5166 case Iop_CmpGT8Sx16
: size
= 0; break;
5167 case Iop_CmpGT16Sx8
: size
= 1; break;
5168 case Iop_CmpGT32Sx4
: size
= 2; break;
5169 default: vassert(0);
5171 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGTS
,
5172 res
, argL
, argR
, size
, True
));
5177 case Iop_CmpEQ32x4
: {
5178 HReg res
= newVRegV(env
);
5179 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5180 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5182 switch (e
->Iex
.Binop
.op
) {
5183 case Iop_CmpEQ8x16
: size
= 0; break;
5184 case Iop_CmpEQ16x8
: size
= 1; break;
5185 case Iop_CmpEQ32x4
: size
= 2; break;
5186 default: vassert(0);
5188 addInstr(env
, ARMInstr_NBinary(ARMneon_VCEQ
,
5189 res
, argL
, argR
, size
, True
));
5195 HReg res
= newVRegV(env
);
5196 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5197 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5199 switch(e
->Iex
.Binop
.op
) {
5200 case Iop_Mul8x16
: size
= 0; break;
5201 case Iop_Mul16x8
: size
= 1; break;
5202 case Iop_Mul32x4
: size
= 2; break;
5203 default: vassert(0);
5205 addInstr(env
, ARMInstr_NBinary(ARMneon_VMUL
,
5206 res
, argL
, argR
, size
, True
));
5211 case Iop_Mull32Ux2
: {
5212 HReg res
= newVRegV(env
);
5213 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
5214 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
5216 switch(e
->Iex
.Binop
.op
) {
5217 case Iop_Mull8Ux8
: size
= 0; break;
5218 case Iop_Mull16Ux4
: size
= 1; break;
5219 case Iop_Mull32Ux2
: size
= 2; break;
5220 default: vassert(0);
5222 addInstr(env
, ARMInstr_NBinary(ARMneon_VMULLU
,
5223 res
, argL
, argR
, size
, True
));
5229 case Iop_Mull32Sx2
: {
5230 HReg res
= newVRegV(env
);
5231 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
5232 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
5234 switch(e
->Iex
.Binop
.op
) {
5235 case Iop_Mull8Sx8
: size
= 0; break;
5236 case Iop_Mull16Sx4
: size
= 1; break;
5237 case Iop_Mull32Sx2
: size
= 2; break;
5238 default: vassert(0);
5240 addInstr(env
, ARMInstr_NBinary(ARMneon_VMULLS
,
5241 res
, argL
, argR
, size
, True
));
5245 case Iop_QDMulHi16Sx8
:
5246 case Iop_QDMulHi32Sx4
: {
5247 HReg res
= newVRegV(env
);
5248 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5249 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5251 switch(e
->Iex
.Binop
.op
) {
5252 case Iop_QDMulHi16Sx8
: size
= 1; break;
5253 case Iop_QDMulHi32Sx4
: size
= 2; break;
5254 default: vassert(0);
5256 addInstr(env
, ARMInstr_NBinary(ARMneon_VQDMULH
,
5257 res
, argL
, argR
, size
, True
));
5261 case Iop_QRDMulHi16Sx8
:
5262 case Iop_QRDMulHi32Sx4
: {
5263 HReg res
= newVRegV(env
);
5264 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5265 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5267 switch(e
->Iex
.Binop
.op
) {
5268 case Iop_QRDMulHi16Sx8
: size
= 1; break;
5269 case Iop_QRDMulHi32Sx4
: size
= 2; break;
5270 default: vassert(0);
5272 addInstr(env
, ARMInstr_NBinary(ARMneon_VQRDMULH
,
5273 res
, argL
, argR
, size
, True
));
5277 case Iop_QDMull16Sx4
:
5278 case Iop_QDMull32Sx2
: {
5279 HReg res
= newVRegV(env
);
5280 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
5281 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
5283 switch(e
->Iex
.Binop
.op
) {
5284 case Iop_QDMull16Sx4
: size
= 1; break;
5285 case Iop_QDMull32Sx2
: size
= 2; break;
5286 default: vassert(0);
5288 addInstr(env
, ARMInstr_NBinary(ARMneon_VQDMULL
,
5289 res
, argL
, argR
, size
, True
));
5292 case Iop_PolynomialMul8x16
: {
5293 HReg res
= newVRegV(env
);
5294 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5295 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5297 addInstr(env
, ARMInstr_NBinary(ARMneon_VMULP
,
5298 res
, argL
, argR
, size
, True
));
5301 case Iop_Max32Fx4
: {
5302 HReg res
= newVRegV(env
);
5303 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5304 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5305 addInstr(env
, ARMInstr_NBinary(ARMneon_VMAXF
,
5306 res
, argL
, argR
, 2, True
));
5309 case Iop_Min32Fx4
: {
5310 HReg res
= newVRegV(env
);
5311 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5312 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5313 addInstr(env
, ARMInstr_NBinary(ARMneon_VMINF
,
5314 res
, argL
, argR
, 2, True
));
5317 case Iop_PwMax32Fx4
: {
5318 HReg res
= newVRegV(env
);
5319 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5320 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5321 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMAXF
,
5322 res
, argL
, argR
, 2, True
));
5325 case Iop_PwMin32Fx4
: {
5326 HReg res
= newVRegV(env
);
5327 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5328 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5329 addInstr(env
, ARMInstr_NBinary(ARMneon_VPMINF
,
5330 res
, argL
, argR
, 2, True
));
5333 case Iop_CmpGT32Fx4
: {
5334 HReg res
= newVRegV(env
);
5335 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5336 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5337 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGTF
,
5338 res
, argL
, argR
, 2, True
));
5341 case Iop_CmpGE32Fx4
: {
5342 HReg res
= newVRegV(env
);
5343 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5344 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5345 addInstr(env
, ARMInstr_NBinary(ARMneon_VCGEF
,
5346 res
, argL
, argR
, 2, True
));
5349 case Iop_CmpEQ32Fx4
: {
5350 HReg res
= newVRegV(env
);
5351 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5352 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5353 addInstr(env
, ARMInstr_NBinary(ARMneon_VCEQF
,
5354 res
, argL
, argR
, 2, True
));
5358 case Iop_PolynomialMull8x8
: {
5359 HReg res
= newVRegV(env
);
5360 HReg argL
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg1
);
5361 HReg argR
= iselNeon64Expr(env
, e
->Iex
.Binop
.arg2
);
5363 addInstr(env
, ARMInstr_NBinary(ARMneon_VMULLP
,
5364 res
, argL
, argR
, size
, True
));
5367 case Iop_F32ToFixed32Ux4_RZ
:
5368 case Iop_F32ToFixed32Sx4_RZ
:
5369 case Iop_Fixed32UToF32x4_RN
:
5370 case Iop_Fixed32SToF32x4_RN
: {
5371 HReg res
= newVRegV(env
);
5372 HReg arg
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5375 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
||
5376 typeOfIRExpr(env
->type_env
, e
->Iex
.Binop
.arg2
) != Ity_I8
) {
5377 vpanic("ARM supports FP <-> Fixed conversion with constant "
5378 "second argument less than 33 only\n");
5380 imm6
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
5381 vassert(imm6
<= 32 && imm6
> 0);
5383 switch(e
->Iex
.Binop
.op
) {
5384 case Iop_F32ToFixed32Ux4_RZ
: op
= ARMneon_VCVTFtoFixedU
; break;
5385 case Iop_F32ToFixed32Sx4_RZ
: op
= ARMneon_VCVTFtoFixedS
; break;
5386 case Iop_Fixed32UToF32x4_RN
: op
= ARMneon_VCVTFixedUtoF
; break;
5387 case Iop_Fixed32SToF32x4_RN
: op
= ARMneon_VCVTFixedStoF
; break;
5388 default: vassert(0);
5390 addInstr(env
, ARMInstr_NUnary(op
, res
, arg
, imm6
, True
));
5394 FIXME remove if not used
5397 case Iop_VDup32x4: {
5398 HReg res = newVRegV(env);
5399 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5402 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5403 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5404 vpanic("ARM supports Iop_VDup with constant "
5405 "second argument less than 16 only\n");
5407 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5408 switch(e->Iex.Binop.op) {
5409 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5410 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5411 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5412 default: vassert(0);
5415 vpanic("ARM supports Iop_VDup with constant "
5416 "second argument less than 16 only\n");
5418 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5419 res, argL, imm4, True));
5425 case Iop_PwAdd32x4
: {
5426 HReg res
= newVRegV(env
);
5427 HReg argL
= iselNeonExpr(env
, e
->Iex
.Binop
.arg1
);
5428 HReg argR
= iselNeonExpr(env
, e
->Iex
.Binop
.arg2
);
5430 switch(e
->Iex
.Binop
.op
) {
5431 case Iop_PwAdd8x16
: size
= 0; break;
5432 case Iop_PwAdd16x8
: size
= 1; break;
5433 case Iop_PwAdd32x4
: size
= 2; break;
5434 default: vassert(0);
5436 addInstr(env
, ARMInstr_NBinary(ARMneon_VPADD
,
5437 res
, argL
, argR
, size
, True
));
5446 if (e
->tag
== Iex_Triop
) {
5447 IRTriop
*triop
= e
->Iex
.Triop
.details
;
5449 switch (triop
->op
) {
5450 case Iop_SliceV128
: {
5451 HReg res
= newVRegV(env
);
5452 HReg argL
= iselNeonExpr(env
, triop
->arg2
);
5453 HReg argR
= iselNeonExpr(env
, triop
->arg1
);
5455 if (triop
->arg3
->tag
!= Iex_Const
||
5456 typeOfIRExpr(env
->type_env
, triop
->arg3
) != Ity_I8
) {
5457 vpanic("ARM target supports Iop_ExtractV128 with constant "
5458 "third argument less than 16 only\n");
5460 imm4
= triop
->arg3
->Iex
.Const
.con
->Ico
.U8
;
5462 vpanic("ARM target supports Iop_ExtractV128 with constant "
5463 "third argument less than 16 only\n");
5465 addInstr(env
, ARMInstr_NBinary(ARMneon_VEXT
,
5466 res
, argL
, argR
, imm4
, True
));
5471 case Iop_Add32Fx4
: {
5472 HReg res
= newVRegV(env
);
5473 HReg argL
= iselNeonExpr(env
, triop
->arg2
);
5474 HReg argR
= iselNeonExpr(env
, triop
->arg3
);
5476 ARMNeonBinOp op
= ARMneon_INVALID
;
5477 switch (triop
->op
) {
5478 case Iop_Mul32Fx4
: op
= ARMneon_VMULFP
; break;
5479 case Iop_Sub32Fx4
: op
= ARMneon_VSUBFP
; break;
5480 case Iop_Add32Fx4
: op
= ARMneon_VADDFP
; break;
5481 default: vassert(0);
5483 addInstr(env
, ARMInstr_NBinary(op
, res
, argL
, argR
, size
, True
));
5491 if (e
->tag
== Iex_ITE
) { // VFD
5493 HReg r1
= iselNeonExpr(env
, e
->Iex
.ITE
.iftrue
);
5494 HReg r0
= iselNeonExpr(env
, e
->Iex
.ITE
.iffalse
);
5495 HReg dst
= newVRegV(env
);
5496 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, dst
, r1
, 4, True
));
5497 cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
5498 addInstr(env
, ARMInstr_NCMovQ(cc
^ 1, dst
, r0
));
5502 /* neon_expr_bad: */
5504 vpanic("iselNeonExpr_wrk");
5507 /*---------------------------------------------------------*/
5508 /*--- ISEL: Floating point expressions (64 bit) ---*/
5509 /*---------------------------------------------------------*/
5511 /* Compute a 64-bit floating point value into a register, the identity
5512 of which is returned. As with iselIntExpr_R, the reg may be either
5513 real or virtual; in any case it must not be changed by subsequent
5514 code emitted by the caller. */
5516 static HReg
iselDblExpr ( ISelEnv
* env
, IRExpr
* e
)
5518 HReg r
= iselDblExpr_wrk( env
, e
);
5520 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5522 vassert(hregClass(r
) == HRcFlt64
);
5523 vassert(hregIsVirtual(r
));
5527 /* DO NOT CALL THIS DIRECTLY */
5528 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, IRExpr
* e
)
5530 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
5532 vassert(ty
== Ity_F64
);
5534 if (e
->tag
== Iex_RdTmp
) {
5535 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
5538 if (e
->tag
== Iex_Const
) {
5539 /* Just handle the zero case. */
5540 IRConst
* con
= e
->Iex
.Const
.con
;
5541 if (con
->tag
== Ico_F64i
&& con
->Ico
.F64i
== 0ULL) {
5542 HReg z32
= newVRegI(env
);
5543 HReg dst
= newVRegD(env
);
5544 addInstr(env
, ARMInstr_Imm32(z32
, 0));
5545 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, dst
, z32
, z32
));
5550 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
5552 HReg res
= newVRegD(env
);
5553 vassert(e
->Iex
.Load
.ty
== Ity_F64
);
5554 am
= iselIntExpr_AModeV(env
, e
->Iex
.Load
.addr
);
5555 addInstr(env
, ARMInstr_VLdStD(True
/*isLoad*/, res
, am
));
5559 if (e
->tag
== Iex_Get
) {
5560 // XXX This won't work if offset > 1020 or is not 0 % 4.
5561 // In which case we'll have to generate more longwinded code.
5562 ARMAModeV
* am
= mkARMAModeV(hregARM_R8(), e
->Iex
.Get
.offset
);
5563 HReg res
= newVRegD(env
);
5564 addInstr(env
, ARMInstr_VLdStD(True
/*isLoad*/, res
, am
));
5568 if (e
->tag
== Iex_Unop
) {
5569 switch (e
->Iex
.Unop
.op
) {
5570 case Iop_ReinterpI64asF64
: {
5571 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
5572 return iselNeon64Expr(env
, e
->Iex
.Unop
.arg
);
5575 HReg dst
= newVRegD(env
);
5576 iselInt64Expr(&srcHi
, &srcLo
, env
, e
->Iex
.Unop
.arg
);
5577 addInstr(env
, ARMInstr_VXferD(True
/*toD*/, dst
, srcHi
, srcLo
));
5582 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
5583 HReg dst
= newVRegD(env
);
5584 addInstr(env
, ARMInstr_VUnaryD(ARMvfpu_NEG
, dst
, src
));
5588 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
5589 HReg dst
= newVRegD(env
);
5590 addInstr(env
, ARMInstr_VUnaryD(ARMvfpu_ABS
, dst
, src
));
5593 case Iop_F32toF64
: {
5594 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
5595 HReg dst
= newVRegD(env
);
5596 addInstr(env
, ARMInstr_VCvtSD(True
/*sToD*/, dst
, src
));
5600 case Iop_I32StoF64
: {
5601 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
5602 HReg f32
= newVRegF(env
);
5603 HReg dst
= newVRegD(env
);
5604 Bool syned
= e
->Iex
.Unop
.op
== Iop_I32StoF64
;
5606 addInstr(env
, ARMInstr_VXferS(True
/*toS*/, f32
, src
));
5607 /* FSITOD dst, f32 */
5608 addInstr(env
, ARMInstr_VCvtID(True
/*iToD*/, syned
,
5617 if (e
->tag
== Iex_Binop
) {
5618 switch (e
->Iex
.Binop
.op
) {
5620 /* first arg is rounding mode; we ignore it. */
5621 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
5622 HReg dst
= newVRegD(env
);
5623 addInstr(env
, ARMInstr_VUnaryD(ARMvfpu_SQRT
, dst
, src
));
5626 case Iop_RoundF64toInt
: {
5627 /* We can only generate this on a >= V8 capable target. But
5628 that's OK since we should only be asked to generate for V8
5629 capable guests, and we assume here that host == guest. */
5630 if (VEX_ARM_ARCHLEVEL(env
->hwcaps
) >= 8) {
5631 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
5632 HReg dst
= newVRegD(env
);
5633 set_VFP_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
5634 addInstr(env
, ARMInstr_VRIntR(True
/*isF64*/, dst
, src
));
5635 set_VFP_rounding_default(env
);
5638 /* not a V8 target, so we can't select insns for this. */
5642 case Iop_MinNumF64
: {
5643 /* Same comments regarding V8 support as for Iop_RoundF64toInt. */
5644 if (VEX_ARM_ARCHLEVEL(env
->hwcaps
) >= 8) {
5645 HReg srcL
= iselDblExpr(env
, e
->Iex
.Binop
.arg1
);
5646 HReg srcR
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
5647 HReg dst
= newVRegD(env
);
5648 Bool isMax
= e
->Iex
.Binop
.op
== Iop_MaxNumF64
;
5649 addInstr(env
, ARMInstr_VMinMaxNum(
5650 True
/*isF64*/, isMax
, dst
, srcL
, srcR
));
5653 /* not a V8 target, so we can't select insns for this. */
5661 if (e
->tag
== Iex_Triop
) {
5662 IRTriop
*triop
= e
->Iex
.Triop
.details
;
5664 switch (triop
->op
) {
5669 ARMVfpOp op
= 0; /*INVALID*/
5670 HReg argL
= iselDblExpr(env
, triop
->arg2
);
5671 HReg argR
= iselDblExpr(env
, triop
->arg3
);
5672 HReg dst
= newVRegD(env
);
5673 switch (triop
->op
) {
5674 case Iop_DivF64
: op
= ARMvfp_DIV
; break;
5675 case Iop_MulF64
: op
= ARMvfp_MUL
; break;
5676 case Iop_AddF64
: op
= ARMvfp_ADD
; break;
5677 case Iop_SubF64
: op
= ARMvfp_SUB
; break;
5678 default: vassert(0);
5680 addInstr(env
, ARMInstr_VAluD(op
, dst
, argL
, argR
));
5688 if (e
->tag
== Iex_ITE
) { // VFD
5690 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
5691 HReg r1
= iselDblExpr(env
, e
->Iex
.ITE
.iftrue
);
5692 HReg r0
= iselDblExpr(env
, e
->Iex
.ITE
.iffalse
);
5693 HReg dst
= newVRegD(env
);
5694 addInstr(env
, ARMInstr_VUnaryD(ARMvfpu_COPY
, dst
, r1
));
5695 ARMCondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
5696 addInstr(env
, ARMInstr_VCMovD(cc
^ 1, dst
, r0
));
5702 vpanic("iselDblExpr_wrk");
5706 /*---------------------------------------------------------*/
5707 /*--- ISEL: Floating point expressions (32 bit) ---*/
5708 /*---------------------------------------------------------*/
5710 /* Compute a 32-bit floating point value into a register, the identity
5711 of which is returned. As with iselIntExpr_R, the reg may be either
5712 real or virtual; in any case it must not be changed by subsequent
5713 code emitted by the caller. */
5715 static HReg
iselFltExpr ( ISelEnv
* env
, IRExpr
* e
)
5717 HReg r
= iselFltExpr_wrk( env
, e
);
5719 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5721 vassert(hregClass(r
) == HRcFlt32
);
5722 vassert(hregIsVirtual(r
));
5726 /* DO NOT CALL THIS DIRECTLY */
5727 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, IRExpr
* e
)
5729 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
5731 vassert(ty
== Ity_F32
);
5733 if (e
->tag
== Iex_RdTmp
) {
5734 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
5737 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
5739 HReg res
= newVRegF(env
);
5740 vassert(e
->Iex
.Load
.ty
== Ity_F32
);
5741 am
= iselIntExpr_AModeV(env
, e
->Iex
.Load
.addr
);
5742 addInstr(env
, ARMInstr_VLdStS(True
/*isLoad*/, res
, am
));
5746 if (e
->tag
== Iex_Get
) {
5747 // XXX This won't work if offset > 1020 or is not 0 % 4.
5748 // In which case we'll have to generate more longwinded code.
5749 ARMAModeV
* am
= mkARMAModeV(hregARM_R8(), e
->Iex
.Get
.offset
);
5750 HReg res
= newVRegF(env
);
5751 addInstr(env
, ARMInstr_VLdStS(True
/*isLoad*/, res
, am
));
5755 if (e
->tag
== Iex_Unop
) {
5756 switch (e
->Iex
.Unop
.op
) {
5757 case Iop_ReinterpI32asF32
: {
5758 HReg dst
= newVRegF(env
);
5759 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
5760 addInstr(env
, ARMInstr_VXferS(True
/*toS*/, dst
, src
));
5764 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
5765 HReg dst
= newVRegF(env
);
5766 addInstr(env
, ARMInstr_VUnaryS(ARMvfpu_NEG
, dst
, src
));
5770 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
5771 HReg dst
= newVRegF(env
);
5772 addInstr(env
, ARMInstr_VUnaryS(ARMvfpu_ABS
, dst
, src
));
5780 if (e
->tag
== Iex_Binop
) {
5781 switch (e
->Iex
.Binop
.op
) {
5783 /* first arg is rounding mode; we ignore it. */
5784 HReg src
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
5785 HReg dst
= newVRegF(env
);
5786 addInstr(env
, ARMInstr_VUnaryS(ARMvfpu_SQRT
, dst
, src
));
5789 case Iop_F64toF32
: {
5790 HReg valD
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
5791 set_VFP_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
5792 HReg valS
= newVRegF(env
);
5793 /* FCVTSD valS, valD */
5794 addInstr(env
, ARMInstr_VCvtSD(False
/*!sToD*/, valS
, valD
));
5795 set_VFP_rounding_default(env
);
5798 case Iop_RoundF32toInt
: {
5799 /* We can only generate this on a >= V8 capable target. But
5800 that's OK since we should only be asked to generate for V8
5801 capable guests, and we assume here that host == guest. */
5802 if (VEX_ARM_ARCHLEVEL(env
->hwcaps
) >= 8) {
5803 HReg src
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
5804 HReg dst
= newVRegF(env
);
5805 set_VFP_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
5806 addInstr(env
, ARMInstr_VRIntR(False
/*!isF64*/, dst
, src
));
5807 set_VFP_rounding_default(env
);
5810 /* not a V8 target, so we can't select insns for this. */
5814 case Iop_MinNumF32
: {
5815 /* Same comments regarding V8 support as for Iop_RoundF32toInt. */
5816 if (VEX_ARM_ARCHLEVEL(env
->hwcaps
) >= 8) {
5817 HReg srcL
= iselFltExpr(env
, e
->Iex
.Binop
.arg1
);
5818 HReg srcR
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
5819 HReg dst
= newVRegF(env
);
5820 Bool isMax
= e
->Iex
.Binop
.op
== Iop_MaxNumF32
;
5821 addInstr(env
, ARMInstr_VMinMaxNum(
5822 False
/*!isF64*/, isMax
, dst
, srcL
, srcR
));
5825 /* not a V8 target, so we can't select insns for this. */
5833 if (e
->tag
== Iex_Triop
) {
5834 IRTriop
*triop
= e
->Iex
.Triop
.details
;
5836 switch (triop
->op
) {
5841 ARMVfpOp op
= 0; /*INVALID*/
5842 HReg argL
= iselFltExpr(env
, triop
->arg2
);
5843 HReg argR
= iselFltExpr(env
, triop
->arg3
);
5844 HReg dst
= newVRegF(env
);
5845 switch (triop
->op
) {
5846 case Iop_DivF32
: op
= ARMvfp_DIV
; break;
5847 case Iop_MulF32
: op
= ARMvfp_MUL
; break;
5848 case Iop_AddF32
: op
= ARMvfp_ADD
; break;
5849 case Iop_SubF32
: op
= ARMvfp_SUB
; break;
5850 default: vassert(0);
5852 addInstr(env
, ARMInstr_VAluS(op
, dst
, argL
, argR
));
5860 if (e
->tag
== Iex_ITE
) { // VFD
5862 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
5864 HReg r1
= iselFltExpr(env
, e
->Iex
.ITE
.iftrue
);
5865 HReg r0
= iselFltExpr(env
, e
->Iex
.ITE
.iffalse
);
5866 HReg dst
= newVRegF(env
);
5867 addInstr(env
, ARMInstr_VUnaryS(ARMvfpu_COPY
, dst
, r1
));
5868 cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
5869 addInstr(env
, ARMInstr_VCMovS(cc
^ 1, dst
, r0
));
5875 vpanic("iselFltExpr_wrk");
5879 /*---------------------------------------------------------*/
5880 /*--- ISEL: Statements ---*/
5881 /*---------------------------------------------------------*/
5883 static void iselStmt ( ISelEnv
* env
, IRStmt
* stmt
)
5885 if (vex_traceflags
& VEX_TRACE_VCODE
) {
5886 vex_printf("\n-- ");
5890 switch (stmt
->tag
) {
5892 /* --------- STORE --------- */
5893 /* little-endian write to memory */
5895 IRType tya
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.addr
);
5896 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.data
);
5897 IREndness end
= stmt
->Ist
.Store
.end
;
5899 if (tya
!= Ity_I32
|| end
!= Iend_LE
)
5902 if (tyd
== Ity_I32
) {
5903 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
5904 ARMAMode1
* am
= iselIntExpr_AMode1(env
, stmt
->Ist
.Store
.addr
);
5905 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*!isLoad*/, rD
, am
));
5908 if (tyd
== Ity_I16
) {
5909 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
5910 ARMAMode2
* am
= iselIntExpr_AMode2(env
, stmt
->Ist
.Store
.addr
);
5911 addInstr(env
, ARMInstr_LdSt16(ARMcc_AL
,
5913 False
/*!isSignedLoad*/, rD
, am
));
5916 if (tyd
== Ity_I8
) {
5917 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
5918 ARMAMode1
* am
= iselIntExpr_AMode1(env
, stmt
->Ist
.Store
.addr
);
5919 addInstr(env
, ARMInstr_LdSt8U(ARMcc_AL
, False
/*!isLoad*/, rD
, am
));
5922 if (tyd
== Ity_I64
) {
5923 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
5924 HReg dD
= iselNeon64Expr(env
, stmt
->Ist
.Store
.data
);
5925 ARMAModeN
* am
= iselIntExpr_AModeN(env
, stmt
->Ist
.Store
.addr
);
5926 addInstr(env
, ARMInstr_NLdStD(False
, dD
, am
));
5928 HReg rDhi
, rDlo
, rA
;
5929 iselInt64Expr(&rDhi
, &rDlo
, env
, stmt
->Ist
.Store
.data
);
5930 rA
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
5931 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*!load*/, rDhi
,
5932 ARMAMode1_RI(rA
,4)));
5933 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*!load*/, rDlo
,
5934 ARMAMode1_RI(rA
,0)));
5938 if (tyd
== Ity_F64
) {
5939 HReg dD
= iselDblExpr(env
, stmt
->Ist
.Store
.data
);
5940 ARMAModeV
* am
= iselIntExpr_AModeV(env
, stmt
->Ist
.Store
.addr
);
5941 addInstr(env
, ARMInstr_VLdStD(False
/*!isLoad*/, dD
, am
));
5944 if (tyd
== Ity_F32
) {
5945 HReg fD
= iselFltExpr(env
, stmt
->Ist
.Store
.data
);
5946 ARMAModeV
* am
= iselIntExpr_AModeV(env
, stmt
->Ist
.Store
.addr
);
5947 addInstr(env
, ARMInstr_VLdStS(False
/*!isLoad*/, fD
, am
));
5950 if (tyd
== Ity_V128
) {
5951 HReg qD
= iselNeonExpr(env
, stmt
->Ist
.Store
.data
);
5952 ARMAModeN
* am
= iselIntExpr_AModeN(env
, stmt
->Ist
.Store
.addr
);
5953 addInstr(env
, ARMInstr_NLdStQ(False
, qD
, am
));
5960 /* --------- CONDITIONAL STORE --------- */
5961 /* conditional little-endian write to memory */
5963 IRStoreG
* sg
= stmt
->Ist
.StoreG
.details
;
5964 IRType tya
= typeOfIRExpr(env
->type_env
, sg
->addr
);
5965 IRType tyd
= typeOfIRExpr(env
->type_env
, sg
->data
);
5966 IREndness end
= sg
->end
;
5968 if (tya
!= Ity_I32
|| end
!= Iend_LE
)
5974 HReg rD
= iselIntExpr_R(env
, sg
->data
);
5975 ARMAMode1
* am
= iselIntExpr_AMode1(env
, sg
->addr
);
5976 ARMCondCode cc
= iselCondCode(env
, sg
->guard
);
5977 addInstr(env
, (tyd
== Ity_I32
? ARMInstr_LdSt32
: ARMInstr_LdSt8U
)
5978 (cc
, False
/*!isLoad*/, rD
, am
));
5982 HReg rD
= iselIntExpr_R(env
, sg
->data
);
5983 ARMAMode2
* am
= iselIntExpr_AMode2(env
, sg
->addr
);
5984 ARMCondCode cc
= iselCondCode(env
, sg
->guard
);
5985 addInstr(env
, ARMInstr_LdSt16(cc
,
5987 False
/*!isSignedLoad*/, rD
, am
));
5996 /* --------- CONDITIONAL LOAD --------- */
5997 /* conditional little-endian load from memory */
5999 IRLoadG
* lg
= stmt
->Ist
.LoadG
.details
;
6000 IRType tya
= typeOfIRExpr(env
->type_env
, lg
->addr
);
6001 IREndness end
= lg
->end
;
6003 if (tya
!= Ity_I32
|| end
!= Iend_LE
)
6008 case ILGop_Ident32
: {
6009 HReg rAlt
= iselIntExpr_R(env
, lg
->alt
);
6010 ARMAMode1
* am
= iselIntExpr_AMode1(env
, lg
->addr
);
6011 HReg rD
= lookupIRTemp(env
, lg
->dst
);
6012 addInstr(env
, mk_iMOVds_RR(rD
, rAlt
));
6013 ARMCondCode cc
= iselCondCode(env
, lg
->guard
);
6014 addInstr(env
, (lg
->cvt
== ILGop_Ident32
? ARMInstr_LdSt32
6016 (cc
, True
/*isLoad*/, rD
, am
));
6021 case ILGop_8Sto32
: {
6022 HReg rAlt
= iselIntExpr_R(env
, lg
->alt
);
6023 ARMAMode2
* am
= iselIntExpr_AMode2(env
, lg
->addr
);
6024 HReg rD
= lookupIRTemp(env
, lg
->dst
);
6025 addInstr(env
, mk_iMOVds_RR(rD
, rAlt
));
6026 ARMCondCode cc
= iselCondCode(env
, lg
->guard
);
6027 if (lg
->cvt
== ILGop_8Sto32
) {
6028 addInstr(env
, ARMInstr_Ld8S(cc
, rD
, am
));
6030 vassert(lg
->cvt
== ILGop_16Sto32
|| lg
->cvt
== ILGop_16Uto32
);
6031 Bool sx
= lg
->cvt
== ILGop_16Sto32
;
6032 addInstr(env
, ARMInstr_LdSt16(cc
, True
/*isLoad*/, sx
, rD
, am
));
6042 /* --------- PUT --------- */
6043 /* write guest state, fixed offset */
6045 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Put
.data
);
6047 if (tyd
== Ity_I32
) {
6048 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
6049 ARMAMode1
* am
= ARMAMode1_RI(hregARM_R8(), stmt
->Ist
.Put
.offset
);
6050 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*!isLoad*/, rD
, am
));
6053 if (tyd
== Ity_I64
) {
6054 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
6055 HReg addr
= newVRegI(env
);
6056 HReg qD
= iselNeon64Expr(env
, stmt
->Ist
.Put
.data
);
6057 addInstr(env
, ARMInstr_Add32(addr
, hregARM_R8(),
6058 stmt
->Ist
.Put
.offset
));
6059 addInstr(env
, ARMInstr_NLdStD(False
, qD
, mkARMAModeN_R(addr
)));
6062 ARMAMode1
* am0
= ARMAMode1_RI(hregARM_R8(),
6063 stmt
->Ist
.Put
.offset
+ 0);
6064 ARMAMode1
* am4
= ARMAMode1_RI(hregARM_R8(),
6065 stmt
->Ist
.Put
.offset
+ 4);
6066 iselInt64Expr(&rDhi
, &rDlo
, env
, stmt
->Ist
.Put
.data
);
6067 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*!isLoad*/,
6069 addInstr(env
, ARMInstr_LdSt32(ARMcc_AL
, False
/*!isLoad*/,
6074 if (tyd
== Ity_F64
) {
6075 // XXX This won't work if offset > 1020 or is not 0 % 4.
6076 // In which case we'll have to generate more longwinded code.
6077 ARMAModeV
* am
= mkARMAModeV(hregARM_R8(), stmt
->Ist
.Put
.offset
);
6078 HReg rD
= iselDblExpr(env
, stmt
->Ist
.Put
.data
);
6079 addInstr(env
, ARMInstr_VLdStD(False
/*!isLoad*/, rD
, am
));
6082 if (tyd
== Ity_F32
) {
6083 // XXX This won't work if offset > 1020 or is not 0 % 4.
6084 // In which case we'll have to generate more longwinded code.
6085 ARMAModeV
* am
= mkARMAModeV(hregARM_R8(), stmt
->Ist
.Put
.offset
);
6086 HReg rD
= iselFltExpr(env
, stmt
->Ist
.Put
.data
);
6087 addInstr(env
, ARMInstr_VLdStS(False
/*!isLoad*/, rD
, am
));
6090 if (tyd
== Ity_V128
) {
6091 HReg addr
= newVRegI(env
);
6092 HReg qD
= iselNeonExpr(env
, stmt
->Ist
.Put
.data
);
6093 addInstr(env
, ARMInstr_Add32(addr
, hregARM_R8(),
6094 stmt
->Ist
.Put
.offset
));
6095 addInstr(env
, ARMInstr_NLdStQ(False
, qD
, mkARMAModeN_R(addr
)));
6101 /* --------- TMP --------- */
6102 /* assign value to temporary */
6104 IRTemp tmp
= stmt
->Ist
.WrTmp
.tmp
;
6105 IRType ty
= typeOfIRTemp(env
->type_env
, tmp
);
6107 if (ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
) {
6108 ARMRI84
* ri84
= iselIntExpr_RI84(NULL
, False
,
6109 env
, stmt
->Ist
.WrTmp
.data
);
6110 HReg dst
= lookupIRTemp(env
, tmp
);
6111 addInstr(env
, ARMInstr_Mov(dst
,ri84
));
6115 /* Here, we are generating a I1 value into a 32 bit register.
6116 Make sure the value in the register is only zero or one,
6117 but no other. This allows optimisation of the
6118 1Uto32(tmp:I1) case, by making it simply a copy of the
6119 register holding 'tmp'. The point being that the value in
6120 the register holding 'tmp' can only have been created
6122 HReg dst
= lookupIRTemp(env
, tmp
);
6123 ARMCondCode cond
= iselCondCode(env
, stmt
->Ist
.WrTmp
.data
);
6124 addInstr(env
, ARMInstr_Mov(dst
, ARMRI84_I84(0,0)));
6125 addInstr(env
, ARMInstr_CMov(cond
, dst
, ARMRI84_I84(1,0)));
6128 if (ty
== Ity_I64
) {
6129 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
6130 HReg src
= iselNeon64Expr(env
, stmt
->Ist
.WrTmp
.data
);
6131 HReg dst
= lookupIRTemp(env
, tmp
);
6132 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, dst
, src
, 4, False
));
6134 HReg rHi
, rLo
, dstHi
, dstLo
;
6135 iselInt64Expr(&rHi
,&rLo
, env
, stmt
->Ist
.WrTmp
.data
);
6136 lookupIRTemp64( &dstHi
, &dstLo
, env
, tmp
);
6137 addInstr(env
, mk_iMOVds_RR(dstHi
, rHi
) );
6138 addInstr(env
, mk_iMOVds_RR(dstLo
, rLo
) );
6142 if (ty
== Ity_F64
) {
6143 HReg src
= iselDblExpr(env
, stmt
->Ist
.WrTmp
.data
);
6144 HReg dst
= lookupIRTemp(env
, tmp
);
6145 addInstr(env
, ARMInstr_VUnaryD(ARMvfpu_COPY
, dst
, src
));
6148 if (ty
== Ity_F32
) {
6149 HReg src
= iselFltExpr(env
, stmt
->Ist
.WrTmp
.data
);
6150 HReg dst
= lookupIRTemp(env
, tmp
);
6151 addInstr(env
, ARMInstr_VUnaryS(ARMvfpu_COPY
, dst
, src
));
6154 if (ty
== Ity_V128
) {
6155 HReg src
= iselNeonExpr(env
, stmt
->Ist
.WrTmp
.data
);
6156 HReg dst
= lookupIRTemp(env
, tmp
);
6157 addInstr(env
, ARMInstr_NUnary(ARMneon_COPY
, dst
, src
, 4, True
));
6163 /* --------- Call to DIRTY helper --------- */
6164 /* call complex ("dirty") helper function */
6166 IRDirty
* d
= stmt
->Ist
.Dirty
.details
;
6168 /* Figure out the return type, if any. */
6169 IRType retty
= Ity_INVALID
;
6170 if (d
->tmp
!= IRTemp_INVALID
)
6171 retty
= typeOfIRTemp(env
->type_env
, d
->tmp
);
6173 Bool retty_ok
= False
;
6175 case Ity_INVALID
: /* function doesn't return anything */
6176 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
6178 retty_ok
= True
; break;
6183 break; /* will go to stmt_fail: */
6185 /* Marshal args, do the call, and set the return value to 0x555..555
6186 if this is a conditional call that returns a value and the
6189 RetLoc rloc
= mk_RetLoc_INVALID();
6190 Bool ok
= doHelperCall( &addToSp
, &rloc
, env
,
6191 d
->guard
, d
->cee
, retty
, d
->args
);
6192 if (!ok
) goto stmt_fail
;
6193 vassert(is_sane_RetLoc(rloc
));
6195 /* Now figure out what to do with the returned value, if any. */
6198 /* No return value. Nothing to do. */
6199 vassert(d
->tmp
== IRTemp_INVALID
);
6200 vassert(rloc
.pri
== RLPri_None
);
6201 vassert(addToSp
== 0);
6205 vassert(rloc
.pri
== RLPri_2Int
);
6206 vassert(addToSp
== 0);
6207 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
6208 HReg tmp
= lookupIRTemp(env
, d
->tmp
);
6209 addInstr(env
, ARMInstr_VXferD(True
, tmp
, hregARM_R1(),
6213 /* The returned value is in r1:r0. Park it in the
6214 register-pair associated with tmp. */
6215 lookupIRTemp64( &dstHi
, &dstLo
, env
, d
->tmp
);
6216 addInstr(env
, mk_iMOVds_RR(dstHi
, hregARM_R1()) );
6217 addInstr(env
, mk_iMOVds_RR(dstLo
, hregARM_R0()) );
6221 case Ity_I32
: case Ity_I16
: case Ity_I8
: {
6222 vassert(rloc
.pri
== RLPri_Int
);
6223 vassert(addToSp
== 0);
6224 /* The returned value is in r0. Park it in the register
6225 associated with tmp. */
6226 HReg dst
= lookupIRTemp(env
, d
->tmp
);
6227 addInstr(env
, mk_iMOVds_RR(dst
, hregARM_R0()) );
6231 /* The returned value is on the stack, and *retloc tells
6232 us where. Fish it off the stack and then move the
6233 stack pointer upwards to clear it, as directed by
6235 vassert(rloc
.pri
== RLPri_V128SpRel
);
6236 vassert(rloc
.spOff
< 256); // else ARMRI84_I84(_,0) can't encode it
6237 vassert(addToSp
>= 16);
6238 vassert(addToSp
<= 256);
6239 /* Both the stack delta and the offset must be at least 8-aligned.
6240 If that isn't so, doHelperCall() has generated bad code. */
6241 vassert(0 == (rloc
.spOff
% 8));
6242 vassert(0 == (addToSp
% 8));
6243 HReg dst
= lookupIRTemp(env
, d
->tmp
);
6244 HReg tmp
= newVRegI(env
);
6245 HReg sp
= hregARM_R13();
6246 addInstr(env
, ARMInstr_Alu(ARMalu_ADD
,
6247 tmp
, sp
, ARMRI84_I84(rloc
.spOff
,0)));
6248 ARMAModeN
* am
= mkARMAModeN_R(tmp
);
6249 /* This load could be done with its effective address 0 % 8,
6250 because that's the best stack alignment that we can be
6252 addInstr(env
, ARMInstr_NLdStQ(True
/*load*/, dst
, am
));
6255 = addToSp
== 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
6256 : ARMRI84_I84(addToSp
, 0);
6257 addInstr(env
, ARMInstr_Alu(ARMalu_ADD
, sp
, sp
, spAdj
));
6267 /* --------- Load Linked and Store Conditional --------- */
6269 if (stmt
->Ist
.LLSC
.storedata
== NULL
) {
6271 IRTemp res
= stmt
->Ist
.LLSC
.result
;
6272 IRType ty
= typeOfIRTemp(env
->type_env
, res
);
6273 if (ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
) {
6275 HReg r_dst
= lookupIRTemp(env
, res
);
6276 HReg raddr
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.addr
);
6278 case Ity_I8
: szB
= 1; break;
6279 case Ity_I16
: szB
= 2; break;
6280 case Ity_I32
: szB
= 4; break;
6281 default: vassert(0);
6283 addInstr(env
, mk_iMOVds_RR(hregARM_R4(), raddr
));
6284 addInstr(env
, ARMInstr_LdrEX(szB
));
6285 addInstr(env
, mk_iMOVds_RR(r_dst
, hregARM_R2()));
6288 if (ty
== Ity_I64
) {
6289 HReg raddr
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.addr
);
6290 addInstr(env
, mk_iMOVds_RR(hregARM_R4(), raddr
));
6291 addInstr(env
, ARMInstr_LdrEX(8));
6292 /* Result is in r3:r2. On a non-NEON capable CPU, we must
6293 move it into a result register pair. On a NEON capable
6294 CPU, the result register will be a 64 bit NEON
6295 register, so we must move it there instead. */
6296 if (env
->hwcaps
& VEX_HWCAPS_ARM_NEON
) {
6297 HReg dst
= lookupIRTemp(env
, res
);
6298 addInstr(env
, ARMInstr_VXferD(True
, dst
, hregARM_R3(),
6301 HReg r_dst_hi
, r_dst_lo
;
6302 lookupIRTemp64(&r_dst_hi
, &r_dst_lo
, env
, res
);
6303 addInstr(env
, mk_iMOVds_RR(r_dst_lo
, hregARM_R2()));
6304 addInstr(env
, mk_iMOVds_RR(r_dst_hi
, hregARM_R3()));
6312 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.LLSC
.storedata
);
6313 if (tyd
== Ity_I32
|| tyd
== Ity_I16
|| tyd
== Ity_I8
) {
6315 HReg rD
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.storedata
);
6316 HReg rA
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.addr
);
6318 case Ity_I8
: szB
= 1; break;
6319 case Ity_I16
: szB
= 2; break;
6320 case Ity_I32
: szB
= 4; break;
6321 default: vassert(0);
6323 addInstr(env
, mk_iMOVds_RR(hregARM_R2(), rD
));
6324 addInstr(env
, mk_iMOVds_RR(hregARM_R4(), rA
));
6325 addInstr(env
, ARMInstr_StrEX(szB
));
6327 vassert(tyd
== Ity_I64
);
6328 /* This is really ugly. There is no is/is-not NEON
6329 decision akin to the case for LL, because iselInt64Expr
6330 fudges this for us, and always gets the result into two
6331 GPRs even if this means moving it from a NEON
6334 iselInt64Expr(&rDhi
, &rDlo
, env
, stmt
->Ist
.LLSC
.storedata
);
6335 HReg rA
= iselIntExpr_R(env
, stmt
->Ist
.LLSC
.addr
);
6336 addInstr(env
, mk_iMOVds_RR(hregARM_R2(), rDlo
));
6337 addInstr(env
, mk_iMOVds_RR(hregARM_R3(), rDhi
));
6338 addInstr(env
, mk_iMOVds_RR(hregARM_R4(), rA
));
6339 addInstr(env
, ARMInstr_StrEX(8));
6341 /* now r0 is 1 if failed, 0 if success. Change to IR
6342 conventions (0 is fail, 1 is success). Also transfer
6344 IRTemp res
= stmt
->Ist
.LLSC
.result
;
6345 IRType ty
= typeOfIRTemp(env
->type_env
, res
);
6346 HReg r_res
= lookupIRTemp(env
, res
);
6347 ARMRI84
* one
= ARMRI84_I84(1,0);
6348 vassert(ty
== Ity_I1
);
6349 addInstr(env
, ARMInstr_Alu(ARMalu_XOR
, r_res
, hregARM_R0(), one
));
6350 /* And be conservative -- mask off all but the lowest bit */
6351 addInstr(env
, ARMInstr_Alu(ARMalu_AND
, r_res
, r_res
, one
));
6357 /* --------- MEM FENCE --------- */
6359 switch (stmt
->Ist
.MBE
.event
) {
6361 addInstr(env
, ARMInstr_MFence());
6363 case Imbe_CancelReservation
:
6364 addInstr(env
, ARMInstr_CLREX());
6371 /* --------- INSTR MARK --------- */
6372 /* Doesn't generate any executable code ... */
6376 /* --------- NO-OP --------- */
6380 /* --------- EXIT --------- */
6382 if (stmt
->Ist
.Exit
.dst
->tag
!= Ico_U32
)
6383 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6385 ARMCondCode cc
= iselCondCode(env
, stmt
->Ist
.Exit
.guard
);
6386 ARMAMode1
* amR15T
= ARMAMode1_RI(hregARM_R8(),
6387 stmt
->Ist
.Exit
.offsIP
);
6389 /* Case: boring transfer to known address */
6390 if (stmt
->Ist
.Exit
.jk
== Ijk_Boring
6391 || stmt
->Ist
.Exit
.jk
== Ijk_Call
6392 || stmt
->Ist
.Exit
.jk
== Ijk_Ret
) {
6393 if (env
->chainingAllowed
) {
6394 /* .. almost always true .. */
6395 /* Skip the event check at the dst if this is a forwards
6398 = stmt
->Ist
.Exit
.dst
->Ico
.U32
> env
->max_ga
;
6399 if (0) vex_printf("%s", toFastEP
? "Y" : ",");
6400 addInstr(env
, ARMInstr_XDirect(stmt
->Ist
.Exit
.dst
->Ico
.U32
,
6401 amR15T
, cc
, toFastEP
));
6403 /* .. very occasionally .. */
6404 /* We can't use chaining, so ask for an assisted transfer,
6405 as that's the only alternative that is allowable. */
6406 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
6407 addInstr(env
, ARMInstr_XAssisted(r
, amR15T
, cc
, Ijk_Boring
));
6412 /* Case: assisted transfer to arbitrary address */
6413 switch (stmt
->Ist
.Exit
.jk
) {
6414 /* Keep this list in sync with that in iselNext below */
6418 case Ijk_Sys_syscall
:
6419 case Ijk_InvalICache
:
6422 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
6423 addInstr(env
, ARMInstr_XAssisted(r
, amR15T
, cc
,
6424 stmt
->Ist
.Exit
.jk
));
6431 /* Do we ever expect to see any other kind? */
6443 /*---------------------------------------------------------*/
6444 /*--- ISEL: Basic block terminators (Nexts) ---*/
6445 /*---------------------------------------------------------*/
6447 static void iselNext ( ISelEnv
* env
,
6448 IRExpr
* next
, IRJumpKind jk
, Int offsIP
)
6450 if (vex_traceflags
& VEX_TRACE_VCODE
) {
6451 vex_printf( "\n-- PUT(%d) = ", offsIP
);
6453 vex_printf( "; exit-");
6458 /* Case: boring transfer to known address */
6459 if (next
->tag
== Iex_Const
) {
6460 IRConst
* cdst
= next
->Iex
.Const
.con
;
6461 vassert(cdst
->tag
== Ico_U32
);
6462 if (jk
== Ijk_Boring
|| jk
== Ijk_Call
) {
6463 /* Boring transfer to known address */
6464 ARMAMode1
* amR15T
= ARMAMode1_RI(hregARM_R8(), offsIP
);
6465 if (env
->chainingAllowed
) {
6466 /* .. almost always true .. */
6467 /* Skip the event check at the dst if this is a forwards
6470 = cdst
->Ico
.U32
> env
->max_ga
;
6471 if (0) vex_printf("%s", toFastEP
? "X" : ".");
6472 addInstr(env
, ARMInstr_XDirect(cdst
->Ico
.U32
,
6476 /* .. very occasionally .. */
6477 /* We can't use chaining, so ask for an assisted transfer,
6478 as that's the only alternative that is allowable. */
6479 HReg r
= iselIntExpr_R(env
, next
);
6480 addInstr(env
, ARMInstr_XAssisted(r
, amR15T
, ARMcc_AL
,
6487 /* Case: call/return (==boring) transfer to any address */
6489 case Ijk_Boring
: case Ijk_Ret
: case Ijk_Call
: {
6490 HReg r
= iselIntExpr_R(env
, next
);
6491 ARMAMode1
* amR15T
= ARMAMode1_RI(hregARM_R8(), offsIP
);
6492 if (env
->chainingAllowed
) {
6493 addInstr(env
, ARMInstr_XIndir(r
, amR15T
, ARMcc_AL
));
6495 addInstr(env
, ARMInstr_XAssisted(r
, amR15T
, ARMcc_AL
,
6504 /* Case: assisted transfer to arbitrary address */
6506 /* Keep this list in sync with that for Ist_Exit above */
6510 case Ijk_Sys_syscall
:
6511 case Ijk_InvalICache
:
6514 HReg r
= iselIntExpr_R(env
, next
);
6515 ARMAMode1
* amR15T
= ARMAMode1_RI(hregARM_R8(), offsIP
);
6516 addInstr(env
, ARMInstr_XAssisted(r
, amR15T
, ARMcc_AL
, jk
));
6523 vex_printf( "\n-- PUT(%d) = ", offsIP
);
6525 vex_printf( "; exit-");
6528 vassert(0); // are we expecting any other kind?
6532 /*---------------------------------------------------------*/
6533 /*--- Insn selector top-level ---*/
6534 /*---------------------------------------------------------*/
6536 /* Translate an entire SB to arm code. */
6538 HInstrArray
* iselSB_ARM ( const IRSB
* bb
,
6540 const VexArchInfo
* archinfo_host
,
6541 const VexAbiInfo
* vbi
/*UNUSED*/,
6542 Int offs_Host_EvC_Counter
,
6543 Int offs_Host_EvC_FailAddr
,
6544 Bool chainingAllowed
,
6551 UInt hwcaps_host
= archinfo_host
->hwcaps
;
6552 ARMAMode1
*amCounter
, *amFailAddr
;
6555 vassert(arch_host
== VexArchARM
);
6557 /* Check that the host's endianness is as expected. */
6558 vassert(archinfo_host
->endness
== VexEndnessLE
);
6560 /* guard against unexpected space regressions */
6561 vassert(sizeof(ARMInstr
) <= 28);
6563 /* hwcaps should not change from one ISEL call to another. */
6564 arm_hwcaps
= hwcaps_host
; // JRS 2012 Mar 31: FIXME (RM)
6566 /* Make up an initial environment to use. */
6567 env
= LibVEX_Alloc_inline(sizeof(ISelEnv
));
6570 /* Set up output code array. */
6571 env
->code
= newHInstrArray();
6573 /* Copy BB's type env. */
6574 env
->type_env
= bb
->tyenv
;
6576 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6577 change as we go along. */
6578 env
->n_vregmap
= bb
->tyenv
->types_used
;
6579 env
->vregmap
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
6580 env
->vregmapHI
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
6582 /* and finally ... */
6583 env
->chainingAllowed
= chainingAllowed
;
6584 env
->hwcaps
= hwcaps_host
;
6585 env
->max_ga
= max_ga
;
6587 /* For each IR temporary, allocate a suitably-kinded virtual
6590 for (i
= 0; i
< env
->n_vregmap
; i
++) {
6591 hregHI
= hreg
= INVALID_HREG
;
6592 switch (bb
->tyenv
->types
[i
]) {
6596 case Ity_I32
: hreg
= mkHReg(True
, HRcInt32
, 0, j
++); break;
6598 if (hwcaps_host
& VEX_HWCAPS_ARM_NEON
) {
6599 hreg
= mkHReg(True
, HRcFlt64
, 0, j
++);
6601 hregHI
= mkHReg(True
, HRcInt32
, 0, j
++);
6602 hreg
= mkHReg(True
, HRcInt32
, 0, j
++);
6605 case Ity_F32
: hreg
= mkHReg(True
, HRcFlt32
, 0, j
++); break;
6606 case Ity_F64
: hreg
= mkHReg(True
, HRcFlt64
, 0, j
++); break;
6607 case Ity_V128
: hreg
= mkHReg(True
, HRcVec128
, 0, j
++); break;
6608 default: ppIRType(bb
->tyenv
->types
[i
]);
6609 vpanic("iselBB: IRTemp type");
6611 env
->vregmap
[i
] = hreg
;
6612 env
->vregmapHI
[i
] = hregHI
;
6616 /* The very first instruction must be an event check. */
6617 amCounter
= ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter
);
6618 amFailAddr
= ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr
);
6619 addInstr(env
, ARMInstr_EvCheck(amCounter
, amFailAddr
));
6621 /* Possibly a block counter increment (for profiling). At this
6622 point we don't know the address of the counter, so just pretend
6623 it is zero. It will have to be patched later, but before this
6624 translation is used, by a call to LibVEX_patchProfCtr. */
6626 addInstr(env
, ARMInstr_ProfInc());
6629 /* Ok, finally we can iterate over the statements. */
6630 for (i
= 0; i
< bb
->stmts_used
; i
++)
6631 iselStmt(env
, bb
->stmts
[i
]);
6633 iselNext(env
, bb
->next
, bb
->jumpkind
, bb
->offsIP
);
6635 /* record the number of vregs we used. */
6636 env
->code
->n_vregs
= env
->vreg_ctr
;
6641 /*---------------------------------------------------------------*/
6642 /*--- end host_arm_isel.c ---*/
6643 /*---------------------------------------------------------------*/