Add a test program for the membarrier() system call
[valgrind.git] / VEX / priv / host_arm64_isel.c
blob50f9205d1d5d22c9ff949c9a05f90eeb64362c8b
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
28 The GNU General Public License is contained in the file COPYING.
31 #include "libvex_basictypes.h"
32 #include "libvex_ir.h"
33 #include "libvex.h"
34 #include "ir_match.h"
36 #include "main_util.h"
37 #include "main_globals.h"
38 #include "host_generic_regs.h"
39 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
40 #include "host_arm64_defs.h"
43 /*---------------------------------------------------------*/
44 /*--- ISelEnv ---*/
45 /*---------------------------------------------------------*/
47 /* This carries around:
49 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
50 might encounter. This is computed before insn selection starts,
51 and does not change.
53 - A mapping from IRTemp to HReg. This tells the insn selector
54 which virtual register is associated with each IRTemp temporary.
55 This is computed before insn selection starts, and does not
56 change. We expect this mapping to map precisely the same set of
57 IRTemps as the type mapping does.
59 |vregmap| holds the primary register for the IRTemp.
60 |vregmapHI| is only used for 128-bit integer-typed
61 IRTemps. It holds the identity of a second
62 64-bit virtual HReg, which holds the high half
63 of the value.
65 - The code array, that is, the insns selected so far.
67 - A counter, for generating new virtual registers.
69 - The host hardware capabilities word. This is set at the start
70 and does not change.
72 - A Bool for indicating whether we may generate chain-me
73 instructions for control flow transfers, or whether we must use
74 XAssisted.
76 - The maximum guest address of any guest insn in this block.
77 Actually, the address of the highest-addressed byte from any insn
78 in this block. Is set at the start and does not change. This is
79 used for detecting jumps which are definitely forward-edges from
80 this block, and therefore can be made (chained) to the fast entry
81 point of the destination, thereby avoiding the destination's
82 event check.
84 - An IRExpr*, which may be NULL, holding the IR expression (an
85 IRRoundingMode-encoded value) to which the FPU's rounding mode
86 was most recently set. Setting to NULL is always safe. Used to
87 avoid redundant settings of the FPU's rounding mode, as
88 described in set_FPCR_rounding_mode below.
90 Note, this is all (well, mostly) host-independent.
93 typedef
94 struct {
95 /* Constant -- are set at the start and do not change. */
96 IRTypeEnv* type_env;
98 HReg* vregmap;
99 HReg* vregmapHI;
100 Int n_vregmap;
102 UInt hwcaps;
104 Bool chainingAllowed;
105 Addr64 max_ga;
107 /* These are modified as we go along. */
108 HInstrArray* code;
109 Int vreg_ctr;
111 IRExpr* previous_rm;
113 ISelEnv;
115 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
117 vassert(tmp >= 0);
118 vassert(tmp < env->n_vregmap);
119 return env->vregmap[tmp];
122 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
123 ISelEnv* env, IRTemp tmp )
125 vassert(tmp >= 0);
126 vassert(tmp < env->n_vregmap);
127 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
128 *vrLO = env->vregmap[tmp];
129 *vrHI = env->vregmapHI[tmp];
132 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
134 addHInstr(env->code, instr);
135 if (vex_traceflags & VEX_TRACE_VCODE) {
136 ppARM64Instr(instr);
137 vex_printf("\n");
141 static HReg newVRegI ( ISelEnv* env )
143 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
144 env->vreg_ctr++;
145 return reg;
148 static HReg newVRegD ( ISelEnv* env )
150 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
151 env->vreg_ctr++;
152 return reg;
155 static HReg newVRegV ( ISelEnv* env )
157 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
158 env->vreg_ctr++;
159 return reg;
163 /*---------------------------------------------------------*/
164 /*--- ISEL: Forward declarations ---*/
165 /*---------------------------------------------------------*/
167 /* These are organised as iselXXX and iselXXX_wrk pairs. The
168 iselXXX_wrk do the real work, but are not to be called directly.
169 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
170 checks that all returned registers are virtual. You should not
171 call the _wrk version directly.
173 Because some forms of ARM64 memory amodes are implicitly scaled by
174 the access size, iselIntExpr_AMode takes an IRType which tells it
175 the type of the access for which the amode is to be used. This
176 type needs to be correct, else you'll get incorrect code.
178 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
179 IRExpr* e, IRType dty );
180 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
181 IRExpr* e, IRType dty );
183 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
184 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
186 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
187 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
189 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
190 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
192 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
193 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
195 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
196 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
198 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
199 ISelEnv* env, IRExpr* e );
200 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
201 ISelEnv* env, IRExpr* e );
203 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
204 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
206 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
207 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
209 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e );
210 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e );
212 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
213 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
215 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
216 ISelEnv* env, IRExpr* e );
217 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
218 ISelEnv* env, IRExpr* e );
220 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
223 /*---------------------------------------------------------*/
224 /*--- ISEL: Misc helpers ---*/
225 /*---------------------------------------------------------*/
227 /* Generate an amode suitable for a 64-bit sized access relative to
228 the baseblock register (X21). This generates an RI12 amode, which
229 means its scaled by the access size, which is why the access size
230 -- 64 bit -- is stated explicitly here. Consequently |off| needs
231 to be divisible by 8. */
232 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
234 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
235 vassert((off & 7) == 0); /* ditto */
236 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
239 /* Ditto, for 32 bit accesses. */
240 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
242 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
243 vassert((off & 3) == 0); /* ditto */
244 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
247 /* Ditto, for 16 bit accesses. */
248 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
250 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
251 vassert((off & 1) == 0); /* ditto */
252 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
255 /* Ditto, for 8 bit accesses. */
256 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
258 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
259 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
262 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
264 vassert(off < (1<<12));
265 HReg r = newVRegI(env);
266 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
267 ARM64RIA_I12(off,0), True/*isAdd*/));
268 return r;
271 static HReg get_baseblock_register ( void )
273 return hregARM64_X21();
276 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
277 a new register, and return the new register. */
278 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
280 HReg dst = newVRegI(env);
281 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
282 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
283 return dst;
286 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
287 a new register, and return the new register. */
288 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
290 HReg dst = newVRegI(env);
291 ARM64RI6* n48 = ARM64RI6_I6(48);
292 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
293 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
294 return dst;
297 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
298 a new register, and return the new register. */
299 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
301 HReg dst = newVRegI(env);
302 ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */
303 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
304 return dst;
307 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
308 a new register, and return the new register. */
309 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
311 HReg dst = newVRegI(env);
312 ARM64RI6* n32 = ARM64RI6_I6(32);
313 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
314 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
315 return dst;
318 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
319 a new register, and return the new register. */
320 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
322 HReg dst = newVRegI(env);
323 ARM64RI6* n56 = ARM64RI6_I6(56);
324 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
325 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
326 return dst;
329 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
331 HReg dst = newVRegI(env);
332 ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */
333 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
334 return dst;
337 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
338 static Bool isZeroU64 ( IRExpr* e ) {
339 if (e->tag != Iex_Const) return False;
340 IRConst* con = e->Iex.Const.con;
341 vassert(con->tag == Ico_U64);
342 return con->Ico.U64 == 0;
346 /*---------------------------------------------------------*/
347 /*--- ISEL: FP rounding mode helpers ---*/
348 /*---------------------------------------------------------*/
350 /* Set the FP rounding mode: 'mode' is an I32-typed expression
351 denoting a value in the range 0 .. 3, indicating a round mode
352 encoded as per type IRRoundingMode -- the first four values only
353 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
354 FSCR to have the same rounding.
356 For speed & simplicity, we're setting the *entire* FPCR here.
358 Setting the rounding mode is expensive. So this function tries to
359 avoid repeatedly setting the rounding mode to the same thing by
360 first comparing 'mode' to the 'mode' tree supplied in the previous
361 call to this function, if any. (The previous value is stored in
362 env->previous_rm.) If 'mode' is a single IR temporary 't' and
363 env->previous_rm is also just 't', then the setting is skipped.
365 This is safe because of the SSA property of IR: an IR temporary can
366 only be defined once and so will have the same value regardless of
367 where it appears in the block. Cool stuff, SSA.
369 A safety condition: all attempts to set the RM must be aware of
370 this mechanism - by being routed through the functions here.
372 Of course this only helps if blocks where the RM is set more than
373 once and it is set to the same value each time, *and* that value is
374 held in the same IR temporary each time. In order to assure the
375 latter as much as possible, the IR optimiser takes care to do CSE
376 on any block with any sign of floating point activity.
378 static
379 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
381 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
383 /* Do we need to do anything? */
384 if (env->previous_rm
385 && env->previous_rm->tag == Iex_RdTmp
386 && mode->tag == Iex_RdTmp
387 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
388 /* no - setting it to what it was before. */
389 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
390 return;
393 /* No luck - we better set it, and remember what we set it to. */
394 env->previous_rm = mode;
396 /* Only supporting the rounding-mode bits - the rest of FPCR is set
397 to zero - so we can set the whole register at once (faster). */
399 /* This isn't simple, because 'mode' carries an IR rounding
400 encoding, and we need to translate that to an ARM64 FP one:
401 The IR encoding:
402 00 to nearest (the default)
403 10 to +infinity
404 01 to -infinity
405 11 to zero
406 The ARM64 FP encoding:
407 00 to nearest
408 01 to +infinity
409 10 to -infinity
410 11 to zero
411 Easy enough to do; just swap the two bits.
413 HReg irrm = iselIntExpr_R(env, mode);
414 HReg tL = newVRegI(env);
415 HReg tR = newVRegI(env);
416 HReg t3 = newVRegI(env);
417 /* tL = irrm << 1;
418 tR = irrm >> 1; if we're lucky, these will issue together
419 tL &= 2;
420 tR &= 1; ditto
421 t3 = tL | tR;
422 t3 <<= 22;
423 fmxr fpscr, t3
425 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
426 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
427 vassert(ril_one && ril_two);
428 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
429 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
430 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
431 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
432 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
433 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
434 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
438 /*---------------------------------------------------------*/
439 /*--- ISEL: Function call helpers ---*/
440 /*---------------------------------------------------------*/
442 /* Used only in doHelperCall. See big comment in doHelperCall re
443 handling of register-parameter args. This function figures out
444 whether evaluation of an expression might require use of a fixed
445 register. If in doubt return True (safe but suboptimal).
447 static
448 Bool mightRequireFixedRegs ( IRExpr* e )
450 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
451 // These are always "safe" -- either a copy of SP in some
452 // arbitrary vreg, or a copy of x21, respectively.
453 return False;
455 /* Else it's a "normal" expression. */
456 switch (e->tag) {
457 case Iex_RdTmp: case Iex_Const: case Iex_Get:
458 return False;
459 default:
460 return True;
465 /* Do a complete function call. |guard| is a Ity_Bit expression
466 indicating whether or not the call happens. If guard==NULL, the
467 call is unconditional. |retloc| is set to indicate where the
468 return value is after the call. The caller (of this fn) must
469 generate code to add |stackAdjustAfterCall| to the stack pointer
470 after the call is done. Returns True iff it managed to handle this
471 combination of arg/return types, else returns False. */
473 static
474 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
475 /*OUT*/RetLoc* retloc,
476 ISelEnv* env,
477 IRExpr* guard,
478 IRCallee* cee, IRType retTy, IRExpr** args )
480 ARM64CondCode cc;
481 HReg argregs[ARM64_N_ARGREGS];
482 HReg tmpregs[ARM64_N_ARGREGS];
483 Bool go_fast;
484 Int n_args, i, nextArgReg;
485 Addr64 target;
487 vassert(ARM64_N_ARGREGS == 8);
489 /* Set default returns. We'll update them later if needed. */
490 *stackAdjustAfterCall = 0;
491 *retloc = mk_RetLoc_INVALID();
493 /* These are used for cross-checking that IR-level constraints on
494 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
495 UInt nVECRETs = 0;
496 UInt nGSPTRs = 0;
498 /* Marshal args for a call and do the call.
500 This function only deals with a tiny set of possibilities, which
501 cover all helpers in practice. The restrictions are that only
502 arguments in registers are supported, hence only
503 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
504 fact the only supported arg type is I64.
506 The return type can be I{64,32} or V128. In the V128 case, it
507 is expected that |args| will contain the special node
508 IRExpr_VECRET(), in which case this routine generates code to
509 allocate space on the stack for the vector return value. Since
510 we are not passing any scalars on the stack, it is enough to
511 preallocate the return space before marshalling any arguments,
512 in this case.
514 |args| may also contain IRExpr_GSPTR(), in which case the
515 value in x21 is passed as the corresponding argument.
517 Generating code which is both efficient and correct when
518 parameters are to be passed in registers is difficult, for the
519 reasons elaborated in detail in comments attached to
520 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
521 of the method described in those comments.
523 The problem is split into two cases: the fast scheme and the
524 slow scheme. In the fast scheme, arguments are computed
525 directly into the target (real) registers. This is only safe
526 when we can be sure that computation of each argument will not
527 trash any real registers set by computation of any other
528 argument.
530 In the slow scheme, all args are first computed into vregs, and
531 once they are all done, they are moved to the relevant real
532 regs. This always gives correct code, but it also gives a bunch
533 of vreg-to-rreg moves which are usually redundant but are hard
534 for the register allocator to get rid of.
536 To decide which scheme to use, all argument expressions are
537 first examined. If they are all so simple that it is clear they
538 will be evaluated without use of any fixed registers, use the
539 fast scheme, else use the slow scheme. Note also that only
540 unconditional calls may use the fast scheme, since having to
541 compute a condition expression could itself trash real
542 registers.
544 Note this requires being able to examine an expression and
545 determine whether or not evaluation of it might use a fixed
546 register. That requires knowledge of how the rest of this insn
547 selector works. Currently just the following 3 are regarded as
548 safe -- hopefully they cover the majority of arguments in
549 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
552 /* Note that the cee->regparms field is meaningless on ARM64 hosts
553 (since there is only one calling convention) and so we always
554 ignore it. */
556 n_args = 0;
557 for (i = 0; args[i]; i++) {
558 IRExpr* arg = args[i];
559 if (UNLIKELY(arg->tag == Iex_VECRET)) {
560 nVECRETs++;
561 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
562 nGSPTRs++;
564 n_args++;
567 /* If this fails, the IR is ill-formed */
568 vassert(nGSPTRs == 0 || nGSPTRs == 1);
570 /* If we have a VECRET, allocate space on the stack for the return
571 value, and record the stack pointer after that. */
572 HReg r_vecRetAddr = INVALID_HREG;
573 if (nVECRETs == 1) {
574 vassert(retTy == Ity_V128 || retTy == Ity_V256);
575 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
576 r_vecRetAddr = newVRegI(env);
577 addInstr(env, ARM64Instr_AddToSP(-16));
578 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
579 } else {
580 // If either of these fail, the IR is ill-formed
581 vassert(retTy != Ity_V128 && retTy != Ity_V256);
582 vassert(nVECRETs == 0);
585 argregs[0] = hregARM64_X0();
586 argregs[1] = hregARM64_X1();
587 argregs[2] = hregARM64_X2();
588 argregs[3] = hregARM64_X3();
589 argregs[4] = hregARM64_X4();
590 argregs[5] = hregARM64_X5();
591 argregs[6] = hregARM64_X6();
592 argregs[7] = hregARM64_X7();
594 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
595 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
597 /* First decide which scheme (slow or fast) is to be used. First
598 assume the fast scheme, and select slow if any contraindications
599 (wow) appear. */
601 go_fast = True;
603 if (guard) {
604 if (guard->tag == Iex_Const
605 && guard->Iex.Const.con->tag == Ico_U1
606 && guard->Iex.Const.con->Ico.U1 == True) {
607 /* unconditional */
608 } else {
609 /* Not manifestly unconditional -- be conservative. */
610 go_fast = False;
614 if (go_fast) {
615 for (i = 0; i < n_args; i++) {
616 if (mightRequireFixedRegs(args[i])) {
617 go_fast = False;
618 break;
623 if (go_fast) {
624 if (retTy == Ity_V128 || retTy == Ity_V256)
625 go_fast = False;
628 /* At this point the scheme to use has been established. Generate
629 code to get the arg values into the argument rregs. If we run
630 out of arg regs, give up. */
632 if (go_fast) {
634 /* FAST SCHEME */
635 nextArgReg = 0;
637 for (i = 0; i < n_args; i++) {
638 IRExpr* arg = args[i];
640 IRType aTy = Ity_INVALID;
641 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
642 aTy = typeOfIRExpr(env->type_env, args[i]);
644 if (nextArgReg >= ARM64_N_ARGREGS)
645 return False; /* out of argregs */
647 if (aTy == Ity_I64) {
648 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
649 iselIntExpr_R(env, args[i]) ));
650 nextArgReg++;
652 else if (arg->tag == Iex_GSPTR) {
653 vassert(0); //ATC
654 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
655 hregARM64_X21() ));
656 nextArgReg++;
658 else if (arg->tag == Iex_VECRET) {
659 // because of the go_fast logic above, we can't get here,
660 // since vector return values makes us use the slow path
661 // instead.
662 vassert(0);
664 else
665 return False; /* unhandled arg type */
668 /* Fast scheme only applies for unconditional calls. Hence: */
669 cc = ARM64cc_AL;
671 } else {
673 /* SLOW SCHEME; move via temporaries */
674 nextArgReg = 0;
676 for (i = 0; i < n_args; i++) {
677 IRExpr* arg = args[i];
679 IRType aTy = Ity_INVALID;
680 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
681 aTy = typeOfIRExpr(env->type_env, args[i]);
683 if (nextArgReg >= ARM64_N_ARGREGS)
684 return False; /* out of argregs */
686 if (aTy == Ity_I64) {
687 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
688 nextArgReg++;
690 else if (arg->tag == Iex_GSPTR) {
691 vassert(0); //ATC
692 tmpregs[nextArgReg] = hregARM64_X21();
693 nextArgReg++;
695 else if (arg->tag == Iex_VECRET) {
696 vassert(!hregIsInvalid(r_vecRetAddr));
697 tmpregs[nextArgReg] = r_vecRetAddr;
698 nextArgReg++;
700 else
701 return False; /* unhandled arg type */
704 /* Now we can compute the condition. We can't do it earlier
705 because the argument computations could trash the condition
706 codes. Be a bit clever to handle the common case where the
707 guard is 1:Bit. */
708 cc = ARM64cc_AL;
709 if (guard) {
710 if (guard->tag == Iex_Const
711 && guard->Iex.Const.con->tag == Ico_U1
712 && guard->Iex.Const.con->Ico.U1 == True) {
713 /* unconditional -- do nothing */
714 } else {
715 cc = iselCondCode( env, guard );
719 /* Move the args to their final destinations. */
720 for (i = 0; i < nextArgReg; i++) {
721 vassert(!(hregIsInvalid(tmpregs[i])));
722 /* None of these insns, including any spill code that might
723 be generated, may alter the condition codes. */
724 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
729 /* Should be assured by checks above */
730 vassert(nextArgReg <= ARM64_N_ARGREGS);
732 /* Do final checks, set the return values, and generate the call
733 instruction proper. */
734 vassert(nGSPTRs == 0 || nGSPTRs == 1);
735 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
736 vassert(*stackAdjustAfterCall == 0);
737 vassert(is_RetLoc_INVALID(*retloc));
738 switch (retTy) {
739 case Ity_INVALID:
740 /* Function doesn't return a value. */
741 *retloc = mk_RetLoc_simple(RLPri_None);
742 break;
743 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
744 *retloc = mk_RetLoc_simple(RLPri_Int);
745 break;
746 case Ity_V128:
747 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
748 *stackAdjustAfterCall = 16;
749 break;
750 case Ity_V256:
751 vassert(0); // ATC
752 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
753 *stackAdjustAfterCall = 32;
754 break;
755 default:
756 /* IR can denote other possible return types, but we don't
757 handle those here. */
758 vassert(0);
761 /* Finally, generate the call itself. This needs the *retloc value
762 set in the switch above, which is why it's at the end. */
764 /* nextArgReg doles out argument registers. Since these are
765 assigned in the order x0 .. x7, its numeric value at this point,
766 which must be between 0 and 8 inclusive, is going to be equal to
767 the number of arg regs in use for the call. Hence bake that
768 number into the call (we'll need to know it when doing register
769 allocation, to know what regs the call reads.) */
771 target = (Addr)cee->addr;
772 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
774 return True; /* success */
778 /*---------------------------------------------------------*/
779 /*--- ISEL: Integer expressions (64/32 bit) ---*/
780 /*---------------------------------------------------------*/
782 /* Select insns for an integer-typed expression, and add them to the
783 code list. Return a reg holding the result. This reg will be a
784 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
785 want to modify it, ask for a new vreg, copy it in there, and modify
786 the copy. The register allocator will do its best to map both
787 vregs to the same real register, so the copies will often disappear
788 later in the game.
790 This should handle expressions of 64- and 32-bit type. All results
791 are returned in a 64-bit register. For 32-bit expressions, the
792 upper 32 bits are arbitrary, so you should mask or sign extend
793 partial values if necessary.
796 /* --------------------- AMode --------------------- */
798 /* Return an AMode which computes the value of the specified
799 expression, possibly also adding insns to the code list as a
800 result. The expression may only be a 64-bit one.
803 static Bool isValidScale ( UChar scale )
805 switch (scale) {
806 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
807 default: return False;
811 static Bool sane_AMode ( ARM64AMode* am )
813 switch (am->tag) {
814 case ARM64am_RI9:
815 return
816 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
817 && (hregIsVirtual(am->ARM64am.RI9.reg)
818 /* || sameHReg(am->ARM64am.RI9.reg,
819 hregARM64_X21()) */ )
820 && am->ARM64am.RI9.simm9 >= -256
821 && am->ARM64am.RI9.simm9 <= 255 );
822 case ARM64am_RI12:
823 return
824 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
825 && (hregIsVirtual(am->ARM64am.RI12.reg)
826 /* || sameHReg(am->ARM64am.RI12.reg,
827 hregARM64_X21()) */ )
828 && am->ARM64am.RI12.uimm12 < 4096
829 && isValidScale(am->ARM64am.RI12.szB) );
830 case ARM64am_RR:
831 return
832 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
833 && hregIsVirtual(am->ARM64am.RR.base)
834 && hregClass(am->ARM64am.RR.index) == HRcInt64
835 && hregIsVirtual(am->ARM64am.RR.index) );
836 default:
837 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
841 static
842 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
844 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
845 vassert(sane_AMode(am));
846 return am;
849 static
850 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
852 IRType ty = typeOfIRExpr(env->type_env,e);
853 vassert(ty == Ity_I64);
855 ULong szBbits = 0;
856 switch (dty) {
857 case Ity_I64: szBbits = 3; break;
858 case Ity_I32: szBbits = 2; break;
859 case Ity_I16: szBbits = 1; break;
860 case Ity_I8: szBbits = 0; break;
861 default: vassert(0);
864 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
865 we're going to create an amode suitable for LDU* or STU*
866 instructions, which use unscaled immediate offsets. */
867 if (e->tag == Iex_Binop
868 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
869 && e->Iex.Binop.arg2->tag == Iex_Const
870 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
871 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
872 if (simm >= -255 && simm <= 255) {
873 /* Although the gating condition might seem to be
874 simm >= -256 && simm <= 255
875 we will need to negate simm in the case where the op is Sub64.
876 Hence limit the lower value to -255 in order that its negation
877 is representable. */
878 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
879 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
880 return ARM64AMode_RI9(reg, (Int)simm);
884 /* Add64(expr, uimm12 * transfer-size) */
885 if (e->tag == Iex_Binop
886 && e->Iex.Binop.op == Iop_Add64
887 && e->Iex.Binop.arg2->tag == Iex_Const
888 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
889 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
890 ULong szB = 1 << szBbits;
891 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
892 && (uimm >> szBbits) < 4096) {
893 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
894 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
898 /* Add64(expr1, expr2) */
899 if (e->tag == Iex_Binop
900 && e->Iex.Binop.op == Iop_Add64) {
901 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
902 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
903 return ARM64AMode_RR(reg1, reg2);
906 /* Doesn't match anything in particular. Generate it into
907 a register and use that. */
908 HReg reg = iselIntExpr_R(env, e);
909 return ARM64AMode_RI9(reg, 0);
913 /* --------------------- RIA --------------------- */
915 /* Select instructions to generate 'e' into a RIA. */
917 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
919 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
920 /* sanity checks ... */
921 switch (ri->tag) {
922 case ARM64riA_I12:
923 vassert(ri->ARM64riA.I12.imm12 < 4096);
924 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
925 return ri;
926 case ARM64riA_R:
927 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
928 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
929 return ri;
930 default:
931 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
935 /* DO NOT CALL THIS DIRECTLY ! */
936 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
938 IRType ty = typeOfIRExpr(env->type_env,e);
939 vassert(ty == Ity_I64 || ty == Ity_I32);
941 /* special case: immediate */
942 if (e->tag == Iex_Const) {
943 ULong u = 0xF000000ULL; /* invalid */
944 switch (e->Iex.Const.con->tag) {
945 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
946 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
947 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
949 if (0 == (u & ~(0xFFFULL << 0)))
950 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
951 if (0 == (u & ~(0xFFFULL << 12)))
952 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
953 /* else fail, fall through to default case */
956 /* default case: calculate into a register and return that */
958 HReg r = iselIntExpr_R ( env, e );
959 return ARM64RIA_R(r);
964 /* --------------------- RIL --------------------- */
966 /* Select instructions to generate 'e' into a RIL. At this point we
967 have to deal with the strange bitfield-immediate encoding for logic
968 instructions. */
971 // The following four functions
972 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
973 // are copied, with modifications, from
974 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
975 // which has the following copyright notice:
977 Copyright 2013, ARM Limited
978 All rights reserved.
980 Redistribution and use in source and binary forms, with or without
981 modification, are permitted provided that the following conditions are met:
983 * Redistributions of source code must retain the above copyright notice,
984 this list of conditions and the following disclaimer.
985 * Redistributions in binary form must reproduce the above copyright notice,
986 this list of conditions and the following disclaimer in the documentation
987 and/or other materials provided with the distribution.
988 * Neither the name of ARM Limited nor the names of its contributors may be
989 used to endorse or promote products derived from this software without
990 specific prior written permission.
992 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
993 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
994 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
995 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
996 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
997 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
998 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
999 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1000 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1001 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1004 static Int CountLeadingZeros(ULong value, Int width)
1006 vassert(width == 32 || width == 64);
1007 Int count = 0;
1008 ULong bit_test = 1ULL << (width - 1);
1009 while ((count < width) && ((bit_test & value) == 0)) {
1010 count++;
1011 bit_test >>= 1;
1013 return count;
1016 static Int CountTrailingZeros(ULong value, Int width)
1018 vassert(width == 32 || width == 64);
1019 Int count = 0;
1020 while ((count < width) && (((value >> count) & 1) == 0)) {
1021 count++;
1023 return count;
1026 static Int CountSetBits(ULong value, Int width)
1028 // TODO: Other widths could be added here, as the implementation already
1029 // supports them.
1030 vassert(width == 32 || width == 64);
1032 // Mask out unused bits to ensure that they are not counted.
1033 value &= (0xffffffffffffffffULL >> (64-width));
1035 // Add up the set bits.
1036 // The algorithm works by adding pairs of bit fields together iteratively,
1037 // where the size of each bit field doubles each time.
1038 // An example for an 8-bit value:
1039 // Bits: h g f e d c b a
1040 // \ | \ | \ | \ |
1041 // value = h+g f+e d+c b+a
1042 // \ | \ |
1043 // value = h+g+f+e d+c+b+a
1044 // \ |
1045 // value = h+g+f+e+d+c+b+a
1046 value = ((value >> 1) & 0x5555555555555555ULL)
1047 + (value & 0x5555555555555555ULL);
1048 value = ((value >> 2) & 0x3333333333333333ULL)
1049 + (value & 0x3333333333333333ULL);
1050 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1051 + (value & 0x0f0f0f0f0f0f0f0fULL);
1052 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1053 + (value & 0x00ff00ff00ff00ffULL);
1054 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1055 + (value & 0x0000ffff0000ffffULL);
1056 value = ((value >> 32) & 0x00000000ffffffffULL)
1057 + (value & 0x00000000ffffffffULL);
1059 return value;
1062 static Bool isImmLogical ( /*OUT*/UInt* n,
1063 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1064 ULong value, UInt width )
1066 // Test if a given value can be encoded in the immediate field of a
1067 // logical instruction.
1069 // If it can be encoded, the function returns true, and values
1070 // pointed to by n, imm_s and imm_r are updated with immediates
1071 // encoded in the format required by the corresponding fields in the
1072 // logical instruction. If it can not be encoded, the function
1073 // returns false, and the values pointed to by n, imm_s and imm_r
1074 // are undefined.
1075 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1076 vassert(width == 32 || width == 64);
1078 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1079 // the following table:
1081 // N imms immr size S R
1082 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1083 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1084 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1085 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1086 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1087 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1088 // (s bits must not be all set)
1090 // A pattern is constructed of size bits, where the least significant S+1
1091 // bits are set. The pattern is rotated right by R, and repeated across a
1092 // 32 or 64-bit value, depending on destination register width.
1094 // To test if an arbitrary immediate can be encoded using this scheme, an
1095 // iterative algorithm is used.
1097 // TODO: This code does not consider using X/W register overlap to support
1098 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1099 // are an encodable logical immediate.
1101 // 1. If the value has all set or all clear bits, it can't be encoded.
1102 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1103 ((width == 32) && (value == 0xffffffff))) {
1104 return False;
1107 UInt lead_zero = CountLeadingZeros(value, width);
1108 UInt lead_one = CountLeadingZeros(~value, width);
1109 UInt trail_zero = CountTrailingZeros(value, width);
1110 UInt trail_one = CountTrailingZeros(~value, width);
1111 UInt set_bits = CountSetBits(value, width);
1113 // The fixed bits in the immediate s field.
1114 // If width == 64 (X reg), start at 0xFFFFFF80.
1115 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1116 // widths won't be executed.
1117 Int imm_s_fixed = (width == 64) ? -128 : -64;
1118 Int imm_s_mask = 0x3F;
1120 for (;;) {
1121 // 2. If the value is two bits wide, it can be encoded.
1122 if (width == 2) {
1123 *n = 0;
1124 *imm_s = 0x3C;
1125 *imm_r = (value & 3) - 1;
1126 return True;
1129 *n = (width == 64) ? 1 : 0;
1130 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1131 if ((lead_zero + set_bits) == width) {
1132 *imm_r = 0;
1133 } else {
1134 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1137 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1138 // the bit width of the value, it can be encoded.
1139 if (lead_zero + trail_zero + set_bits == width) {
1140 return True;
1143 // 4. If the sum of leading ones, trailing ones and unset bits in the
1144 // value is equal to the bit width of the value, it can be encoded.
1145 if (lead_one + trail_one + (width - set_bits) == width) {
1146 return True;
1149 // 5. If the most-significant half of the bitwise value is equal to the
1150 // least-significant half, return to step 2 using the least-significant
1151 // half of the value.
1152 ULong mask = (1ULL << (width >> 1)) - 1;
1153 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1154 width >>= 1;
1155 set_bits >>= 1;
1156 imm_s_fixed >>= 1;
1157 continue;
1160 // 6. Otherwise, the value can't be encoded.
1161 return False;
1166 /* Create a RIL for the given immediate, if it is representable, or
1167 return NULL if not. */
1169 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1171 UInt n = 0, imm_s = 0, imm_r = 0;
1172 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1173 if (!ok) return NULL;
1174 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1175 return ARM64RIL_I13(n, imm_r, imm_s);
1178 /* So, finally .. */
1180 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1182 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1183 /* sanity checks ... */
1184 switch (ri->tag) {
1185 case ARM64riL_I13:
1186 vassert(ri->ARM64riL.I13.bitN < 2);
1187 vassert(ri->ARM64riL.I13.immR < 64);
1188 vassert(ri->ARM64riL.I13.immS < 64);
1189 return ri;
1190 case ARM64riL_R:
1191 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1192 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1193 return ri;
1194 default:
1195 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1199 /* DO NOT CALL THIS DIRECTLY ! */
1200 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1202 IRType ty = typeOfIRExpr(env->type_env,e);
1203 vassert(ty == Ity_I64 || ty == Ity_I32);
1205 /* special case: immediate */
1206 if (e->tag == Iex_Const) {
1207 ARM64RIL* maybe = NULL;
1208 if (ty == Ity_I64) {
1209 vassert(e->Iex.Const.con->tag == Ico_U64);
1210 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1211 } else {
1212 vassert(ty == Ity_I32);
1213 vassert(e->Iex.Const.con->tag == Ico_U32);
1214 UInt u32 = e->Iex.Const.con->Ico.U32;
1215 ULong u64 = (ULong)u32;
1216 /* First try with 32 leading zeroes. */
1217 maybe = mb_mkARM64RIL_I(u64);
1218 /* If that doesn't work, try with 2 copies, since it doesn't
1219 matter what winds up in the upper 32 bits. */
1220 if (!maybe) {
1221 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1224 if (maybe) return maybe;
1225 /* else fail, fall through to default case */
1228 /* default case: calculate into a register and return that */
1230 HReg r = iselIntExpr_R ( env, e );
1231 return ARM64RIL_R(r);
1236 /* --------------------- RI6 --------------------- */
1238 /* Select instructions to generate 'e' into a RI6. */
1240 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1242 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1243 /* sanity checks ... */
1244 switch (ri->tag) {
1245 case ARM64ri6_I6:
1246 vassert(ri->ARM64ri6.I6.imm6 < 64);
1247 vassert(ri->ARM64ri6.I6.imm6 > 0);
1248 return ri;
1249 case ARM64ri6_R:
1250 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1251 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1252 return ri;
1253 default:
1254 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1258 /* DO NOT CALL THIS DIRECTLY ! */
1259 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1261 IRType ty = typeOfIRExpr(env->type_env,e);
1262 vassert(ty == Ity_I64 || ty == Ity_I8);
1264 /* special case: immediate */
1265 if (e->tag == Iex_Const) {
1266 switch (e->Iex.Const.con->tag) {
1267 case Ico_U8: {
1268 UInt u = e->Iex.Const.con->Ico.U8;
1269 if (u > 0 && u < 64)
1270 return ARM64RI6_I6(u);
1271 break;
1272 default:
1273 break;
1276 /* else fail, fall through to default case */
1279 /* default case: calculate into a register and return that */
1281 HReg r = iselIntExpr_R ( env, e );
1282 return ARM64RI6_R(r);
1287 /* ------------------- CondCode ------------------- */
1289 /* Generate code to evaluated a bit-typed expression, returning the
1290 condition code which would correspond when the expression would
1291 notionally have returned 1. */
1293 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1295 ARM64CondCode cc = iselCondCode_wrk(env,e);
1296 vassert(cc != ARM64cc_NV);
1297 return cc;
1300 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1302 vassert(e);
1303 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1305 /* var */
1306 if (e->tag == Iex_RdTmp) {
1307 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1308 /* Cmp doesn't modify rTmp; so this is OK. */
1309 ARM64RIL* one = mb_mkARM64RIL_I(1);
1310 vassert(one);
1311 addInstr(env, ARM64Instr_Test(rTmp, one));
1312 return ARM64cc_NE;
1315 /* Not1(e) */
1316 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1317 /* Generate code for the arg, and negate the test condition */
1318 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1319 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1320 return ARM64cc_AL;
1321 } else {
1322 return 1 ^ cc;
1326 /* --- patterns rooted at: 64to1 --- */
1328 if (e->tag == Iex_Unop
1329 && e->Iex.Unop.op == Iop_64to1) {
1330 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1331 ARM64RIL* one = mb_mkARM64RIL_I(1);
1332 vassert(one); /* '1' must be representable */
1333 addInstr(env, ARM64Instr_Test(rTmp, one));
1334 return ARM64cc_NE;
1337 /* --- patterns rooted at: CmpNEZ8 --- */
1339 if (e->tag == Iex_Unop
1340 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1341 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1342 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1343 addInstr(env, ARM64Instr_Test(r1, xFF));
1344 return ARM64cc_NE;
1347 /* --- patterns rooted at: CmpNEZ16 --- */
1349 if (e->tag == Iex_Unop
1350 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1351 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1352 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1353 addInstr(env, ARM64Instr_Test(r1, xFFFF));
1354 return ARM64cc_NE;
1357 /* --- patterns rooted at: CmpNEZ64 --- */
1359 if (e->tag == Iex_Unop
1360 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1361 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1362 ARM64RIA* zero = ARM64RIA_I12(0,0);
1363 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1364 return ARM64cc_NE;
1367 /* --- patterns rooted at: CmpNEZ32 --- */
1369 if (e->tag == Iex_Unop
1370 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1371 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1372 ARM64RIA* zero = ARM64RIA_I12(0,0);
1373 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1374 return ARM64cc_NE;
1377 /* --- Cmp*64*(x,y) --- */
1378 if (e->tag == Iex_Binop
1379 && (e->Iex.Binop.op == Iop_CmpEQ64
1380 || e->Iex.Binop.op == Iop_CmpNE64
1381 || e->Iex.Binop.op == Iop_CmpLT64S
1382 || e->Iex.Binop.op == Iop_CmpLT64U
1383 || e->Iex.Binop.op == Iop_CmpLE64S
1384 || e->Iex.Binop.op == Iop_CmpLE64U
1385 || e->Iex.Binop.op == Iop_CasCmpEQ64)) {
1386 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1387 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1388 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1389 switch (e->Iex.Binop.op) {
1390 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
1391 case Iop_CmpNE64: return ARM64cc_NE;
1392 case Iop_CmpLT64S: return ARM64cc_LT;
1393 case Iop_CmpLT64U: return ARM64cc_CC;
1394 case Iop_CmpLE64S: return ARM64cc_LE;
1395 case Iop_CmpLE64U: return ARM64cc_LS;
1396 default: vpanic("iselCondCode(arm64): CmpXX64");
1400 /* --- Cmp*32*(x,y) --- */
1401 if (e->tag == Iex_Binop
1402 && (e->Iex.Binop.op == Iop_CmpEQ32
1403 || e->Iex.Binop.op == Iop_CmpNE32
1404 || e->Iex.Binop.op == Iop_CmpLT32S
1405 || e->Iex.Binop.op == Iop_CmpLT32U
1406 || e->Iex.Binop.op == Iop_CmpLE32S
1407 || e->Iex.Binop.op == Iop_CmpLE32U
1408 || e->Iex.Binop.op == Iop_CasCmpEQ32)) {
1409 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1410 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1411 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1412 switch (e->Iex.Binop.op) {
1413 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
1414 case Iop_CmpNE32: return ARM64cc_NE;
1415 case Iop_CmpLT32S: return ARM64cc_LT;
1416 case Iop_CmpLT32U: return ARM64cc_CC;
1417 case Iop_CmpLE32S: return ARM64cc_LE;
1418 case Iop_CmpLE32U: return ARM64cc_LS;
1419 default: vpanic("iselCondCode(arm64): CmpXX32");
1423 /* --- Cmp*16*(x,y) --- */
1424 if (e->tag == Iex_Binop
1425 && (e->Iex.Binop.op == Iop_CasCmpEQ16)) {
1426 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1427 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1428 HReg argL2 = widen_z_16_to_64(env, argL);
1429 HReg argR2 = widen_z_16_to_64(env, argR);
1430 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1431 switch (e->Iex.Binop.op) {
1432 case Iop_CasCmpEQ16: return ARM64cc_EQ;
1433 default: vpanic("iselCondCode(arm64): CmpXX16");
1437 /* --- Cmp*8*(x,y) --- */
1438 if (e->tag == Iex_Binop
1439 && (e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1440 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1441 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1442 HReg argL2 = widen_z_8_to_64(env, argL);
1443 HReg argR2 = widen_z_8_to_64(env, argR);
1444 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
1445 switch (e->Iex.Binop.op) {
1446 case Iop_CasCmpEQ8: return ARM64cc_EQ;
1447 default: vpanic("iselCondCode(arm64): CmpXX8");
1451 ppIRExpr(e);
1452 vpanic("iselCondCode");
1456 /* --------------------- Reg --------------------- */
1458 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1460 HReg r = iselIntExpr_R_wrk(env, e);
1461 /* sanity checks ... */
1462 # if 0
1463 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1464 # endif
1465 vassert(hregClass(r) == HRcInt64);
1466 vassert(hregIsVirtual(r));
1467 return r;
1470 /* DO NOT CALL THIS DIRECTLY ! */
1471 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1473 IRType ty = typeOfIRExpr(env->type_env,e);
1474 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1476 switch (e->tag) {
1478 /* --------- TEMP --------- */
1479 case Iex_RdTmp: {
1480 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1483 /* --------- LOAD --------- */
1484 case Iex_Load: {
1485 HReg dst = newVRegI(env);
1487 if (e->Iex.Load.end != Iend_LE)
1488 goto irreducible;
1490 if (ty == Ity_I64) {
1491 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1492 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1493 return dst;
1495 if (ty == Ity_I32) {
1496 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1497 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1498 return dst;
1500 if (ty == Ity_I16) {
1501 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1502 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1503 return dst;
1505 if (ty == Ity_I8) {
1506 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1507 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1508 return dst;
1510 break;
1513 /* --------- BINARY OP --------- */
1514 case Iex_Binop: {
1516 ARM64LogicOp lop = 0; /* invalid */
1517 ARM64ShiftOp sop = 0; /* invalid */
1519 /* Special-case 0-x into a Neg instruction. Not because it's
1520 particularly useful but more so as to give value flow using
1521 this instruction, so as to check its assembly correctness for
1522 implementation of Left32/Left64. */
1523 switch (e->Iex.Binop.op) {
1524 case Iop_Sub64:
1525 if (isZeroU64(e->Iex.Binop.arg1)) {
1526 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1527 HReg dst = newVRegI(env);
1528 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1529 return dst;
1531 break;
1532 default:
1533 break;
1536 /* ADD/SUB */
1537 switch (e->Iex.Binop.op) {
1538 case Iop_Add64: case Iop_Add32:
1539 case Iop_Sub64: case Iop_Sub32: {
1540 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1541 || e->Iex.Binop.op == Iop_Add32;
1542 HReg dst = newVRegI(env);
1543 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1544 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1545 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1546 return dst;
1548 default:
1549 break;
1552 /* AND/OR/XOR */
1553 switch (e->Iex.Binop.op) {
1554 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1555 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1556 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1557 log_binop: {
1558 HReg dst = newVRegI(env);
1559 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1560 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1561 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1562 return dst;
1564 default:
1565 break;
1568 /* SHL/SHR/SAR */
1569 switch (e->Iex.Binop.op) {
1570 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1571 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1572 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1573 sh_binop: {
1574 HReg dst = newVRegI(env);
1575 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1576 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1577 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1578 return dst;
1580 case Iop_Shr32:
1581 case Iop_Sar32: {
1582 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1583 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1584 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1585 HReg dst = zx ? widen_z_32_to_64(env, argL)
1586 : widen_s_32_to_64(env, argL);
1587 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1588 return dst;
1590 default: break;
1593 /* MUL */
1594 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1595 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1596 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1597 HReg dst = newVRegI(env);
1598 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1599 return dst;
1602 /* MULL */
1603 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1604 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1605 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1606 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1607 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1608 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1609 HReg dst = newVRegI(env);
1610 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1611 return dst;
1614 /* Handle misc other ops. */
1616 if (e->Iex.Binop.op == Iop_Max32U) {
1617 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1618 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1619 HReg dst = newVRegI(env);
1620 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1621 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1622 return dst;
1625 if (e->Iex.Binop.op == Iop_32HLto64) {
1626 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1627 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1628 HReg lo32 = widen_z_32_to_64(env, lo32s);
1629 HReg hi32 = newVRegI(env);
1630 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1631 ARM64sh_SHL));
1632 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1633 ARM64lo_OR));
1634 return hi32;
1637 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1638 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1639 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1640 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1641 HReg dst = newVRegI(env);
1642 HReg imm = newVRegI(env);
1643 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1644 create in dst, the IRCmpF64Result encoded result. */
1645 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1646 addInstr(env, ARM64Instr_Imm64(dst, 0));
1647 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1648 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1649 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1650 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1651 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1652 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1653 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1654 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1655 return dst;
1658 { /* local scope */
1659 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1660 Bool srcIsD = False;
1661 switch (e->Iex.Binop.op) {
1662 case Iop_F64toI64S:
1663 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1664 case Iop_F64toI64U:
1665 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1666 case Iop_F64toI32S:
1667 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1668 case Iop_F64toI32U:
1669 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1670 case Iop_F32toI32S:
1671 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1672 case Iop_F32toI32U:
1673 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1674 case Iop_F32toI64S:
1675 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1676 case Iop_F32toI64U:
1677 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1678 default:
1679 break;
1681 if (cvt_op != ARM64cvt_INVALID) {
1682 /* This is all a bit dodgy, because we can't handle a
1683 non-constant (not-known-at-JIT-time) rounding mode
1684 indication. That's because there's no instruction
1685 AFAICS that does this conversion but rounds according to
1686 FPCR.RM, so we have to bake the rounding mode into the
1687 instruction right now. But that should be OK because
1688 (1) the front end attaches a literal Irrm_ value to the
1689 conversion binop, and (2) iropt will never float that
1690 off via CSE, into a literal. Hence we should always
1691 have an Irrm_ value as the first arg. */
1692 IRExpr* arg1 = e->Iex.Binop.arg1;
1693 if (arg1->tag != Iex_Const) goto irreducible;
1694 IRConst* arg1con = arg1->Iex.Const.con;
1695 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1696 UInt irrm = arg1con->Ico.U32;
1697 /* Find the ARM-encoded equivalent for |irrm|. */
1698 UInt armrm = 4; /* impossible */
1699 switch (irrm) {
1700 case Irrm_NEAREST: armrm = 0; break;
1701 case Irrm_NegINF: armrm = 2; break;
1702 case Irrm_PosINF: armrm = 1; break;
1703 case Irrm_ZERO: armrm = 3; break;
1704 default: goto irreducible;
1706 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1707 (env, e->Iex.Binop.arg2);
1708 HReg dst = newVRegI(env);
1709 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1710 return dst;
1712 } /* local scope */
1714 /* All cases involving host-side helper calls. */
1715 void* fn = NULL;
1716 switch (e->Iex.Binop.op) {
1717 case Iop_DivU32:
1718 fn = &h_calc_udiv32_w_arm_semantics; break;
1719 case Iop_DivS32:
1720 fn = &h_calc_sdiv32_w_arm_semantics; break;
1721 case Iop_DivU64:
1722 fn = &h_calc_udiv64_w_arm_semantics; break;
1723 case Iop_DivS64:
1724 fn = &h_calc_sdiv64_w_arm_semantics; break;
1725 default:
1726 break;
1729 if (fn) {
1730 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1731 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1732 HReg res = newVRegI(env);
1733 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1734 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1735 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1736 2, mk_RetLoc_simple(RLPri_Int) ));
1737 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1738 return res;
1741 break;
1744 /* --------- UNARY OP --------- */
1745 case Iex_Unop: {
1747 switch (e->Iex.Unop.op) {
1748 case Iop_16Uto64: {
1749 /* This probably doesn't occur often enough to be worth
1750 rolling the extension into the load. */
1751 IRExpr* arg = e->Iex.Unop.arg;
1752 HReg src = iselIntExpr_R(env, arg);
1753 HReg dst = widen_z_16_to_64(env, src);
1754 return dst;
1756 case Iop_32Uto64: {
1757 IRExpr* arg = e->Iex.Unop.arg;
1758 if (arg->tag == Iex_Load) {
1759 /* This correctly zero extends because _LdSt32 is
1760 defined to do a zero extending load. */
1761 HReg dst = newVRegI(env);
1762 ARM64AMode* am
1763 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1764 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1765 return dst;
1767 /* else be lame and mask it */
1768 HReg src = iselIntExpr_R(env, arg);
1769 HReg dst = widen_z_32_to_64(env, src);
1770 return dst;
1772 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1773 case Iop_8Uto64: {
1774 IRExpr* arg = e->Iex.Unop.arg;
1775 if (arg->tag == Iex_Load) {
1776 /* This correctly zero extends because _LdSt8 is
1777 defined to do a zero extending load. */
1778 HReg dst = newVRegI(env);
1779 ARM64AMode* am
1780 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1781 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1782 return dst;
1784 /* else be lame and mask it */
1785 HReg src = iselIntExpr_R(env, arg);
1786 HReg dst = widen_z_8_to_64(env, src);
1787 return dst;
1789 case Iop_128HIto64: {
1790 HReg rHi, rLo;
1791 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1792 return rHi; /* and abandon rLo */
1794 case Iop_8Sto32: case Iop_8Sto64: {
1795 IRExpr* arg = e->Iex.Unop.arg;
1796 HReg src = iselIntExpr_R(env, arg);
1797 HReg dst = widen_s_8_to_64(env, src);
1798 return dst;
1800 case Iop_16Sto32: case Iop_16Sto64: {
1801 IRExpr* arg = e->Iex.Unop.arg;
1802 HReg src = iselIntExpr_R(env, arg);
1803 HReg dst = widen_s_16_to_64(env, src);
1804 return dst;
1806 case Iop_32Sto64: {
1807 IRExpr* arg = e->Iex.Unop.arg;
1808 HReg src = iselIntExpr_R(env, arg);
1809 HReg dst = widen_s_32_to_64(env, src);
1810 return dst;
1812 case Iop_Not32:
1813 case Iop_Not64: {
1814 HReg dst = newVRegI(env);
1815 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1816 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1817 return dst;
1819 case Iop_Clz64: {
1820 HReg dst = newVRegI(env);
1821 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1822 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1823 return dst;
1825 case Iop_Left32:
1826 case Iop_Left64: {
1827 /* Left64(src) = src | -src. Left32 can use the same
1828 implementation since in that case we don't care what
1829 the upper 32 bits become. */
1830 HReg dst = newVRegI(env);
1831 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1832 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1833 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1834 ARM64lo_OR));
1835 return dst;
1837 case Iop_CmpwNEZ64: {
1838 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1839 = Left64(src) >>s 63 */
1840 HReg dst = newVRegI(env);
1841 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1842 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1843 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1844 ARM64lo_OR));
1845 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1846 ARM64sh_SAR));
1847 return dst;
1849 case Iop_CmpwNEZ32: {
1850 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1851 = Left64(src & 0xFFFFFFFF) >>s 63 */
1852 HReg dst = newVRegI(env);
1853 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1854 HReg src = widen_z_32_to_64(env, pre);
1855 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1856 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1857 ARM64lo_OR));
1858 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1859 ARM64sh_SAR));
1860 return dst;
1862 case Iop_V128to64: case Iop_V128HIto64: {
1863 HReg dst = newVRegI(env);
1864 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1865 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1866 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1867 return dst;
1869 case Iop_ReinterpF64asI64: {
1870 HReg dst = newVRegI(env);
1871 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1872 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1873 return dst;
1875 case Iop_ReinterpF32asI32: {
1876 HReg dst = newVRegI(env);
1877 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1878 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1879 return dst;
1881 case Iop_1Sto16:
1882 case Iop_1Sto32:
1883 case Iop_1Sto64: {
1884 /* As with the iselStmt case for 'tmp:I1 = expr', we could
1885 do a lot better here if it ever became necessary. */
1886 HReg zero = newVRegI(env);
1887 HReg one = newVRegI(env);
1888 HReg dst = newVRegI(env);
1889 addInstr(env, ARM64Instr_Imm64(zero, 0));
1890 addInstr(env, ARM64Instr_Imm64(one, 1));
1891 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1892 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1893 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1894 ARM64sh_SHL));
1895 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1896 ARM64sh_SAR));
1897 return dst;
1899 case Iop_NarrowUn16to8x8:
1900 case Iop_NarrowUn32to16x4:
1901 case Iop_NarrowUn64to32x2:
1902 case Iop_QNarrowUn16Sto8Sx8:
1903 case Iop_QNarrowUn32Sto16Sx4:
1904 case Iop_QNarrowUn64Sto32Sx2:
1905 case Iop_QNarrowUn16Uto8Ux8:
1906 case Iop_QNarrowUn32Uto16Ux4:
1907 case Iop_QNarrowUn64Uto32Ux2:
1908 case Iop_QNarrowUn16Sto8Ux8:
1909 case Iop_QNarrowUn32Sto16Ux4:
1910 case Iop_QNarrowUn64Sto32Ux2:
1912 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1913 HReg tmp = newVRegV(env);
1914 HReg dst = newVRegI(env);
1915 UInt dszBlg2 = 3; /* illegal */
1916 ARM64VecNarrowOp op = ARM64vecna_INVALID;
1917 switch (e->Iex.Unop.op) {
1918 case Iop_NarrowUn16to8x8:
1919 dszBlg2 = 0; op = ARM64vecna_XTN; break;
1920 case Iop_NarrowUn32to16x4:
1921 dszBlg2 = 1; op = ARM64vecna_XTN; break;
1922 case Iop_NarrowUn64to32x2:
1923 dszBlg2 = 2; op = ARM64vecna_XTN; break;
1924 case Iop_QNarrowUn16Sto8Sx8:
1925 dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1926 case Iop_QNarrowUn32Sto16Sx4:
1927 dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1928 case Iop_QNarrowUn64Sto32Sx2:
1929 dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1930 case Iop_QNarrowUn16Uto8Ux8:
1931 dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1932 case Iop_QNarrowUn32Uto16Ux4:
1933 dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1934 case Iop_QNarrowUn64Uto32Ux2:
1935 dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1936 case Iop_QNarrowUn16Sto8Ux8:
1937 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1938 case Iop_QNarrowUn32Sto16Ux4:
1939 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1940 case Iop_QNarrowUn64Sto32Ux2:
1941 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1942 default:
1943 vassert(0);
1945 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
1946 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1947 return dst;
1949 case Iop_1Uto64: {
1950 /* 1Uto64(tmp). */
1951 HReg dst = newVRegI(env);
1952 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1953 ARM64RIL* one = mb_mkARM64RIL_I(1);
1954 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1955 vassert(one);
1956 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
1957 } else {
1958 /* CLONE-01 */
1959 HReg zero = newVRegI(env);
1960 HReg one = newVRegI(env);
1961 addInstr(env, ARM64Instr_Imm64(zero, 0));
1962 addInstr(env, ARM64Instr_Imm64(one, 1));
1963 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1964 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1966 return dst;
1968 case Iop_64to32:
1969 case Iop_64to16:
1970 case Iop_64to8:
1971 /* These are no-ops. */
1972 return iselIntExpr_R(env, e->Iex.Unop.arg);
1974 default:
1975 break;
1978 break;
1981 /* --------- GET --------- */
1982 case Iex_Get: {
1983 if (ty == Ity_I64
1984 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
1985 HReg dst = newVRegI(env);
1986 ARM64AMode* am
1987 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
1988 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
1989 return dst;
1991 if (ty == Ity_I32
1992 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
1993 HReg dst = newVRegI(env);
1994 ARM64AMode* am
1995 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
1996 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1997 return dst;
1999 if (ty == Ity_I16
2000 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2001 HReg dst = newVRegI(env);
2002 ARM64AMode* am
2003 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2004 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2005 return dst;
2007 if (ty == Ity_I8
2008 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2009 HReg dst = newVRegI(env);
2010 ARM64AMode* am
2011 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2012 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2013 return dst;
2015 break;
2018 /* --------- CCALL --------- */
2019 case Iex_CCall: {
2020 HReg dst = newVRegI(env);
2021 vassert(ty == e->Iex.CCall.retty);
2023 /* be very restrictive for now. Only 64-bit ints allowed for
2024 args, and 64 bits for return type. Don't forget to change
2025 the RetLoc if more types are allowed in future. */
2026 if (e->Iex.CCall.retty != Ity_I64)
2027 goto irreducible;
2029 /* Marshal args, do the call, clear stack. */
2030 UInt addToSp = 0;
2031 RetLoc rloc = mk_RetLoc_INVALID();
2032 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2033 e->Iex.CCall.cee, e->Iex.CCall.retty,
2034 e->Iex.CCall.args );
2035 /* */
2036 if (ok) {
2037 vassert(is_sane_RetLoc(rloc));
2038 vassert(rloc.pri == RLPri_Int);
2039 vassert(addToSp == 0);
2040 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2041 return dst;
2043 /* else fall through; will hit the irreducible: label */
2046 /* --------- LITERAL --------- */
2047 /* 64-bit literals */
2048 case Iex_Const: {
2049 ULong u = 0;
2050 HReg dst = newVRegI(env);
2051 switch (e->Iex.Const.con->tag) {
2052 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2053 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2054 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2055 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2056 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2058 addInstr(env, ARM64Instr_Imm64(dst, u));
2059 return dst;
2062 /* --------- MULTIPLEX --------- */
2063 case Iex_ITE: {
2064 /* ITE(ccexpr, iftrue, iffalse) */
2065 if (ty == Ity_I64 || ty == Ity_I32) {
2066 ARM64CondCode cc;
2067 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2068 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2069 HReg dst = newVRegI(env);
2070 cc = iselCondCode(env, e->Iex.ITE.cond);
2071 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2072 return dst;
2074 break;
2077 default:
2078 break;
2079 } /* switch (e->tag) */
2081 /* We get here if no pattern matched. */
2082 irreducible:
2083 ppIRExpr(e);
2084 vpanic("iselIntExpr_R: cannot reduce tree");
2088 /*---------------------------------------------------------*/
2089 /*--- ISEL: Integer expressions (128 bit) ---*/
2090 /*---------------------------------------------------------*/
2092 /* Compute a 128-bit value into a register pair, which is returned as
2093 the first two parameters. As with iselIntExpr_R, these may be
2094 either real or virtual regs; in any case they must not be changed
2095 by subsequent code emitted by the caller. */
2097 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2098 ISelEnv* env, IRExpr* e )
2100 iselInt128Expr_wrk(rHi, rLo, env, e);
2101 # if 0
2102 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2103 # endif
2104 vassert(hregClass(*rHi) == HRcInt64);
2105 vassert(hregIsVirtual(*rHi));
2106 vassert(hregClass(*rLo) == HRcInt64);
2107 vassert(hregIsVirtual(*rLo));
2110 /* DO NOT CALL THIS DIRECTLY ! */
2111 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2112 ISelEnv* env, IRExpr* e )
2114 vassert(e);
2115 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2117 /* --------- BINARY ops --------- */
2118 if (e->tag == Iex_Binop) {
2119 switch (e->Iex.Binop.op) {
2120 /* 64 x 64 -> 128 multiply */
2121 case Iop_MullU64:
2122 case Iop_MullS64: {
2123 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2124 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2125 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2126 HReg dstLo = newVRegI(env);
2127 HReg dstHi = newVRegI(env);
2128 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2129 ARM64mul_PLAIN));
2130 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2131 syned ? ARM64mul_SX : ARM64mul_ZX));
2132 *rHi = dstHi;
2133 *rLo = dstLo;
2134 return;
2136 /* 64HLto128(e1,e2) */
2137 case Iop_64HLto128:
2138 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2139 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2140 return;
2141 default:
2142 break;
2144 } /* if (e->tag == Iex_Binop) */
2146 ppIRExpr(e);
2147 vpanic("iselInt128Expr(arm64)");
2151 /*---------------------------------------------------------*/
2152 /*--- ISEL: Vector expressions (128 bit) ---*/
2153 /*---------------------------------------------------------*/
2155 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2157 HReg r = iselV128Expr_wrk( env, e );
2158 vassert(hregClass(r) == HRcVec128);
2159 vassert(hregIsVirtual(r));
2160 return r;
2163 /* DO NOT CALL THIS DIRECTLY */
2164 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2166 IRType ty = typeOfIRExpr(env->type_env, e);
2167 vassert(e);
2168 vassert(ty == Ity_V128);
2170 if (e->tag == Iex_RdTmp) {
2171 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2174 if (e->tag == Iex_Const) {
2175 /* Only a very limited range of constants is handled. */
2176 vassert(e->Iex.Const.con->tag == Ico_V128);
2177 UShort con = e->Iex.Const.con->Ico.V128;
2178 HReg res = newVRegV(env);
2179 switch (con) {
2180 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2181 addInstr(env, ARM64Instr_VImmQ(res, con));
2182 return res;
2183 case 0x00F0:
2184 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2185 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2186 return res;
2187 case 0x0F00:
2188 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2189 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2190 return res;
2191 case 0x0FF0:
2192 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2193 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2194 return res;
2195 case 0x0FFF:
2196 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2197 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2198 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2199 return res;
2200 case 0xF000:
2201 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2202 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2203 return res;
2204 case 0xFF00:
2205 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2206 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2207 return res;
2208 default:
2209 break;
2211 /* Unhandled */
2212 goto v128_expr_bad;
2215 if (e->tag == Iex_Load) {
2216 HReg res = newVRegV(env);
2217 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
2218 vassert(ty == Ity_V128);
2219 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2220 return res;
2223 if (e->tag == Iex_Get) {
2224 UInt offs = (UInt)e->Iex.Get.offset;
2225 if (offs < (1<<12)) {
2226 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2227 HReg res = newVRegV(env);
2228 vassert(ty == Ity_V128);
2229 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2230 return res;
2232 goto v128_expr_bad;
2235 if (e->tag == Iex_Unop) {
2237 /* Iop_ZeroHIXXofV128 cases */
2238 UShort imm16 = 0;
2239 switch (e->Iex.Unop.op) {
2240 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
2241 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
2242 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2243 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2244 default: break;
2246 if (imm16 != 0) {
2247 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2248 HReg imm = newVRegV(env);
2249 HReg res = newVRegV(env);
2250 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2251 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2252 return res;
2255 /* Other cases */
2256 switch (e->Iex.Unop.op) {
2257 case Iop_NotV128:
2258 case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2259 case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2260 case Iop_Abs64x2: case Iop_Abs32x4:
2261 case Iop_Abs16x8: case Iop_Abs8x16:
2262 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16:
2263 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16:
2264 case Iop_Cnt8x16:
2265 case Iop_Reverse1sIn8_x16:
2266 case Iop_Reverse8sIn16_x8:
2267 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2268 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2269 case Iop_Reverse32sIn64_x2:
2270 case Iop_RecipEst32Ux4:
2271 case Iop_RSqrtEst32Ux4:
2272 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2273 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2275 HReg res = newVRegV(env);
2276 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2277 Bool setRM = False;
2278 ARM64VecUnaryOp op = ARM64vecu_INVALID;
2279 switch (e->Iex.Unop.op) {
2280 case Iop_NotV128: op = ARM64vecu_NOT; break;
2281 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
2282 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
2283 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
2284 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
2285 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
2286 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
2287 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
2288 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
2289 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
2290 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
2291 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
2292 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
2293 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
2294 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
2295 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
2296 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
2297 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
2298 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
2299 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
2300 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
2301 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
2302 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
2303 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
2304 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
2305 case Iop_RecipEst64Fx2: setRM = True;
2306 op = ARM64vecu_FRECPE64x2; break;
2307 case Iop_RecipEst32Fx4: setRM = True;
2308 op = ARM64vecu_FRECPE32x4; break;
2309 case Iop_RSqrtEst64Fx2: setRM = True;
2310 op = ARM64vecu_FRSQRTE64x2; break;
2311 case Iop_RSqrtEst32Fx4: setRM = True;
2312 op = ARM64vecu_FRSQRTE32x4; break;
2313 default: vassert(0);
2315 if (setRM) {
2316 // This is a bit of a kludge. We should do rm properly for
2317 // these recip-est insns, but that would require changing the
2318 // primop's type to take an rmode.
2319 set_FPCR_rounding_mode(env, IRExpr_Const(
2320 IRConst_U32(Irrm_NEAREST)));
2322 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2323 return res;
2325 case Iop_CmpNEZ8x16:
2326 case Iop_CmpNEZ16x8:
2327 case Iop_CmpNEZ32x4:
2328 case Iop_CmpNEZ64x2: {
2329 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2330 HReg zero = newVRegV(env);
2331 HReg res = newVRegV(env);
2332 ARM64VecBinOp cmp = ARM64vecb_INVALID;
2333 switch (e->Iex.Unop.op) {
2334 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2335 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2336 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2337 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2338 default: vassert(0);
2340 // This is pretty feeble. Better: use CMP against zero
2341 // and avoid the extra instruction and extra register.
2342 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2343 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2344 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2345 return res;
2347 case Iop_V256toV128_0:
2348 case Iop_V256toV128_1: {
2349 HReg vHi, vLo;
2350 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2351 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2353 case Iop_64UtoV128: {
2354 HReg res = newVRegV(env);
2355 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2356 addInstr(env, ARM64Instr_VQfromX(res, arg));
2357 return res;
2359 case Iop_Widen8Sto16x8: {
2360 HReg res = newVRegV(env);
2361 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2362 addInstr(env, ARM64Instr_VQfromX(res, arg));
2363 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2364 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2365 res, res, 8));
2366 return res;
2368 case Iop_Widen16Sto32x4: {
2369 HReg res = newVRegV(env);
2370 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2371 addInstr(env, ARM64Instr_VQfromX(res, arg));
2372 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2373 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2374 res, res, 16));
2375 return res;
2377 case Iop_Widen32Sto64x2: {
2378 HReg res = newVRegV(env);
2379 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2380 addInstr(env, ARM64Instr_VQfromX(res, arg));
2381 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2382 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2383 res, res, 32));
2384 return res;
2386 /* ... */
2387 default:
2388 break;
2389 } /* switch on the unop */
2390 } /* if (e->tag == Iex_Unop) */
2392 if (e->tag == Iex_Binop) {
2393 switch (e->Iex.Binop.op) {
2394 case Iop_Sqrt32Fx4:
2395 case Iop_Sqrt64Fx2: {
2396 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2397 HReg res = newVRegV(env);
2398 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2399 ARM64VecUnaryOp op
2400 = e->Iex.Binop.op == Iop_Sqrt32Fx4
2401 ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2402 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2403 return res;
2405 case Iop_64HLtoV128: {
2406 HReg res = newVRegV(env);
2407 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2408 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2409 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2410 return res;
2412 /* -- Cases where we can generate a simple three-reg instruction. -- */
2413 case Iop_AndV128:
2414 case Iop_OrV128:
2415 case Iop_XorV128:
2416 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2417 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2418 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2419 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2420 case Iop_Add64x2: case Iop_Add32x4:
2421 case Iop_Add16x8: case Iop_Add8x16:
2422 case Iop_Sub64x2: case Iop_Sub32x4:
2423 case Iop_Sub16x8: case Iop_Sub8x16:
2424 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2425 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2426 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16:
2427 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2428 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2429 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2430 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2431 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2432 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2433 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2434 case Iop_Perm8x16:
2435 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2436 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2437 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2438 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16:
2439 case Iop_InterleaveHI32x4:
2440 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2441 case Iop_InterleaveLO32x4:
2442 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2443 case Iop_PolynomialMul8x16:
2444 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2445 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2446 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2447 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2448 case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2449 case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2450 case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2451 case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2452 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
2453 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2454 case Iop_Sh8Sx16: case Iop_Sh16Sx8:
2455 case Iop_Sh32Sx4: case Iop_Sh64Sx2:
2456 case Iop_Sh8Ux16: case Iop_Sh16Ux8:
2457 case Iop_Sh32Ux4: case Iop_Sh64Ux2:
2458 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2459 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2460 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2461 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2462 case Iop_Max64Fx2: case Iop_Max32Fx4:
2463 case Iop_Min64Fx2: case Iop_Min32Fx4:
2464 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2465 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2467 HReg res = newVRegV(env);
2468 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2469 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2470 Bool sw = False;
2471 Bool setRM = False;
2472 ARM64VecBinOp op = ARM64vecb_INVALID;
2473 switch (e->Iex.Binop.op) {
2474 case Iop_AndV128: op = ARM64vecb_AND; break;
2475 case Iop_OrV128: op = ARM64vecb_ORR; break;
2476 case Iop_XorV128: op = ARM64vecb_XOR; break;
2477 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
2478 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
2479 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
2480 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
2481 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
2482 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
2483 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
2484 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
2485 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
2486 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
2487 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
2488 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
2489 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
2490 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
2491 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
2492 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
2493 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
2494 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
2495 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
2496 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
2497 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
2498 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
2499 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
2500 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
2501 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
2502 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
2503 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
2504 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2505 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2506 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2507 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2508 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2509 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2510 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2511 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2512 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2513 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2514 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2515 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2516 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2517 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2518 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
2519 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2520 break;
2521 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2522 break;
2523 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2524 break;
2525 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2526 break;
2527 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2528 break;
2529 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
2530 break;
2531 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
2532 break;
2533 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
2534 break;
2535 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2536 break;
2537 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2538 break;
2539 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2540 break;
2541 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2542 break;
2543 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2544 break;
2545 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2546 break;
2547 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2548 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break;
2549 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break;
2550 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break;
2551 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break;
2552 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break;
2553 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break;
2554 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break;
2555 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break;
2556 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break;
2557 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break;
2558 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break;
2559 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break;
2560 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break;
2561 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break;
2562 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break;
2563 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break;
2564 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break;
2565 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
2566 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
2567 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
2568 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
2569 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
2570 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
2571 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
2572 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
2573 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
2574 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
2575 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
2576 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
2577 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
2578 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
2579 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
2580 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
2581 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
2582 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
2583 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
2584 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break;
2585 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
2586 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
2587 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
2588 case Iop_RecipStep64Fx2: setRM = True;
2589 op = ARM64vecb_FRECPS64x2; break;
2590 case Iop_RecipStep32Fx4: setRM = True;
2591 op = ARM64vecb_FRECPS32x4; break;
2592 case Iop_RSqrtStep64Fx2: setRM = True;
2593 op = ARM64vecb_FRSQRTS64x2; break;
2594 case Iop_RSqrtStep32Fx4: setRM = True;
2595 op = ARM64vecb_FRSQRTS32x4; break;
2596 default: vassert(0);
2598 if (setRM) {
2599 // This is a bit of a kludge. We should do rm properly for
2600 // these recip-step insns, but that would require changing the
2601 // primop's type to take an rmode.
2602 set_FPCR_rounding_mode(env, IRExpr_Const(
2603 IRConst_U32(Irrm_NEAREST)));
2605 if (sw) {
2606 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2607 } else {
2608 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2610 return res;
2612 /* -- These only have 2 operand instructions, so we have to first move
2613 the first argument into a new register, for modification. -- */
2614 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2615 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2616 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2617 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2619 HReg res = newVRegV(env);
2620 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2621 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2622 ARM64VecModifyOp op = ARM64vecmo_INVALID;
2623 switch (e->Iex.Binop.op) {
2624 /* In the following 8 cases, the US - SU switching is intended.
2625 See comments on the libvex_ir.h for details. Also in the
2626 ARM64 front end, where used these primops are generated. */
2627 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2628 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2629 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2630 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2631 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2632 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2633 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2634 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2635 default: vassert(0);
2637 /* The order of the operands is important. Although this is
2638 basically addition, the two operands are extended differently,
2639 making it important to get them into the correct registers in
2640 the instruction. */
2641 addInstr(env, ARM64Instr_VMov(16, res, argR));
2642 addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2643 return res;
2645 /* -- Shifts by an immediate. -- */
2646 case Iop_ShrN64x2: case Iop_ShrN32x4:
2647 case Iop_ShrN16x8: case Iop_ShrN8x16:
2648 case Iop_SarN64x2: case Iop_SarN32x4:
2649 case Iop_SarN16x8: case Iop_SarN8x16:
2650 case Iop_ShlN64x2: case Iop_ShlN32x4:
2651 case Iop_ShlN16x8: case Iop_ShlN8x16:
2652 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2653 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2654 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2655 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2656 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2657 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2659 IRExpr* argL = e->Iex.Binop.arg1;
2660 IRExpr* argR = e->Iex.Binop.arg2;
2661 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2662 UInt amt = argR->Iex.Const.con->Ico.U8;
2663 UInt limLo = 0;
2664 UInt limHi = 0;
2665 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2666 /* Establish the instruction to use. */
2667 switch (e->Iex.Binop.op) {
2668 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
2669 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
2670 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
2671 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
2672 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
2673 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
2674 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
2675 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
2676 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
2677 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
2678 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
2679 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
2680 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
2681 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
2682 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
2683 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
2684 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
2685 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
2686 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
2687 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
2688 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2689 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2690 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2691 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2692 default: vassert(0);
2694 /* Establish the shift limits, for sanity check purposes only. */
2695 switch (e->Iex.Binop.op) {
2696 case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
2697 case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
2698 case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
2699 case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
2700 case Iop_SarN64x2: limLo = 1; limHi = 64; break;
2701 case Iop_SarN32x4: limLo = 1; limHi = 32; break;
2702 case Iop_SarN16x8: limLo = 1; limHi = 16; break;
2703 case Iop_SarN8x16: limLo = 1; limHi = 8; break;
2704 case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
2705 case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
2706 case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
2707 case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
2708 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2709 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2710 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2711 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break;
2712 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2713 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2714 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2715 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break;
2716 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2717 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2718 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2719 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break;
2720 default: vassert(0);
2722 /* For left shifts, the allowable amt values are
2723 0 .. lane_bits-1. For right shifts the allowable
2724 values are 1 .. lane_bits. */
2725 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2726 HReg src = iselV128Expr(env, argL);
2727 HReg dst = newVRegV(env);
2728 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2729 return dst;
2731 /* Special case some no-op shifts that the arm64 front end
2732 throws at us. We can't generate any instructions for these,
2733 but we don't need to either. */
2734 switch (e->Iex.Binop.op) {
2735 case Iop_ShrN64x2: case Iop_ShrN32x4:
2736 case Iop_ShrN16x8: case Iop_ShrN8x16:
2737 if (amt == 0) {
2738 return iselV128Expr(env, argL);
2740 break;
2741 default:
2742 break;
2744 /* otherwise unhandled */
2746 /* else fall out; this is unhandled */
2747 break;
2749 /* -- Saturating narrowing by an immediate -- */
2750 /* uu */
2751 case Iop_QandQShrNnarrow16Uto8Ux8:
2752 case Iop_QandQShrNnarrow32Uto16Ux4:
2753 case Iop_QandQShrNnarrow64Uto32Ux2:
2754 /* ss */
2755 case Iop_QandQSarNnarrow16Sto8Sx8:
2756 case Iop_QandQSarNnarrow32Sto16Sx4:
2757 case Iop_QandQSarNnarrow64Sto32Sx2:
2758 /* su */
2759 case Iop_QandQSarNnarrow16Sto8Ux8:
2760 case Iop_QandQSarNnarrow32Sto16Ux4:
2761 case Iop_QandQSarNnarrow64Sto32Ux2:
2762 /* ruu */
2763 case Iop_QandQRShrNnarrow16Uto8Ux8:
2764 case Iop_QandQRShrNnarrow32Uto16Ux4:
2765 case Iop_QandQRShrNnarrow64Uto32Ux2:
2766 /* rss */
2767 case Iop_QandQRSarNnarrow16Sto8Sx8:
2768 case Iop_QandQRSarNnarrow32Sto16Sx4:
2769 case Iop_QandQRSarNnarrow64Sto32Sx2:
2770 /* rsu */
2771 case Iop_QandQRSarNnarrow16Sto8Ux8:
2772 case Iop_QandQRSarNnarrow32Sto16Ux4:
2773 case Iop_QandQRSarNnarrow64Sto32Ux2:
2775 IRExpr* argL = e->Iex.Binop.arg1;
2776 IRExpr* argR = e->Iex.Binop.arg2;
2777 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2778 UInt amt = argR->Iex.Const.con->Ico.U8;
2779 UInt limit = 0;
2780 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2781 switch (e->Iex.Binop.op) {
2782 /* uu */
2783 case Iop_QandQShrNnarrow64Uto32Ux2:
2784 op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2785 case Iop_QandQShrNnarrow32Uto16Ux4:
2786 op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2787 case Iop_QandQShrNnarrow16Uto8Ux8:
2788 op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2789 /* ss */
2790 case Iop_QandQSarNnarrow64Sto32Sx2:
2791 op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2792 case Iop_QandQSarNnarrow32Sto16Sx4:
2793 op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2794 case Iop_QandQSarNnarrow16Sto8Sx8:
2795 op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2796 /* su */
2797 case Iop_QandQSarNnarrow64Sto32Ux2:
2798 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
2799 case Iop_QandQSarNnarrow32Sto16Ux4:
2800 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
2801 case Iop_QandQSarNnarrow16Sto8Ux8:
2802 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
2803 /* ruu */
2804 case Iop_QandQRShrNnarrow64Uto32Ux2:
2805 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
2806 case Iop_QandQRShrNnarrow32Uto16Ux4:
2807 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
2808 case Iop_QandQRShrNnarrow16Uto8Ux8:
2809 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
2810 /* rss */
2811 case Iop_QandQRSarNnarrow64Sto32Sx2:
2812 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
2813 case Iop_QandQRSarNnarrow32Sto16Sx4:
2814 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
2815 case Iop_QandQRSarNnarrow16Sto8Sx8:
2816 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
2817 /* rsu */
2818 case Iop_QandQRSarNnarrow64Sto32Ux2:
2819 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
2820 case Iop_QandQRSarNnarrow32Sto16Ux4:
2821 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
2822 case Iop_QandQRSarNnarrow16Sto8Ux8:
2823 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
2824 /**/
2825 default:
2826 vassert(0);
2828 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
2829 HReg src = iselV128Expr(env, argL);
2830 HReg dst = newVRegV(env);
2831 HReg fpsr = newVRegI(env);
2832 /* Clear FPSR.Q, do the operation, and return both its
2833 result and the new value of FPSR.Q. We can simply
2834 zero out FPSR since all the other bits have no relevance
2835 in VEX generated code. */
2836 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2837 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2838 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2839 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2840 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2841 ARM64sh_SHR));
2842 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2843 vassert(ril_one);
2844 addInstr(env, ARM64Instr_Logic(fpsr,
2845 fpsr, ril_one, ARM64lo_AND));
2846 /* Now we have: the main (shift) result in the bottom half
2847 of |dst|, and the Q bit at the bottom of |fpsr|.
2848 Combining them with a "InterleaveLO64x2" style operation
2849 produces a 128 bit value, dst[63:0]:fpsr[63:0],
2850 which is what we want. */
2851 HReg scratch = newVRegV(env);
2852 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2853 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2854 dst, dst, scratch));
2855 return dst;
2858 /* else fall out; this is unhandled */
2859 break;
2862 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2863 // as it is in some ways more general and often leads to better
2864 // code overall.
2865 case Iop_ShlV128:
2866 case Iop_ShrV128: {
2867 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2868 /* This is tricky. Generate an EXT instruction with zeroes in
2869 the high operand (shift right) or low operand (shift left).
2870 Note that we can only slice in the EXT instruction at a byte
2871 level of granularity, so the shift amount needs careful
2872 checking. */
2873 IRExpr* argL = e->Iex.Binop.arg1;
2874 IRExpr* argR = e->Iex.Binop.arg2;
2875 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2876 UInt amt = argR->Iex.Const.con->Ico.U8;
2877 Bool amtOK = False;
2878 switch (amt) {
2879 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2880 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2881 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2882 amtOK = True; break;
2884 /* We could also deal with amt==0 by copying the source to
2885 the destination, but there's no need for that so far. */
2886 if (amtOK) {
2887 HReg src = iselV128Expr(env, argL);
2888 HReg srcZ = newVRegV(env);
2889 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2890 UInt immB = amt / 8;
2891 vassert(immB >= 1 && immB <= 15);
2892 HReg dst = newVRegV(env);
2893 if (isSHR) {
2894 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2895 immB));
2896 } else {
2897 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2898 16 - immB));
2900 return dst;
2903 /* else fall out; this is unhandled */
2904 break;
2907 case Iop_PolynomialMull8x8:
2908 case Iop_Mull32Ux2:
2909 case Iop_Mull16Ux4:
2910 case Iop_Mull8Ux8:
2911 case Iop_Mull32Sx2:
2912 case Iop_Mull16Sx4:
2913 case Iop_Mull8Sx8:
2914 case Iop_QDMull32Sx2:
2915 case Iop_QDMull16Sx4:
2917 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2918 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2919 HReg vSrcL = newVRegV(env);
2920 HReg vSrcR = newVRegV(env);
2921 HReg dst = newVRegV(env);
2922 ARM64VecBinOp op = ARM64vecb_INVALID;
2923 switch (e->Iex.Binop.op) {
2924 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break;
2925 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break;
2926 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break;
2927 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break;
2928 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break;
2929 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break;
2930 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break;
2931 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break;
2932 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break;
2933 default: vassert(0);
2935 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2936 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
2937 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
2938 return dst;
2941 /* ... */
2942 default:
2943 break;
2944 } /* switch on the binop */
2945 } /* if (e->tag == Iex_Binop) */
2947 if (e->tag == Iex_Triop) {
2948 IRTriop* triop = e->Iex.Triop.details;
2949 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2950 switch (triop->op) {
2951 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2952 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2953 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2954 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
2955 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
2956 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
2957 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
2958 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
2959 default: break;
2961 if (vecbop != ARM64vecb_INVALID) {
2962 HReg argL = iselV128Expr(env, triop->arg2);
2963 HReg argR = iselV128Expr(env, triop->arg3);
2964 HReg dst = newVRegV(env);
2965 set_FPCR_rounding_mode(env, triop->arg1);
2966 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
2967 return dst;
2970 if (triop->op == Iop_SliceV128) {
2971 /* Note that, compared to ShlV128/ShrV128 just above, the shift
2972 amount here is in bytes, not bits. */
2973 IRExpr* argHi = triop->arg1;
2974 IRExpr* argLo = triop->arg2;
2975 IRExpr* argAmt = triop->arg3;
2976 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
2977 UInt amt = argAmt->Iex.Const.con->Ico.U8;
2978 Bool amtOK = amt >= 1 && amt <= 15;
2979 /* We could also deal with amt==0 by copying argLO to
2980 the destination, but there's no need for that so far. */
2981 if (amtOK) {
2982 HReg srcHi = iselV128Expr(env, argHi);
2983 HReg srcLo = iselV128Expr(env, argLo);
2984 HReg dst = newVRegV(env);
2985 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
2986 return dst;
2989 /* else fall out; this is unhandled */
2992 } /* if (e->tag == Iex_Triop) */
2994 v128_expr_bad:
2995 ppIRExpr(e);
2996 vpanic("iselV128Expr_wrk");
3000 /*---------------------------------------------------------*/
3001 /*--- ISEL: Floating point expressions (64 bit) ---*/
3002 /*---------------------------------------------------------*/
3004 /* Compute a 64-bit floating point value into a register, the identity
3005 of which is returned. As with iselIntExpr_R, the reg may be either
3006 real or virtual; in any case it must not be changed by subsequent
3007 code emitted by the caller. */
3009 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3011 HReg r = iselDblExpr_wrk( env, e );
3012 # if 0
3013 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3014 # endif
3015 vassert(hregClass(r) == HRcFlt64);
3016 vassert(hregIsVirtual(r));
3017 return r;
3020 /* DO NOT CALL THIS DIRECTLY */
3021 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3023 IRType ty = typeOfIRExpr(env->type_env,e);
3024 vassert(e);
3025 vassert(ty == Ity_F64);
3027 if (e->tag == Iex_RdTmp) {
3028 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3031 if (e->tag == Iex_Const) {
3032 IRConst* con = e->Iex.Const.con;
3033 if (con->tag == Ico_F64i) {
3034 HReg src = newVRegI(env);
3035 HReg dst = newVRegD(env);
3036 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3037 addInstr(env, ARM64Instr_VDfromX(dst, src));
3038 return dst;
3040 if (con->tag == Ico_F64) {
3041 HReg src = newVRegI(env);
3042 HReg dst = newVRegD(env);
3043 union { Double d64; ULong u64; } u;
3044 vassert(sizeof(u) == 8);
3045 u.d64 = con->Ico.F64;
3046 addInstr(env, ARM64Instr_Imm64(src, u.u64));
3047 addInstr(env, ARM64Instr_VDfromX(dst, src));
3048 return dst;
3052 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3053 vassert(e->Iex.Load.ty == Ity_F64);
3054 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3055 HReg res = newVRegD(env);
3056 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3057 return res;
3060 if (e->tag == Iex_Get) {
3061 Int offs = e->Iex.Get.offset;
3062 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3063 HReg rD = newVRegD(env);
3064 HReg rN = get_baseblock_register();
3065 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3066 return rD;
3070 if (e->tag == Iex_Unop) {
3071 switch (e->Iex.Unop.op) {
3072 case Iop_NegF64: {
3073 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3074 HReg dst = newVRegD(env);
3075 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3076 return dst;
3078 case Iop_AbsF64: {
3079 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3080 HReg dst = newVRegD(env);
3081 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3082 return dst;
3084 case Iop_F32toF64: {
3085 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3086 HReg dst = newVRegD(env);
3087 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3088 return dst;
3090 case Iop_F16toF64: {
3091 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3092 HReg dst = newVRegD(env);
3093 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3094 return dst;
3096 case Iop_I32UtoF64:
3097 case Iop_I32StoF64: {
3098 /* Rounding mode is not involved here, since the
3099 conversion can always be done without loss of
3100 precision. */
3101 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3102 HReg dst = newVRegD(env);
3103 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3104 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3105 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3106 return dst;
3108 default:
3109 break;
3113 if (e->tag == Iex_Binop) {
3114 switch (e->Iex.Binop.op) {
3115 case Iop_RoundF64toInt:
3116 case Iop_SqrtF64:
3117 case Iop_RecpExpF64: {
3118 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3119 HReg dst = newVRegD(env);
3120 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3121 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3122 switch (e->Iex.Binop.op) {
3123 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break;
3124 case Iop_SqrtF64: op = ARM64fpu_SQRT; break;
3125 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break;
3126 default: vassert(0);
3128 addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3129 return dst;
3131 case Iop_I64StoF64:
3132 case Iop_I64UtoF64: {
3133 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3134 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3135 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3136 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3137 HReg dstS = newVRegD(env);
3138 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3139 return dstS;
3141 default:
3142 break;
3146 if (e->tag == Iex_Triop) {
3147 IRTriop* triop = e->Iex.Triop.details;
3148 ARM64FpBinOp dblop = ARM64fpb_INVALID;
3149 switch (triop->op) {
3150 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3151 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3152 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3153 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3154 default: break;
3156 if (dblop != ARM64fpb_INVALID) {
3157 HReg argL = iselDblExpr(env, triop->arg2);
3158 HReg argR = iselDblExpr(env, triop->arg3);
3159 HReg dst = newVRegD(env);
3160 set_FPCR_rounding_mode(env, triop->arg1);
3161 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3162 return dst;
3166 if (e->tag == Iex_ITE) {
3167 /* ITE(ccexpr, iftrue, iffalse) */
3168 ARM64CondCode cc;
3169 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3170 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3171 HReg dst = newVRegD(env);
3172 cc = iselCondCode(env, e->Iex.ITE.cond);
3173 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3174 return dst;
3177 ppIRExpr(e);
3178 vpanic("iselDblExpr_wrk");
3182 /*---------------------------------------------------------*/
3183 /*--- ISEL: Floating point expressions (32 bit) ---*/
3184 /*---------------------------------------------------------*/
3186 /* Compute a 32-bit floating point value into a register, the identity
3187 of which is returned. As with iselIntExpr_R, the reg may be either
3188 real or virtual; in any case it must not be changed by subsequent
3189 code emitted by the caller. Values are generated into HRcFlt64
3190 registers despite the values themselves being Ity_F32s. */
3192 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3194 HReg r = iselFltExpr_wrk( env, e );
3195 # if 0
3196 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3197 # endif
3198 vassert(hregClass(r) == HRcFlt64);
3199 vassert(hregIsVirtual(r));
3200 return r;
3203 /* DO NOT CALL THIS DIRECTLY */
3204 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3206 IRType ty = typeOfIRExpr(env->type_env,e);
3207 vassert(e);
3208 vassert(ty == Ity_F32);
3210 if (e->tag == Iex_RdTmp) {
3211 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3214 if (e->tag == Iex_Const) {
3215 /* This is something of a kludge. Since a 32 bit floating point
3216 zero is just .. all zeroes, just create a 64 bit zero word
3217 and transfer it. This avoids having to create a SfromW
3218 instruction for this specific case. */
3219 IRConst* con = e->Iex.Const.con;
3220 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3221 HReg src = newVRegI(env);
3222 HReg dst = newVRegD(env);
3223 addInstr(env, ARM64Instr_Imm64(src, 0));
3224 addInstr(env, ARM64Instr_VDfromX(dst, src));
3225 return dst;
3227 if (con->tag == Ico_F32) {
3228 HReg src = newVRegI(env);
3229 HReg dst = newVRegD(env);
3230 union { Float f32; UInt u32; } u;
3231 vassert(sizeof(u) == 4);
3232 u.f32 = con->Ico.F32;
3233 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3234 addInstr(env, ARM64Instr_VDfromX(dst, src));
3235 return dst;
3239 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3240 vassert(e->Iex.Load.ty == Ity_F32);
3241 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3242 HReg res = newVRegD(env);
3243 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3244 return res;
3247 if (e->tag == Iex_Get) {
3248 Int offs = e->Iex.Get.offset;
3249 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3250 HReg rD = newVRegD(env);
3251 HReg rN = get_baseblock_register();
3252 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3253 return rD;
3257 if (e->tag == Iex_Unop) {
3258 switch (e->Iex.Unop.op) {
3259 case Iop_NegF32: {
3260 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3261 HReg dst = newVRegD(env);
3262 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3263 return dst;
3265 case Iop_AbsF32: {
3266 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3267 HReg dst = newVRegD(env);
3268 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3269 return dst;
3271 case Iop_F16toF32: {
3272 HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3273 HReg dst = newVRegD(env);
3274 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3275 return dst;
3277 default:
3278 break;
3282 if (e->tag == Iex_Binop) {
3283 switch (e->Iex.Binop.op) {
3284 case Iop_RoundF32toInt:
3285 case Iop_SqrtF32:
3286 case Iop_RecpExpF32: {
3287 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3288 HReg dst = newVRegD(env);
3289 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3290 ARM64FpUnaryOp op = ARM64fpu_INVALID;
3291 switch (e->Iex.Binop.op) {
3292 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break;
3293 case Iop_SqrtF32: op = ARM64fpu_SQRT; break;
3294 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break;
3295 default: vassert(0);
3297 addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3298 return dst;
3300 case Iop_F64toF32: {
3301 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3302 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3303 HReg dstS = newVRegD(env);
3304 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3305 return dstS;
3307 case Iop_I32UtoF32:
3308 case Iop_I32StoF32:
3309 case Iop_I64UtoF32:
3310 case Iop_I64StoF32: {
3311 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3312 switch (e->Iex.Binop.op) {
3313 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3314 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3315 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3316 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3317 default: vassert(0);
3319 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3320 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3321 HReg dstS = newVRegD(env);
3322 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3323 return dstS;
3325 default:
3326 break;
3330 if (e->tag == Iex_Triop) {
3331 IRTriop* triop = e->Iex.Triop.details;
3332 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3333 switch (triop->op) {
3334 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3335 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3336 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3337 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3338 default: break;
3340 if (sglop != ARM64fpb_INVALID) {
3341 HReg argL = iselFltExpr(env, triop->arg2);
3342 HReg argR = iselFltExpr(env, triop->arg3);
3343 HReg dst = newVRegD(env);
3344 set_FPCR_rounding_mode(env, triop->arg1);
3345 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3346 return dst;
3350 if (e->tag == Iex_ITE) {
3351 /* ITE(ccexpr, iftrue, iffalse) */
3352 ARM64CondCode cc;
3353 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
3354 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
3355 HReg dst = newVRegD(env);
3356 cc = iselCondCode(env, e->Iex.ITE.cond);
3357 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3358 return dst;
3361 ppIRExpr(e);
3362 vpanic("iselFltExpr_wrk");
3366 /*---------------------------------------------------------*/
3367 /*--- ISEL: Floating point expressions (16 bit) ---*/
3368 /*---------------------------------------------------------*/
3370 /* Compute a 16-bit floating point value into a register, the identity
3371 of which is returned. As with iselIntExpr_R, the reg may be either
3372 real or virtual; in any case it must not be changed by subsequent
3373 code emitted by the caller. Values are generated into HRcFlt64
3374 registers despite the values themselves being Ity_F16s. */
3376 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3378 HReg r = iselF16Expr_wrk( env, e );
3379 # if 0
3380 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3381 # endif
3382 vassert(hregClass(r) == HRcFlt64);
3383 vassert(hregIsVirtual(r));
3384 return r;
3387 /* DO NOT CALL THIS DIRECTLY */
3388 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3390 IRType ty = typeOfIRExpr(env->type_env,e);
3391 vassert(e);
3392 vassert(ty == Ity_F16);
3394 if (e->tag == Iex_Get) {
3395 Int offs = e->Iex.Get.offset;
3396 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3397 HReg rD = newVRegD(env);
3398 HReg rN = get_baseblock_register();
3399 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3400 return rD;
3404 if (e->tag == Iex_Binop) {
3405 switch (e->Iex.Binop.op) {
3406 case Iop_F32toF16: {
3407 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3408 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3409 HReg dstH = newVRegD(env);
3410 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3411 return dstH;
3413 case Iop_F64toF16: {
3414 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3415 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3416 HReg dstH = newVRegD(env);
3417 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3418 return dstH;
3420 default:
3421 break;
3425 ppIRExpr(e);
3426 vpanic("iselF16Expr_wrk");
3430 /*---------------------------------------------------------*/
3431 /*--- ISEL: Vector expressions (256 bit) ---*/
3432 /*---------------------------------------------------------*/
3434 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3435 ISelEnv* env, IRExpr* e )
3437 iselV256Expr_wrk( rHi, rLo, env, e );
3438 vassert(hregClass(*rHi) == HRcVec128);
3439 vassert(hregClass(*rLo) == HRcVec128);
3440 vassert(hregIsVirtual(*rHi));
3441 vassert(hregIsVirtual(*rLo));
3444 /* DO NOT CALL THIS DIRECTLY */
3445 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3446 ISelEnv* env, IRExpr* e )
3448 vassert(e);
3449 IRType ty = typeOfIRExpr(env->type_env,e);
3450 vassert(ty == Ity_V256);
3452 /* read 256-bit IRTemp */
3453 if (e->tag == Iex_RdTmp) {
3454 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3455 return;
3458 if (e->tag == Iex_Binop) {
3459 switch (e->Iex.Binop.op) {
3460 case Iop_V128HLtoV256: {
3461 *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3462 *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3463 return;
3465 case Iop_QandSQsh64x2:
3466 case Iop_QandSQsh32x4:
3467 case Iop_QandSQsh16x8:
3468 case Iop_QandSQsh8x16:
3469 case Iop_QandUQsh64x2:
3470 case Iop_QandUQsh32x4:
3471 case Iop_QandUQsh16x8:
3472 case Iop_QandUQsh8x16:
3473 case Iop_QandSQRsh64x2:
3474 case Iop_QandSQRsh32x4:
3475 case Iop_QandSQRsh16x8:
3476 case Iop_QandSQRsh8x16:
3477 case Iop_QandUQRsh64x2:
3478 case Iop_QandUQRsh32x4:
3479 case Iop_QandUQRsh16x8:
3480 case Iop_QandUQRsh8x16:
3482 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
3483 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
3484 HReg fpsr = newVRegI(env);
3485 HReg resHi = newVRegV(env);
3486 HReg resLo = newVRegV(env);
3487 ARM64VecBinOp op = ARM64vecb_INVALID;
3488 switch (e->Iex.Binop.op) {
3489 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break;
3490 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break;
3491 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break;
3492 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break;
3493 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break;
3494 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break;
3495 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break;
3496 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break;
3497 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3498 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3499 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3500 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3501 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3502 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3503 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3504 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3505 default: vassert(0);
3507 /* Clear FPSR.Q, do the operation, and return both its result
3508 and the new value of FPSR.Q. We can simply zero out FPSR
3509 since all the other bits have no relevance in VEX generated
3510 code. */
3511 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3512 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3513 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3514 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3515 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3516 ARM64sh_SHR));
3517 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3518 vassert(ril_one);
3519 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3520 /* Now we have: the main (shift) result in |resLo|, and the
3521 Q bit at the bottom of |fpsr|. */
3522 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3523 *rHi = resHi;
3524 *rLo = resLo;
3525 return;
3528 /* ... */
3529 default:
3530 break;
3531 } /* switch on the binop */
3532 } /* if (e->tag == Iex_Binop) */
3534 ppIRExpr(e);
3535 vpanic("iselV256Expr_wrk");
3539 /*---------------------------------------------------------*/
3540 /*--- ISEL: Statements ---*/
3541 /*---------------------------------------------------------*/
3543 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3545 if (vex_traceflags & VEX_TRACE_VCODE) {
3546 vex_printf("\n-- ");
3547 ppIRStmt(stmt);
3548 vex_printf("\n");
3550 switch (stmt->tag) {
3552 /* --------- STORE --------- */
3553 /* little-endian write to memory */
3554 case Ist_Store: {
3555 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3556 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3557 IREndness end = stmt->Ist.Store.end;
3559 if (tya != Ity_I64 || end != Iend_LE)
3560 goto stmt_fail;
3562 if (tyd == Ity_I64) {
3563 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3564 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3565 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3566 return;
3568 if (tyd == Ity_I32) {
3569 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3570 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3571 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3572 return;
3574 if (tyd == Ity_I16) {
3575 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3576 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3577 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3578 return;
3580 if (tyd == Ity_I8) {
3581 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3582 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3583 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3584 return;
3586 if (tyd == Ity_V128) {
3587 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
3588 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3589 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3590 return;
3592 if (tyd == Ity_F64) {
3593 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
3594 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3595 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3596 return;
3598 if (tyd == Ity_F32) {
3599 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
3600 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3601 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3602 return;
3604 break;
3607 /* --------- PUT --------- */
3608 /* write guest state, fixed offset */
3609 case Ist_Put: {
3610 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3611 UInt offs = (UInt)stmt->Ist.Put.offset;
3612 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3613 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3614 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3615 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3616 return;
3618 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3619 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3620 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3621 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3622 return;
3624 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3625 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3626 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3627 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3628 return;
3630 if (tyd == Ity_I8 && offs < (1<<12)) {
3631 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3632 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3633 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3634 return;
3636 if (tyd == Ity_V128 && offs < (1<<12)) {
3637 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
3638 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3639 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3640 return;
3642 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3643 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
3644 HReg bbp = get_baseblock_register();
3645 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3646 return;
3648 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3649 HReg sD = iselFltExpr(env, stmt->Ist.Put.data);
3650 HReg bbp = get_baseblock_register();
3651 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3652 return;
3654 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3655 HReg hD = iselF16Expr(env, stmt->Ist.Put.data);
3656 HReg bbp = get_baseblock_register();
3657 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3658 return;
3661 break;
3664 /* --------- TMP --------- */
3665 /* assign value to temporary */
3666 case Ist_WrTmp: {
3667 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3668 IRType ty = typeOfIRTemp(env->type_env, tmp);
3670 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3671 /* We could do a lot better here. But for the time being: */
3672 HReg dst = lookupIRTemp(env, tmp);
3673 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3674 addInstr(env, ARM64Instr_MovI(dst, rD));
3675 return;
3677 if (ty == Ity_I1) {
3678 /* Here, we are generating a I1 value into a 64 bit register.
3679 Make sure the value in the register is only zero or one,
3680 but no other. This allows optimisation of the
3681 1Uto64(tmp:I1) case, by making it simply a copy of the
3682 register holding 'tmp'. The point being that the value in
3683 the register holding 'tmp' can only have been created
3684 here. LATER: that seems dangerous; safer to do 'tmp & 1'
3685 in that case. Also, could do this just with a single CINC
3686 insn. */
3687 /* CLONE-01 */
3688 HReg zero = newVRegI(env);
3689 HReg one = newVRegI(env);
3690 HReg dst = lookupIRTemp(env, tmp);
3691 addInstr(env, ARM64Instr_Imm64(zero, 0));
3692 addInstr(env, ARM64Instr_Imm64(one, 1));
3693 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3694 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3695 return;
3697 if (ty == Ity_F64) {
3698 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3699 HReg dst = lookupIRTemp(env, tmp);
3700 addInstr(env, ARM64Instr_VMov(8, dst, src));
3701 return;
3703 if (ty == Ity_F32) {
3704 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3705 HReg dst = lookupIRTemp(env, tmp);
3706 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3707 return;
3709 if (ty == Ity_V128) {
3710 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3711 HReg dst = lookupIRTemp(env, tmp);
3712 addInstr(env, ARM64Instr_VMov(16, dst, src));
3713 return;
3715 if (ty == Ity_V256) {
3716 HReg srcHi, srcLo, dstHi, dstLo;
3717 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3718 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3719 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3720 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3721 return;
3723 break;
3726 /* --------- Call to DIRTY helper --------- */
3727 /* call complex ("dirty") helper function */
3728 case Ist_Dirty: {
3729 IRDirty* d = stmt->Ist.Dirty.details;
3731 /* Figure out the return type, if any. */
3732 IRType retty = Ity_INVALID;
3733 if (d->tmp != IRTemp_INVALID)
3734 retty = typeOfIRTemp(env->type_env, d->tmp);
3736 Bool retty_ok = False;
3737 switch (retty) {
3738 case Ity_INVALID: /* function doesn't return anything */
3739 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3740 case Ity_V128:
3741 retty_ok = True; break;
3742 default:
3743 break;
3745 if (!retty_ok)
3746 break; /* will go to stmt_fail: */
3748 /* Marshal args, do the call, and set the return value to 0x555..555
3749 if this is a conditional call that returns a value and the
3750 call is skipped. */
3751 UInt addToSp = 0;
3752 RetLoc rloc = mk_RetLoc_INVALID();
3753 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3754 vassert(is_sane_RetLoc(rloc));
3756 /* Now figure out what to do with the returned value, if any. */
3757 switch (retty) {
3758 case Ity_INVALID: {
3759 /* No return value. Nothing to do. */
3760 vassert(d->tmp == IRTemp_INVALID);
3761 vassert(rloc.pri == RLPri_None);
3762 vassert(addToSp == 0);
3763 return;
3765 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3766 vassert(rloc.pri == RLPri_Int);
3767 vassert(addToSp == 0);
3768 /* The returned value is in x0. Park it in the register
3769 associated with tmp. */
3770 HReg dst = lookupIRTemp(env, d->tmp);
3771 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3772 return;
3774 case Ity_V128: {
3775 /* The returned value is on the stack, and *retloc tells
3776 us where. Fish it off the stack and then move the
3777 stack pointer upwards to clear it, as directed by
3778 doHelperCall. */
3779 vassert(rloc.pri == RLPri_V128SpRel);
3780 vassert(rloc.spOff < 256); // stay sane
3781 vassert(addToSp >= 16); // ditto
3782 vassert(addToSp < 256); // ditto
3783 HReg dst = lookupIRTemp(env, d->tmp);
3784 HReg tmp = newVRegI(env); // the address of the returned value
3785 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3786 addInstr(env, ARM64Instr_Arith(tmp, tmp,
3787 ARM64RIA_I12((UShort)rloc.spOff, 0),
3788 True/*isAdd*/ ));
3789 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3790 addInstr(env, ARM64Instr_AddToSP(addToSp));
3791 return;
3793 default:
3794 /*NOTREACHED*/
3795 vassert(0);
3797 break;
3800 /* --------- Load Linked and Store Conditional --------- */
3801 case Ist_LLSC: {
3802 if (stmt->Ist.LLSC.storedata == NULL) {
3803 /* LL */
3804 IRTemp res = stmt->Ist.LLSC.result;
3805 IRType ty = typeOfIRTemp(env->type_env, res);
3806 if (ty == Ity_I64 || ty == Ity_I32
3807 || ty == Ity_I16 || ty == Ity_I8) {
3808 Int szB = 0;
3809 HReg r_dst = lookupIRTemp(env, res);
3810 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3811 switch (ty) {
3812 case Ity_I8: szB = 1; break;
3813 case Ity_I16: szB = 2; break;
3814 case Ity_I32: szB = 4; break;
3815 case Ity_I64: szB = 8; break;
3816 default: vassert(0);
3818 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3819 addInstr(env, ARM64Instr_LdrEX(szB));
3820 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3821 return;
3823 goto stmt_fail;
3824 } else {
3825 /* SC */
3826 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3827 if (tyd == Ity_I64 || tyd == Ity_I32
3828 || tyd == Ity_I16 || tyd == Ity_I8) {
3829 Int szB = 0;
3830 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3831 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3832 switch (tyd) {
3833 case Ity_I8: szB = 1; break;
3834 case Ity_I16: szB = 2; break;
3835 case Ity_I32: szB = 4; break;
3836 case Ity_I64: szB = 8; break;
3837 default: vassert(0);
3839 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3840 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3841 addInstr(env, ARM64Instr_StrEX(szB));
3842 } else {
3843 goto stmt_fail;
3845 /* now r0 is 1 if failed, 0 if success. Change to IR
3846 conventions (0 is fail, 1 is success). Also transfer
3847 result to r_res. */
3848 IRTemp res = stmt->Ist.LLSC.result;
3849 IRType ty = typeOfIRTemp(env->type_env, res);
3850 HReg r_res = lookupIRTemp(env, res);
3851 ARM64RIL* one = mb_mkARM64RIL_I(1);
3852 vassert(ty == Ity_I1);
3853 vassert(one);
3854 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3855 ARM64lo_XOR));
3856 /* And be conservative -- mask off all but the lowest bit. */
3857 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3858 ARM64lo_AND));
3859 return;
3861 break;
3864 /* --------- ACAS --------- */
3865 case Ist_CAS: {
3866 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3867 /* "normal" singleton CAS */
3868 UChar sz;
3869 IRCAS* cas = stmt->Ist.CAS.details;
3870 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3871 switch (ty) {
3872 case Ity_I64: sz = 8; break;
3873 case Ity_I32: sz = 4; break;
3874 case Ity_I16: sz = 2; break;
3875 case Ity_I8: sz = 1; break;
3876 default: goto unhandled_cas;
3878 HReg rAddr = iselIntExpr_R(env, cas->addr);
3879 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3880 HReg rData = iselIntExpr_R(env, cas->dataLo);
3881 vassert(cas->expdHi == NULL);
3882 vassert(cas->dataHi == NULL);
3883 addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
3884 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
3885 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
3886 addInstr(env, ARM64Instr_CAS(sz));
3887 /* Now we have the lowest szB bytes of x1 are either equal to
3888 the lowest szB bytes of x5, indicating success, or they
3889 aren't, indicating failure. The IR semantics actually
3890 require us to return the old value at the location,
3891 regardless of success or failure, but in the case of
3892 failure it's not clear how to do this, since
3893 ARM64Instr_CAS can't provide that. Instead we'll just
3894 return the relevant bit of x1, since that's at least
3895 guaranteed to be different from the lowest bits of x5 on
3896 failure. */
3897 HReg rResult = hregARM64_X1();
3898 switch (sz) {
3899 case 8: break;
3900 case 4: rResult = widen_z_32_to_64(env, rResult); break;
3901 case 2: rResult = widen_z_16_to_64(env, rResult); break;
3902 case 1: rResult = widen_z_8_to_64(env, rResult); break;
3903 default: vassert(0);
3905 // "old" in this case is interpreted somewhat liberally, per
3906 // the previous comment.
3907 HReg rOld = lookupIRTemp(env, cas->oldLo);
3908 addInstr(env, ARM64Instr_MovI(rOld, rResult));
3909 return;
3911 unhandled_cas:
3912 break;
3915 /* --------- MEM FENCE --------- */
3916 case Ist_MBE:
3917 switch (stmt->Ist.MBE.event) {
3918 case Imbe_Fence:
3919 addInstr(env, ARM64Instr_MFence());
3920 return;
3921 case Imbe_CancelReservation:
3922 addInstr(env, ARM64Instr_ClrEX());
3923 return;
3924 default:
3925 break;
3927 break;
3929 /* --------- INSTR MARK --------- */
3930 /* Doesn't generate any executable code ... */
3931 case Ist_IMark:
3932 return;
3934 /* --------- ABI HINT --------- */
3935 /* These have no meaning (denotation in the IR) and so we ignore
3936 them ... if any actually made it this far. */
3937 case Ist_AbiHint:
3938 return;
3940 /* --------- NO-OP --------- */
3941 case Ist_NoOp:
3942 return;
3944 /* --------- EXIT --------- */
3945 case Ist_Exit: {
3946 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3947 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
3949 ARM64CondCode cc
3950 = iselCondCode(env, stmt->Ist.Exit.guard);
3951 ARM64AMode* amPC
3952 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
3954 /* Case: boring transfer to known address */
3955 if (stmt->Ist.Exit.jk == Ijk_Boring) {
3956 if (env->chainingAllowed) {
3957 /* .. almost always true .. */
3958 /* Skip the event check at the dst if this is a forwards
3959 edge. */
3960 Bool toFastEP
3961 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
3962 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
3963 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
3964 amPC, cc, toFastEP));
3965 } else {
3966 /* .. very occasionally .. */
3967 /* We can't use chaining, so ask for an assisted transfer,
3968 as that's the only alternative that is allowable. */
3969 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3970 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
3972 return;
3975 /* Case: assisted transfer to arbitrary address */
3976 switch (stmt->Ist.Exit.jk) {
3977 /* Keep this list in sync with that for iselNext below */
3978 case Ijk_ClientReq:
3979 case Ijk_NoDecode:
3980 case Ijk_NoRedir:
3981 case Ijk_Sys_syscall:
3982 case Ijk_InvalICache:
3983 case Ijk_FlushDCache:
3984 case Ijk_SigTRAP:
3985 case Ijk_Yield: {
3986 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3987 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
3988 stmt->Ist.Exit.jk));
3989 return;
3991 default:
3992 break;
3995 /* Do we ever expect to see any other kind? */
3996 goto stmt_fail;
3999 default: break;
4001 stmt_fail:
4002 ppIRStmt(stmt);
4003 vpanic("iselStmt");
4007 /*---------------------------------------------------------*/
4008 /*--- ISEL: Basic block terminators (Nexts) ---*/
4009 /*---------------------------------------------------------*/
4011 static void iselNext ( ISelEnv* env,
4012 IRExpr* next, IRJumpKind jk, Int offsIP )
4014 if (vex_traceflags & VEX_TRACE_VCODE) {
4015 vex_printf( "\n-- PUT(%d) = ", offsIP);
4016 ppIRExpr( next );
4017 vex_printf( "; exit-");
4018 ppIRJumpKind(jk);
4019 vex_printf( "\n");
4022 /* Case: boring transfer to known address */
4023 if (next->tag == Iex_Const) {
4024 IRConst* cdst = next->Iex.Const.con;
4025 vassert(cdst->tag == Ico_U64);
4026 if (jk == Ijk_Boring || jk == Ijk_Call) {
4027 /* Boring transfer to known address */
4028 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4029 if (env->chainingAllowed) {
4030 /* .. almost always true .. */
4031 /* Skip the event check at the dst if this is a forwards
4032 edge. */
4033 Bool toFastEP
4034 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4035 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4036 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
4037 amPC, ARM64cc_AL,
4038 toFastEP));
4039 } else {
4040 /* .. very occasionally .. */
4041 /* We can't use chaining, so ask for an assisted transfer,
4042 as that's the only alternative that is allowable. */
4043 HReg r = iselIntExpr_R(env, next);
4044 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4045 Ijk_Boring));
4047 return;
4051 /* Case: call/return (==boring) transfer to any address */
4052 switch (jk) {
4053 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4054 HReg r = iselIntExpr_R(env, next);
4055 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4056 if (env->chainingAllowed) {
4057 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
4058 } else {
4059 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
4060 Ijk_Boring));
4062 return;
4064 default:
4065 break;
4068 /* Case: assisted transfer to arbitrary address */
4069 switch (jk) {
4070 /* Keep this list in sync with that for Ist_Exit above */
4071 case Ijk_ClientReq:
4072 case Ijk_NoDecode:
4073 case Ijk_NoRedir:
4074 case Ijk_Sys_syscall:
4075 case Ijk_InvalICache:
4076 case Ijk_FlushDCache:
4077 case Ijk_SigTRAP:
4078 case Ijk_Yield:
4080 HReg r = iselIntExpr_R(env, next);
4081 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4082 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4083 return;
4085 default:
4086 break;
4089 vex_printf( "\n-- PUT(%d) = ", offsIP);
4090 ppIRExpr( next );
4091 vex_printf( "; exit-");
4092 ppIRJumpKind(jk);
4093 vex_printf( "\n");
4094 vassert(0); // are we expecting any other kind?
4098 /*---------------------------------------------------------*/
4099 /*--- Insn selector top-level ---*/
4100 /*---------------------------------------------------------*/
4102 /* Translate an entire SB to arm64 code. */
4104 HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4105 VexArch arch_host,
4106 const VexArchInfo* archinfo_host,
4107 const VexAbiInfo* vbi/*UNUSED*/,
4108 Int offs_Host_EvC_Counter,
4109 Int offs_Host_EvC_FailAddr,
4110 Bool chainingAllowed,
4111 Bool addProfInc,
4112 Addr max_ga )
4114 Int i, j;
4115 HReg hreg, hregHI;
4116 ISelEnv* env;
4117 UInt hwcaps_host = archinfo_host->hwcaps;
4118 ARM64AMode *amCounter, *amFailAddr;
4120 /* sanity ... */
4121 vassert(arch_host == VexArchARM64);
4123 /* Check that the host's endianness is as expected. */
4124 vassert(archinfo_host->endness == VexEndnessLE);
4126 /* guard against unexpected space regressions */
4127 vassert(sizeof(ARM64Instr) <= 32);
4129 /* Make up an initial environment to use. */
4130 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4131 env->vreg_ctr = 0;
4133 /* Set up output code array. */
4134 env->code = newHInstrArray();
4136 /* Copy BB's type env. */
4137 env->type_env = bb->tyenv;
4139 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4140 change as we go along. */
4141 env->n_vregmap = bb->tyenv->types_used;
4142 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4143 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4145 /* and finally ... */
4146 env->chainingAllowed = chainingAllowed;
4147 env->hwcaps = hwcaps_host;
4148 env->previous_rm = NULL;
4149 env->max_ga = max_ga;
4151 /* For each IR temporary, allocate a suitably-kinded virtual
4152 register. */
4153 j = 0;
4154 for (i = 0; i < env->n_vregmap; i++) {
4155 hregHI = hreg = INVALID_HREG;
4156 switch (bb->tyenv->types[i]) {
4157 case Ity_I1:
4158 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4159 hreg = mkHReg(True, HRcInt64, 0, j++);
4160 break;
4161 case Ity_I128:
4162 hreg = mkHReg(True, HRcInt64, 0, j++);
4163 hregHI = mkHReg(True, HRcInt64, 0, j++);
4164 break;
4165 case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4166 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4167 case Ity_F64:
4168 hreg = mkHReg(True, HRcFlt64, 0, j++);
4169 break;
4170 case Ity_V128:
4171 hreg = mkHReg(True, HRcVec128, 0, j++);
4172 break;
4173 case Ity_V256:
4174 hreg = mkHReg(True, HRcVec128, 0, j++);
4175 hregHI = mkHReg(True, HRcVec128, 0, j++);
4176 break;
4177 default:
4178 ppIRType(bb->tyenv->types[i]);
4179 vpanic("iselBB(arm64): IRTemp type");
4181 env->vregmap[i] = hreg;
4182 env->vregmapHI[i] = hregHI;
4184 env->vreg_ctr = j;
4186 /* The very first instruction must be an event check. */
4187 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4188 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4189 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4191 /* Possibly a block counter increment (for profiling). At this
4192 point we don't know the address of the counter, so just pretend
4193 it is zero. It will have to be patched later, but before this
4194 translation is used, by a call to LibVEX_patchProfCtr. */
4195 if (addProfInc) {
4196 addInstr(env, ARM64Instr_ProfInc());
4199 /* Ok, finally we can iterate over the statements. */
4200 for (i = 0; i < bb->stmts_used; i++)
4201 iselStmt(env, bb->stmts[i]);
4203 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4205 /* record the number of vregs we used. */
4206 env->code->n_vregs = env->vreg_ctr;
4207 return env->code;
4211 /*---------------------------------------------------------------*/
4212 /*--- end host_arm64_isel.c ---*/
4213 /*---------------------------------------------------------------*/