Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_ppc_isel.c
blobff87ae7ed66e9592dc31133c028059e5218f9129
3 /*---------------------------------------------------------------*/
4 /*--- begin host_ppc_isel.c ---*/
5 /*---------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2004-2017 OpenWorks LLP
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
29 Neither the names of the U.S. Department of Energy nor the
30 University of California nor the names of its contributors may be
31 used to endorse or promote products derived from this software
32 without prior written permission.
35 #include "libvex_basictypes.h"
36 #include "libvex_ir.h"
37 #include "libvex.h"
39 #include "ir_match.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "host_generic_regs.h"
43 #include "host_generic_simd64.h"
44 #include "host_ppc_defs.h"
46 /* GPR register class for ppc32/64 */
47 #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
50 /*---------------------------------------------------------*/
51 /*--- Register Usage Conventions ---*/
52 /*---------------------------------------------------------*/
54 Integer Regs
55 ------------
56 GPR0 Reserved
57 GPR1 Stack Pointer
58 GPR2 not used - TOC pointer
59 GPR3:10 Allocateable
60 GPR11 if mode64: not used - calls by ptr / env ptr for some langs
61 GPR12 if mode64: not used - exceptions / global linkage code
62 GPR13 not used - Thread-specific pointer
63 GPR14:28 Allocateable
64 GPR29 Unused by us (reserved for the dispatcher)
65 GPR30 AltiVec temp spill register
66 GPR31 GuestStatePointer
68 Of Allocateable regs:
69 if (mode64)
70 GPR3:10 Caller-saved regs
71 else
72 GPR3:12 Caller-saved regs
73 GPR14:29 Callee-saved regs
75 GPR3 [Return | Parameter] - carrying reg
76 GPR4:10 Parameter-carrying regs
79 Floating Point Regs
80 -------------------
81 FPR0:31 Allocateable
83 FPR0 Caller-saved - scratch reg
84 if (mode64)
85 FPR1:13 Caller-saved - param & return regs
86 else
87 FPR1:8 Caller-saved - param & return regs
88 FPR9:13 Caller-saved regs
89 FPR14:31 Callee-saved regs
92 Vector Regs (on processors with the VMX feature)
93 -----------
94 VR0-VR1 Volatile scratch registers
95 VR2-VR13 Volatile vector parameters registers
96 VR14-VR19 Volatile scratch registers
97 VR20-VR31 Non-volatile registers
98 VRSAVE Non-volatile 32-bit register
102 /*---------------------------------------------------------*/
103 /*--- PPC FP Status & Control Register Conventions ---*/
104 /*---------------------------------------------------------*/
106 Vex-generated code expects to run with the FPU set as follows: all
107 exceptions masked. The rounding mode is set appropriately before
108 each floating point insn emitted (or left unchanged if known to be
109 correct already). There are a few fp insns (fmr,fneg,fabs,fnabs),
110 which are unaffected by the rm and so the rounding mode is not set
111 prior to them.
113 At least on MPC7447A (Mac Mini), frsqrte is also not affected by
114 rounding mode. At some point the ppc docs get sufficiently vague
115 that the only way to find out is to write test programs.
117 /* Notes on the FP instruction set, 6 Feb 06.
119 What exns -> CR1 ? Sets FPRF ? Observes RM ?
120 -------------------------------------------------------------
122 fmr[.] if . n n
123 fneg[.] if . n n
124 fabs[.] if . n n
125 fnabs[.] if . n n
127 fadd[.] if . y y
128 fadds[.] if . y y
129 fcfid[.] (Si64->dbl) if . y y
130 fcfidU[.] (Ui64->dbl) if . y y
131 fcfids[.] (Si64->sngl) if . Y Y
132 fcfidus[.] (Ui64->sngl) if . Y Y
133 fcmpo (cmp, result n n n
134 fcmpu to crfD) n n n
135 fctid[.] (dbl->i64) if . ->undef y
136 fctidz[.] (dbl->i64) if . ->undef rounds-to-zero
137 fctiw[.] (dbl->i32) if . ->undef y
138 fctiwz[.] (dbl->i32) if . ->undef rounds-to-zero
139 fdiv[.] if . y y
140 fdivs[.] if . y y
141 fmadd[.] if . y y
142 fmadds[.] if . y y
143 fmsub[.] if . y y
144 fmsubs[.] if . y y
145 fmul[.] if . y y
146 fmuls[.] if . y y
148 (note: for fnm*, rounding happens before final negation)
149 fnmadd[.] if . y y
150 fnmadds[.] if . y y
151 fnmsub[.] if . y y
152 fnmsubs[.] if . y y
154 fre[.] if . y y
155 fres[.] if . y y
157 frsqrte[.] if . y apparently not
159 fsqrt[.] if . y y
160 fsqrts[.] if . y y
161 fsub[.] if . y y
162 fsubs[.] if . y y
165 fpscr: bits 30-31 (ibm) is RM
166 24-29 (ibm) are exnmasks/non-IEEE bit, all zero
167 15-19 (ibm) is FPRF: class, <, =, >, UNord
169 ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
170 in future)
172 mcrfs - move fpscr field to CR field
173 mtfsfi[.] - 4 bit imm moved to fpscr field
174 mtfsf[.] - move frS[low 1/2] to fpscr but using 8-bit field mask
175 mtfsb1[.] - set given fpscr bit
176 mtfsb0[.] - clear given fpscr bit
177 mffs[.] - move all fpscr to frD[low 1/2]
179 For [.] presumably cr1 is set with exn summary bits, as per
180 main FP insns
182 A single precision store truncates/denormalises the in-register value,
183 but does not round it. This is so that flds followed by fsts is
184 always the identity.
188 /*---------------------------------------------------------*/
189 /*--- misc helpers ---*/
190 /*---------------------------------------------------------*/
192 /* These are duplicated in guest-ppc/toIR.c */
193 static IRExpr* unop ( IROp op, IRExpr* a )
195 return IRExpr_Unop(op, a);
198 static IRExpr* mkU32 ( UInt i )
200 return IRExpr_Const(IRConst_U32(i));
203 static IRExpr* bind ( Int binder )
205 return IRExpr_Binder(binder);
208 static Bool isZeroU8 ( IRExpr* e )
210 return e->tag == Iex_Const
211 && e->Iex.Const.con->tag == Ico_U8
212 && e->Iex.Const.con->Ico.U8 == 0;
216 /*---------------------------------------------------------*/
217 /*--- ISelEnv ---*/
218 /*---------------------------------------------------------*/
220 /* This carries around:
222 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
223 might encounter. This is computed before insn selection starts,
224 and does not change.
226 - A mapping from IRTemp to HReg. This tells the insn selector
227 which virtual register(s) are associated with each IRTemp
228 temporary. This is computed before insn selection starts, and
229 does not change. We expect this mapping to map precisely the
230 same set of IRTemps as the type mapping does.
232 - vregmapLo holds the primary register for the IRTemp.
233 - vregmapMedLo holds the secondary register for the IRTemp,
234 if any is needed. That's only for Ity_I64 temps
235 in 32 bit mode or Ity_I128 temps in 64-bit mode.
236 - vregmapMedHi is only for dealing with Ity_I128 temps in
237 32 bit mode. It holds bits 95:64 (Intel numbering)
238 of the IRTemp.
239 - vregmapHi is also only for dealing with Ity_I128 temps
240 in 32 bit mode. It holds the most significant bits
241 (127:96 in Intel numbering) of the IRTemp.
243 - The code array, that is, the insns selected so far.
245 - A counter, for generating new virtual registers.
247 - The host subarchitecture we are selecting insns for.
248 This is set at the start and does not change.
250 - A Bool to tell us if the host is 32 or 64bit.
251 This is set at the start and does not change.
253 - An IRExpr*, which may be NULL, holding the IR expression (an
254 IRRoundingMode-encoded value) to which the FPU's rounding mode
255 was most recently set. Setting to NULL is always safe. Used to
256 avoid redundant settings of the FPU's rounding mode, as
257 described in set_FPU_rounding_mode below.
259 - A VexMiscInfo*, needed for knowing how to generate
260 function calls for this target.
262 - The maximum guest address of any guest insn in this block.
263 Actually, the address of the highest-addressed byte from any
264 insn in this block. Is set at the start and does not change.
265 This is used for detecting jumps which are definitely
266 forward-edges from this block, and therefore can be made
267 (chained) to the fast entry point of the destination, thereby
268 avoiding the destination's event check.
271 typedef
272 struct {
273 /* Constant -- are set at the start and do not change. */
274 IRTypeEnv* type_env;
275 // 64-bit mode 32-bit mode
276 HReg* vregmapLo; // Low 64-bits [63:0] Low 32-bits [31:0]
277 HReg* vregmapMedLo; // high 64-bits[127:64] Next 32-bits [63:32]
278 HReg* vregmapMedHi; // unused Next 32-bits [95:64]
279 HReg* vregmapHi; // unused highest 32-bits [127:96]
280 Int n_vregmap;
282 /* 27 Jan 06: Not currently used, but should be */
283 UInt hwcaps;
285 Bool mode64;
287 const VexAbiInfo* vbi; // unused
289 Bool chainingAllowed;
290 Addr64 max_ga;
292 /* These are modified as we go along. */
293 HInstrArray* code;
294 Int vreg_ctr;
296 IRExpr* previous_rm;
298 ISelEnv;
301 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
303 vassert(tmp < env->n_vregmap);
304 return env->vregmapLo[tmp];
307 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
308 ISelEnv* env, IRTemp tmp )
310 vassert(tmp < env->n_vregmap);
311 vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
312 *vrLO = env->vregmapLo[tmp];
313 *vrHI = env->vregmapMedLo[tmp];
316 /* Only for used in 32-bit mode */
317 static void lookupIRTempQuad ( HReg* vrHi, HReg* vrMedHi, HReg* vrMedLo,
318 HReg* vrLo, ISelEnv* env, IRTemp tmp )
320 vassert(!env->mode64);
321 vassert(tmp < env->n_vregmap);
322 vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
323 *vrHi = env->vregmapHi[tmp];
324 *vrMedHi = env->vregmapMedHi[tmp];
325 *vrMedLo = env->vregmapMedLo[tmp];
326 *vrLo = env->vregmapLo[tmp];
329 static void addInstr ( ISelEnv* env, PPCInstr* instr )
331 addHInstr(env->code, instr);
332 if (vex_traceflags & VEX_TRACE_VCODE) {
333 ppPPCInstr(instr, env->mode64);
334 vex_printf("\n");
338 static HReg newVRegI ( ISelEnv* env )
340 HReg reg
341 = mkHReg(True/*vreg*/, HRcGPR(env->mode64), 0/*enc*/, env->vreg_ctr);
342 env->vreg_ctr++;
343 return reg;
346 static HReg newVRegF ( ISelEnv* env )
348 HReg reg = mkHReg(True/*vreg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
349 env->vreg_ctr++;
350 return reg;
353 static HReg newVRegV ( ISelEnv* env )
355 HReg reg = mkHReg(True/*vreg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
356 env->vreg_ctr++;
357 return reg;
361 /*---------------------------------------------------------*/
362 /*--- ISEL: Forward declarations ---*/
363 /*---------------------------------------------------------*/
365 /* These are organised as iselXXX and iselXXX_wrk pairs. The
366 iselXXX_wrk do the real work, but are not to be called directly.
367 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
368 checks that all returned registers are virtual. You should not
369 call the _wrk version directly.
371 'Word' refers to the size of the native machine word, that is,
372 32-bit int in 32-bit mode and 64-bit int in 64-bit mode. '2Word'
373 therefore refers to a double-width (64/128-bit) quantity in two
374 integer registers.
376 /* 32-bit mode: compute an I8/I16/I32 into a GPR.
377 64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
378 static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
379 IREndness IEndianess );
380 static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
381 IREndness IEndianess );
383 /* 32-bit mode: Compute an I8/I16/I32 into a RH
384 (reg-or-halfword-immediate).
385 64-bit mode: Compute an I8/I16/I32/I64 into a RH
386 (reg-or-halfword-immediate).
387 It's important to specify whether the immediate is to be regarded
388 as signed or not. If yes, this will never return -32768 as an
389 immediate; this guaranteed that all signed immediates that are
390 return can have their sign inverted if need be.
392 static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env,
393 Bool syned, const IRExpr* e,
394 IREndness IEndianess );
395 static PPCRH* iselWordExpr_RH ( ISelEnv* env,
396 Bool syned, const IRExpr* e,
397 IREndness IEndianess );
399 /* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
400 64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
401 static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
402 IREndness IEndianess );
403 static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
404 IREndness IEndianess );
406 /* In 32 bit mode ONLY, compute an I8 into a
407 reg-or-5-bit-unsigned-immediate, the latter being an immediate in
408 the range 1 .. 31 inclusive. Used for doing shift amounts. */
409 static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
410 IREndness IEndianess );
411 static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
412 IREndness IEndianess );
414 /* In 64-bit mode ONLY, compute an I8 into a
415 reg-or-6-bit-unsigned-immediate, the latter being an immediate in
416 the range 1 .. 63 inclusive. Used for doing shift amounts. */
417 static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
418 IREndness IEndianess );
419 static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
420 IREndness IEndianess );
422 /* 32-bit mode: compute an I32 into an AMode.
423 64-bit mode: compute an I64 into an AMode.
425 Requires to know (xferTy) the type of data to be loaded/stored
426 using this amode. That is so that, for 64-bit code generation, any
427 PPCAMode_IR returned will have an index (immediate offset) field
428 that is guaranteed to be 4-aligned, if there is any chance that the
429 amode is to be used in ld/ldu/lda/std/stdu.
431 Since there are no such restrictions on 32-bit insns, xferTy is
432 ignored for 32-bit code generation. */
433 static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
434 IRType xferTy,
435 IREndness IEndianess );
436 static PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e,
437 IRType xferTy,
438 IREndness IEndianess );
440 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
441 HReg* rMedLo, HReg* rLo,
442 ISelEnv* env, const IRExpr* e,
443 IREndness IEndianess );
444 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi,
445 HReg* rMedLo, HReg* rLo,
446 ISelEnv* env, const IRExpr* e,
447 IREndness IEndianess );
450 /* 32-bit mode ONLY: compute an I64 into a GPR pair. */
451 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
452 ISelEnv* env, const IRExpr* e,
453 IREndness IEndianess );
454 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
455 ISelEnv* env, const IRExpr* e,
456 IREndness IEndianess );
458 /* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
459 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
460 ISelEnv* env, const IRExpr* e,
461 IREndness IEndianess );
463 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
464 ISelEnv* env, const IRExpr* e,
465 IREndness IEndianess );
467 static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
468 IREndness IEndianess );
469 static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
470 IREndness IEndianess );
472 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
473 IREndness IEndianess );
474 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e,
475 IREndness IEndianess );
477 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
478 IREndness IEndianess );
479 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e,
480 IREndness IEndianess );
482 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
483 IREndness IEndianess );
484 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e,
485 IREndness IEndianess );
487 /* 64-bit mode ONLY. */
488 static HReg iselDfp32Expr_wrk ( ISelEnv* env, const IRExpr* e,
489 IREndness IEndianess );
490 static HReg iselDfp32Expr ( ISelEnv* env, const IRExpr* e,
491 IREndness IEndianess );
492 static HReg iselDfp64Expr_wrk ( ISelEnv* env, const IRExpr* e,
493 IREndness IEndianess );
494 static HReg iselDfp64Expr ( ISelEnv* env, const IRExpr* e,
495 IREndness IEndianess );
496 static HReg iselFp128Expr_wrk ( ISelEnv* env, const IRExpr* e,
497 IREndness IEndianess);
498 static HReg iselFp128Expr ( ISelEnv* env, const IRExpr* e,
499 IREndness IEndianess);
501 /* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
502 static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
503 const IRExpr* e, IREndness IEndianess );
504 static void iselDfp128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
505 const IRExpr* e, IREndness IEndianess );
507 /*---------------------------------------------------------*/
508 /*--- ISEL: Misc helpers ---*/
509 /*---------------------------------------------------------*/
511 /* Make an int reg-reg move. */
513 static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
515 vassert(hregClass(r_dst) == hregClass(r_src));
516 vassert(hregClass(r_src) == HRcInt32 ||
517 hregClass(r_src) == HRcInt64);
518 return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
521 /* Advance/retreat %r1 by n. */
523 static void add_to_sp ( ISelEnv* env, UInt n )
525 HReg sp = StackFramePtr(env->mode64);
526 vassert(n <= 1024 && (n%16) == 0);
527 addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
528 PPCRH_Imm(True,toUShort(n)) ));
531 static void sub_from_sp ( ISelEnv* env, UInt n )
533 HReg sp = StackFramePtr(env->mode64);
534 vassert(n <= 1024 && (n%16) == 0);
535 addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
536 PPCRH_Imm(True,toUShort(n)) ));
540 returns a quadword aligned address on the stack
541 - copies SP, adds 16bytes, aligns to quadword.
542 use sub_from_sp(32) before calling this,
543 as expects to have 32 bytes to play with.
545 static HReg get_sp_aligned16 ( ISelEnv* env )
547 HReg r = newVRegI(env);
548 HReg align16 = newVRegI(env);
549 addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
550 // add 16
551 addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
552 PPCRH_Imm(True,toUShort(16)) ));
553 // mask to quadword
554 addInstr(env,
555 PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
556 addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
557 return r;
562 /* Load 2*I32 regs to fp reg */
563 static HReg mk_LoadRR32toFPR ( ISelEnv* env,
564 HReg r_srcHi, HReg r_srcLo )
566 HReg fr_dst = newVRegF(env);
567 PPCAMode *am_addr0, *am_addr1;
569 vassert(!env->mode64);
570 vassert(hregClass(r_srcHi) == HRcInt32);
571 vassert(hregClass(r_srcLo) == HRcInt32);
573 sub_from_sp( env, 16 ); // Move SP down 16 bytes
574 am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
575 am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
577 // store hi,lo as Ity_I32's
578 addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
579 addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
581 // load as float
582 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
584 add_to_sp( env, 16 ); // Reset SP
585 return fr_dst;
588 /* Load I64 reg to fp reg */
589 static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
591 HReg fr_dst = newVRegF(env);
592 PPCAMode *am_addr0;
594 vassert(env->mode64);
595 vassert(hregClass(r_src) == HRcInt64);
597 sub_from_sp( env, 16 ); // Move SP down 16 bytes
598 am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
600 // store as Ity_I64
601 addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
603 // load as float
604 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
606 add_to_sp( env, 16 ); // Reset SP
607 return fr_dst;
611 /* Given an amode, return one which references 4 bytes further
612 along. */
614 static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
616 PPCAMode* am4 = dopyPPCAMode( am );
617 if (am4->tag == Pam_IR
618 && am4->Pam.IR.index + 4 <= 32767) {
619 am4->Pam.IR.index += 4;
620 } else {
621 vpanic("advance4(ppc,host)");
623 return am4;
627 /* Given a guest-state array descriptor, an index expression and a
628 bias, generate a PPCAMode pointing at the relevant piece of
629 guest state. */
630 static
631 PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
632 IRExpr* off, Int bias, IREndness IEndianess )
634 HReg rtmp, roff;
635 Int elemSz = sizeofIRType(descr->elemTy);
636 Int nElems = descr->nElems;
637 Int shift = 0;
639 /* MAX is somewhat arbitrarily, needs to be at least
640 3 times the size of VexGuestPPC64State */
641 #define MAX 6500
643 /* Throw out any cases we don't need. In theory there might be a
644 day where we need to handle others, but not today. */
646 if (nElems != 16 && nElems != 32)
647 vpanic("genGuestArrayOffset(ppc host)(1)");
649 switch (elemSz) {
650 case 4: shift = 2; break;
651 case 8: shift = 3; break;
652 default: vpanic("genGuestArrayOffset(ppc host)(2)");
655 if (bias < -100 || bias > 100) /* somewhat arbitrarily */
656 vpanic("genGuestArrayOffset(ppc host)(3)");
657 if (descr->base < 0 || descr->base > MAX) { /* somewhat arbitrarily */
658 vex_printf("ERROR: descr->base = %d, is greater then maximum = %d\n",
659 descr->base, MAX);
660 vpanic("genGuestArrayOffset(ppc host)(4)");
663 /* Compute off into a reg, %off. Then return:
665 addi %tmp, %off, bias (if bias != 0)
666 andi %tmp, nElems-1
667 sldi %tmp, shift
668 addi %tmp, %tmp, base
669 ... Baseblockptr + %tmp ...
671 roff = iselWordExpr_R(env, off, IEndianess);
672 rtmp = newVRegI(env);
673 addInstr(env, PPCInstr_Alu(
674 Palu_ADD,
675 rtmp, roff,
676 PPCRH_Imm(True/*signed*/, toUShort(bias))));
677 addInstr(env, PPCInstr_Alu(
678 Palu_AND,
679 rtmp, rtmp,
680 PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
681 addInstr(env, PPCInstr_Shft(
682 Pshft_SHL,
683 env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
684 rtmp, rtmp,
685 PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
686 addInstr(env, PPCInstr_Alu(
687 Palu_ADD,
688 rtmp, rtmp,
689 PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
690 return
691 PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
692 #undef MAX
696 /*---------------------------------------------------------*/
697 /*--- ISEL: Function call helpers ---*/
698 /*---------------------------------------------------------*/
700 /* Used only in doHelperCall. See big comment in doHelperCall re
701 handling of register-parameter args. This function figures out
702 whether evaluation of an expression might require use of a fixed
703 register. If in doubt return True (safe but suboptimal).
705 static
706 Bool mightRequireFixedRegs ( IRExpr* e )
708 switch (e->tag) {
709 case Iex_RdTmp: case Iex_Const: case Iex_Get:
710 return False;
711 default:
712 return True;
717 /* Do a complete function call. |guard| is a Ity_Bit expression
718 indicating whether or not the call happens. If guard==NULL, the
719 call is unconditional. |retloc| is set to indicate where the
720 return value is after the call. The caller (of this fn) must
721 generate code to add |stackAdjustAfterCall| to the stack pointer
722 after the call is done. */
724 static
725 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
726 /*OUT*/RetLoc* retloc,
727 ISelEnv* env,
728 IRExpr* guard,
729 IRCallee* cee, IRType retTy, IRExpr** args,
730 IREndness IEndianess)
732 PPCCondCode cc;
733 HReg argregs[PPC_N_REGPARMS];
734 HReg tmpregs[PPC_N_REGPARMS];
735 Bool go_fast;
736 Int n_args, i, argreg;
737 UInt argiregs;
738 Bool mode64 = env->mode64;
740 /* Set default returns. We'll update them later if needed. */
741 *stackAdjustAfterCall = 0;
742 *retloc = mk_RetLoc_INVALID();
744 /* These are used for cross-checking that IR-level constraints on
745 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
746 UInt nVECRETs = 0;
747 UInt nGSPTRs = 0;
749 /* Marshal args for a call and do the call.
751 This function only deals with a tiny set of possibilities, which
752 cover all helpers in practice. The restrictions are that only
753 arguments in registers are supported, hence only PPC_N_REGPARMS x
754 (mode32:32 | mode64:64) integer bits in total can be passed.
755 In fact the only supported arg type is (mode32:I32 | mode64:I64).
757 The return type can be I{64,32,16,8} or V{128,256}. In the
758 latter two cases, it is expected that |args| will contain the
759 special node IRExpr_VECRET(), in which case this routine
760 generates code to allocate space on the stack for the vector
761 return value. Since we are not passing any scalars on the
762 stack, it is enough to preallocate the return space before
763 marshalling any arguments, in this case.
765 |args| may also contain IRExpr_GSPTR(), in which case the value
766 in the guest state pointer register is passed as the
767 corresponding argument.
769 Generating code which is both efficient and correct when
770 parameters are to be passed in registers is difficult, for the
771 reasons elaborated in detail in comments attached to
772 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
773 of the method described in those comments.
775 The problem is split into two cases: the fast scheme and the
776 slow scheme. In the fast scheme, arguments are computed
777 directly into the target (real) registers. This is only safe
778 when we can be sure that computation of each argument will not
779 trash any real registers set by computation of any other
780 argument.
782 In the slow scheme, all args are first computed into vregs, and
783 once they are all done, they are moved to the relevant real
784 regs. This always gives correct code, but it also gives a bunch
785 of vreg-to-rreg moves which are usually redundant but are hard
786 for the register allocator to get rid of.
788 To decide which scheme to use, all argument expressions are
789 first examined. If they are all so simple that it is clear they
790 will be evaluated without use of any fixed registers, use the
791 fast scheme, else use the slow scheme. Note also that only
792 unconditional calls may use the fast scheme, since having to
793 compute a condition expression could itself trash real
794 registers.
796 Note this requires being able to examine an expression and
797 determine whether or not evaluation of it might use a fixed
798 register. That requires knowledge of how the rest of this insn
799 selector works. Currently just the following 3 are regarded as
800 safe -- hopefully they cover the majority of arguments in
801 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
804 /* Note that the cee->regparms field is meaningless on PPC32/64 host
805 (since there is only one calling convention) and so we always
806 ignore it. */
808 n_args = 0;
809 for (i = 0; args[i]; i++)
810 n_args++;
812 if (n_args > PPC_N_REGPARMS) {
813 vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
814 // PPC_N_REGPARMS
817 /* This is kind of stupid .. the arrays are sized as PPC_N_REGPARMS
818 but we then assume that that value is 8. */
819 vassert(PPC_N_REGPARMS == 8);
821 argregs[0] = hregPPC_GPR3(mode64);
822 argregs[1] = hregPPC_GPR4(mode64);
823 argregs[2] = hregPPC_GPR5(mode64);
824 argregs[3] = hregPPC_GPR6(mode64);
825 argregs[4] = hregPPC_GPR7(mode64);
826 argregs[5] = hregPPC_GPR8(mode64);
827 argregs[6] = hregPPC_GPR9(mode64);
828 argregs[7] = hregPPC_GPR10(mode64);
829 argiregs = 0;
831 tmpregs[0] = tmpregs[1] = tmpregs[2] =
832 tmpregs[3] = tmpregs[4] = tmpregs[5] =
833 tmpregs[6] = tmpregs[7] = INVALID_HREG;
835 /* First decide which scheme (slow or fast) is to be used. First
836 assume the fast scheme, and select slow if any contraindications
837 (wow) appear. */
839 go_fast = True;
841 /* We'll need space on the stack for the return value. Avoid
842 possible complications with nested calls by using the slow
843 scheme. */
844 if (retTy == Ity_V128 || retTy == Ity_V256)
845 go_fast = False;
847 if (go_fast && guard) {
848 if (guard->tag == Iex_Const
849 && guard->Iex.Const.con->tag == Ico_U1
850 && guard->Iex.Const.con->Ico.U1 == True) {
851 /* unconditional */
852 } else {
853 /* Not manifestly unconditional -- be conservative. */
854 go_fast = False;
858 if (go_fast) {
859 for (i = 0; i < n_args; i++) {
860 IRExpr* arg = args[i];
861 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
862 /* that's OK */
864 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
865 /* This implies ill-formed IR, since if the IR was
866 well-formed, the return-type test above would have
867 filtered it out. */
868 vpanic("doHelperCall(PPC): invalid IR");
870 else if (mightRequireFixedRegs(arg)) {
871 go_fast = False;
872 break;
877 /* At this point the scheme to use has been established. Generate
878 code to get the arg values into the argument rregs. */
880 if (go_fast) {
882 /* FAST SCHEME */
883 argreg = 0;
885 for (i = 0; i < n_args; i++) {
886 IRExpr* arg = args[i];
887 vassert(argreg < PPC_N_REGPARMS);
889 if (arg->tag == Iex_GSPTR) {
890 argiregs |= (1 << (argreg+3));
891 addInstr(env, mk_iMOVds_RR( argregs[argreg],
892 GuestStatePtr(mode64) ));
893 argreg++;
894 } else {
895 vassert(arg->tag != Iex_VECRET);
896 IRType ty = typeOfIRExpr(env->type_env, arg);
897 vassert(ty == Ity_I32 || ty == Ity_I64);
898 if (!mode64) {
899 if (ty == Ity_I32) {
900 argiregs |= (1 << (argreg+3));
901 addInstr(env,
902 mk_iMOVds_RR( argregs[argreg],
903 iselWordExpr_R(env, arg,
904 IEndianess) ));
905 } else { // Ity_I64 in 32-bit mode
906 HReg rHi, rLo;
907 if ((argreg%2) == 1)
908 // ppc32 ELF abi spec for passing LONG_LONG
909 argreg++; // XXX: odd argreg => even rN
910 vassert(argreg < PPC_N_REGPARMS-1);
911 iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
912 argiregs |= (1 << (argreg+3));
913 addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
914 argiregs |= (1 << (argreg+3));
915 addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
917 } else { // mode64
918 argiregs |= (1 << (argreg+3));
919 addInstr(env, mk_iMOVds_RR( argregs[argreg],
920 iselWordExpr_R(env, arg,
921 IEndianess) ));
923 argreg++;
924 } /* if (arg == IRExprP__BBPR) */
927 /* Fast scheme only applies for unconditional calls. Hence: */
928 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
930 } else {
932 /* SLOW SCHEME; move via temporaries */
933 argreg = 0;
935 /* If we have a vector return type, allocate a place for it on
936 the stack and record its address. Rather than figure out the
937 complexities of PPC{32,64} ELF ABI stack frame layout, simply
938 drop the SP by 1024 and allocate the return point in the
939 middle. I think this should comfortably clear any ABI
940 mandated register save areas. Note that it doesn't maintain
941 the backchain as it should, since we're not doing st{d,w}u to
942 adjust the SP, but .. that doesn't seem to be a big deal.
943 Since we're not expecting to have to unwind out of here. */
944 HReg r_vecRetAddr = INVALID_HREG;
945 if (retTy == Ity_V128) {
946 r_vecRetAddr = newVRegI(env);
947 sub_from_sp(env, 512);
948 addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
949 sub_from_sp(env, 512);
951 else if (retTy == Ity_V256) {
952 vassert(0); //ATC
953 r_vecRetAddr = newVRegI(env);
954 sub_from_sp(env, 512);
955 addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
956 sub_from_sp(env, 512);
959 vassert(n_args >= 0 && n_args <= 8);
960 for (i = 0; i < n_args; i++) {
961 IRExpr* arg = args[i];
962 vassert(argreg < PPC_N_REGPARMS);
963 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
964 tmpregs[argreg] = newVRegI(env);
965 addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
966 GuestStatePtr(mode64) ));
967 nGSPTRs++;
969 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
970 /* We stashed the address of the return slot earlier, so just
971 retrieve it now. */
972 vassert(!hregIsInvalid(r_vecRetAddr));
973 tmpregs[i] = r_vecRetAddr;
974 nVECRETs++;
976 else {
977 IRType ty = typeOfIRExpr(env->type_env, arg);
978 vassert(ty == Ity_I32 || ty == Ity_I64);
979 if (!mode64) {
980 if (ty == Ity_I32) {
981 tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
982 } else { // Ity_I64 in 32-bit mode
983 HReg rHi, rLo;
984 if ((argreg%2) == 1)
985 // ppc32 ELF abi spec for passing LONG_LONG
986 argreg++; // XXX: odd argreg => even rN
987 vassert(argreg < PPC_N_REGPARMS-1);
988 iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
989 tmpregs[argreg++] = rHi;
990 tmpregs[argreg] = rLo;
992 } else { // mode64
993 tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
996 argreg++;
999 /* Now we can compute the condition. We can't do it earlier
1000 because the argument computations could trash the condition
1001 codes. Be a bit clever to handle the common case where the
1002 guard is 1:Bit. */
1003 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
1004 if (guard) {
1005 if (guard->tag == Iex_Const
1006 && guard->Iex.Const.con->tag == Ico_U1
1007 && guard->Iex.Const.con->Ico.U1 == True) {
1008 /* unconditional -- do nothing */
1009 } else {
1010 cc = iselCondCode( env, guard, IEndianess );
1014 /* Move the args to their final destinations. */
1015 for (i = 0; i < argreg; i++) {
1016 if (hregIsInvalid(tmpregs[i])) // Skip invalid regs
1017 continue;
1018 /* None of these insns, including any spill code that might
1019 be generated, may alter the condition codes. */
1020 argiregs |= (1 << (i+3));
1021 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
1026 /* Do final checks, set the return values, and generate the call
1027 instruction proper. */
1028 if (retTy == Ity_V128 || retTy == Ity_V256) {
1029 vassert(nVECRETs == 1);
1030 } else {
1031 vassert(nVECRETs == 0);
1034 vassert(nGSPTRs == 0 || nGSPTRs == 1);
1036 vassert(*stackAdjustAfterCall == 0);
1037 vassert(is_RetLoc_INVALID(*retloc));
1038 switch (retTy) {
1039 case Ity_INVALID:
1040 /* Function doesn't return a value. */
1041 *retloc = mk_RetLoc_simple(RLPri_None);
1042 break;
1043 case Ity_I64:
1044 *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
1045 break;
1046 case Ity_I32: case Ity_I16: case Ity_I8:
1047 *retloc = mk_RetLoc_simple(RLPri_Int);
1048 break;
1049 case Ity_V128:
1050 /* Result is 512 bytes up the stack, and after it has been
1051 retrieved, adjust SP upwards by 1024. */
1052 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 512);
1053 *stackAdjustAfterCall = 1024;
1054 break;
1055 case Ity_V256:
1056 vassert(0); // ATC
1057 /* Ditto */
1058 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 512);
1059 *stackAdjustAfterCall = 1024;
1060 break;
1061 default:
1062 /* IR can denote other possible return types, but we don't
1063 handle those here. */
1064 vassert(0);
1067 /* Finally, generate the call itself. This needs the *retloc value
1068 set in the switch above, which is why it's at the end. */
1070 Addr64 target = mode64 ? (Addr)cee->addr
1071 : toUInt((Addr)(cee->addr));
1072 addInstr(env, PPCInstr_Call( cc, target, argiregs, *retloc ));
1076 /*---------------------------------------------------------*/
1077 /*--- ISEL: FP rounding mode helpers ---*/
1078 /*---------------------------------------------------------*/
1080 ///* Set FPU's rounding mode to the default */
1081 //static
1082 //void set_FPU_rounding_default ( ISelEnv* env )
1084 // HReg fr_src = newVRegF(env);
1085 // HReg r_src = newVRegI(env);
1087 // /* Default rounding mode = 0x0
1088 // Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
1089 // - so we can set the whole register at once (faster)
1090 // note: upper 32 bits ignored by FpLdFPSCR
1091 // */
1092 // addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
1093 // if (env->mode64) {
1094 // fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1095 // } else {
1096 // fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1097 // }
1098 // addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
1101 /* Convert IR rounding mode to PPC encoding */
1102 static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
1105 rounding mode | PPC | IR
1106 -----------------------------------------------
1107 to nearest, ties to even | 000 | 000
1108 to zero | 001 | 011
1109 to +infinity | 010 | 010
1110 to -infinity | 011 | 001
1111 +++++ Below are the extended rounding modes for decimal floating point +++++
1112 to nearest, ties away from 0 | 100 | 100
1113 to nearest, ties toward 0 | 101 | 111
1114 to away from 0 | 110 | 110
1115 to prepare for shorter precision | 111 | 101
1117 HReg r_rmPPC = newVRegI(env);
1118 HReg r_tmp1 = newVRegI(env);
1119 HReg r_tmp2 = newVRegI(env);
1121 vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
1123 // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
1125 // slwi tmp1, r_rmIR, 1
1126 // xor tmp1, r_rmIR, tmp1
1127 // andi r_rmPPC, tmp1, 3
1129 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1130 r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
1132 addInstr( env, PPCInstr_Alu( Palu_AND,
1133 r_tmp2, r_tmp1, PPCRH_Imm( False, 3 ) ) );
1135 addInstr( env, PPCInstr_Alu( Palu_XOR,
1136 r_rmPPC, r_rmIR, PPCRH_Reg( r_tmp2 ) ) );
1138 return r_rmPPC;
1142 /* Set the FPU's rounding mode: 'mode' is an I32-typed expression
1143 denoting a value in the range 0 .. 7, indicating a round mode
1144 encoded as per type IRRoundingMode. Set the PPC FPSCR to have the
1145 same rounding. When the dfp_rm arg is True, set the decimal
1146 floating point rounding mode bits (29:31); otherwise, set the
1147 binary floating point rounding mode bits (62:63).
1149 For speed & simplicity, we're setting the *entire* FPSCR here.
1151 Setting the rounding mode is expensive. So this function tries to
1152 avoid repeatedly setting the rounding mode to the same thing by
1153 first comparing 'mode' to the 'mode' tree supplied in the previous
1154 call to this function, if any. (The previous value is stored in
1155 env->previous_rm.) If 'mode' is a single IR temporary 't' and
1156 env->previous_rm is also just 't', then the setting is skipped.
1158 This is safe because of the SSA property of IR: an IR temporary can
1159 only be defined once and so will have the same value regardless of
1160 where it appears in the block. Cool stuff, SSA.
1162 A safety condition: all attempts to set the RM must be aware of
1163 this mechanism - by being routed through the functions here.
1165 Of course this only helps if blocks where the RM is set more than
1166 once and it is set to the same value each time, *and* that value is
1167 held in the same IR temporary each time. In order to assure the
1168 latter as much as possible, the IR optimiser takes care to do CSE
1169 on any block with any sign of floating point activity.
1171 static
1172 void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm,
1173 IREndness IEndianess )
1175 HReg fr_src = newVRegF(env);
1176 HReg r_src;
1178 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
1180 /* Do we need to do anything? */
1181 if (env->previous_rm
1182 && env->previous_rm->tag == Iex_RdTmp
1183 && mode->tag == Iex_RdTmp
1184 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
1185 /* no - setting it to what it was before. */
1186 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
1187 return;
1190 /* No luck - we better set it, and remember what we set it to. */
1191 env->previous_rm = mode;
1193 /* Only supporting the rounding-mode bits - the rest of FPSCR is
1194 0x0 - so we can set the whole register at once (faster). */
1196 // Resolve rounding mode and convert to PPC representation
1197 r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode, IEndianess) );
1199 // gpr -> fpr
1200 if (env->mode64) {
1201 if (dfp_rm) {
1202 HReg r_tmp1 = newVRegI( env );
1203 addInstr( env,
1204 PPCInstr_Shft( Pshft_SHL, False/*64bit shift*/,
1205 r_tmp1, r_src, PPCRH_Imm( False, 32 ) ) );
1206 fr_src = mk_LoadR64toFPR( env, r_tmp1 );
1207 } else {
1208 fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1210 } else {
1211 if (dfp_rm) {
1212 HReg r_zero = newVRegI( env );
1213 addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
1214 fr_src = mk_LoadRR32toFPR( env, r_src, r_zero );
1215 } else {
1216 fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1220 // Move to FPSCR
1221 addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
1224 static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode,
1225 IREndness IEndianess )
1227 _set_FPU_rounding_mode(env, mode, False, IEndianess);
1230 static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
1231 IREndness IEndianess )
1233 _set_FPU_rounding_mode(env, mode, True, IEndianess);
1236 static
1237 Bool FPU_rounding_mode_isOdd (IRExpr* mode) {
1238 /* If the rounding mode is set to odd, the the expr must be a constant U8
1239 * value equal to 8. Otherwise, it must be a bin op expressiong that
1240 * calculates the value.
1243 if (mode->tag != Iex_Const)
1244 return False;
1246 vassert(mode->Iex.Const.con->tag == Ico_U32);
1247 vassert(mode->Iex.Const.con->Ico.U32 == 0x8);
1248 return True;
1251 /*---------------------------------------------------------*/
1252 /*--- ISEL: vector helpers ---*/
1253 /*---------------------------------------------------------*/
1255 /* Generate all-zeroes into a new vector register.
1257 static HReg generate_zeroes_V128 ( ISelEnv* env )
1259 HReg dst = newVRegV(env);
1260 addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
1261 return dst;
1264 /* Generate all-ones into a new vector register.
1266 static HReg generate_ones_V128 ( ISelEnv* env )
1268 HReg dst = newVRegV(env);
1269 PPCVI5s * src = PPCVI5s_Imm(-1);
1270 addInstr(env, PPCInstr_AvSplat(8, dst, src));
1271 return dst;
1276 Generates code for AvSplat
1277 - takes in IRExpr* of type 8|16|32
1278 returns vector reg of duplicated lanes of input
1279 - uses AvSplat(imm) for imms up to simm6.
1280 otherwise must use store reg & load vector
1282 static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e, IREndness IEndianess )
1284 HReg r_src;
1285 HReg dst = newVRegV(env);
1286 PPCRI* ri = iselWordExpr_RI(env, e, IEndianess);
1287 IRType ty = typeOfIRExpr(env->type_env,e);
1288 UInt sz = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
1289 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1291 /* special case: immediate */
1292 if (ri->tag == Pri_Imm) {
1293 Int simm32 = (Int)ri->Pri.Imm;
1295 /* figure out if it's do-able with imm splats. */
1296 if (simm32 >= -32 && simm32 <= 31) {
1297 Char simm6 = (Char)simm32;
1298 if (simm6 > 15) { /* 16:31 inclusive */
1299 HReg v1 = newVRegV(env);
1300 HReg v2 = newVRegV(env);
1301 addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1302 addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
1303 addInstr(env,
1304 (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
1305 (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
1306 : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
1307 return dst;
1309 if (simm6 < -16) { /* -32:-17 inclusive */
1310 HReg v1 = newVRegV(env);
1311 HReg v2 = newVRegV(env);
1312 addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1313 addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
1314 addInstr(env,
1315 (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
1316 (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
1317 : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
1318 return dst;
1320 /* simplest form: -16:15 inclusive */
1321 addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
1322 return dst;
1325 /* no luck; use the Slow way. */
1326 r_src = newVRegI(env);
1327 addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
1329 else {
1330 r_src = ri->Pri.Reg;
1334 /* Store r_src multiple times (sz dependent); then load the dest vector. */
1335 HReg r_aligned16;
1336 PPCAMode *am_offset, *am_offset_zero;
1338 sub_from_sp( env, 32 ); // Move SP down
1339 /* Get a 16-aligned address within our stack space */
1340 r_aligned16 = get_sp_aligned16( env );
1342 Int i;
1343 Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
1344 UChar num_bytes_to_store = stride;
1345 am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
1346 am_offset = am_offset_zero;
1347 for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
1348 addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
1351 /* Effectively splat the r_src value to dst */
1352 addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 16, dst, am_offset_zero ) );
1353 add_to_sp( env, 32 ); // Reset SP
1355 return dst;
1360 /* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
1361 static HReg isNan ( ISelEnv* env, HReg vSrc, IREndness IEndianess )
1363 HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
1365 vassert(hregClass(vSrc) == HRcVec128);
1367 zeros = mk_AvDuplicateRI(env, mkU32(0), IEndianess);
1368 msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000), IEndianess);
1369 msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF), IEndianess);
1370 expt = newVRegV(env);
1371 mnts = newVRegV(env);
1372 vIsNan = newVRegV(env);
1374 /* 32bit float => sign(1) | exponent(8) | mantissa(23)
1375 nan => exponent all ones, mantissa > 0 */
1377 addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
1378 addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
1379 addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
1380 addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
1381 addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
1382 return vIsNan;
1386 /*---------------------------------------------------------*/
1387 /*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
1388 /*---------------------------------------------------------*/
1390 /* Select insns for an integer-typed expression, and add them to the
1391 code list. Return a reg holding the result. This reg will be a
1392 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
1393 want to modify it, ask for a new vreg, copy it in there, and modify
1394 the copy. The register allocator will do its best to map both
1395 vregs to the same real register, so the copies will often disappear
1396 later in the game.
1398 This should handle expressions of 64, 32, 16 and 8-bit type.
1399 All results are returned in a (mode64 ? 64bit : 32bit) register.
1400 For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
1401 are arbitrary, so you should mask or sign extend partial values
1402 if necessary.
1405 static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
1406 IREndness IEndianess )
1408 HReg r = iselWordExpr_R_wrk(env, e, IEndianess);
1409 /* sanity checks ... */
1410 # if 0
1411 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1412 # endif
1414 vassert(hregClass(r) == HRcGPR(env->mode64));
1415 vassert(hregIsVirtual(r));
1416 return r;
1419 /* DO NOT CALL THIS DIRECTLY ! */
1420 static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
1421 IREndness IEndianess )
1423 Bool mode64 = env->mode64;
1424 MatchInfo mi;
1425 DECLARE_PATTERN(p_32to1_then_1Uto8);
1427 IRType ty = typeOfIRExpr(env->type_env,e);
1428 vassert(ty == Ity_I8 || ty == Ity_I16 ||
1429 ty == Ity_I32 || ((ty == Ity_I64) && mode64));
1431 switch (e->tag) {
1433 /* --------- TEMP --------- */
1434 case Iex_RdTmp:
1435 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1437 /* --------- LOAD --------- */
1438 case Iex_Load: {
1439 HReg r_dst;
1440 PPCAMode* am_addr;
1441 if (e->Iex.Load.end != IEndianess)
1442 goto irreducible;
1443 r_dst = newVRegI(env);
1444 am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/,
1445 IEndianess );
1446 addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
1447 r_dst, am_addr, mode64 ));
1448 return r_dst;
1449 /*NOTREACHED*/
1452 /* --------- BINARY OP --------- */
1453 case Iex_Binop: {
1454 PPCAluOp aluOp;
1455 PPCShftOp shftOp;
1457 /* Is it an addition or logical style op? */
1458 switch (e->Iex.Binop.op) {
1459 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
1460 aluOp = Palu_ADD; break;
1461 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
1462 aluOp = Palu_SUB; break;
1463 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
1464 aluOp = Palu_AND; break;
1465 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
1466 aluOp = Palu_OR; break;
1467 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
1468 aluOp = Palu_XOR; break;
1469 default:
1470 aluOp = Palu_INVALID; break;
1472 /* For commutative ops we assume any literal
1473 values are on the second operand. */
1474 if (aluOp != Palu_INVALID) {
1475 HReg r_dst = newVRegI(env);
1476 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1477 PPCRH* ri_srcR = NULL;
1478 /* get right arg into an RH, in the appropriate way */
1479 switch (aluOp) {
1480 case Palu_ADD: case Palu_SUB:
1481 ri_srcR = iselWordExpr_RH(env, True/*signed*/,
1482 e->Iex.Binop.arg2, IEndianess);
1483 break;
1484 case Palu_AND: case Palu_OR: case Palu_XOR:
1485 ri_srcR = iselWordExpr_RH(env, False/*signed*/,
1486 e->Iex.Binop.arg2, IEndianess);
1487 break;
1488 default:
1489 vpanic("iselWordExpr_R_wrk-aluOp-arg2");
1491 addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
1492 return r_dst;
1495 /* a shift? */
1496 switch (e->Iex.Binop.op) {
1497 case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
1498 shftOp = Pshft_SHL; break;
1499 case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
1500 shftOp = Pshft_SHR; break;
1501 case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
1502 shftOp = Pshft_SAR; break;
1503 default:
1504 shftOp = Pshft_INVALID; break;
1506 /* we assume any literal values are on the second operand. */
1507 if (shftOp != Pshft_INVALID) {
1508 HReg r_dst = newVRegI(env);
1509 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1510 PPCRH* ri_srcR = NULL;
1511 /* get right arg into an RH, in the appropriate way */
1512 switch (shftOp) {
1513 case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
1514 if (!mode64)
1515 ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2, IEndianess);
1516 else
1517 ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2, IEndianess);
1518 break;
1519 default:
1520 vpanic("iselIntExpr_R_wrk-shftOp-arg2");
1522 /* widen the left arg if needed */
1523 if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
1524 if (ty == Ity_I8 || ty == Ity_I16) {
1525 PPCRH* amt = PPCRH_Imm(False,
1526 toUShort(ty == Ity_I8 ? 24 : 16));
1527 HReg tmp = newVRegI(env);
1528 addInstr(env, PPCInstr_Shft(Pshft_SHL,
1529 True/*32bit shift*/,
1530 tmp, r_srcL, amt));
1531 addInstr(env, PPCInstr_Shft(shftOp,
1532 True/*32bit shift*/,
1533 tmp, tmp, amt));
1534 r_srcL = tmp;
1537 /* Only 64 expressions need 64bit shifts,
1538 32bit shifts are fine for all others */
1539 if (ty == Ity_I64) {
1540 vassert(mode64);
1541 addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
1542 r_dst, r_srcL, ri_srcR));
1543 } else {
1544 addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
1545 r_dst, r_srcL, ri_srcR));
1547 return r_dst;
1550 /* How about a div? */
1551 if (e->Iex.Binop.op == Iop_DivS32 ||
1552 e->Iex.Binop.op == Iop_DivU32 ||
1553 e->Iex.Binop.op == Iop_DivS32E ||
1554 e->Iex.Binop.op == Iop_DivU32E) {
1555 Bool syned = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
1556 HReg r_dst = newVRegI(env);
1557 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1558 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1559 addInstr( env,
1560 PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
1561 || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
1562 : False,
1563 syned,
1564 True/*32bit div*/,
1565 r_dst,
1566 r_srcL,
1567 r_srcR ) );
1568 return r_dst;
1570 if (e->Iex.Binop.op == Iop_DivS64 ||
1571 e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
1572 || e->Iex.Binop.op == Iop_DivU64E ) {
1573 Bool syned = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
1574 HReg r_dst = newVRegI(env);
1575 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1576 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1577 vassert(mode64);
1578 addInstr( env,
1579 PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
1580 || ( e->Iex.Binop.op
1581 == Iop_DivU64E ) ) ? True
1582 : False,
1583 syned,
1584 False/*64bit div*/,
1585 r_dst,
1586 r_srcL,
1587 r_srcR ) );
1588 return r_dst;
1591 /* No? Anyone for a mul? */
1592 if (e->Iex.Binop.op == Iop_Mul32
1593 || e->Iex.Binop.op == Iop_Mul64) {
1594 Bool syned = False;
1595 Bool sz32 = (e->Iex.Binop.op != Iop_Mul64);
1596 HReg r_dst = newVRegI(env);
1597 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1598 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1599 addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
1600 r_dst, r_srcL, r_srcR));
1601 return r_dst;
1604 /* 32 x 32 -> 64 multiply */
1605 if (mode64
1606 && (e->Iex.Binop.op == Iop_MullU32
1607 || e->Iex.Binop.op == Iop_MullS32)) {
1608 HReg tLo = newVRegI(env);
1609 HReg tHi = newVRegI(env);
1610 HReg r_dst = newVRegI(env);
1611 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
1612 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1613 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1614 addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
1615 False/*lo32*/, True/*32bit mul*/,
1616 tLo, r_srcL, r_srcR));
1617 addInstr(env, PPCInstr_MulL(syned,
1618 True/*hi32*/, True/*32bit mul*/,
1619 tHi, r_srcL, r_srcR));
1620 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1621 r_dst, tHi, PPCRH_Imm(False,32)));
1622 addInstr(env, PPCInstr_Alu(Palu_OR,
1623 r_dst, r_dst, PPCRH_Reg(tLo)));
1624 return r_dst;
1627 /* El-mutanto 3-way compare? */
1628 if (e->Iex.Binop.op == Iop_CmpORD32S
1629 || e->Iex.Binop.op == Iop_CmpORD32U) {
1630 Bool syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
1631 HReg dst = newVRegI(env);
1632 HReg srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1633 PPCRH* srcR = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1634 IEndianess);
1635 addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
1636 7/*cr*/, srcL, srcR));
1637 addInstr(env, PPCInstr_MfCR(dst));
1638 addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1639 PPCRH_Imm(False,7<<1)));
1640 return dst;
1643 if (e->Iex.Binop.op == Iop_CmpORD64S
1644 || e->Iex.Binop.op == Iop_CmpORD64U) {
1645 Bool syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
1646 HReg dst = newVRegI(env);
1647 HReg srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1648 PPCRH* srcR = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1649 IEndianess);
1650 vassert(mode64);
1651 addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
1652 7/*cr*/, srcL, srcR));
1653 addInstr(env, PPCInstr_MfCR(dst));
1654 addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1655 PPCRH_Imm(False,7<<1)));
1656 return dst;
1659 if (e->Iex.Binop.op == Iop_Max32U) {
1660 HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1661 HReg r2 = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1662 HReg rdst = newVRegI(env);
1663 PPCCondCode cc = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
1664 addInstr(env, mk_iMOVds_RR(rdst, r1));
1665 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
1666 7/*cr*/, rdst, PPCRH_Reg(r2)));
1667 addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
1668 return rdst;
1671 if (e->Iex.Binop.op == Iop_32HLto64) {
1672 HReg r_Hi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1673 HReg r_Lo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1674 HReg r_Tmp = newVRegI(env);
1675 HReg r_dst = newVRegI(env);
1676 HReg msk = newVRegI(env);
1677 vassert(mode64);
1678 /* r_dst = OR( r_Hi<<32, r_Lo ) */
1679 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1680 r_dst, r_Hi, PPCRH_Imm(False,32)));
1681 addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
1682 addInstr(env, PPCInstr_Alu( Palu_AND, r_Tmp, r_Lo,
1683 PPCRH_Reg(msk) ));
1684 addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
1685 PPCRH_Reg(r_Tmp) ));
1686 return r_dst;
1689 if ((e->Iex.Binop.op == Iop_CmpF64) ||
1690 (e->Iex.Binop.op == Iop_CmpD64) ||
1691 (e->Iex.Binop.op == Iop_CmpD128)) {
1692 HReg fr_srcL;
1693 HReg fr_srcL_lo;
1694 HReg fr_srcR;
1695 HReg fr_srcR_lo;
1697 HReg r_ccPPC = newVRegI(env);
1698 HReg r_ccIR = newVRegI(env);
1699 HReg r_ccIR_b0 = newVRegI(env);
1700 HReg r_ccIR_b2 = newVRegI(env);
1701 HReg r_ccIR_b6 = newVRegI(env);
1703 if (e->Iex.Binop.op == Iop_CmpF64) {
1704 fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
1705 fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1706 addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
1708 } else if (e->Iex.Binop.op == Iop_CmpD64) {
1709 fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
1710 fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1711 addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
1713 } else { // e->Iex.Binop.op == Iop_CmpD128
1714 iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1,
1715 IEndianess);
1716 iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2,
1717 IEndianess);
1718 addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
1719 fr_srcR, fr_srcR_lo));
1722 /* Map compare result from PPC to IR,
1723 conforming to CmpF64 definition. */
1725 FP cmp result | PPC | IR
1726 --------------------------
1727 UN | 0x1 | 0x45
1728 EQ | 0x2 | 0x40
1729 GT | 0x4 | 0x00
1730 LT | 0x8 | 0x01
1733 // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
1734 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1735 r_ccIR_b0, r_ccPPC,
1736 PPCRH_Imm(False,0x3)));
1737 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR_b0,
1738 r_ccPPC, PPCRH_Reg(r_ccIR_b0)));
1739 addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
1740 r_ccIR_b0, PPCRH_Imm(False,0x1)));
1742 // r_ccIR_b2 = r_ccPPC[0]
1743 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1744 r_ccIR_b2, r_ccPPC,
1745 PPCRH_Imm(False,0x2)));
1746 addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
1747 r_ccIR_b2, PPCRH_Imm(False,0x4)));
1749 // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
1750 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1751 r_ccIR_b6, r_ccPPC,
1752 PPCRH_Imm(False,0x1)));
1753 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR_b6,
1754 r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
1755 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1756 r_ccIR_b6, r_ccIR_b6,
1757 PPCRH_Imm(False,0x6)));
1758 addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
1759 r_ccIR_b6, PPCRH_Imm(False,0x40)));
1761 // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
1762 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1763 r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
1764 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1765 r_ccIR, PPCRH_Reg(r_ccIR_b6)));
1766 return r_ccIR;
1769 if ( e->Iex.Binop.op == Iop_F64toI32S ||
1770 e->Iex.Binop.op == Iop_F64toI32U ) {
1771 /* This works in both mode64 and mode32. */
1772 HReg r1 = StackFramePtr(env->mode64);
1773 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1774 HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1775 HReg ftmp = newVRegF(env);
1776 HReg idst = newVRegI(env);
1778 /* Set host rounding mode */
1779 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1781 sub_from_sp( env, 16 );
1782 addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
1783 e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
1784 : False,
1785 True/*flt64*/,
1786 ftmp, fsrc));
1787 addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
1788 addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
1790 /* in 64-bit mode we need to sign-widen idst. */
1791 if (mode64)
1792 addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
1794 add_to_sp( env, 16 );
1796 ///* Restore default FPU rounding. */
1797 //set_FPU_rounding_default( env );
1798 return idst;
1801 if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
1802 if (mode64) {
1803 HReg r1 = StackFramePtr(env->mode64);
1804 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1805 HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2,
1806 IEndianess);
1807 HReg idst = newVRegI(env);
1808 HReg ftmp = newVRegF(env);
1810 /* Set host rounding mode */
1811 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1813 sub_from_sp( env, 16 );
1814 addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
1815 ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
1816 : False,
1817 True, ftmp, fsrc));
1818 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1819 addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1820 add_to_sp( env, 16 );
1822 ///* Restore default FPU rounding. */
1823 //set_FPU_rounding_default( env );
1824 return idst;
1828 if (e->Iex.Binop.op == Iop_D64toI64S ) {
1829 HReg r1 = StackFramePtr(env->mode64);
1830 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1831 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1832 HReg idst = newVRegI(env);
1833 HReg ftmp = newVRegF(env);
1835 /* Set host rounding mode */
1836 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1837 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
1838 sub_from_sp( env, 16 );
1839 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1840 addInstr(env, PPCInstr_Load(8, idst, zero_r1, mode64));
1842 add_to_sp( env, 16 );
1844 ///* Restore default FPU rounding. */
1845 //set_FPU_rounding_default( env );
1846 return idst;
1849 if (e->Iex.Binop.op == Iop_D128toI64S ) {
1850 PPCFpOp fpop = Pfp_DCTFIXQ;
1851 HReg r_srcHi = newVRegF(env);
1852 HReg r_srcLo = newVRegF(env);
1853 HReg idst = newVRegI(env);
1854 HReg ftmp = newVRegF(env);
1855 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
1857 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1858 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
1859 IEndianess);
1860 addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
1862 // put the D64 result into an integer register
1863 sub_from_sp( env, 16 );
1864 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1865 addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1866 add_to_sp( env, 16 );
1867 return idst;
1869 break;
1872 /* --------- UNARY OP --------- */
1873 case Iex_Unop: {
1874 IROp op_unop = e->Iex.Unop.op;
1876 /* 1Uto8(32to1(expr32)) */
1877 DEFINE_PATTERN(p_32to1_then_1Uto8,
1878 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1879 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1880 const IRExpr* expr32 = mi.bindee[0];
1881 HReg r_dst = newVRegI(env);
1882 HReg r_src = iselWordExpr_R(env, expr32, IEndianess);
1883 addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
1884 r_src, PPCRH_Imm(False,1)));
1885 return r_dst;
1888 /* 16Uto32(LDbe:I16(expr32)) */
1890 DECLARE_PATTERN(p_LDbe16_then_16Uto32);
1891 DEFINE_PATTERN(p_LDbe16_then_16Uto32,
1892 unop(Iop_16Uto32,
1893 IRExpr_Load(IEndianess,Ity_I16,bind(0))) );
1894 if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
1895 HReg r_dst = newVRegI(env);
1896 PPCAMode* amode
1897 = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/,
1898 IEndianess );
1899 addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
1900 return r_dst;
1904 switch (op_unop) {
1905 case Iop_8Uto16:
1906 case Iop_8Uto32:
1907 case Iop_8Uto64:
1908 case Iop_16Uto32:
1909 case Iop_16Uto64: {
1910 HReg r_dst = newVRegI(env);
1911 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1912 UShort mask = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
1913 op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
1914 addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
1915 PPCRH_Imm(False,mask)));
1916 return r_dst;
1918 case Iop_32Uto64: {
1919 HReg r_dst = newVRegI(env);
1920 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1921 vassert(mode64);
1922 addInstr(env,
1923 PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1924 r_dst, r_src, PPCRH_Imm(False,32)));
1925 addInstr(env,
1926 PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1927 r_dst, r_dst, PPCRH_Imm(False,32)));
1928 return r_dst;
1930 case Iop_8Sto16:
1931 case Iop_8Sto32:
1932 case Iop_16Sto32: {
1933 HReg r_dst = newVRegI(env);
1934 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1935 UShort amt = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
1936 addInstr(env,
1937 PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1938 r_dst, r_src, PPCRH_Imm(False,amt)));
1939 addInstr(env,
1940 PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1941 r_dst, r_dst, PPCRH_Imm(False,amt)));
1942 return r_dst;
1944 case Iop_8Sto64:
1945 case Iop_16Sto64: {
1946 HReg r_dst = newVRegI(env);
1947 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1948 UShort amt = toUShort(op_unop==Iop_8Sto64 ? 56 : 48);
1949 vassert(mode64);
1950 addInstr(env,
1951 PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1952 r_dst, r_src, PPCRH_Imm(False,amt)));
1953 addInstr(env,
1954 PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
1955 r_dst, r_dst, PPCRH_Imm(False,amt)));
1956 return r_dst;
1958 case Iop_32Sto64: {
1959 HReg r_dst = newVRegI(env);
1960 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1961 vassert(mode64);
1962 /* According to the IBM docs, in 64 bit mode, srawi r,r,0
1963 sign extends the lower 32 bits into the upper 32 bits. */
1964 addInstr(env,
1965 PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1966 r_dst, r_src, PPCRH_Imm(False,0)));
1967 return r_dst;
1969 case Iop_Not8:
1970 case Iop_Not16:
1971 case Iop_Not32:
1972 case Iop_Not64: {
1973 if (op_unop == Iop_Not64) vassert(mode64);
1974 HReg r_dst = newVRegI(env);
1975 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1976 addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
1977 return r_dst;
1979 case Iop_64HIto32: {
1980 if (!mode64) {
1981 HReg rHi, rLo;
1982 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1983 return rHi; /* and abandon rLo .. poor wee thing :-) */
1984 } else {
1985 HReg r_dst = newVRegI(env);
1986 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1987 addInstr(env,
1988 PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1989 r_dst, r_src, PPCRH_Imm(False,32)));
1990 return r_dst;
1993 case Iop_64to32: {
1994 if (!mode64) {
1995 HReg rHi, rLo;
1996 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1997 return rLo; /* similar stupid comment to the above ... */
1998 } else {
1999 /* This is a no-op. */
2000 return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2003 case Iop_64to16: {
2004 if (mode64) { /* This is a no-op. */
2005 return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2007 break; /* evidently not used in 32-bit mode */
2009 case Iop_16HIto8:
2010 case Iop_32HIto16: {
2011 HReg r_dst = newVRegI(env);
2012 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2013 UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
2014 addInstr(env,
2015 PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
2016 r_dst, r_src, PPCRH_Imm(False,shift)));
2017 return r_dst;
2019 case Iop_128HIto64:
2020 if (mode64) {
2021 HReg rHi, rLo;
2022 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2023 return rHi; /* and abandon rLo .. poor wee thing :-) */
2025 break;
2026 case Iop_128to64:
2027 if (mode64) {
2028 HReg rHi, rLo;
2029 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2030 return rLo; /* similar stupid comment to the above ... */
2032 break;
2033 case Iop_1Uto64:
2034 case Iop_1Uto32:
2035 case Iop_1Uto8:
2036 if ((op_unop != Iop_1Uto64) || mode64) {
2037 HReg r_dst = newVRegI(env);
2038 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2039 addInstr(env, PPCInstr_Set(cond,r_dst));
2040 return r_dst;
2042 break;
2043 case Iop_1Sto8:
2044 case Iop_1Sto16:
2045 case Iop_1Sto32: {
2046 /* could do better than this, but for now ... */
2047 HReg r_dst = newVRegI(env);
2048 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2049 addInstr(env, PPCInstr_Set(cond,r_dst));
2050 addInstr(env,
2051 PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
2052 r_dst, r_dst, PPCRH_Imm(False,31)));
2053 addInstr(env,
2054 PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2055 r_dst, r_dst, PPCRH_Imm(False,31)));
2056 return r_dst;
2058 case Iop_1Sto64:
2059 if (mode64) {
2060 /* could do better than this, but for now ... */
2061 HReg r_dst = newVRegI(env);
2062 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2063 addInstr(env, PPCInstr_Set(cond,r_dst));
2064 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
2065 r_dst, r_dst, PPCRH_Imm(False,63)));
2066 addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2067 r_dst, r_dst, PPCRH_Imm(False,63)));
2068 return r_dst;
2070 break;
2072 case Iop_Clz32: case Iop_ClzNat32:
2073 case Iop_Clz64: case Iop_ClzNat64: {
2074 // cntlz is available even in the most basic (earliest) ppc
2075 // variants, so it's safe to generate it unconditionally.
2076 HReg r_src, r_dst;
2077 PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
2078 ? Pun_CLZ32 : Pun_CLZ64;
2079 if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
2080 goto irreducible;
2081 /* Count leading zeroes. */
2082 r_dst = newVRegI(env);
2083 r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2084 addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2085 return r_dst;
2088 //case Iop_Ctz32:
2089 case Iop_CtzNat32:
2090 //case Iop_Ctz64:
2091 case Iop_CtzNat64:
2093 // Generate code using Clz, because we can't assume the host has
2094 // Ctz. In particular, part of the fix for bug 386945 involves
2095 // creating a Ctz in ir_opt.c from smaller fragments.
2096 PPCUnaryOp op_clz = Pun_CLZ64;
2097 Int WS = 64;
2098 if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
2099 op_clz = Pun_CLZ32;
2100 WS = 32;
2102 /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
2103 t1 = arg - 1
2104 t2 = not arg
2105 t2 = t2 & t1
2106 t2 = clz t2
2107 t1 = WS
2108 t2 = t1 - t2
2109 // result in t2
2111 HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2112 HReg t1 = newVRegI(env);
2113 HReg t2 = newVRegI(env);
2114 addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
2115 addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
2116 addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
2117 addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
2118 addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
2119 addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
2120 return t2;
2123 case Iop_PopCount64: {
2124 // popcnt{x,d} is only available in later arch revs (ISA 3.0,
2125 // maybe) so it's not really correct to emit it here without a caps
2126 // check for the host.
2127 if (mode64) {
2128 HReg r_dst = newVRegI(env);
2129 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2130 addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
2131 return r_dst;
2133 // We don't expect to be required to handle this in 32-bit mode.
2134 break;
2137 case Iop_PopCount32: {
2138 // Similar comment as for Ctz just above applies -- we really
2139 // should have a caps check here.
2141 HReg r_dst = newVRegI(env);
2142 // This actually generates popcntw, which in 64 bit mode does a
2143 // 32-bit count individually for both low and high halves of the
2144 // word. Per the comment at the top of iselIntExpr_R, in the 64
2145 // bit mode case, the user of this result is required to ignore
2146 // the upper 32 bits of the result. In 32 bit mode this is all
2147 // moot. It is however unclear from the PowerISA 3.0 docs that
2148 // the instruction exists in 32 bit mode; however our own front
2149 // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
2150 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2151 addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
2152 return r_dst;
2155 case Iop_Reverse8sIn32_x1: {
2156 // A bit of a mouthful, but simply .. 32-bit byte swap.
2157 // This is pretty rubbish code. We could do vastly better if
2158 // rotates, and better, rotate-inserts, were allowed. Note that
2159 // even on a 64 bit target, the right shifts must be done as 32-bit
2160 // so as to introduce zero bits in the right places. So it seems
2161 // simplest to do the whole sequence in 32-bit insns.
2163 r = <argument> // working temporary, initial byte order ABCD
2164 Mask = 00FF00FF
2165 nMask = not Mask
2166 tHi = and r, Mask
2167 tHi = shl tHi, 8
2168 tLo = and r, nMask
2169 tLo = shr tLo, 8
2170 r = or tHi, tLo // now r has order BADC
2171 and repeat for 16 bit chunks ..
2172 Mask = 0000FFFF
2173 nMask = not Mask
2174 tHi = and r, Mask
2175 tHi = shl tHi, 16
2176 tLo = and r, nMask
2177 tLo = shr tLo, 16
2178 r = or tHi, tLo // now r has order DCBA
2180 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2181 HReg rr = newVRegI(env);
2182 HReg rMask = newVRegI(env);
2183 HReg rnMask = newVRegI(env);
2184 HReg rtHi = newVRegI(env);
2185 HReg rtLo = newVRegI(env);
2186 // Copy r_src since we need to modify it
2187 addInstr(env, mk_iMOVds_RR(rr, r_src));
2188 // Swap within 16-bit lanes
2189 addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
2190 False/* !64bit imm*/));
2191 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2192 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2193 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2194 rtHi, rtHi,
2195 PPCRH_Imm(False/*!signed imm*/, 8)));
2196 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2197 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2198 rtLo, rtLo,
2199 PPCRH_Imm(False/*!signed imm*/, 8)));
2200 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2201 // And now swap the two 16-bit chunks
2202 addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
2203 False/* !64bit imm*/));
2204 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2205 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2206 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2207 rtHi, rtHi,
2208 PPCRH_Imm(False/*!signed imm*/, 16)));
2209 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2210 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2211 rtLo, rtLo,
2212 PPCRH_Imm(False/*!signed imm*/, 16)));
2213 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2214 return rr;
2217 case Iop_Reverse8sIn64_x1: {
2218 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
2219 Can only be used in 64bit mode. */
2220 vassert (mode64);
2222 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2223 HReg rr = newVRegI(env);
2224 HReg rMask = newVRegI(env);
2225 HReg rnMask = newVRegI(env);
2226 HReg rtHi = newVRegI(env);
2227 HReg rtLo = newVRegI(env);
2229 // Copy r_src since we need to modify it
2230 addInstr(env, mk_iMOVds_RR(rr, r_src));
2232 // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
2233 addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
2234 True/* 64bit imm*/));
2235 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2236 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2237 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2238 rtHi, rtHi,
2239 PPCRH_Imm(False/*!signed imm*/, 8)));
2240 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2241 addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2242 rtLo, rtLo,
2243 PPCRH_Imm(False/*!signed imm*/, 8)));
2244 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2246 // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
2247 addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
2248 True/* !64bit imm*/));
2249 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2250 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2251 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2252 rtHi, rtHi,
2253 PPCRH_Imm(False/*!signed imm*/, 16)));
2254 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2255 addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2256 rtLo, rtLo,
2257 PPCRH_Imm(False/*!signed imm*/, 16)));
2258 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2260 // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
2261 /* We don't need to mask anymore, just two more shifts and an or. */
2262 addInstr(env, mk_iMOVds_RR(rtLo, rr));
2263 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2264 rtLo, rtLo,
2265 PPCRH_Imm(False/*!signed imm*/, 32)));
2266 addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2267 rr, rr,
2268 PPCRH_Imm(False/*!signed imm*/, 32)));
2269 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
2271 return rr;
2274 case Iop_Left8:
2275 case Iop_Left16:
2276 case Iop_Left32:
2277 case Iop_Left64: {
2278 HReg r_src, r_dst;
2279 if (op_unop == Iop_Left64 && !mode64)
2280 goto irreducible;
2281 r_dst = newVRegI(env);
2282 r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2283 addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2284 addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2285 return r_dst;
2288 case Iop_CmpwNEZ32: {
2289 HReg r_dst = newVRegI(env);
2290 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2291 addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2292 addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2293 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2294 r_dst, r_dst, PPCRH_Imm(False, 31)));
2295 return r_dst;
2298 case Iop_CmpwNEZ64: {
2299 HReg r_dst = newVRegI(env);
2300 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2301 if (!mode64) goto irreducible;
2302 addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2303 addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2304 addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2305 r_dst, r_dst, PPCRH_Imm(False, 63)));
2306 return r_dst;
2309 case Iop_V128to32: {
2310 HReg r_aligned16;
2311 HReg dst = newVRegI(env);
2312 HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2313 PPCAMode *am_off0, *am_off_word0;
2314 sub_from_sp( env, 32 ); // Move SP down 32 bytes
2316 // get a quadword aligned address within our stack space
2317 r_aligned16 = get_sp_aligned16( env );
2318 am_off0 = PPCAMode_IR( 0, r_aligned16 );
2320 /* Note that the store below (done via PPCInstr_AvLdSt) uses
2321 * stvx, which stores the vector in proper LE format,
2322 * with byte zero (far right byte of the register in LE format)
2323 * stored at the lowest memory address. Therefore, to obtain
2324 * integer word zero, we need to use that lowest memory address
2325 * as the base for the load.
2327 if (IEndianess == Iend_LE)
2328 am_off_word0 = am_off0;
2329 else
2330 am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
2332 // store vec, load low word to dst
2333 addInstr(env,
2334 PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2335 addInstr(env,
2336 PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
2338 add_to_sp( env, 32 ); // Reset SP
2339 return dst;
2342 case Iop_V128to64:
2343 case Iop_V128HIto64:
2344 if (mode64) {
2345 HReg r_aligned16;
2346 HReg dst = newVRegI(env);
2347 HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2348 PPCAMode *am_off0, *am_off8, *am_off_arg;
2349 sub_from_sp( env, 32 ); // Move SP down 32 bytes
2351 // get a quadword aligned address within our stack space
2352 r_aligned16 = get_sp_aligned16( env );
2353 am_off0 = PPCAMode_IR( 0, r_aligned16 );
2354 am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
2356 // store vec, load low word or high to dst
2357 addInstr(env,
2358 PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2359 if (IEndianess == Iend_LE) {
2360 if (op_unop == Iop_V128HIto64)
2361 am_off_arg = am_off8;
2362 else
2363 am_off_arg = am_off0;
2364 } else {
2365 if (op_unop == Iop_V128HIto64)
2366 am_off_arg = am_off0;
2367 else
2368 am_off_arg = am_off8;
2370 addInstr(env,
2371 PPCInstr_Load(
2372 8, dst,
2373 am_off_arg,
2374 mode64 ));
2376 add_to_sp( env, 32 ); // Reset SP
2377 return dst;
2379 break;
2380 case Iop_16to8:
2381 case Iop_32to8:
2382 case Iop_32to16:
2383 case Iop_64to8:
2384 /* These are no-ops. */
2385 return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2387 /* ReinterpF64asI64(e) */
2388 /* Given an IEEE754 double, produce an I64 with the same bit
2389 pattern. */
2390 case Iop_ReinterpF64asI64:
2391 if (mode64) {
2392 PPCAMode *am_addr;
2393 HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
2394 HReg r_dst = newVRegI(env);
2396 sub_from_sp( env, 16 ); // Move SP down 16 bytes
2397 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2399 // store as F64
2400 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2401 fr_src, am_addr ));
2402 // load as Ity_I64
2403 addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2405 add_to_sp( env, 16 ); // Reset SP
2406 return r_dst;
2408 break;
2410 /* ReinterpF32asI32(e) */
2411 /* Given an IEEE754 float, produce an I32 with the same bit
2412 pattern. */
2413 case Iop_ReinterpF32asI32: {
2414 /* I believe this generates correct code for both 32- and
2415 64-bit hosts. */
2416 PPCAMode *am_addr;
2417 HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
2418 HReg r_dst = newVRegI(env);
2420 sub_from_sp( env, 16 ); // Move SP down 16 bytes
2421 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2423 // store as F32
2424 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
2425 fr_src, am_addr ));
2426 // load as Ity_I32
2427 addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
2429 add_to_sp( env, 16 ); // Reset SP
2430 return r_dst;
2432 break;
2434 case Iop_ReinterpD64asI64:
2435 if (mode64) {
2436 PPCAMode *am_addr;
2437 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2438 HReg r_dst = newVRegI(env);
2440 sub_from_sp( env, 16 ); // Move SP down 16 bytes
2441 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2443 // store as D64
2444 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2445 fr_src, am_addr ));
2446 // load as Ity_I64
2447 addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2448 add_to_sp( env, 16 ); // Reset SP
2449 return r_dst;
2451 break;
2453 case Iop_BCDtoDPB: {
2454 /* the following is only valid in 64 bit mode */
2455 if (!mode64) break;
2457 PPCCondCode cc;
2458 UInt argiregs;
2459 HReg argregs[1];
2460 HReg r_dst = newVRegI(env);
2461 Int argreg;
2463 argiregs = 0;
2464 argreg = 0;
2465 argregs[0] = hregPPC_GPR3(mode64);
2467 argiregs |= (1 << (argreg+3));
2468 addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2469 iselWordExpr_R(env, e->Iex.Unop.arg,
2470 IEndianess) ) );
2472 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2473 if (IEndianess == Iend_LE) {
2474 addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
2475 argiregs,
2476 mk_RetLoc_simple(RLPri_Int)) );
2477 } else {
2478 HWord* fdescr;
2479 fdescr = (HWord*)h_calc_BCDtoDPB;
2480 addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2481 argiregs,
2482 mk_RetLoc_simple(RLPri_Int)) );
2485 addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2486 return r_dst;
2489 case Iop_DPBtoBCD: {
2490 /* the following is only valid in 64 bit mode */
2491 if (!mode64) break;
2493 PPCCondCode cc;
2494 UInt argiregs;
2495 HReg argregs[1];
2496 HReg r_dst = newVRegI(env);
2497 Int argreg;
2499 argiregs = 0;
2500 argreg = 0;
2501 argregs[0] = hregPPC_GPR3(mode64);
2503 argiregs |= (1 << (argreg+3));
2504 addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2505 iselWordExpr_R(env, e->Iex.Unop.arg,
2506 IEndianess) ) );
2508 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2510 if (IEndianess == Iend_LE) {
2511 addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
2512 argiregs,
2513 mk_RetLoc_simple(RLPri_Int) ) );
2514 } else {
2515 HWord* fdescr;
2516 fdescr = (HWord*)h_calc_DPBtoBCD;
2517 addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2518 argiregs,
2519 mk_RetLoc_simple(RLPri_Int) ) );
2522 addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2523 return r_dst;
2525 case Iop_F32toF16x4_DEP: {
2526 HReg vdst = newVRegV(env); /* V128 */
2527 HReg dst = newVRegI(env); /* I64*/
2528 HReg r0 = newVRegI(env); /* I16*/
2529 HReg r1 = newVRegI(env); /* I16*/
2530 HReg r2 = newVRegI(env); /* I16*/
2531 HReg r3 = newVRegI(env); /* I16*/
2532 HReg vsrc = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2533 PPCAMode *am_off0, *am_off2, *am_off4, *am_off6, *am_off8;
2534 PPCAMode *am_off10, *am_off12, *am_off14;
2535 HReg r_aligned16;
2537 sub_from_sp( env, 32 ); // Move SP down
2539 /* issue instruction */
2540 addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, vdst, vsrc));
2542 /* Get a quadword aligned address within our stack space */
2543 r_aligned16 = get_sp_aligned16( env );
2544 am_off0 = PPCAMode_IR( 0, r_aligned16 );
2545 am_off2 = PPCAMode_IR( 2, r_aligned16 );
2546 am_off4 = PPCAMode_IR( 4, r_aligned16 );
2547 am_off6 = PPCAMode_IR( 6, r_aligned16 );
2548 am_off8 = PPCAMode_IR( 8, r_aligned16 );
2549 am_off10 = PPCAMode_IR( 10, r_aligned16 );
2550 am_off12 = PPCAMode_IR( 12, r_aligned16 );
2551 am_off14 = PPCAMode_IR( 14, r_aligned16 );
2553 /* Store v128 result to stack. */
2554 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off0));
2556 /* fetch four I16 from V128, store into contiguous I64 via stack, */
2557 if (IEndianess == Iend_LE) {
2558 addInstr(env, PPCInstr_Load( 2, r3, am_off12, mode64));
2559 addInstr(env, PPCInstr_Load( 2, r2, am_off8, mode64));
2560 addInstr(env, PPCInstr_Load( 2, r1, am_off4, mode64));
2561 addInstr(env, PPCInstr_Load( 2, r0, am_off0, mode64));
2562 } else {
2563 addInstr(env, PPCInstr_Load( 2, r0, am_off14, mode64));
2564 addInstr(env, PPCInstr_Load( 2, r1, am_off10, mode64));
2565 addInstr(env, PPCInstr_Load( 2, r2, am_off6, mode64));
2566 addInstr(env, PPCInstr_Load( 2, r3, am_off2, mode64));
2569 /* store in contiguous 64-bit values */
2570 addInstr(env, PPCInstr_Store( 2, am_off6, r3, mode64));
2571 addInstr(env, PPCInstr_Store( 2, am_off4, r2, mode64));
2572 addInstr(env, PPCInstr_Store( 2, am_off2, r1, mode64));
2573 addInstr(env, PPCInstr_Store( 2, am_off0, r0, mode64));
2575 /* Fetch I64 */
2576 addInstr(env, PPCInstr_Load(8, dst, am_off0, mode64));
2578 add_to_sp( env, 32 ); // Reset SP
2579 return dst;
2582 default:
2583 break;
2586 switch (e->Iex.Unop.op) {
2587 case Iop_ExtractExpD64: {
2589 HReg fr_dst = newVRegI(env);
2590 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2591 HReg tmp = newVRegF(env);
2592 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2593 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
2595 // put the D64 result into a integer register
2596 sub_from_sp( env, 16 );
2597 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2598 addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2599 add_to_sp( env, 16 );
2600 return fr_dst;
2602 case Iop_ExtractExpD128: {
2603 HReg fr_dst = newVRegI(env);
2604 HReg r_srcHi;
2605 HReg r_srcLo;
2606 HReg tmp = newVRegF(env);
2607 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2609 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
2610 IEndianess);
2611 addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
2612 r_srcHi, r_srcLo));
2614 sub_from_sp( env, 16 );
2615 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2616 addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2617 add_to_sp( env, 16 );
2618 return fr_dst;
2620 default:
2621 break;
2624 break;
2627 /* --------- GET --------- */
2628 case Iex_Get: {
2629 if (ty == Ity_I8 || ty == Ity_I16 ||
2630 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
2631 HReg r_dst = newVRegI(env);
2632 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
2633 GuestStatePtr(mode64) );
2634 addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
2635 r_dst, am_addr, mode64 ));
2636 return r_dst;
2638 break;
2641 case Iex_GetI: {
2642 PPCAMode* src_am
2643 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2644 e->Iex.GetI.ix, e->Iex.GetI.bias,
2645 IEndianess );
2646 HReg r_dst = newVRegI(env);
2647 if (mode64 && ty == Ity_I64) {
2648 addInstr(env, PPCInstr_Load( toUChar(8),
2649 r_dst, src_am, mode64 ));
2650 return r_dst;
2652 if ((!mode64) && ty == Ity_I32) {
2653 addInstr(env, PPCInstr_Load( toUChar(4),
2654 r_dst, src_am, mode64 ));
2655 return r_dst;
2657 break;
2660 /* --------- CCALL --------- */
2661 case Iex_CCall: {
2662 vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
2664 /* be very restrictive for now. Only 32/64-bit ints allowed for
2665 args, and 32 bits or host machine word for return type. */
2666 if (!(ty == Ity_I32 || (mode64 && ty == Ity_I64)))
2667 goto irreducible;
2669 /* Marshal args, do the call, clear stack. */
2670 UInt addToSp = 0;
2671 RetLoc rloc = mk_RetLoc_INVALID();
2672 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2673 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
2674 IEndianess );
2675 vassert(is_sane_RetLoc(rloc));
2676 vassert(rloc.pri == RLPri_Int);
2677 vassert(addToSp == 0);
2679 /* GPR3 now holds the destination address from Pin_Goto */
2680 HReg r_dst = newVRegI(env);
2681 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
2682 return r_dst;
2685 /* --------- LITERAL --------- */
2686 /* 32/16/8-bit literals */
2687 case Iex_Const: {
2688 Long l;
2689 HReg r_dst = newVRegI(env);
2690 IRConst* con = e->Iex.Const.con;
2691 switch (con->tag) {
2692 case Ico_U64: if (!mode64) goto irreducible;
2693 l = (Long) con->Ico.U64; break;
2694 case Ico_U32: l = (Long)(Int) con->Ico.U32; break;
2695 case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2696 case Ico_U8: l = (Long)(Int)(Char )con->Ico.U8; break;
2697 default: vpanic("iselIntExpr_R.const(ppc)");
2699 addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
2700 return r_dst;
2703 /* --------- MULTIPLEX --------- */
2704 case Iex_ITE: { // VFD
2705 if ((ty == Ity_I8 || ty == Ity_I16 ||
2706 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
2707 typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
2708 PPCRI* r1 = iselWordExpr_RI(env, e->Iex.ITE.iftrue, IEndianess);
2709 HReg r0 = iselWordExpr_R(env, e->Iex.ITE.iffalse, IEndianess);
2710 HReg r_dst = newVRegI(env);
2711 addInstr(env, mk_iMOVds_RR(r_dst,r0));
2712 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
2713 addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
2714 return r_dst;
2716 break;
2719 default:
2720 break;
2721 } /* switch (e->tag) */
2724 /* We get here if no pattern matched. */
2725 irreducible:
2726 ppIRExpr(e);
2727 vpanic("iselIntExpr_R(ppc): cannot reduce tree");
2731 /*---------------------------------------------------------*/
2732 /*--- ISEL: Integer expression auxiliaries ---*/
2733 /*---------------------------------------------------------*/
2735 /* --------------------- AMODEs --------------------- */
2737 /* Return an AMode which computes the value of the specified
2738 expression, possibly also adding insns to the code list as a
2739 result. The expression may only be a word-size one.
2742 static Bool uInt_fits_in_16_bits ( UInt u )
2744 /* Is u the same as the sign-extend of its lower 16 bits? */
2745 UInt v = u & 0xFFFF;
2747 v = (Int)(v << 16) >> 16; /* sign extend */
2749 return u == v;
2752 static Bool uLong_fits_in_16_bits ( ULong u )
2754 /* Is u the same as the sign-extend of its lower 16 bits? */
2755 ULong v = u & 0xFFFFULL;
2757 v = (Long)(v << 48) >> 48; /* sign extend */
2759 return u == v;
2762 static Bool uLong_is_4_aligned ( ULong u )
2764 return toBool((u & 3ULL) == 0);
2767 static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
2769 Bool mode64 = env->mode64;
2770 switch (am->tag) {
2771 case Pam_IR:
2772 /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
2773 somehow, but I think it's OK. */
2774 return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
2775 hregIsVirtual(am->Pam.IR.base) &&
2776 uInt_fits_in_16_bits(am->Pam.IR.index) );
2777 case Pam_RR:
2778 return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
2779 hregIsVirtual(am->Pam.RR.base) &&
2780 hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
2781 hregIsVirtual(am->Pam.RR.index) );
2782 default:
2783 vpanic("sane_AMode: unknown ppc amode tag");
2787 static
2788 PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e, IRType xferTy,
2789 IREndness IEndianess )
2791 PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy, IEndianess);
2792 vassert(sane_AMode(env, am));
2793 return am;
2796 /* DO NOT CALL THIS DIRECTLY ! */
2797 static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
2798 IRType xferTy, IREndness IEndianess )
2800 IRType ty = typeOfIRExpr(env->type_env,e);
2802 if (env->mode64) {
2804 /* If the data load/store type is I32 or I64, this amode might
2805 be destined for use in ld/ldu/lwa/st/stu. In which case
2806 insist that if it comes out as an _IR, the immediate must
2807 have its bottom two bits be zero. This does assume that for
2808 any other type (I8/I16/I128/F32/F64/V128) the amode will not
2809 be parked in any such instruction. But that seems a
2810 reasonable assumption. */
2811 Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
2813 vassert(ty == Ity_I64);
2815 /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
2816 if (e->tag == Iex_Binop
2817 && e->Iex.Binop.op == Iop_Add64
2818 && e->Iex.Binop.arg2->tag == Iex_Const
2819 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2820 && (aligned4imm ? uLong_is_4_aligned(e->Iex.Binop.arg2
2821 ->Iex.Const.con->Ico.U64)
2822 : True)
2823 && uLong_fits_in_16_bits(e->Iex.Binop.arg2
2824 ->Iex.Const.con->Ico.U64)) {
2825 return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
2826 iselWordExpr_R(env, e->Iex.Binop.arg1,
2827 IEndianess) );
2830 /* Add64(expr,expr) */
2831 if (e->tag == Iex_Binop
2832 && e->Iex.Binop.op == Iop_Add64) {
2833 HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2834 HReg r_idx = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2835 return PPCAMode_RR( r_idx, r_base );
2838 } else {
2840 vassert(ty == Ity_I32);
2842 /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
2843 if (e->tag == Iex_Binop
2844 && e->Iex.Binop.op == Iop_Add32
2845 && e->Iex.Binop.arg2->tag == Iex_Const
2846 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
2847 && uInt_fits_in_16_bits(e->Iex.Binop.arg2
2848 ->Iex.Const.con->Ico.U32)) {
2849 return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
2850 iselWordExpr_R(env, e->Iex.Binop.arg1,
2851 IEndianess) );
2854 /* Add32(expr,expr) */
2855 if (e->tag == Iex_Binop
2856 && e->Iex.Binop.op == Iop_Add32) {
2857 HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2858 HReg r_idx = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2859 return PPCAMode_RR( r_idx, r_base );
2864 /* Doesn't match anything in particular. Generate it into
2865 a register and use that. */
2866 return PPCAMode_IR( 0, iselWordExpr_R(env,e,IEndianess) );
2870 /* --------------------- RH --------------------- */
2872 /* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
2873 (reg-or-halfword-immediate). It's important to specify whether the
2874 immediate is to be regarded as signed or not. If yes, this will
2875 never return -32768 as an immediate; this guaranteed that all
2876 signed immediates that are return can have their sign inverted if
2877 need be. */
2879 static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, const IRExpr* e,
2880 IREndness IEndianess )
2882 PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e, IEndianess);
2883 /* sanity checks ... */
2884 switch (ri->tag) {
2885 case Prh_Imm:
2886 vassert(ri->Prh.Imm.syned == syned);
2887 if (syned)
2888 vassert(ri->Prh.Imm.imm16 != 0x8000);
2889 return ri;
2890 case Prh_Reg:
2891 vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2892 vassert(hregIsVirtual(ri->Prh.Reg.reg));
2893 return ri;
2894 default:
2895 vpanic("iselIntExpr_RH: unknown ppc RH tag");
2899 /* DO NOT CALL THIS DIRECTLY ! */
2900 static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, const IRExpr* e,
2901 IREndness IEndianess )
2903 ULong u;
2904 Long l;
2905 IRType ty = typeOfIRExpr(env->type_env,e);
2906 vassert(ty == Ity_I8 || ty == Ity_I16 ||
2907 ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2909 /* special case: immediate */
2910 if (e->tag == Iex_Const) {
2911 IRConst* con = e->Iex.Const.con;
2912 /* What value are we aiming to generate? */
2913 switch (con->tag) {
2914 /* Note: Not sign-extending - we carry 'syned' around */
2915 case Ico_U64: vassert(env->mode64);
2916 u = con->Ico.U64; break;
2917 case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
2918 case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
2919 case Ico_U8: u = 0x000000FF & con->Ico.U8; break;
2920 default: vpanic("iselIntExpr_RH.Iex_Const(ppch)");
2922 l = (Long)u;
2923 /* Now figure out if it's representable. */
2924 if (!syned && u <= 65535) {
2925 return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
2927 if (syned && l >= -32767 && l <= 32767) {
2928 return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
2930 /* no luck; use the Slow Way. */
2933 /* default case: calculate into a register and return that */
2934 return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2938 /* --------------------- RIs --------------------- */
2940 /* Calculate an expression into an PPCRI operand. As with
2941 iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
2942 in 64-bit mode, 64 bits. */
2944 static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
2945 IREndness IEndianess )
2947 PPCRI* ri = iselWordExpr_RI_wrk(env, e, IEndianess);
2948 /* sanity checks ... */
2949 switch (ri->tag) {
2950 case Pri_Imm:
2951 return ri;
2952 case Pri_Reg:
2953 vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
2954 vassert(hregIsVirtual(ri->Pri.Reg));
2955 return ri;
2956 default:
2957 vpanic("iselIntExpr_RI: unknown ppc RI tag");
2961 /* DO NOT CALL THIS DIRECTLY ! */
2962 static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
2963 IREndness IEndianess )
2965 Long l;
2966 IRType ty = typeOfIRExpr(env->type_env,e);
2967 vassert(ty == Ity_I8 || ty == Ity_I16 ||
2968 ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2970 /* special case: immediate */
2971 if (e->tag == Iex_Const) {
2972 IRConst* con = e->Iex.Const.con;
2973 switch (con->tag) {
2974 case Ico_U64: vassert(env->mode64);
2975 l = (Long) con->Ico.U64; break;
2976 case Ico_U32: l = (Long)(Int) con->Ico.U32; break;
2977 case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2978 case Ico_U8: l = (Long)(Int)(Char )con->Ico.U8; break;
2979 default: vpanic("iselIntExpr_RI.Iex_Const(ppch)");
2981 return PPCRI_Imm((ULong)l);
2984 /* default case: calculate into a register and return that */
2985 return PPCRI_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2989 /* --------------------- RH5u --------------------- */
2991 /* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
2992 being an immediate in the range 1 .. 31 inclusive. Used for doing
2993 shift amounts. Only used in 32-bit mode. */
2995 static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
2996 IREndness IEndianess )
2998 PPCRH* ri;
2999 vassert(!env->mode64);
3000 ri = iselWordExpr_RH5u_wrk(env, e, IEndianess);
3001 /* sanity checks ... */
3002 switch (ri->tag) {
3003 case Prh_Imm:
3004 vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
3005 vassert(!ri->Prh.Imm.syned);
3006 return ri;
3007 case Prh_Reg:
3008 vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3009 vassert(hregIsVirtual(ri->Prh.Reg.reg));
3010 return ri;
3011 default:
3012 vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
3016 /* DO NOT CALL THIS DIRECTLY ! */
3017 static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
3018 IREndness IEndianess )
3020 IRType ty = typeOfIRExpr(env->type_env,e);
3021 vassert(ty == Ity_I8);
3023 /* special case: immediate */
3024 if (e->tag == Iex_Const
3025 && e->Iex.Const.con->tag == Ico_U8
3026 && e->Iex.Const.con->Ico.U8 >= 1
3027 && e->Iex.Const.con->Ico.U8 <= 31) {
3028 return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3031 /* default case: calculate into a register and return that */
3032 return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3036 /* --------------------- RH6u --------------------- */
3038 /* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
3039 being an immediate in the range 1 .. 63 inclusive. Used for doing
3040 shift amounts. Only used in 64-bit mode. */
3042 static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
3043 IREndness IEndianess )
3045 PPCRH* ri;
3046 vassert(env->mode64);
3047 ri = iselWordExpr_RH6u_wrk(env, e, IEndianess);
3048 /* sanity checks ... */
3049 switch (ri->tag) {
3050 case Prh_Imm:
3051 vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
3052 vassert(!ri->Prh.Imm.syned);
3053 return ri;
3054 case Prh_Reg:
3055 vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3056 vassert(hregIsVirtual(ri->Prh.Reg.reg));
3057 return ri;
3058 default:
3059 vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
3063 /* DO NOT CALL THIS DIRECTLY ! */
3064 static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
3065 IREndness IEndianess )
3067 IRType ty = typeOfIRExpr(env->type_env,e);
3068 vassert(ty == Ity_I8);
3070 /* special case: immediate */
3071 if (e->tag == Iex_Const
3072 && e->Iex.Const.con->tag == Ico_U8
3073 && e->Iex.Const.con->Ico.U8 >= 1
3074 && e->Iex.Const.con->Ico.U8 <= 63) {
3075 return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3078 /* default case: calculate into a register and return that */
3079 return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3083 /* --------------------- CONDCODE --------------------- */
3085 /* Generate code to evaluated a bit-typed expression, returning the
3086 condition code which would correspond when the expression would
3087 notionally have returned 1. */
3089 static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
3090 IREndness IEndianess )
3092 /* Uh, there's nothing we can sanity check here, unfortunately. */
3093 return iselCondCode_wrk(env,e, IEndianess);
3096 /* DO NOT CALL THIS DIRECTLY ! */
3097 static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
3098 IREndness IEndianess )
3100 vassert(e);
3101 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
3103 /* Constant 1:Bit */
3104 if (e->tag == Iex_Const) {
3105 // Make a compare that will always be true (or always false):
3106 vassert(e->Iex.Const.con->Ico.U1 == True || e->Iex.Const.con->Ico.U1 == False);
3107 HReg r_zero = newVRegI(env);
3108 addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
3109 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3110 7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
3111 return mk_PPCCondCode( e->Iex.Const.con->Ico.U1 ? Pct_TRUE : Pct_FALSE,
3112 Pcf_7EQ );
3115 /* Not1(...) */
3116 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
3117 /* Generate code for the arg, and negate the test condition */
3118 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3119 cond.test = invertCondTest(cond.test);
3120 return cond;
3123 /* --- patterns rooted at: 32to1 or 64to1 --- */
3125 /* 32to1, 64to1 */
3126 if (e->tag == Iex_Unop &&
3127 (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
3128 HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3129 HReg tmp = newVRegI(env);
3130 /* could do better, probably -- andi. */
3131 addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
3132 src, PPCRH_Imm(False,1)));
3133 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3134 7/*cr*/, tmp, PPCRH_Imm(False,1)));
3135 return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3138 /* --- patterns rooted at: CmpNEZ8 --- */
3140 /* CmpNEZ8(x) */
3141 /* Note this cloned as CmpNE8(x,0) below. */
3142 /* could do better -- andi. */
3143 if (e->tag == Iex_Unop
3144 && e->Iex.Unop.op == Iop_CmpNEZ8) {
3145 HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3146 HReg tmp = newVRegI(env);
3147 addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3148 PPCRH_Imm(False,0xFF)));
3149 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3150 7/*cr*/, tmp, PPCRH_Imm(False,0)));
3151 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3154 /* --- patterns rooted at: CmpNEZ32 --- */
3156 /* CmpNEZ32(x) */
3157 if (e->tag == Iex_Unop
3158 && e->Iex.Unop.op == Iop_CmpNEZ32) {
3159 HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3160 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3161 7/*cr*/, r1, PPCRH_Imm(False,0)));
3162 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3165 /* --- patterns rooted at: Cmp*32* --- */
3167 /* Cmp*32*(x,y) */
3168 if (e->tag == Iex_Binop
3169 && (e->Iex.Binop.op == Iop_CmpEQ32
3170 || e->Iex.Binop.op == Iop_CmpNE32
3171 || e->Iex.Binop.op == Iop_CmpLT32S
3172 || e->Iex.Binop.op == Iop_CmpLT32U
3173 || e->Iex.Binop.op == Iop_CmpLE32S
3174 || e->Iex.Binop.op == Iop_CmpLE32U)) {
3175 Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
3176 e->Iex.Binop.op == Iop_CmpLE32S);
3177 HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3178 PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3179 addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
3180 7/*cr*/, r1, ri2));
3182 switch (e->Iex.Binop.op) {
3183 case Iop_CmpEQ32: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3184 case Iop_CmpNE32: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3185 case Iop_CmpLT32U: case Iop_CmpLT32S:
3186 return mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
3187 case Iop_CmpLE32U: case Iop_CmpLE32S:
3188 return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3189 default: vpanic("iselCondCode(ppc): CmpXX32");
3193 /* --- patterns rooted at: CmpNEZ64 --- */
3195 /* CmpNEZ64 */
3196 if (e->tag == Iex_Unop
3197 && e->Iex.Unop.op == Iop_CmpNEZ64) {
3198 if (!env->mode64) {
3199 HReg hi, lo;
3200 HReg tmp = newVRegI(env);
3201 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg, IEndianess );
3202 addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
3203 addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
3204 7/*cr*/, tmp,PPCRH_Imm(False,0)));
3205 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3206 } else { // mode64
3207 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3208 addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
3209 7/*cr*/, r_src,PPCRH_Imm(False,0)));
3210 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3214 /* --- patterns rooted at: Cmp*64* --- */
3216 /* Cmp*64*(x,y) */
3217 if (e->tag == Iex_Binop
3218 && (e->Iex.Binop.op == Iop_CmpEQ64
3219 || e->Iex.Binop.op == Iop_CmpNE64
3220 || e->Iex.Binop.op == Iop_CmpLT64S
3221 || e->Iex.Binop.op == Iop_CmpLT64U
3222 || e->Iex.Binop.op == Iop_CmpLE64S
3223 || e->Iex.Binop.op == Iop_CmpLE64U)) {
3224 Bool syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
3225 e->Iex.Binop.op == Iop_CmpLE64S);
3226 HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3227 PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3228 vassert(env->mode64);
3229 addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
3230 7/*cr*/, r1, ri2));
3232 switch (e->Iex.Binop.op) {
3233 case Iop_CmpEQ64: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3234 case Iop_CmpNE64: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3235 case Iop_CmpLT64U: case Iop_CmpLT64S:
3236 return mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
3237 case Iop_CmpLE64U: case Iop_CmpLE64S:
3238 return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3239 default: vpanic("iselCondCode(ppc): CmpXX64");
3243 /* --- patterns rooted at: CmpNE8 --- */
3245 /* CmpNE8(x,0) */
3246 /* Note this is a direct copy of CmpNEZ8 above. */
3247 /* could do better -- andi. */
3248 if (e->tag == Iex_Binop
3249 && e->Iex.Binop.op == Iop_CmpNE8
3250 && isZeroU8(e->Iex.Binop.arg2)) {
3251 HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3252 HReg tmp = newVRegI(env);
3253 addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3254 PPCRH_Imm(False,0xFF)));
3255 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3256 7/*cr*/, tmp, PPCRH_Imm(False,0)));
3257 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3260 /* var */
3261 if (e->tag == Iex_RdTmp) {
3262 HReg r_src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
3263 HReg src_masked = newVRegI(env);
3264 addInstr(env,
3265 PPCInstr_Alu(Palu_AND, src_masked,
3266 r_src, PPCRH_Imm(False,1)));
3267 addInstr(env,
3268 PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3269 7/*cr*/, src_masked, PPCRH_Imm(False,1)));
3270 return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3273 /* --- And1(x,y), Or1(x,y) --- */
3274 /* FIXME: We could (and probably should) do a lot better here, by using the
3275 iselCondCode_C/_R scheme used in the amd64 insn selector. */
3276 if (e->tag == Iex_Binop
3277 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
3278 HReg x_as_int = newVRegI(env);
3279 PPCCondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1, IEndianess);
3280 addInstr(env, PPCInstr_Set(cc_x, x_as_int));
3282 HReg y_as_int = newVRegI(env);
3283 PPCCondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2, IEndianess);
3284 addInstr(env, PPCInstr_Set(cc_y, y_as_int));
3286 HReg tmp = newVRegI(env);
3287 PPCAluOp op = e->Iex.Binop.op == Iop_And1 ? Palu_AND : Palu_OR;
3288 addInstr(env, PPCInstr_Alu(op, tmp, x_as_int, PPCRH_Reg(y_as_int)));
3290 addInstr(env, PPCInstr_Alu(Palu_AND, tmp, tmp, PPCRH_Imm(False,1)));
3291 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3292 7/*cr*/, tmp, PPCRH_Imm(False,1)));
3293 return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3296 vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
3297 ppIRExpr(e);
3298 vpanic("iselCondCode(ppc)");
3302 /*---------------------------------------------------------*/
3303 /*--- ISEL: Integer expressions (128 bit) ---*/
3304 /*---------------------------------------------------------*/
3306 /* 64-bit mode ONLY: compute a 128-bit value into a register pair,
3307 which is returned as the first two parameters. As with
3308 iselWordExpr_R, these may be either real or virtual regs; in any
3309 case they must not be changed by subsequent code emitted by the
3310 caller. */
3312 static void iselInt128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
3313 const IRExpr* e, IREndness IEndianess )
3315 vassert(env->mode64);
3316 iselInt128Expr_wrk(rHi, rLo, env, e, IEndianess);
3317 # if 0
3318 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3319 # endif
3320 vassert(hregClass(*rHi) == HRcGPR(env->mode64));
3321 vassert(hregIsVirtual(*rHi));
3322 vassert(hregClass(*rLo) == HRcGPR(env->mode64));
3323 vassert(hregIsVirtual(*rLo));
3326 /* DO NOT CALL THIS DIRECTLY ! */
3327 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
3328 const IRExpr* e, IREndness IEndianess )
3330 Bool mode64 = env->mode64;
3332 vassert(e);
3333 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3335 /* read 128-bit IRTemp */
3336 if (e->tag == Iex_RdTmp) {
3337 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3338 return;
3341 /* 128-bit GET */
3342 if (e->tag == Iex_Get) {
3343 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3344 GuestStatePtr(mode64) );
3345 PPCAMode* am_addr4 = advance4(env, am_addr);
3346 HReg tLo = newVRegI(env);
3347 HReg tHi = newVRegI(env);
3349 addInstr(env, PPCInstr_Load( 8, tHi, am_addr, mode64));
3350 addInstr(env, PPCInstr_Load( 8, tLo, am_addr4, mode64));
3351 *rHi = tHi;
3352 *rLo = tLo;
3353 return;
3356 /* --------- BINARY ops --------- */
3357 if (e->tag == Iex_Binop) {
3358 switch (e->Iex.Binop.op) {
3359 /* 64 x 64 -> 128 multiply */
3360 case Iop_MullU64:
3361 case Iop_MullS64: {
3362 HReg tLo = newVRegI(env);
3363 HReg tHi = newVRegI(env);
3364 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
3365 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3366 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3367 addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3368 False/*lo64*/, False/*64bit mul*/,
3369 tLo, r_srcL, r_srcR));
3370 addInstr(env, PPCInstr_MulL(syned,
3371 True/*hi64*/, False/*64bit mul*/,
3372 tHi, r_srcL, r_srcR));
3373 *rHi = tHi;
3374 *rLo = tLo;
3375 return;
3378 /* 64HLto128(e1,e2) */
3379 case Iop_64HLto128:
3380 *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3381 *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3382 return;
3384 case Iop_D128toI128S: {
3385 HReg srcHi = INVALID_HREG;
3386 HReg srcLo = INVALID_HREG;
3387 HReg dstLo = newVRegI(env);
3388 HReg dstHi = newVRegI(env);
3389 HReg tmp = newVRegV(env);
3390 PPCAMode* am_addr;
3391 PPCAMode* am_addr4;
3393 /* Get the DF128 value, store in two 64-bit halves */
3394 iselDfp128Expr( &srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess );
3396 sub_from_sp( env, 16 ); // Move SP down 16 bytes
3397 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3398 am_addr4 = advance4(env, am_addr);
3400 addInstr(env, PPCInstr_XFormUnary994(Px_DFPTOIQS, tmp, srcHi, srcLo));
3402 // store the result in the VSR
3403 addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, tmp, am_addr ));
3405 // load the two Ity_64 values
3406 addInstr(env, PPCInstr_Load( 8, dstHi, am_addr, mode64 ));
3407 addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3409 *rHi = dstHi;
3410 *rLo = dstLo;
3412 add_to_sp( env, 16 ); // Reset SP
3413 return;
3416 default:
3417 break;
3419 } /* if (e->tag == Iex_Binop) */
3422 /* --------- UNARY ops --------- */
3423 if (e->tag == Iex_Unop) {
3424 switch (e->Iex.Unop.op) {
3425 case Iop_ReinterpV128asI128:
3426 case Iop_ReinterpF128asI128: {
3427 HReg src;
3428 HReg dstLo = newVRegI(env);
3429 HReg dstHi = newVRegI(env);
3430 PPCAMode* am_addr;
3431 PPCAMode* am_addr4;
3433 if (e->Iex.Unop.op == Iop_ReinterpF128asI128)
3434 src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
3435 else
3436 src = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3438 sub_from_sp( env, 16 ); // Move SP down 16 bytes
3439 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3440 am_addr4 = advance4(env, am_addr);
3442 // store the Ity_F128 value
3443 addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, src, am_addr ));
3445 // load the two Ity_64 values
3446 addInstr(env, PPCInstr_Load( 8, dstHi, am_addr, mode64 ));
3447 addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3449 *rHi = dstHi;
3450 *rLo = dstLo;
3451 add_to_sp( env, 16 ); // Reset SP
3452 return;
3454 default:
3455 break;
3457 } /* if (e->tag == Iex_Unop) */
3459 vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
3460 ppIRExpr(e);
3461 vpanic("iselInt128Expr(ppc64)");
3465 /*---------------------------------------------------------*/
3466 /*--- ISEL: Integer expressions (64 bit) ---*/
3467 /*---------------------------------------------------------*/
3469 /* 32-bit mode ONLY: compute a 128-bit value into a register quad */
3470 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
3471 HReg* rLo, ISelEnv* env, const IRExpr* e,
3472 IREndness IEndianess )
3474 vassert(!env->mode64);
3475 iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e, IEndianess);
3476 # if 0
3477 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3478 # endif
3479 vassert(hregClass(*rHi) == HRcInt32);
3480 vassert(hregIsVirtual(*rHi));
3481 vassert(hregClass(*rMedHi) == HRcInt32);
3482 vassert(hregIsVirtual(*rMedHi));
3483 vassert(hregClass(*rMedLo) == HRcInt32);
3484 vassert(hregIsVirtual(*rMedLo));
3485 vassert(hregClass(*rLo) == HRcInt32);
3486 vassert(hregIsVirtual(*rLo));
3489 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
3490 HReg* rMedLo, HReg* rLo,
3491 ISelEnv* env, const IRExpr* e,
3492 IREndness IEndianess )
3494 vassert(e);
3495 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3497 /* read 128-bit IRTemp */
3498 if (e->tag == Iex_RdTmp) {
3499 lookupIRTempQuad( rHi, rMedHi, rMedLo, rLo, env, e->Iex.RdTmp.tmp);
3500 return;
3503 if (e->tag == Iex_Binop) {
3505 IROp op_binop = e->Iex.Binop.op;
3506 switch (op_binop) {
3507 case Iop_64HLto128:
3508 iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1, IEndianess);
3509 iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2, IEndianess);
3510 return;
3511 default:
3512 vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
3513 op_binop);
3514 break;
3518 vex_printf("iselInt128Expr_to_32x4_wrk: e->tag 0x%x not found\n", e->tag);
3519 return;
3522 /* 32-bit mode ONLY: compute a 64-bit value into a register pair,
3523 which is returned as the first two parameters. As with
3524 iselIntExpr_R, these may be either real or virtual regs; in any
3525 case they must not be changed by subsequent code emitted by the
3526 caller. */
3528 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
3529 ISelEnv* env, const IRExpr* e,
3530 IREndness IEndianess )
3532 vassert(!env->mode64);
3533 iselInt64Expr_wrk(rHi, rLo, env, e, IEndianess);
3534 # if 0
3535 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3536 # endif
3537 vassert(hregClass(*rHi) == HRcInt32);
3538 vassert(hregIsVirtual(*rHi));
3539 vassert(hregClass(*rLo) == HRcInt32);
3540 vassert(hregIsVirtual(*rLo));
3543 /* DO NOT CALL THIS DIRECTLY ! */
3544 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
3545 ISelEnv* env, const IRExpr* e,
3546 IREndness IEndianess )
3548 vassert(e);
3549 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
3551 /* 64-bit load */
3552 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
3553 HReg tLo = newVRegI(env);
3554 HReg tHi = newVRegI(env);
3555 HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr, IEndianess);
3556 vassert(!env->mode64);
3557 addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3558 tHi, PPCAMode_IR( 0, r_addr ),
3559 False/*32-bit insn please*/) );
3560 addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3561 tLo, PPCAMode_IR( 4, r_addr ),
3562 False/*32-bit insn please*/) );
3563 *rHi = tHi;
3564 *rLo = tLo;
3565 return;
3568 /* 64-bit literal */
3569 if (e->tag == Iex_Const) {
3570 ULong w64 = e->Iex.Const.con->Ico.U64;
3571 UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
3572 UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
3573 HReg tLo = newVRegI(env);
3574 HReg tHi = newVRegI(env);
3575 vassert(e->Iex.Const.con->tag == Ico_U64);
3576 addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
3577 addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
3578 *rHi = tHi;
3579 *rLo = tLo;
3580 return;
3583 /* read 64-bit IRTemp */
3584 if (e->tag == Iex_RdTmp) {
3585 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3586 return;
3589 /* 64-bit GET */
3590 if (e->tag == Iex_Get) {
3591 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3592 GuestStatePtr(False/*mode32*/) );
3593 PPCAMode* am_addr4 = advance4(env, am_addr);
3594 HReg tLo = newVRegI(env);
3595 HReg tHi = newVRegI(env);
3596 addInstr(env, PPCInstr_Load( 4, tHi, am_addr, False/*mode32*/ ));
3597 addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
3598 *rHi = tHi;
3599 *rLo = tLo;
3600 return;
3603 /* --------- CCALL --------- */
3604 if(e->tag == Iex_CCall) {
3605 IRType ty = typeOfIRExpr(env->type_env,e);
3606 Bool mode64 = env->mode64;
3608 vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
3610 /* be very restrictive for now. Only 32-bit ints allowed for
3611 args, and 32 bits or host machine word for return type. */
3612 vassert(!(ty == Ity_I32 || (mode64 && ty == Ity_I64)));
3614 /* Marshal args, do the call, clear stack. */
3615 UInt addToSp = 0;
3616 RetLoc rloc = mk_RetLoc_INVALID();
3617 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
3618 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
3619 IEndianess );
3620 vassert(is_sane_RetLoc(rloc));
3622 vassert(rloc.pri == RLPri_2Int);
3623 vassert(addToSp == 0);
3625 /* GPR3 now holds the destination address from Pin_Goto */
3626 HReg r_dst = newVRegI(env);
3627 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
3628 *rHi = r_dst;
3629 *rLo = r_dst;
3630 return;
3633 /* 64-bit ITE */
3634 if (e->tag == Iex_ITE) { // VFD
3635 HReg e0Lo, e0Hi, eXLo, eXHi;
3636 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue, IEndianess);
3637 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse, IEndianess);
3638 HReg tLo = newVRegI(env);
3639 HReg tHi = newVRegI(env);
3640 addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
3641 addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
3642 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
3643 addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
3644 addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
3645 *rHi = tHi;
3646 *rLo = tLo;
3647 return;
3650 /* --------- BINARY ops --------- */
3651 if (e->tag == Iex_Binop) {
3652 IROp op_binop = e->Iex.Binop.op;
3653 switch (op_binop) {
3654 /* 32 x 32 -> 64 multiply */
3655 case Iop_MullU32:
3656 case Iop_MullS32: {
3657 HReg tLo = newVRegI(env);
3658 HReg tHi = newVRegI(env);
3659 Bool syned = toBool(op_binop == Iop_MullS32);
3660 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1,
3661 IEndianess);
3662 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2,
3663 IEndianess);
3664 addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3665 False/*lo32*/, True/*32bit mul*/,
3666 tLo, r_srcL, r_srcR));
3667 addInstr(env, PPCInstr_MulL(syned,
3668 True/*hi32*/, True/*32bit mul*/,
3669 tHi, r_srcL, r_srcR));
3670 *rHi = tHi;
3671 *rLo = tLo;
3672 return;
3675 /* Or64/And64/Xor64 */
3676 case Iop_Or64:
3677 case Iop_And64:
3678 case Iop_Xor64: {
3679 HReg xLo, xHi, yLo, yHi;
3680 HReg tLo = newVRegI(env);
3681 HReg tHi = newVRegI(env);
3682 PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
3683 (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
3684 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3685 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3686 addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
3687 addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
3688 *rHi = tHi;
3689 *rLo = tLo;
3690 return;
3693 /* Add64 */
3694 case Iop_Add64: {
3695 HReg xLo, xHi, yLo, yHi;
3696 HReg tLo = newVRegI(env);
3697 HReg tHi = newVRegI(env);
3698 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3699 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3700 addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
3701 tLo, xLo, yLo));
3702 addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
3703 tHi, xHi, yHi));
3704 *rHi = tHi;
3705 *rLo = tLo;
3706 return;
3709 /* 32HLto64(e1,e2) */
3710 case Iop_32HLto64:
3711 *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3712 *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3713 return;
3715 /* F64toI64[S|U] */
3716 case Iop_F64toI64S: case Iop_F64toI64U: {
3717 HReg tLo = newVRegI(env);
3718 HReg tHi = newVRegI(env);
3719 HReg r1 = StackFramePtr(env->mode64);
3720 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3721 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3722 HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2,
3723 IEndianess);
3724 HReg ftmp = newVRegF(env);
3726 vassert(!env->mode64);
3727 /* Set host rounding mode */
3728 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3730 sub_from_sp( env, 16 );
3731 addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
3732 (op_binop == Iop_F64toI64S) ? True : False,
3733 True, ftmp, fsrc));
3734 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3735 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3736 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3737 add_to_sp( env, 16 );
3739 ///* Restore default FPU rounding. */
3740 //set_FPU_rounding_default( env );
3741 *rHi = tHi;
3742 *rLo = tLo;
3743 return;
3745 case Iop_D64toI64S: {
3746 HReg tLo = newVRegI(env);
3747 HReg tHi = newVRegI(env);
3748 HReg r1 = StackFramePtr(env->mode64);
3749 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3750 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3751 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
3752 HReg tmp = newVRegF(env);
3754 vassert(!env->mode64);
3755 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3756 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
3758 sub_from_sp( env, 16 );
3759 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3760 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3761 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3762 add_to_sp( env, 16 );
3763 *rHi = tHi;
3764 *rLo = tLo;
3765 return;
3767 case Iop_D128toI64S: {
3768 PPCFpOp fpop = Pfp_DCTFIXQ;
3769 HReg r_srcHi = newVRegF(env);
3770 HReg r_srcLo = newVRegF(env);
3771 HReg tLo = newVRegI(env);
3772 HReg tHi = newVRegI(env);
3773 HReg ftmp = newVRegF(env);
3774 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3775 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3777 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3778 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
3779 IEndianess);
3780 addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
3782 // put the D64 result into an integer register pair
3783 sub_from_sp( env, 16 );
3784 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3785 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3786 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3787 add_to_sp( env, 16 );
3788 *rHi = tHi;
3789 *rLo = tLo;
3790 return;
3792 default:
3793 break;
3795 } /* if (e->tag == Iex_Binop) */
3798 /* --------- UNARY ops --------- */
3799 if (e->tag == Iex_Unop) {
3800 switch (e->Iex.Unop.op) {
3802 /* CmpwNEZ64(e) */
3803 case Iop_CmpwNEZ64: {
3804 HReg argHi, argLo;
3805 HReg tmp1 = newVRegI(env);
3806 HReg tmp2 = newVRegI(env);
3807 iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3808 /* tmp1 = argHi | argLo */
3809 addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
3810 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
3811 addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
3812 addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
3813 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3814 tmp2, tmp2, PPCRH_Imm(False, 31)));
3815 *rHi = tmp2;
3816 *rLo = tmp2; /* yes, really tmp2 */
3817 return;
3820 /* Left64 */
3821 case Iop_Left64: {
3822 HReg argHi, argLo;
3823 HReg zero32 = newVRegI(env);
3824 HReg resHi = newVRegI(env);
3825 HReg resLo = newVRegI(env);
3826 iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3827 vassert(env->mode64 == False);
3828 addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
3829 /* resHi:resLo = - argHi:argLo */
3830 addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
3831 resLo, zero32, argLo ));
3832 addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
3833 resHi, zero32, argHi ));
3834 /* resHi:resLo |= srcHi:srcLo */
3835 addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
3836 addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
3837 *rHi = resHi;
3838 *rLo = resLo;
3839 return;
3842 /* 32Sto64(e) */
3843 case Iop_32Sto64: {
3844 HReg tHi = newVRegI(env);
3845 HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3846 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3847 tHi, src, PPCRH_Imm(False,31)));
3848 *rHi = tHi;
3849 *rLo = src;
3850 return;
3852 case Iop_ExtractExpD64: {
3853 HReg tmp = newVRegF(env);
3854 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
3855 HReg tLo = newVRegI(env);
3856 HReg tHi = newVRegI(env);
3857 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3858 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3860 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
3862 // put the D64 result into a integer register pair
3863 sub_from_sp( env, 16 );
3864 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3865 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3866 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3867 add_to_sp( env, 16 );
3868 *rHi = tHi;
3869 *rLo = tLo;
3870 return;
3872 case Iop_ExtractExpD128: {
3873 HReg r_srcHi;
3874 HReg r_srcLo;
3875 HReg tmp = newVRegF(env);
3876 HReg tLo = newVRegI(env);
3877 HReg tHi = newVRegI(env);
3878 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3879 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3881 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg, IEndianess);
3882 addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
3883 r_srcHi, r_srcLo));
3885 // put the D64 result into a integer register pair
3886 sub_from_sp( env, 16 );
3887 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3888 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3889 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3890 add_to_sp( env, 16 );
3891 *rHi = tHi;
3892 *rLo = tLo;
3893 return;
3896 /* 32Uto64(e) */
3897 case Iop_32Uto64: {
3898 HReg tHi = newVRegI(env);
3899 HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3900 addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
3901 *rHi = tHi;
3902 *rLo = tLo;
3903 return;
3906 case Iop_128to64: {
3907 /* Narrow, return the low 64-bit half as a 32-bit
3908 * register pair */
3909 HReg r_Hi = INVALID_HREG;
3910 HReg r_MedHi = INVALID_HREG;
3911 HReg r_MedLo = INVALID_HREG;
3912 HReg r_Lo = INVALID_HREG;
3914 iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3915 env, e->Iex.Unop.arg, IEndianess);
3916 *rHi = r_MedLo;
3917 *rLo = r_Lo;
3918 return;
3921 case Iop_128HIto64: {
3922 /* Narrow, return the high 64-bit half as a 32-bit
3923 * register pair */
3924 HReg r_Hi = INVALID_HREG;
3925 HReg r_MedHi = INVALID_HREG;
3926 HReg r_MedLo = INVALID_HREG;
3927 HReg r_Lo = INVALID_HREG;
3929 iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3930 env, e->Iex.Unop.arg, IEndianess);
3931 *rHi = r_Hi;
3932 *rLo = r_MedHi;
3933 return;
3936 /* V128{HI}to64 */
3937 case Iop_V128HIto64:
3938 case Iop_V128to64: {
3939 HReg r_aligned16;
3940 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
3941 HReg tLo = newVRegI(env);
3942 HReg tHi = newVRegI(env);
3943 HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3944 PPCAMode *am_off0, *am_offLO, *am_offHI;
3945 sub_from_sp( env, 32 ); // Move SP down 32 bytes
3947 // get a quadword aligned address within our stack space
3948 r_aligned16 = get_sp_aligned16( env );
3949 am_off0 = PPCAMode_IR( 0, r_aligned16 );
3950 am_offHI = PPCAMode_IR( off, r_aligned16 );
3951 am_offLO = PPCAMode_IR( off+4, r_aligned16 );
3953 // store as Vec128
3954 addInstr(env,
3955 PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
3957 // load hi,lo words (of hi/lo half of vec) as Ity_I32's
3958 addInstr(env,
3959 PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
3960 addInstr(env,
3961 PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
3963 add_to_sp( env, 32 ); // Reset SP
3964 *rHi = tHi;
3965 *rLo = tLo;
3966 return;
3969 /* could do better than this, but for now ... */
3970 case Iop_1Sto64: {
3971 HReg tLo = newVRegI(env);
3972 HReg tHi = newVRegI(env);
3973 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3974 addInstr(env, PPCInstr_Set(cond,tLo));
3975 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
3976 tLo, tLo, PPCRH_Imm(False,31)));
3977 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3978 tLo, tLo, PPCRH_Imm(False,31)));
3979 addInstr(env, mk_iMOVds_RR(tHi, tLo));
3980 *rHi = tHi;
3981 *rLo = tLo;
3982 return;
3985 case Iop_Not64: {
3986 HReg xLo, xHi;
3987 HReg tmpLo = newVRegI(env);
3988 HReg tmpHi = newVRegI(env);
3989 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg, IEndianess);
3990 addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
3991 addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
3992 *rHi = tmpHi;
3993 *rLo = tmpLo;
3994 return;
3997 /* ReinterpF64asI64(e) */
3998 /* Given an IEEE754 double, produce an I64 with the same bit
3999 pattern. */
4000 case Iop_ReinterpF64asI64: {
4001 PPCAMode *am_addr0, *am_addr1;
4002 HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4003 HReg r_dstLo = newVRegI(env);
4004 HReg r_dstHi = newVRegI(env);
4006 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4007 am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4008 am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4010 // store as F64
4011 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4012 fr_src, am_addr0 ));
4014 // load hi,lo as Ity_I32's
4015 addInstr(env, PPCInstr_Load( 4, r_dstHi,
4016 am_addr0, False/*mode32*/ ));
4017 addInstr(env, PPCInstr_Load( 4, r_dstLo,
4018 am_addr1, False/*mode32*/ ));
4019 *rHi = r_dstHi;
4020 *rLo = r_dstLo;
4022 add_to_sp( env, 16 ); // Reset SP
4023 return;
4026 case Iop_ReinterpD64asI64: {
4027 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
4028 PPCAMode *am_addr0, *am_addr1;
4029 HReg r_dstLo = newVRegI(env);
4030 HReg r_dstHi = newVRegI(env);
4033 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4034 am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4035 am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4037 // store as D64
4038 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4039 fr_src, am_addr0 ));
4041 // load hi,lo as Ity_I32's
4042 addInstr(env, PPCInstr_Load( 4, r_dstHi,
4043 am_addr0, False/*mode32*/ ));
4044 addInstr(env, PPCInstr_Load( 4, r_dstLo,
4045 am_addr1, False/*mode32*/ ));
4046 *rHi = r_dstHi;
4047 *rLo = r_dstLo;
4049 add_to_sp( env, 16 ); // Reset SP
4051 return;
4054 case Iop_BCDtoDPB: {
4055 PPCCondCode cc;
4056 UInt argiregs;
4057 HReg argregs[2];
4058 Int argreg;
4059 HReg tLo = newVRegI(env);
4060 HReg tHi = newVRegI(env);
4061 HReg tmpHi;
4062 HReg tmpLo;
4063 Bool mode64 = env->mode64;
4065 argregs[0] = hregPPC_GPR3(mode64);
4066 argregs[1] = hregPPC_GPR4(mode64);
4068 argiregs = 0;
4069 argreg = 0;
4071 iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess );
4073 argiregs |= ( 1 << (argreg+3 ) );
4074 addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
4076 argiregs |= ( 1 << (argreg+3 ) );
4077 addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
4079 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4081 if (IEndianess == Iend_LE) {
4082 addInstr( env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
4083 argiregs,
4084 mk_RetLoc_simple(RLPri_2Int) ) );
4085 } else {
4086 Addr64 target;
4087 target = mode64 ? (Addr)h_calc_BCDtoDPB :
4088 toUInt( (Addr)h_calc_BCDtoDPB );
4089 addInstr( env, PPCInstr_Call( cc, target,
4090 argiregs,
4091 mk_RetLoc_simple(RLPri_2Int) ) );
4094 addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
4095 addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
4097 *rHi = tHi;
4098 *rLo = tLo;
4099 return;
4102 case Iop_DPBtoBCD: {
4103 PPCCondCode cc;
4104 UInt argiregs;
4105 HReg argregs[2];
4106 Int argreg;
4107 HReg tLo = newVRegI(env);
4108 HReg tHi = newVRegI(env);
4109 HReg tmpHi;
4110 HReg tmpLo;
4111 Bool mode64 = env->mode64;
4113 argregs[0] = hregPPC_GPR3(mode64);
4114 argregs[1] = hregPPC_GPR4(mode64);
4116 argiregs = 0;
4117 argreg = 0;
4119 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess);
4121 argiregs |= (1 << (argreg+3));
4122 addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
4124 argiregs |= (1 << (argreg+3));
4125 addInstr(env, mk_iMOVds_RR( argregs[argreg], tmpLo));
4127 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4129 if (IEndianess == Iend_LE) {
4130 addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
4131 argiregs,
4132 mk_RetLoc_simple(RLPri_2Int) ) );
4133 } else {
4134 Addr64 target;
4135 target = mode64 ? (Addr)h_calc_DPBtoBCD :
4136 toUInt( (Addr)h_calc_DPBtoBCD );
4137 addInstr(env, PPCInstr_Call( cc, target, argiregs,
4138 mk_RetLoc_simple(RLPri_2Int) ) );
4141 addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
4142 addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
4144 *rHi = tHi;
4145 *rLo = tLo;
4146 return;
4149 default:
4150 break;
4152 } /* if (e->tag == Iex_Unop) */
4154 vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
4155 ppIRExpr(e);
4156 vpanic("iselInt64Expr(ppc)");
4160 /*---------------------------------------------------------*/
4161 /*--- ISEL: Floating point expressions (32 bit) ---*/
4162 /*---------------------------------------------------------*/
4164 /* Nothing interesting here; really just wrappers for
4165 64-bit stuff. */
4167 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4169 HReg r = iselFltExpr_wrk( env, e, IEndianess );
4170 # if 0
4171 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4172 # endif
4173 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
4174 vassert(hregIsVirtual(r));
4175 return r;
4178 /* DO NOT CALL THIS DIRECTLY */
4179 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
4180 IREndness IEndianess )
4182 Bool mode64 = env->mode64;
4184 IRType ty = typeOfIRExpr(env->type_env,e);
4185 vassert(ty == Ity_F32);
4187 if (e->tag == Iex_RdTmp) {
4188 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4191 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4192 PPCAMode* am_addr;
4193 HReg r_dst = newVRegF(env);
4194 vassert(e->Iex.Load.ty == Ity_F32);
4195 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/,
4196 IEndianess);
4197 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4198 return r_dst;
4201 if (e->tag == Iex_Get) {
4202 HReg r_dst = newVRegF(env);
4203 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4204 GuestStatePtr(env->mode64) );
4205 addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
4206 return r_dst;
4209 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
4210 /* This is quite subtle. The only way to do the relevant
4211 truncation is to do a single-precision store and then a
4212 double precision load to get it back into a register. The
4213 problem is, if the data is then written to memory a second
4214 time, as in
4216 STbe(...) = TruncF64asF32(...)
4218 then will the second truncation further alter the value? The
4219 answer is no: flds (as generated here) followed by fsts
4220 (generated for the STbe) is the identity function on 32-bit
4221 floats, so we are safe.
4223 Another upshot of this is that if iselStmt can see the
4224 entirety of
4226 STbe(...) = TruncF64asF32(arg)
4228 then it can short circuit having to deal with TruncF64asF32
4229 individually; instead just compute arg into a 64-bit FP
4230 register and do 'fsts' (since that itself does the
4231 truncation).
4233 We generate pretty poor code here (should be ok both for
4234 32-bit and 64-bit mode); but it is expected that for the most
4235 part the latter optimisation will apply and hence this code
4236 will not often be used.
4238 HReg fsrc = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4239 HReg fdst = newVRegF(env);
4240 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4242 sub_from_sp( env, 16 );
4243 // store as F32, hence truncating
4244 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
4245 fsrc, zero_r1 ));
4246 // and reload. Good huh?! (sigh)
4247 addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
4248 fdst, zero_r1 ));
4249 add_to_sp( env, 16 );
4250 return fdst;
4253 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
4254 if (mode64) {
4255 HReg fdst = newVRegF(env);
4256 HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4257 HReg r1 = StackFramePtr(env->mode64);
4258 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4260 /* Set host rounding mode */
4261 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4263 sub_from_sp( env, 16 );
4265 addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4266 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4267 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4268 False, False,
4269 fdst, fdst));
4271 add_to_sp( env, 16 );
4273 ///* Restore default FPU rounding. */
4274 //set_FPU_rounding_default( env );
4275 return fdst;
4276 } else {
4277 /* 32-bit mode */
4278 HReg fdst = newVRegF(env);
4279 HReg isrcHi, isrcLo;
4280 HReg r1 = StackFramePtr(env->mode64);
4281 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4282 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4284 iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2, IEndianess);
4286 /* Set host rounding mode */
4287 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4289 sub_from_sp( env, 16 );
4291 addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4292 addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4293 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4294 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4295 False, False,
4296 fdst, fdst));
4298 add_to_sp( env, 16 );
4300 ///* Restore default FPU rounding. */
4301 //set_FPU_rounding_default( env );
4302 return fdst;
4307 vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
4308 ppIRExpr(e);
4309 vpanic("iselFltExpr_wrk(ppc)");
4313 /*---------------------------------------------------------*/
4314 /*--- ISEL: Floating point expressions (64 bit) ---*/
4315 /*---------------------------------------------------------*/
4317 /* Compute a 64-bit floating point value into a register, the identity
4318 of which is returned. As with iselIntExpr_R, the reg may be either
4319 real or virtual; in any case it must not be changed by subsequent
4320 code emitted by the caller. */
4322 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
4324 Type S (1 bit) E (11 bits) F (52 bits)
4325 ---- --------- ----------- -----------
4326 signalling NaN u 2047 (max) .0uuuuu---u
4327 (with at least
4328 one 1 bit)
4329 quiet NaN u 2047 (max) .1uuuuu---u
4331 negative infinity 1 2047 (max) .000000---0
4333 positive infinity 0 2047 (max) .000000---0
4335 negative zero 1 0 .000000---0
4337 positive zero 0 0 .000000---0
4340 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4342 HReg r = iselDblExpr_wrk( env, e, IEndianess );
4343 # if 0
4344 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4345 # endif
4346 vassert(hregClass(r) == HRcFlt64);
4347 vassert(hregIsVirtual(r));
4348 return r;
4351 /* DO NOT CALL THIS DIRECTLY */
4352 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
4353 IREndness IEndianess )
4355 Bool mode64 = env->mode64;
4356 IRType ty = typeOfIRExpr(env->type_env,e);
4357 vassert(e);
4358 vassert(ty == Ity_F64);
4360 if (e->tag == Iex_RdTmp) {
4361 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4364 /* --------- LITERAL --------- */
4365 if (e->tag == Iex_Const) {
4366 union { UInt u32x2[2]; ULong u64; Double f64; } u;
4367 vassert(sizeof(u) == 8);
4368 vassert(sizeof(u.u64) == 8);
4369 vassert(sizeof(u.f64) == 8);
4370 vassert(sizeof(u.u32x2) == 8);
4372 if (e->Iex.Const.con->tag == Ico_F64) {
4373 u.f64 = e->Iex.Const.con->Ico.F64;
4375 else if (e->Iex.Const.con->tag == Ico_F64i) {
4376 u.u64 = e->Iex.Const.con->Ico.F64i;
4378 else
4379 vpanic("iselDblExpr(ppc): const");
4381 if (!mode64) {
4382 HReg r_srcHi = newVRegI(env);
4383 HReg r_srcLo = newVRegI(env);
4384 addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
4385 addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
4386 return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4387 } else { // mode64
4388 HReg r_src = newVRegI(env);
4389 addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
4390 return mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
4394 /* --------- LOAD --------- */
4395 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4396 HReg r_dst = newVRegF(env);
4397 PPCAMode* am_addr;
4398 vassert(e->Iex.Load.ty == Ity_F64);
4399 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/,
4400 IEndianess);
4401 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4402 return r_dst;
4405 /* --------- GET --------- */
4406 if (e->tag == Iex_Get) {
4407 HReg r_dst = newVRegF(env);
4408 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4409 GuestStatePtr(mode64) );
4410 addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
4411 return r_dst;
4414 /* --------- OPS --------- */
4415 if (e->tag == Iex_Qop) {
4416 PPCFpOp fpop = Pfp_INVALID;
4417 switch (e->Iex.Qop.details->op) {
4418 case Iop_MAddF64: fpop = Pfp_MADDD; break;
4419 case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
4420 case Iop_MSubF64: fpop = Pfp_MSUBD; break;
4421 case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
4422 default: break;
4424 if (fpop != Pfp_INVALID) {
4425 HReg r_dst = newVRegF(env);
4426 HReg r_srcML = iselDblExpr(env, e->Iex.Qop.details->arg2,
4427 IEndianess);
4428 HReg r_srcMR = iselDblExpr(env, e->Iex.Qop.details->arg3,
4429 IEndianess);
4430 HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4,
4431 IEndianess);
4432 set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1, IEndianess );
4433 addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
4434 r_srcML, r_srcMR, r_srcAcc));
4435 return r_dst;
4439 if (e->tag == Iex_Triop) {
4440 IRTriop *triop = e->Iex.Triop.details;
4441 PPCFpOp fpop = Pfp_INVALID;
4442 switch (triop->op) {
4443 case Iop_AddF64: fpop = Pfp_ADDD; break;
4444 case Iop_SubF64: fpop = Pfp_SUBD; break;
4445 case Iop_MulF64: fpop = Pfp_MULD; break;
4446 case Iop_DivF64: fpop = Pfp_DIVD; break;
4447 case Iop_AddF64r32: fpop = Pfp_ADDS; break;
4448 case Iop_SubF64r32: fpop = Pfp_SUBS; break;
4449 case Iop_MulF64r32: fpop = Pfp_MULS; break;
4450 case Iop_DivF64r32: fpop = Pfp_DIVS; break;
4451 default: break;
4453 if (fpop != Pfp_INVALID) {
4454 HReg r_dst = newVRegF(env);
4455 HReg r_srcL = iselDblExpr(env, triop->arg2, IEndianess);
4456 HReg r_srcR = iselDblExpr(env, triop->arg3, IEndianess);
4457 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4458 addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
4459 return r_dst;
4463 if (e->tag == Iex_Binop) {
4464 PPCFpOp fpop = Pfp_INVALID;
4465 switch (e->Iex.Binop.op) {
4466 case Iop_SqrtF64: fpop = Pfp_SQRT; break;
4467 default: break;
4469 if (fpop == Pfp_SQRT) {
4470 HReg fr_dst = newVRegF(env);
4471 HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4472 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4473 addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4474 return fr_dst;
4478 if (e->tag == Iex_Binop) {
4480 if (e->Iex.Binop.op == Iop_F128toF64) {
4481 HReg fr_dst = newVRegF(env);
4482 HReg fr_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4483 HReg tmp = newVRegV(env);
4484 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4485 PPCAMode* eight_r1 = PPCAMode_IR( 8, StackFramePtr(env->mode64) );
4486 PPCFpOp fpop = Pfp_INVALID;
4488 if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4489 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4490 fpop = Pfp_FPQTODRNDODD;
4491 } else {
4492 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4493 fpop = Pfp_FPQTOD;
4496 addInstr(env, PPCInstr_Fp128Unary(fpop, tmp, fr_src));
4498 /* result is in a 128-bit vector register, move to 64-bit reg to
4499 * match the Iop specification. The result will get moved back
4500 * to a 128-bit register and stored once the value is returned.
4502 sub_from_sp( env, 16 );
4503 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, zero_r1));
4504 if (IEndianess == Iend_LE)
4505 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, eight_r1));
4506 else
4507 /* High 64-bits stored at lower address */
4508 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, zero_r1));
4510 add_to_sp( env, 16 );
4512 return fr_dst;
4515 if (e->Iex.Binop.op == Iop_RoundF64toF32) {
4516 HReg r_dst = newVRegF(env);
4517 HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4518 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4519 addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
4520 //set_FPU_rounding_default( env );
4521 return r_dst;
4524 if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
4525 if (mode64) {
4526 HReg fdst = newVRegF(env);
4527 HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4528 HReg r1 = StackFramePtr(env->mode64);
4529 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4531 /* Set host rounding mode */
4532 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4534 sub_from_sp( env, 16 );
4536 addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4537 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4538 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4539 e->Iex.Binop.op == Iop_I64StoF64,
4540 True/*fdst is 64 bit*/,
4541 fdst, fdst));
4543 add_to_sp( env, 16 );
4545 ///* Restore default FPU rounding. */
4546 //set_FPU_rounding_default( env );
4547 return fdst;
4548 } else {
4549 /* 32-bit mode */
4550 HReg fdst = newVRegF(env);
4551 HReg isrcHi, isrcLo;
4552 HReg r1 = StackFramePtr(env->mode64);
4553 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4554 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4556 iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2,
4557 IEndianess);
4559 /* Set host rounding mode */
4560 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4562 sub_from_sp( env, 16 );
4564 addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4565 addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4566 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4567 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4568 e->Iex.Binop.op == Iop_I64StoF64,
4569 True/*fdst is 64 bit*/,
4570 fdst, fdst));
4572 add_to_sp( env, 16 );
4574 ///* Restore default FPU rounding. */
4575 //set_FPU_rounding_default( env );
4576 return fdst;
4582 if (e->tag == Iex_Unop) {
4583 PPCFpOp fpop = Pfp_INVALID;
4584 switch (e->Iex.Unop.op) {
4585 case Iop_NegF64: fpop = Pfp_NEG; break;
4586 case Iop_AbsF64: fpop = Pfp_ABS; break;
4587 case Iop_RSqrtEst5GoodF64: fpop = Pfp_RSQRTE; break;
4588 case Iop_RoundF64toF64_NegINF: fpop = Pfp_FRIM; break;
4589 case Iop_RoundF64toF64_PosINF: fpop = Pfp_FRIP; break;
4590 case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
4591 case Iop_RoundF64toF64_ZERO: fpop = Pfp_FRIZ; break;
4592 default: break;
4594 if (fpop != Pfp_INVALID) {
4595 HReg fr_dst = newVRegF(env);
4596 HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4597 addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4598 return fr_dst;
4602 if (e->tag == Iex_Unop) {
4603 switch (e->Iex.Unop.op) {
4604 case Iop_F128HItoF64:
4605 case Iop_F128LOtoF64:
4607 /* put upper/lower 64-bits of F128 into an F64. */
4608 HReg r_aligned16;
4609 HReg fdst = newVRegF(env);
4610 HReg fsrc = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4611 PPCAMode *am_off0, *am_off8, *am_off_arg;
4612 sub_from_sp( env, 32 ); // Move SP down 32 bytes
4614 // get a quadword aligned address within our stack space
4615 r_aligned16 = get_sp_aligned16( env );
4616 am_off0 = PPCAMode_IR( 0, r_aligned16 );
4617 am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
4619 /* store 128-bit floating point value to memory, load low word
4620 * or high to 64-bit destination floating point register
4622 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, fsrc, am_off0));
4623 if (IEndianess == Iend_LE) {
4624 if (e->Iex.Binop.op == Iop_F128HItoF64)
4625 am_off_arg = am_off8;
4626 else
4627 am_off_arg = am_off0;
4628 } else {
4629 if (e->Iex.Binop.op == Iop_F128HItoF64)
4630 am_off_arg = am_off0;
4631 else
4632 am_off_arg = am_off8;
4634 addInstr(env,
4635 PPCInstr_FpLdSt( True /*load*/,
4636 8, fdst,
4637 am_off_arg ));
4638 add_to_sp( env, 32 ); // Reset SP
4639 return fdst;
4641 case Iop_ReinterpI64asF64: {
4642 /* Given an I64, produce an IEEE754 double with the same
4643 bit pattern. */
4644 if (!mode64) {
4645 HReg r_srcHi, r_srcLo;
4646 iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
4647 IEndianess);
4648 return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4649 } else {
4650 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4651 return mk_LoadR64toFPR( env, r_src );
4655 case Iop_F32toF64: {
4656 if (e->Iex.Unop.arg->tag == Iex_Unop &&
4657 e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
4658 e = e->Iex.Unop.arg;
4660 HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4661 HReg fr_dst = newVRegF(env);
4662 PPCAMode *am_addr;
4664 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4665 am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4667 // store src as Ity_I32's
4668 addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
4670 // load single precision float, but the end results loads into a
4671 // 64-bit FP register -- i.e., F64.
4672 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
4674 add_to_sp( env, 16 ); // Reset SP
4675 return fr_dst;
4679 /* this is a no-op */
4680 HReg res = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
4681 return res;
4683 default:
4684 break;
4688 /* --------- MULTIPLEX --------- */
4689 if (e->tag == Iex_ITE) { // VFD
4690 if (ty == Ity_F64
4691 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
4692 HReg fr1 = iselDblExpr(env, e->Iex.ITE.iftrue, IEndianess);
4693 HReg fr0 = iselDblExpr(env, e->Iex.ITE.iffalse, IEndianess);
4694 HReg fr_dst = newVRegF(env);
4695 addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
4696 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
4697 addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
4698 return fr_dst;
4702 vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
4703 ppIRExpr(e);
4704 vpanic("iselDblExpr_wrk(ppc)");
4707 static HReg iselDfp32Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
4709 HReg r = iselDfp32Expr_wrk( env, e, IEndianess );
4710 vassert(hregClass(r) == HRcFlt64);
4711 vassert( hregIsVirtual(r) );
4712 return r;
4715 /* DO NOT CALL THIS DIRECTLY */
4716 static HReg iselDfp32Expr_wrk(ISelEnv* env, const IRExpr* e,
4717 IREndness IEndianess)
4719 Bool mode64 = env->mode64;
4720 IRType ty = typeOfIRExpr( env->type_env, e );
4722 vassert( e );
4723 vassert( ty == Ity_D32 );
4725 /* --------- GET --------- */
4726 if (e->tag == Iex_Get) {
4727 HReg r_dst = newVRegF( env );
4728 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4729 GuestStatePtr(mode64) );
4730 addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4731 return r_dst;
4734 /* --------- LOAD --------- */
4735 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4736 PPCAMode* am_addr;
4737 HReg r_dst = newVRegF(env);
4738 vassert(e->Iex.Load.ty == Ity_D32);
4739 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/,
4740 IEndianess);
4741 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4742 return r_dst;
4745 /* --------- OPS --------- */
4746 if (e->tag == Iex_Binop) {
4747 if (e->Iex.Binop.op == Iop_D64toD32) {
4748 HReg fr_dst = newVRegF(env);
4749 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4750 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4751 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
4752 return fr_dst;
4756 ppIRExpr( e );
4757 vpanic( "iselDfp32Expr_wrk(ppc)" );
4760 static HReg iselFp128Expr( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4762 HReg r = iselFp128Expr_wrk( env, e, IEndianess );
4763 vassert(hregClass(r) == HRcVec128);
4764 vassert(hregIsVirtual(r));
4765 return r;
4768 /* DO NOT CALL THIS DIRECTLY */
4769 static HReg iselFp128Expr_wrk( ISelEnv* env, const IRExpr* e,
4770 IREndness IEndianess)
4772 Bool mode64 = env->mode64;
4773 PPCFpOp fpop = Pfp_INVALID;
4774 IRType ty = typeOfIRExpr(env->type_env,e);
4776 vassert(e);
4777 vassert( ty == Ity_F128 );
4779 /* read 128-bit IRTemp */
4780 if (e->tag == Iex_RdTmp) {
4781 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4784 if (e->tag == Iex_Get) {
4785 /* Guest state vectors are 16byte aligned,
4786 so don't need to worry here */
4787 HReg dst = newVRegV(env);
4789 addInstr(env,
4790 PPCInstr_AvLdSt( True/*load*/, 16, dst,
4791 PPCAMode_IR( e->Iex.Get.offset,
4792 GuestStatePtr(mode64) )));
4793 return dst;
4796 if (e->tag == Iex_Unop) {
4797 switch (e->Iex.Unop.op) {
4798 case Iop_TruncF128toI64S:
4799 fpop = Pfp_TRUNCFPQTOISD; goto do_Un_F128;
4800 case Iop_TruncF128toI32S:
4801 fpop = Pfp_TRUNCFPQTOISW; goto do_Un_F128;
4802 case Iop_TruncF128toI64U:
4803 fpop = Pfp_TRUNCFPQTOIUD; goto do_Un_F128;
4804 case Iop_TruncF128toI32U:
4805 fpop = Pfp_TRUNCFPQTOIUW; goto do_Un_F128;
4806 case Iop_TruncF128toI128U:
4807 fpop = Pfp_TRUNCFPQTOIUQ; goto do_Un_F128;
4808 case Iop_TruncF128toI128S:
4809 fpop = Pfp_TRUNCFPQTOISQ; goto do_Un_F128;
4811 do_Un_F128: {
4812 HReg r_dst = newVRegV(env);
4813 HReg r_src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4814 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4815 return r_dst;
4818 case Iop_F64toF128: {
4819 fpop = Pfp_FPDTOQ;
4820 HReg r_dst = newVRegV(env);
4821 HReg r_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4822 HReg v128tmp = newVRegV(env);
4823 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4825 /* value is in 64-bit float reg, need to move to 128-bit vector reg */
4826 sub_from_sp( env, 16 );
4827 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, r_src, zero_r1));
4828 addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, v128tmp, zero_r1));
4829 add_to_sp( env, 16 );
4831 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, v128tmp));
4832 return r_dst;
4835 case Iop_I64StoF128:
4836 fpop = Pfp_IDSTOQ; goto do_Un_int_F128;
4837 case Iop_I64UtoF128:
4838 fpop = Pfp_IDUTOQ; goto do_Un_int_F128;
4840 do_Un_int_F128: {
4841 HReg r_dst = newVRegV(env);
4842 HReg tmp = newVRegV(env);
4843 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4844 PPCAMode *am_offhi, *am_offlo;
4845 HReg r_aligned16;
4847 /* source is in a 64-bit integer reg, move to 128-bit float reg
4848 * do this via the stack (easy, convenient, etc).
4850 sub_from_sp( env, 32 ); // Move SP down
4852 /* Get a quadword aligned address within our stack space */
4853 r_aligned16 = get_sp_aligned16( env );
4855 am_offlo = PPCAMode_IR( 0, r_aligned16 );
4856 am_offhi = PPCAMode_IR( 8, r_aligned16 );
4858 /* Inst only uses the upper 64-bit of the source */
4859 addInstr(env, PPCInstr_Load(8, r_src, am_offhi, mode64));
4861 /* Fetch result back from stack. */
4862 addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, tmp, am_offlo));
4864 add_to_sp( env, 32 ); // Reset SP
4866 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, tmp));
4867 return r_dst;
4870 case Iop_ReinterpI128asF128:
4872 PPCAMode* am_addr;
4873 PPCAMode* am_addr4;
4874 HReg rHi = INVALID_HREG;
4875 HReg rLo = INVALID_HREG;
4876 HReg dst = newVRegV(env);
4878 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
4880 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4881 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
4882 am_addr4 = advance4(env, am_addr);
4884 // store the two 64-bit pars
4885 addInstr(env, PPCInstr_Store( 8, am_addr, rHi, mode64 ));
4886 addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
4888 // load as Ity_F128
4889 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
4891 add_to_sp( env, 16 ); // Reset SP
4892 return dst;
4895 default:
4896 break;
4897 } /* switch (e->Iex.Unop.op) */
4898 } /* if (e->tag == Iex_Unop) */
4900 if (e->tag == Iex_Binop) {
4901 switch (e->Iex.Binop.op) {
4903 case Iop_F64HLtoF128:
4905 HReg dst = newVRegV(env);
4906 HReg r_src_hi = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
4907 HReg r_src_lo = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4908 PPCAMode *am_offhi, *am_offlo;
4909 HReg r_aligned16;
4911 /* do this via the stack (easy, convenient, etc) */
4912 sub_from_sp( env, 16 ); // Move SP down
4914 /* Get a quadword aligned address within our stack space */
4915 r_aligned16 = get_sp_aligned16( env );
4917 am_offlo = PPCAMode_IR( 0, r_aligned16 );
4918 am_offhi = PPCAMode_IR( 8, r_aligned16 );
4920 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4921 r_src_lo, am_offlo));
4922 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4923 r_src_hi, am_offhi));
4925 /* Fetch result back from stack. */
4926 addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16,
4927 dst, am_offlo));
4929 add_to_sp( env, 16 ); // Reset SP
4930 return dst;
4932 case Iop_F128toI128S:
4934 HReg dst = newVRegV(env);
4935 HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4936 PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4937 /* Note: rm is a set of three bit fields that specify the
4938 * rounding mode and which of the two instructions to issue.
4940 addInstr(env, PPCInstr_AvBinaryInt(Pav_F128toI128S, dst,
4941 r_src, rm));
4942 return dst;
4944 case Iop_RndF128:
4946 HReg dst = newVRegV(env);
4947 HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4948 PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4949 /* Note: rm is a set of three bit fields that specify the
4950 * rounding mode and which of the two instructions to issue.
4952 addInstr(env, PPCInstr_AvBinaryInt(Pav_ROUNDFPQ, dst,
4953 r_src, rm));
4954 return dst;
4956 case Iop_SqrtF128:
4957 if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4958 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4959 fpop = Pfp_FPSQRTQRNDODD;
4960 goto do_Bin_F128;
4961 } else {
4962 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4963 fpop = Pfp_FPSQRTQ;
4964 goto do_Bin_F128;
4966 case Iop_F128toF32:
4967 if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4968 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4969 fpop = Pfp_FPQTOWRNDODD;
4970 goto do_Bin_F128;
4971 } else {
4972 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4973 fpop = Pfp_FPQTOW;
4974 goto do_Bin_F128;
4976 do_Bin_F128: {
4977 HReg r_dst = newVRegV(env);
4978 HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4979 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4980 return r_dst;
4983 case Iop_I128StoF128:
4984 fpop = Pfp_IQSTOQ; goto do_Un_I128_F128_DFP_conversions;
4985 case Iop_I128UtoF128:
4986 fpop = Pfp_IQUTOQ; goto do_Un_I128_F128_DFP_conversions;
4987 do_Un_I128_F128_DFP_conversions: {
4988 PPCAMode* am_addr;
4989 PPCAMode* am_addr4;
4990 HReg rHi, rLo;
4991 HReg r_tmp = newVRegV(env);
4992 HReg r_dst = newVRegV(env);
4994 iselInt128Expr(&rHi,&rLo, env, e->Iex.Binop.arg2, IEndianess);
4996 /* Set host rounding mode for the conversion instruction */
4997 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4999 sub_from_sp( env, 16 );
5001 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
5002 am_addr4 = advance4(env, am_addr);
5004 // store the two 64-bit halfs of the I128
5005 addInstr(env, PPCInstr_Store( 8, am_addr, rHi, mode64 ));
5006 addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
5008 /* Fetch the I128 into an V128 register */
5009 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, r_tmp, am_addr ));
5010 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_tmp));
5012 add_to_sp( env, 16 ); // Reset SP
5014 return r_dst;
5017 default:
5018 break;
5019 } /* switch (e->Iex.Binop.op) */
5020 } /* if (e->tag == Iex_Binop) */
5022 if (e->tag == Iex_Triop) {
5023 IRTriop *triop = e->Iex.Triop.details;
5025 switch (triop->op) {
5026 case Iop_AddF128:
5027 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5028 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5029 fpop = Pfp_FPADDQRNDODD; goto do_Tri_F128;
5030 } else {
5031 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5032 fpop = Pfp_FPADDQ; goto do_Tri_F128;
5034 case Iop_SubF128:
5035 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5036 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5037 fpop = Pfp_FPSUBQRNDODD; goto do_Tri_F128;
5038 } else {
5039 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5040 fpop = Pfp_FPSUBQ; goto do_Tri_F128;
5042 case Iop_MulF128:
5043 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5044 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5045 fpop = Pfp_FPMULQRNDODD; goto do_Tri_F128;
5046 } else {
5047 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5048 fpop = Pfp_FPMULQ; goto do_Tri_F128;
5050 case Iop_DivF128:
5051 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5052 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5053 fpop = Pfp_FPDIVQRNDODD; goto do_Tri_F128;
5054 } else {
5055 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5056 fpop = Pfp_FPDIVQ; goto do_Tri_F128;
5058 case Iop_MAddF128:
5059 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5060 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5061 fpop = Pfp_FPMULADDQRNDODD; goto do_Tri_F128;
5062 } else {
5063 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5064 fpop = Pfp_FPMULADDQ; goto do_Tri_F128;
5067 do_Tri_F128: {
5068 HReg r_dst = newVRegV(env);
5069 HReg r_srcL = iselFp128Expr(env, triop->arg2, IEndianess);
5070 HReg r_srcR = iselFp128Expr(env, triop->arg3, IEndianess);
5072 addInstr(env, PPCInstr_Fp128Binary(fpop, r_dst, r_srcL, r_srcR));
5073 return r_dst;
5076 default:
5077 break;
5078 } /* switch (e->Iex.Triop.op) */
5080 } /* if (e->tag == Iex_Trinop) */
5082 if (e->tag == Iex_Qop) {
5083 IRQop *qop = e->Iex.Qop.details;
5085 switch (qop->op) {
5086 case Iop_MAddF128:
5087 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5088 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5089 fpop = Pfp_FPMULADDQRNDODD; goto do_Quad_F128;
5090 } else {
5091 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5092 fpop = Pfp_FPMULADDQ; goto do_Quad_F128;
5094 case Iop_MSubF128:
5095 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5096 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5097 fpop = Pfp_FPMULSUBQRNDODD; goto do_Quad_F128;
5098 } else {
5099 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5100 fpop = Pfp_FPMULSUBQ; goto do_Quad_F128;
5102 case Iop_NegMAddF128:
5103 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5104 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5105 fpop = Pfp_FPNEGMULADDQRNDODD; goto do_Quad_F128;
5106 } else {
5107 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5108 fpop = Pfp_FPNEGMULADDQ; goto do_Quad_F128;
5110 case Iop_NegMSubF128:
5111 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5112 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5113 fpop = Pfp_FPNEGMULSUBQRNDODD; goto do_Quad_F128;
5114 } else {
5115 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5116 fpop = Pfp_FPNEGMULSUBQ; goto do_Quad_F128;
5119 do_Quad_F128: {
5120 HReg r_dst = iselFp128Expr(env, qop->arg3,
5121 IEndianess);
5122 HReg r_srcL = iselFp128Expr(env, qop->arg2,
5123 IEndianess);
5124 HReg r_srcR = iselFp128Expr(env, qop->arg4,
5125 IEndianess);
5127 addInstr(env, PPCInstr_Fp128Ternary(fpop, r_dst, r_srcL, r_srcR));
5128 return r_dst;
5131 default:
5132 break;
5134 } /* if (e->tag == Iex_Qop) */
5136 ppIRExpr( e );
5137 vpanic( "iselFp128Expr(ppc64)" );
5140 static HReg iselDfp64Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
5142 HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
5143 vassert(hregClass(r) == HRcFlt64);
5144 vassert( hregIsVirtual(r) );
5145 return r;
5148 /* DO NOT CALL THIS DIRECTLY */
5149 static HReg iselDfp64Expr_wrk(ISelEnv* env, const IRExpr* e,
5150 IREndness IEndianess)
5152 Bool mode64 = env->mode64;
5153 IRType ty = typeOfIRExpr( env->type_env, e );
5154 HReg r_dstHi, r_dstLo;
5156 vassert( e );
5157 vassert( ty == Ity_D64 );
5159 if (e->tag == Iex_RdTmp) {
5160 return lookupIRTemp( env, e->Iex.RdTmp.tmp );
5163 /* --------- GET --------- */
5164 if (e->tag == Iex_Get) {
5165 HReg r_dst = newVRegF( env );
5166 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
5167 GuestStatePtr(mode64) );
5168 addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
5169 return r_dst;
5172 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5173 PPCAMode* am_addr;
5174 HReg r_dst = newVRegF(env);
5175 vassert(e->Iex.Load.ty == Ity_D64);
5176 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/,
5177 IEndianess);
5178 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
5179 return r_dst;
5182 /* --------- OPS --------- */
5183 if (e->tag == Iex_Qop) {
5184 HReg r_dst = newVRegF( env );
5185 return r_dst;
5188 if (e->tag == Iex_Unop) {
5189 HReg fr_dst = newVRegF(env);
5190 switch (e->Iex.Unop.op) {
5191 case Iop_ReinterpI64asD64: {
5192 /* Given an I64, produce an IEEE754 DFP with the same
5193 bit pattern. */
5194 if (!mode64) {
5195 HReg r_srcHi, r_srcLo;
5196 iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
5197 IEndianess);
5198 return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
5199 } else {
5200 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5201 return mk_LoadR64toFPR( env, r_src );
5204 case Iop_D32toD64: {
5205 HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg, IEndianess);
5206 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
5207 return fr_dst;
5209 case Iop_D128HItoD64:
5210 iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5211 IEndianess );
5212 return r_dstHi;
5213 case Iop_D128LOtoD64:
5214 iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5215 IEndianess );
5216 return r_dstLo;
5217 case Iop_InsertExpD64: {
5218 HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
5219 HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
5221 addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
5222 fr_srcR));
5223 return fr_dst;
5225 default:
5226 vex_printf( "ERROR: iselDfp64Expr_wrk, UNKNOWN unop case %d\n",
5227 (Int)e->Iex.Unop.op );
5231 if (e->tag == Iex_Binop) {
5232 PPCFpOp fpop = Pfp_INVALID;
5233 HReg fr_dst = newVRegF(env);
5235 switch (e->Iex.Binop.op) {
5236 case Iop_D128toD64: fpop = Pfp_DRDPQ; break;
5237 case Iop_D64toD32: fpop = Pfp_DRSP; break;
5238 case Iop_I64StoD64: fpop = Pfp_DCFFIX; break;
5239 case Iop_RoundD64toInt: fpop = Pfp_DRINTN; break;
5240 default: break;
5242 if (fpop == Pfp_DRDPQ) {
5243 HReg r_srcHi = newVRegF(env);
5244 HReg r_srcLo = newVRegF(env);
5246 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5247 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5248 IEndianess);
5249 addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5250 return fr_dst;
5252 } else if (fpop == Pfp_DRINTN) {
5253 HReg fr_src = newVRegF(env);
5254 PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5256 /* NOTE, this IOP takes a DFP value and rounds to the
5257 * neares floating point integer value, i.e. fractional part
5258 * is zero. The result is a decimal floating point number.
5259 * the INT in the name is a bit misleading.
5261 fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5262 addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
5263 return fr_dst;
5265 } else if (fpop == Pfp_DRSP) {
5266 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5267 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5268 addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5269 return fr_dst;
5271 } else if (fpop == Pfp_DCFFIX) {
5272 HReg fr_src = newVRegF(env);
5273 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5275 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5276 sub_from_sp( env, 16 );
5278 // put the I64 value into a floating point register
5279 if (mode64) {
5280 HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
5282 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5283 } else {
5284 HReg tmpHi, tmpLo;
5285 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5287 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2,
5288 IEndianess);
5289 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5290 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5293 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5294 addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5295 add_to_sp( env, 16 );
5296 return fr_dst;
5299 switch (e->Iex.Binop.op) {
5300 /* shift instructions D64, I32 -> D64 */
5301 case Iop_ShlD64: fpop = Pfp_DSCLI; break;
5302 case Iop_ShrD64: fpop = Pfp_DSCRI; break;
5303 default: break;
5305 if (fpop != Pfp_INVALID) {
5306 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
5307 PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5309 /* shift value must be an immediate value */
5310 vassert(shift->tag == Pri_Imm);
5312 addInstr(env, PPCInstr_DfpShift(fpop, fr_dst, fr_src, shift));
5313 return fr_dst;
5316 switch (e->Iex.Binop.op) {
5317 case Iop_InsertExpD64:
5318 fpop = Pfp_DIEX;
5319 break;
5320 default: break;
5322 if (fpop != Pfp_INVALID) {
5323 HReg fr_srcL = newVRegF(env);
5324 HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5325 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5326 sub_from_sp( env, 16 );
5328 if (env->mode64) {
5329 // put the I64 value into a floating point reg
5330 HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5332 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5333 } else {
5334 // put the I64 register pair into a floating point reg
5335 HReg tmpHi;
5336 HReg tmpLo;
5337 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5339 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1,
5340 IEndianess);
5341 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
5342 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
5344 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_srcL, zero_r1));
5345 addInstr(env, PPCInstr_Dfp64Binary(fpop, fr_dst, fr_srcL,
5346 fr_srcR));
5347 add_to_sp( env, 16 );
5348 return fr_dst;
5352 if (e->tag == Iex_Triop) {
5353 IRTriop *triop = e->Iex.Triop.details;
5354 PPCFpOp fpop = Pfp_INVALID;
5356 switch (triop->op) {
5357 case Iop_AddD64:
5358 fpop = Pfp_DFPADD;
5359 break;
5360 case Iop_SubD64:
5361 fpop = Pfp_DFPSUB;
5362 break;
5363 case Iop_MulD64:
5364 fpop = Pfp_DFPMUL;
5365 break;
5366 case Iop_DivD64:
5367 fpop = Pfp_DFPDIV;
5368 break;
5369 default:
5370 break;
5372 if (fpop != Pfp_INVALID) {
5373 HReg r_dst = newVRegF( env );
5374 HReg r_srcL = iselDfp64Expr( env, triop->arg2, IEndianess );
5375 HReg r_srcR = iselDfp64Expr( env, triop->arg3, IEndianess );
5377 set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5378 addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
5379 return r_dst;
5382 switch (triop->op) {
5383 case Iop_QuantizeD64: fpop = Pfp_DQUA; break;
5384 case Iop_SignificanceRoundD64: fpop = Pfp_RRDTR; break;
5385 default: break;
5387 if (fpop == Pfp_DQUA) {
5388 HReg r_dst = newVRegF(env);
5389 HReg r_srcL = iselDfp64Expr(env, triop->arg2, IEndianess);
5390 HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5391 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5392 addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
5393 rmc));
5394 return r_dst;
5396 } else if (fpop == Pfp_RRDTR) {
5397 HReg r_dst = newVRegF(env);
5398 HReg r_srcL = newVRegF(env);
5399 HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5400 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5401 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5402 HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5404 /* Move I8 to float register to issue instruction */
5405 sub_from_sp( env, 16 );
5406 if (mode64)
5407 addInstr(env, PPCInstr_Store(8, zero_r1, i8_val, True/*mode64*/));
5408 else
5409 addInstr(env, PPCInstr_Store(4, zero_r1, i8_val, False/*mode32*/));
5411 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5412 add_to_sp( env, 16 );
5414 // will set TE and RMC when issuing instruction
5415 addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR, rmc));
5416 return r_dst;
5420 ppIRExpr( e );
5421 vpanic( "iselDfp64Expr_wrk(ppc)" );
5424 static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, const IRExpr* e,
5425 IREndness IEndianess)
5427 iselDfp128Expr_wrk( rHi, rLo, env, e, IEndianess );
5428 vassert( hregIsVirtual(*rHi) );
5429 vassert( hregIsVirtual(*rLo) );
5432 /* DO NOT CALL THIS DIRECTLY */
5433 static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env,
5434 const IRExpr* e, IREndness IEndianess)
5436 vassert( e );
5437 vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
5439 /* read 128-bit IRTemp */
5440 if (e->tag == Iex_RdTmp) {
5441 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp );
5442 return;
5445 if (e->tag == Iex_Unop) {
5446 HReg r_dstHi = newVRegF(env);
5447 HReg r_dstLo = newVRegF(env);
5449 if (e->Iex.Unop.op == Iop_I64StoD128) {
5450 HReg fr_src = newVRegF(env);
5451 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5453 // put the I64 value into a floating point reg
5454 if (env->mode64) {
5455 HReg tmp = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5456 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5457 } else {
5458 HReg tmpHi, tmpLo;
5459 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5461 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5462 IEndianess);
5463 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5464 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5467 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5468 addInstr(env, PPCInstr_DfpI64StoD128(Pfp_DCFFIXQ, r_dstHi, r_dstLo,
5469 fr_src));
5472 if (e->Iex.Unop.op == Iop_D64toD128) {
5473 HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
5475 /* Source is 64bit, result is 128 bit. High 64bit source arg,
5476 * is ignored by the instruction. Set high arg to r_src just
5477 * to meet the vassert tests.
5479 addInstr(env, PPCInstr_Dfp128Unary(Pfp_DCTQPQ, r_dstHi, r_dstLo,
5480 r_src, r_src));
5482 *rHi = r_dstHi;
5483 *rLo = r_dstLo;
5484 return;
5487 /* --------- OPS --------- */
5488 if (e->tag == Iex_Binop) {
5489 HReg r_srcHi;
5490 HReg r_srcLo;
5492 switch (e->Iex.Binop.op) {
5493 case Iop_D64HLtoD128:
5494 r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1, IEndianess );
5495 r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2, IEndianess );
5496 *rHi = r_srcHi;
5497 *rLo = r_srcLo;
5498 return;
5499 break;
5500 case Iop_D128toD64: {
5501 PPCFpOp fpop = Pfp_DRDPQ;
5502 HReg fr_dst = newVRegF(env);
5504 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5505 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5506 IEndianess);
5507 addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5509 /* Need to meet the interface spec but the result is
5510 * just 64-bits so send the result back in both halfs.
5512 *rHi = fr_dst;
5513 *rLo = fr_dst;
5514 return;
5516 case Iop_ShlD128:
5517 case Iop_ShrD128: {
5518 HReg fr_dst_hi = newVRegF(env);
5519 HReg fr_dst_lo = newVRegF(env);
5520 PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5521 PPCFpOp fpop = Pfp_DSCLIQ; /* fix later if necessary */
5523 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1,
5524 IEndianess);
5526 if (e->Iex.Binop.op == Iop_ShrD128)
5527 fpop = Pfp_DSCRIQ;
5529 addInstr(env, PPCInstr_DfpShift128(fpop, fr_dst_hi, fr_dst_lo,
5530 r_srcHi, r_srcLo, shift));
5532 *rHi = fr_dst_hi;
5533 *rLo = fr_dst_lo;
5534 return;
5536 case Iop_RoundD128toInt: {
5537 HReg r_dstHi = newVRegF(env);
5538 HReg r_dstLo = newVRegF(env);
5539 PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5541 // will set R and RMC when issuing instruction
5542 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5543 IEndianess);
5545 addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
5546 r_srcHi, r_srcLo, r_rmc));
5547 *rHi = r_dstHi;
5548 *rLo = r_dstLo;
5549 return;
5551 case Iop_InsertExpD128: {
5552 HReg r_dstHi = newVRegF(env);
5553 HReg r_dstLo = newVRegF(env);
5554 HReg r_srcL = newVRegF(env);
5555 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5556 r_srcHi = newVRegF(env);
5557 r_srcLo = newVRegF(env);
5559 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5560 IEndianess);
5562 /* Move I64 to float register to issue instruction */
5563 if (env->mode64) {
5564 HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5565 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5566 } else {
5567 HReg tmpHi, tmpLo;
5568 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5570 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5571 IEndianess);
5572 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5573 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5576 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5577 addInstr(env, PPCInstr_InsertExpD128(Pfp_DIEXQ,
5578 r_dstHi, r_dstLo,
5579 r_srcL, r_srcHi, r_srcLo));
5580 *rHi = r_dstHi;
5581 *rLo = r_dstLo;
5582 return;
5585 case Iop_I128StoD128: {
5586 HReg tmpF128 = newVRegV(env);
5587 HReg FdstHi = newVRegF(env);
5588 HReg FdstLo = newVRegF(env);
5589 HReg srcLo = newVRegI(env);
5590 HReg srcHi = newVRegI(env);
5591 PPCAMode* am_addr;
5592 PPCAMode* am_addr4;
5594 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5596 // Get the I128 value, store into a VSR register
5597 iselInt128Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess);
5599 sub_from_sp( env, 16 ); // Move SP down 16 bytes
5600 am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5601 am_addr4 = advance4(env, am_addr);
5603 addInstr(env, PPCInstr_Store( 8, am_addr, srcHi, env->mode64 ));
5604 addInstr(env, PPCInstr_Store( 8, am_addr4, srcLo, env->mode64 ));
5606 // load as Ity_F128
5607 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, tmpF128, am_addr ));
5609 // do conversion
5610 addInstr( env, PPCInstr_XFormUnary994( Px_IQSTODFP, FdstHi, FdstLo,
5611 tmpF128 ) );
5613 *rHi = FdstHi;
5614 *rLo = FdstLo;
5615 add_to_sp( env, 16 ); // Reset SP
5616 return;
5619 default:
5620 vex_printf( "ERROR: iselDfp128Expr_wrk, UNKNOWN binop case %d\n",
5621 (Int)e->Iex.Binop.op );
5622 break;
5626 if (e->tag == Iex_Triop) {
5627 IRTriop *triop = e->Iex.Triop.details;
5628 PPCFpOp fpop = Pfp_INVALID;
5629 HReg r_dstHi = newVRegF(env);
5630 HReg r_dstLo = newVRegF(env);
5632 switch (triop->op) {
5633 case Iop_AddD128:
5634 fpop = Pfp_DFPADDQ;
5635 break;
5636 case Iop_SubD128:
5637 fpop = Pfp_DFPSUBQ;
5638 break;
5639 case Iop_MulD128:
5640 fpop = Pfp_DFPMULQ;
5641 break;
5642 case Iop_DivD128:
5643 fpop = Pfp_DFPDIVQ;
5644 break;
5645 default:
5646 break;
5649 if (fpop != Pfp_INVALID) {
5650 HReg r_srcRHi = newVRegV( env );
5651 HReg r_srcRLo = newVRegV( env );
5653 /* dst will be used to pass in the left operand and get the result. */
5654 iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2, IEndianess );
5655 iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3, IEndianess );
5656 set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5657 addInstr( env,
5658 PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
5659 r_srcRHi, r_srcRLo ) );
5660 *rHi = r_dstHi;
5661 *rLo = r_dstLo;
5662 return;
5664 switch (triop->op) {
5665 case Iop_QuantizeD128: fpop = Pfp_DQUAQ; break;
5666 case Iop_SignificanceRoundD128: fpop = Pfp_DRRNDQ; break;
5667 default: break;
5669 if (fpop == Pfp_DQUAQ) {
5670 HReg r_srcHi = newVRegF(env);
5671 HReg r_srcLo = newVRegF(env);
5672 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5674 /* dst will be used to pass in the left operand and get the result */
5675 iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2, IEndianess);
5676 iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5678 // will set RMC when issuing instruction
5679 addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5680 r_srcHi, r_srcLo, rmc));
5681 *rHi = r_dstHi;
5682 *rLo = r_dstLo;
5683 return;
5685 } else if (fpop == Pfp_DRRNDQ) {
5686 HReg r_srcHi = newVRegF(env);
5687 HReg r_srcLo = newVRegF(env);
5688 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5689 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5690 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5691 HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5692 HReg r_zero = newVRegI( env );
5694 iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5696 /* dst will be used to pass in the left operand and get the result */
5697 /* Move I8 to float register to issue instruction. Note, the
5698 * instruction only looks at the bottom 6 bits so we really don't
5699 * have to clear the upper bits since the iselWordExpr_R sets the
5700 * bottom 8-bits.
5702 sub_from_sp( env, 16 );
5704 if (env->mode64)
5705 addInstr(env, PPCInstr_Store(4, four_r1, i8_val, True/*mode64*/));
5706 else
5707 addInstr(env, PPCInstr_Store(4, four_r1, i8_val, False/*mode32*/));
5709 /* Have to write to the upper bits to ensure they have been
5710 * initialized. The instruction ignores all but the lower 6-bits.
5712 addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
5713 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstHi, zero_r1));
5714 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstLo, zero_r1));
5716 add_to_sp( env, 16 );
5718 // will set RMC when issuing instruction
5719 addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5720 r_srcHi, r_srcLo, rmc));
5721 *rHi = r_dstHi;
5722 *rLo = r_dstLo;
5723 return;
5727 ppIRExpr( e );
5728 vpanic( "iselDfp128Expr(ppc64)" );
5732 /*---------------------------------------------------------*/
5733 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
5734 /*---------------------------------------------------------*/
5736 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
5738 HReg r = iselVecExpr_wrk( env, e, IEndianess );
5739 # if 0
5740 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5741 # endif
5742 vassert(hregClass(r) == HRcVec128);
5743 vassert(hregIsVirtual(r));
5744 return r;
5747 /* DO NOT CALL THIS DIRECTLY */
5748 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
5749 IREndness IEndianess )
5751 Bool mode64 = env->mode64;
5752 PPCAvOp op = Pav_INVALID;
5753 PPCAvFpOp fpop = Pavfp_INVALID;
5754 PPCAvOpBin128 opav128 = Pav_INVALIDBinary128;
5755 PPCAvOpTri128 optri128 = Pav_INVALIDTri128;
5756 IRType ty = typeOfIRExpr(env->type_env,e);
5757 vassert(e);
5758 vassert(ty == Ity_V128);
5760 if (e->tag == Iex_ITE) {
5761 HReg r1 = iselVecExpr( env, e->Iex.ITE.iftrue, IEndianess );
5762 HReg r0 = iselVecExpr( env, e->Iex.ITE.iffalse, IEndianess );
5763 HReg r_dst = newVRegV(env);
5765 // Use OR operator to do move r1 to r_dst
5766 addInstr(env, PPCInstr_AvBinary( Pav_OR, r_dst, r0, r0));
5767 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
5768 addInstr(env, PPCInstr_AvCMov(cc, r_dst, r1));
5769 return r_dst;
5772 if (e->tag == Iex_RdTmp) {
5773 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5776 if (e->tag == Iex_Get) {
5777 /* Guest state vectors are 16byte aligned,
5778 so don't need to worry here */
5779 HReg dst = newVRegV(env);
5780 addInstr(env,
5781 PPCInstr_AvLdSt( True/*load*/, 16, dst,
5782 PPCAMode_IR( e->Iex.Get.offset,
5783 GuestStatePtr(mode64) )));
5784 return dst;
5787 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5788 /* Need to be able to do V128 unaligned loads. The BE unaligned load
5789 * can be accomplised using the following code sequece from the ISA.
5790 * It uses the lvx instruction that does two aligned loads and then
5791 * permute the data to store the required data as if it had been an
5792 * unaligned load.
5794 * lvx Vhi,0,Rb # load MSQ, using the unaligned address in Rb
5795 * lvsl Vp, 0,Rb # Set permute control vector
5796 * addi Rb,Rb,15 # Address of LSQ
5797 * lvx Vlo,0,Rb # load LSQ
5798 * vperm Vt,Vhi,Vlo,Vp # align the data as requested
5801 HReg Vhi = newVRegV(env);
5802 HReg Vlo = newVRegV(env);
5803 HReg Vp = newVRegV(env);
5804 HReg v_dst = newVRegV(env);
5805 HReg rB;
5806 HReg rB_plus_15 = newVRegI(env);
5808 vassert(e->Iex.Load.ty == Ity_V128);
5809 rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess );
5811 // lvx Vhi, 0, Rb
5812 addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi,
5813 PPCAMode_IR(0, rB)) );
5815 if (IEndianess == Iend_LE)
5816 // lvsr Vp, 0, Rb
5817 addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp,
5818 PPCAMode_IR(0, rB)) );
5819 else
5820 // lvsl Vp, 0, Rb
5821 addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp,
5822 PPCAMode_IR(0, rB)) );
5824 // addi Rb_plus_15, Rb, 15
5825 addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15,
5826 rB, PPCRH_Imm(True, toUShort(15))) );
5828 // lvx Vlo, 0, Rb_plus_15
5829 addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo,
5830 PPCAMode_IR(0, rB_plus_15)) );
5832 if (IEndianess == Iend_LE)
5833 // vperm Vt, Vhi, Vlo, Vp
5834 addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp ));
5835 else
5836 // vperm Vt, Vhi, Vlo, Vp
5837 addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp ));
5839 return v_dst;
5842 if (e->tag == Iex_Unop) {
5843 switch (e->Iex.Unop.op) {
5845 case Iop_F16toF64x2:
5847 HReg dst = newVRegV(env);
5848 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5849 /* Note: PPC only coverts the 16-bt value in the upper word
5850 * to a 64-bit value stored in the upper word. The
5851 * contents of the lower word is undefined.
5853 addInstr(env, PPCInstr_AvUnary(Pav_F16toF64x2, dst, arg));
5854 return dst;
5857 case Iop_F64toF16x2_DEP:
5859 HReg dst = newVRegV(env);
5860 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5861 /* Note: PPC only coverts the 64-bt value in the upper 64-bit of V128
5862 * to a 16-bit value stored in the upper 64-bits of the result
5863 * V128. The contents of the lower 64-bits is undefined.
5865 addInstr(env, PPCInstr_AvUnary(Pav_F64toF16x2, dst, arg));
5866 return dst;
5869 case Iop_F16toF32x4:
5871 HReg src = newVRegV(env);
5872 HReg dst = newVRegV(env);
5873 HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5874 PPCAMode *am_off0, *am_off8;
5875 HReg r_aligned16;
5877 vassert(mode64);
5878 /* need to put I64 src into upper 64-bits of vector register,
5879 use stack */
5880 sub_from_sp( env, 32 ); // Move SP down
5882 /* Get a quadword aligned address within our stack space */
5883 r_aligned16 = get_sp_aligned16( env );
5884 am_off0 = PPCAMode_IR( 0, r_aligned16 );
5885 am_off8 = PPCAMode_IR( 8, r_aligned16 );
5887 /* Store I64 to stack */
5889 if (IEndianess == Iend_LE) {
5890 addInstr(env, PPCInstr_Store( 8, am_off8, arg, mode64 ));
5891 } else {
5892 addInstr(env, PPCInstr_Store( 8, am_off0, arg, mode64 ));
5895 /* Fetch new v128 src back from stack. */
5896 addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, src, am_off0));
5898 /* issue instruction */
5899 addInstr(env, PPCInstr_AvUnary(Pav_F16toF32x4, dst, src));
5900 add_to_sp( env, 32 ); // Reset SP
5902 return dst;
5905 case Iop_F32toF16x4_DEP:
5907 HReg dst = newVRegI(env);
5908 HReg tmp = newVRegV(env);
5909 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5910 PPCAMode *am_off0, *am_off8;
5911 HReg r_aligned16;
5913 /* Instruction returns a V128, the Iop_F32toF16x4 needs to return
5914 * I64. Move the upper 64-bits from the instruction to an I64 via
5915 * the stack and return it.
5917 sub_from_sp( env, 32 ); // Move SP down
5919 addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, tmp, arg));
5921 /* Get a quadword aligned address within our stack space */
5922 r_aligned16 = get_sp_aligned16( env );
5923 am_off0 = PPCAMode_IR( 0, r_aligned16 );
5924 am_off8 = PPCAMode_IR( 8, r_aligned16 );
5926 /* Store v128 tmp to stack. */
5927 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, am_off0));
5929 /* Fetch I64 from stack */
5930 if (IEndianess == Iend_LE) {
5931 addInstr(env, PPCInstr_Load( 8, dst, am_off8, mode64 ));
5932 } else {
5933 addInstr(env, PPCInstr_Load( 8, dst, am_off0, mode64 ));
5936 add_to_sp( env, 32 ); // Reset SP
5937 return dst;
5940 case Iop_NotV128: {
5941 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5942 HReg dst = newVRegV(env);
5943 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
5944 return dst;
5947 case Iop_CmpNEZ8x16: {
5948 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5949 HReg zero = newVRegV(env);
5950 HReg dst = newVRegV(env);
5951 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5952 addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
5953 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5954 return dst;
5957 case Iop_CmpNEZ16x8: {
5958 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5959 HReg zero = newVRegV(env);
5960 HReg dst = newVRegV(env);
5961 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5962 addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
5963 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5964 return dst;
5967 case Iop_CmpNEZ32x4: {
5968 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5969 HReg zero = newVRegV(env);
5970 HReg dst = newVRegV(env);
5971 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5972 addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
5973 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5974 return dst;
5977 case Iop_CmpNEZ64x2: {
5978 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5979 HReg zero = newVRegV(env);
5980 HReg dst = newVRegV(env);
5981 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5982 addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
5983 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5984 return dst;
5987 case Iop_RecipEst32Fx4: fpop = Pavfp_RCPF; goto do_32Fx4_unary;
5988 case Iop_RSqrtEst32Fx4: fpop = Pavfp_RSQRTF; goto do_32Fx4_unary;
5989 case Iop_Log2_32Fx4: fpop = Pavfp_Log2; goto do_32Fx4_unary;
5990 case Iop_Exp2_32Fx4: fpop = Pavfp_Exp2; goto do_32Fx4_unary;
5991 case Iop_I32UtoF32x4_DEP: fpop = Pavfp_CVTU2F; goto do_32Fx4_unary;
5992 case Iop_I32StoF32x4_DEP: fpop = Pavfp_CVTS2F; goto do_32Fx4_unary;
5993 case Iop_QF32toI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
5994 case Iop_QF32toI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
5995 case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM; goto do_32Fx4_unary;
5996 case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP; goto do_32Fx4_unary;
5997 case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN; goto do_32Fx4_unary;
5998 case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ; goto do_32Fx4_unary;
5999 do_32Fx4_unary:
6001 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6002 HReg dst = newVRegV(env);
6003 addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
6004 return dst;
6007 case Iop_32UtoV128: {
6008 HReg r_aligned16, r_zeros;
6009 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
6010 HReg dst = newVRegV(env);
6011 PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6012 sub_from_sp( env, 32 ); // Move SP down
6014 /* Get a quadword aligned address within our stack space */
6015 r_aligned16 = get_sp_aligned16( env );
6016 am_off0 = PPCAMode_IR( 0, r_aligned16 );
6017 am_off4 = PPCAMode_IR( 4, r_aligned16 );
6018 am_off8 = PPCAMode_IR( 8, r_aligned16 );
6019 am_off12 = PPCAMode_IR( 12, r_aligned16 );
6021 /* Store zeros */
6022 r_zeros = newVRegI(env);
6023 addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
6024 if (IEndianess == Iend_LE)
6025 addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
6026 else
6027 addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
6028 addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
6029 addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
6031 /* Store r_src in low word of quadword-aligned mem */
6032 if (IEndianess == Iend_LE)
6033 addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
6034 else
6035 addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
6037 /* Load word into low word of quadword vector reg */
6038 if (IEndianess == Iend_LE)
6039 addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
6040 else
6041 addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
6043 add_to_sp( env, 32 ); // Reset SP
6044 return dst;
6047 case Iop_Dup8x16:
6048 case Iop_Dup16x8:
6049 case Iop_Dup32x4:
6050 return mk_AvDuplicateRI(env, e->Iex.Unop.arg, IEndianess);
6052 case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
6053 do_AvCipherV128Un: {
6054 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6055 HReg dst = newVRegV(env);
6056 addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
6057 return dst;
6060 case Iop_Clz8x16: op = Pav_ZEROCNTBYTE; goto do_zerocnt;
6061 case Iop_Clz16x8: op = Pav_ZEROCNTHALF; goto do_zerocnt;
6062 case Iop_Clz32x4: op = Pav_ZEROCNTWORD; goto do_zerocnt;
6063 case Iop_Clz64x2: op = Pav_ZEROCNTDBL; goto do_zerocnt;
6064 case Iop_Ctz8x16: op = Pav_TRAILINGZEROCNTBYTE; goto do_zerocnt;
6065 case Iop_Ctz16x8: op = Pav_TRAILINGZEROCNTHALF; goto do_zerocnt;
6066 case Iop_Ctz32x4: op = Pav_TRAILINGZEROCNTWORD; goto do_zerocnt;
6067 case Iop_Ctz64x2: op = Pav_TRAILINGZEROCNTDBL; goto do_zerocnt;
6068 case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE; goto do_zerocnt;
6069 do_zerocnt:
6071 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6072 HReg dst = newVRegV(env);
6073 addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6074 return dst;
6077 /* BCD Iops */
6078 case Iop_BCD128toI128S:
6080 HReg dst = newVRegV(env);
6081 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6082 addInstr(env, PPCInstr_AvUnary( Pav_BCD128toI128S, dst, arg ) );
6083 return dst;
6086 case Iop_MulI128by10: op = Pav_MulI128by10; goto do_MulI128;
6087 case Iop_MulI128by10Carry: op = Pav_MulI128by10Carry; goto do_MulI128;
6088 do_MulI128: {
6089 HReg dst = newVRegV(env);
6090 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6091 addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6092 return dst;
6095 case Iop_ReinterpI128asV128: {
6096 PPCAMode* am_addr;
6097 PPCAMode* am_addr4;
6098 HReg rHi, rLo;
6099 HReg dst = newVRegV(env);
6101 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
6103 sub_from_sp( env, 16 ); // Move SP down 16 bytes
6104 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
6105 am_addr4 = advance4(env, am_addr);
6107 // store the two 64-bit pars
6108 addInstr(env, PPCInstr_Store( 8, am_addr, rHi, mode64 ));
6109 addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
6111 // load as Ity_V128
6112 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
6114 add_to_sp( env, 16 ); // Reset SP
6115 return dst;
6118 default:
6119 break;
6120 } /* switch (e->Iex.Unop.op) */
6121 } /* if (e->tag == Iex_Unop) */
6123 if (e->tag == Iex_Binop) {
6124 switch (e->Iex.Binop.op) {
6126 case Iop_64HLtoV128: {
6127 if (!mode64) {
6128 HReg r3, r2, r1, r0, r_aligned16;
6129 PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6130 HReg dst = newVRegV(env);
6131 /* do this via the stack (easy, convenient, etc) */
6132 sub_from_sp( env, 32 ); // Move SP down
6134 // get a quadword aligned address within our stack space
6135 r_aligned16 = get_sp_aligned16( env );
6136 am_off0 = PPCAMode_IR( 0, r_aligned16 );
6137 am_off4 = PPCAMode_IR( 4, r_aligned16 );
6138 am_off8 = PPCAMode_IR( 8, r_aligned16 );
6139 am_off12 = PPCAMode_IR( 12, r_aligned16 );
6141 /* Do the less significant 64 bits */
6142 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2, IEndianess);
6143 addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
6144 addInstr(env, PPCInstr_Store( 4, am_off8, r1, mode64 ));
6145 /* Do the more significant 64 bits */
6146 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1, IEndianess);
6147 addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
6148 addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
6150 /* Fetch result back from stack. */
6151 addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6153 add_to_sp( env, 32 ); // Reset SP
6154 return dst;
6155 } else {
6156 HReg rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
6157 HReg rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
6158 HReg dst = newVRegV(env);
6159 HReg r_aligned16;
6160 PPCAMode *am_off0, *am_off8;
6161 /* do this via the stack (easy, convenient, etc) */
6162 sub_from_sp( env, 32 ); // Move SP down
6164 // get a quadword aligned address within our stack space
6165 r_aligned16 = get_sp_aligned16( env );
6166 am_off0 = PPCAMode_IR( 0, r_aligned16 );
6167 am_off8 = PPCAMode_IR( 8, r_aligned16 );
6169 /* Store 2*I64 to stack */
6170 if (IEndianess == Iend_LE) {
6171 addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
6172 addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
6173 } else {
6174 addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
6175 addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
6177 /* Fetch result back from stack. */
6178 addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6180 add_to_sp( env, 32 ); // Reset SP
6181 return dst;
6185 case Iop_Max32Fx4: fpop = Pavfp_MAXF; goto do_32Fx4;
6186 case Iop_Min32Fx4: fpop = Pavfp_MINF; goto do_32Fx4;
6187 case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
6188 case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
6189 case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
6190 do_32Fx4:
6192 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6193 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6194 HReg dst = newVRegV(env);
6195 addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6196 return dst;
6199 case Iop_CmpLE32Fx4: {
6200 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6201 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6202 HReg dst = newVRegV(env);
6204 /* stay consistent with native ppc compares:
6205 if a left/right lane holds a nan, return zeros for that lane
6206 so: le == NOT(gt OR isNan)
6208 HReg isNanLR = newVRegV(env);
6209 HReg isNanL = isNan(env, argL, IEndianess);
6210 HReg isNanR = isNan(env, argR, IEndianess);
6211 addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
6212 isNanL, isNanR));
6214 addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
6215 argL, argR));
6216 addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
6217 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
6218 return dst;
6221 case Iop_AndV128: op = Pav_AND; goto do_AvBin;
6222 case Iop_OrV128: op = Pav_OR; goto do_AvBin;
6223 case Iop_XorV128: op = Pav_XOR; goto do_AvBin;
6224 do_AvBin: {
6225 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6226 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6227 HReg dst = newVRegV(env);
6228 addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
6229 return dst;
6232 case Iop_Shl8x16: op = Pav_SHL; goto do_AvBin8x16;
6233 case Iop_Shr8x16: op = Pav_SHR; goto do_AvBin8x16;
6234 case Iop_Sar8x16: op = Pav_SAR; goto do_AvBin8x16;
6235 case Iop_Rol8x16: op = Pav_ROTL; goto do_AvBin8x16;
6236 case Iop_InterleaveHI8x16: op = Pav_MRGHI; goto do_AvBin8x16;
6237 case Iop_InterleaveLO8x16: op = Pav_MRGLO; goto do_AvBin8x16;
6238 case Iop_Add8x16: op = Pav_ADDU; goto do_AvBin8x16;
6239 case Iop_QAdd8Ux16: op = Pav_QADDU; goto do_AvBin8x16;
6240 case Iop_QAdd8Sx16: op = Pav_QADDS; goto do_AvBin8x16;
6241 case Iop_Sub8x16: op = Pav_SUBU; goto do_AvBin8x16;
6242 case Iop_QSub8Ux16: op = Pav_QSUBU; goto do_AvBin8x16;
6243 case Iop_QSub8Sx16: op = Pav_QSUBS; goto do_AvBin8x16;
6244 case Iop_Avg8Ux16: op = Pav_AVGU; goto do_AvBin8x16;
6245 case Iop_Avg8Sx16: op = Pav_AVGS; goto do_AvBin8x16;
6246 case Iop_Max8Ux16: op = Pav_MAXU; goto do_AvBin8x16;
6247 case Iop_Max8Sx16: op = Pav_MAXS; goto do_AvBin8x16;
6248 case Iop_Min8Ux16: op = Pav_MINU; goto do_AvBin8x16;
6249 case Iop_Min8Sx16: op = Pav_MINS; goto do_AvBin8x16;
6250 case Iop_MullEven8Ux16: op = Pav_OMULU; goto do_AvBin8x16;
6251 case Iop_MullEven8Sx16: op = Pav_OMULS; goto do_AvBin8x16;
6252 case Iop_CmpEQ8x16: op = Pav_CMPEQU; goto do_AvBin8x16;
6253 case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
6254 case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
6255 case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
6256 do_AvBin8x16: {
6257 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6258 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6259 HReg dst = newVRegV(env);
6260 addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
6261 return dst;
6264 case Iop_Shl16x8: op = Pav_SHL; goto do_AvBin16x8;
6265 case Iop_Shr16x8: op = Pav_SHR; goto do_AvBin16x8;
6266 case Iop_Sar16x8: op = Pav_SAR; goto do_AvBin16x8;
6267 case Iop_Rol16x8: op = Pav_ROTL; goto do_AvBin16x8;
6268 case Iop_NarrowBin16to8x16: op = Pav_PACKUU; goto do_AvBin16x8;
6269 case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
6270 case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
6271 case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8;
6272 case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8;
6273 case Iop_Add16x8: op = Pav_ADDU; goto do_AvBin16x8;
6274 case Iop_QAdd16Ux8: op = Pav_QADDU; goto do_AvBin16x8;
6275 case Iop_QAdd16Sx8: op = Pav_QADDS; goto do_AvBin16x8;
6276 case Iop_Sub16x8: op = Pav_SUBU; goto do_AvBin16x8;
6277 case Iop_QSub16Ux8: op = Pav_QSUBU; goto do_AvBin16x8;
6278 case Iop_QSub16Sx8: op = Pav_QSUBS; goto do_AvBin16x8;
6279 case Iop_Avg16Ux8: op = Pav_AVGU; goto do_AvBin16x8;
6280 case Iop_Avg16Sx8: op = Pav_AVGS; goto do_AvBin16x8;
6281 case Iop_Max16Ux8: op = Pav_MAXU; goto do_AvBin16x8;
6282 case Iop_Max16Sx8: op = Pav_MAXS; goto do_AvBin16x8;
6283 case Iop_Min16Ux8: op = Pav_MINU; goto do_AvBin16x8;
6284 case Iop_Min16Sx8: op = Pav_MINS; goto do_AvBin16x8;
6285 case Iop_MullEven16Ux8: op = Pav_OMULU; goto do_AvBin16x8;
6286 case Iop_MullEven16Sx8: op = Pav_OMULS; goto do_AvBin16x8;
6287 case Iop_CmpEQ16x8: op = Pav_CMPEQU; goto do_AvBin16x8;
6288 case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
6289 case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
6290 case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
6291 do_AvBin16x8: {
6292 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6293 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6294 HReg dst = newVRegV(env);
6295 addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
6296 return dst;
6299 case Iop_Shl32x4: op = Pav_SHL; goto do_AvBin32x4;
6300 case Iop_Shr32x4: op = Pav_SHR; goto do_AvBin32x4;
6301 case Iop_Sar32x4: op = Pav_SAR; goto do_AvBin32x4;
6302 case Iop_Rol32x4: op = Pav_ROTL; goto do_AvBin32x4;
6303 case Iop_NarrowBin32to16x8: op = Pav_PACKUU; goto do_AvBin32x4;
6304 case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
6305 case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
6306 case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4;
6307 case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4;
6308 case Iop_Add32x4: op = Pav_ADDU; goto do_AvBin32x4;
6309 case Iop_QAdd32Ux4: op = Pav_QADDU; goto do_AvBin32x4;
6310 case Iop_QAdd32Sx4: op = Pav_QADDS; goto do_AvBin32x4;
6311 case Iop_Sub32x4: op = Pav_SUBU; goto do_AvBin32x4;
6312 case Iop_QSub32Ux4: op = Pav_QSUBU; goto do_AvBin32x4;
6313 case Iop_QSub32Sx4: op = Pav_QSUBS; goto do_AvBin32x4;
6314 case Iop_Avg32Ux4: op = Pav_AVGU; goto do_AvBin32x4;
6315 case Iop_Avg32Sx4: op = Pav_AVGS; goto do_AvBin32x4;
6316 case Iop_Max32Ux4: op = Pav_MAXU; goto do_AvBin32x4;
6317 case Iop_Max32Sx4: op = Pav_MAXS; goto do_AvBin32x4;
6318 case Iop_Min32Ux4: op = Pav_MINU; goto do_AvBin32x4;
6319 case Iop_Min32Sx4: op = Pav_MINS; goto do_AvBin32x4;
6320 case Iop_Mul32x4: op = Pav_MULU; goto do_AvBin32x4;
6321 case Iop_MullEven32Ux4: op = Pav_OMULU; goto do_AvBin32x4;
6322 case Iop_MullEven32Sx4: op = Pav_OMULS; goto do_AvBin32x4;
6323 case Iop_CmpEQ32x4: op = Pav_CMPEQU; goto do_AvBin32x4;
6324 case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
6325 case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
6326 case Iop_CatOddLanes32x4: op = Pav_CATODD; goto do_AvBin32x4;
6327 case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
6328 case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
6329 do_AvBin32x4: {
6330 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6331 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6332 HReg dst = newVRegV(env);
6333 addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
6334 return dst;
6337 case Iop_Shl64x2: op = Pav_SHL; goto do_AvBin64x2;
6338 case Iop_Shr64x2: op = Pav_SHR; goto do_AvBin64x2;
6339 case Iop_Sar64x2: op = Pav_SAR; goto do_AvBin64x2;
6340 case Iop_Rol64x2: op = Pav_ROTL; goto do_AvBin64x2;
6341 case Iop_NarrowBin64to32x4: op = Pav_PACKUU; goto do_AvBin64x2;
6342 case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2;
6343 case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2;
6344 case Iop_InterleaveHI64x2: op = Pav_MRGHI; goto do_AvBin64x2;
6345 case Iop_InterleaveLO64x2: op = Pav_MRGLO; goto do_AvBin64x2;
6346 case Iop_Add64x2: op = Pav_ADDU; goto do_AvBin64x2;
6347 case Iop_Sub64x2: op = Pav_SUBU; goto do_AvBin64x2;
6348 case Iop_Max64Ux2: op = Pav_MAXU; goto do_AvBin64x2;
6349 case Iop_Max64Sx2: op = Pav_MAXS; goto do_AvBin64x2;
6350 case Iop_Min64Ux2: op = Pav_MINU; goto do_AvBin64x2;
6351 case Iop_Min64Sx2: op = Pav_MINS; goto do_AvBin64x2;
6352 case Iop_CmpEQ64x2: op = Pav_CMPEQU; goto do_AvBin64x2;
6353 case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
6354 case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
6355 case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
6356 do_AvBin64x2: {
6357 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6358 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6359 HReg dst = newVRegV(env);
6360 addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
6361 return dst;
6364 case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
6365 case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
6366 do_AvShift8x16: {
6367 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6368 HReg dst = newVRegV(env);
6369 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6370 addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
6371 return dst;
6374 case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
6375 case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
6376 case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
6377 do_AvShift16x8: {
6378 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6379 HReg dst = newVRegV(env);
6380 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6381 addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
6382 return dst;
6385 case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
6386 case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
6387 case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
6388 do_AvShift32x4: {
6389 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6390 HReg dst = newVRegV(env);
6391 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6392 addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
6393 return dst;
6396 case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2;
6397 case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
6398 case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
6399 do_AvShift64x2: {
6400 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6401 HReg dst = newVRegV(env);
6402 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6403 addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
6404 return dst;
6407 case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
6408 case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
6409 do_AvShiftV128: {
6410 HReg dst = newVRegV(env);
6411 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6412 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6413 /* Note: shift value gets masked by 127 */
6414 addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
6415 return dst;
6418 case Iop_Perm8x16: {
6419 HReg dst = newVRegV(env);
6420 HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6421 HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6422 addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
6423 return dst;
6426 case Iop_CipherV128: op = Pav_CIPHERV128; goto do_AvCipherV128;
6427 case Iop_CipherLV128: op = Pav_CIPHERLV128; goto do_AvCipherV128;
6428 case Iop_NCipherV128: op = Pav_NCIPHERV128; goto do_AvCipherV128;
6429 case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
6430 do_AvCipherV128: {
6431 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6432 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6433 HReg dst = newVRegV(env);
6434 addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
6435 return dst;
6438 case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
6439 case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
6440 do_AvHashV128: {
6441 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6442 HReg dst = newVRegV(env);
6443 PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6444 addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
6445 return dst;
6448 /* BCD Iops */
6449 case Iop_I128StoBCD128:
6451 HReg dst = newVRegV(env);
6452 HReg arg = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6453 PPCRI* ps = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6455 addInstr(env, PPCInstr_AvBinaryInt( Pav_I128StoBCD128, dst, arg,
6456 ps ) );
6457 return dst;
6460 case Iop_MulI128by10E: op = Pav_MulI128by10E; goto do_MulI128E;
6461 case Iop_MulI128by10ECarry: op = Pav_MulI128by10ECarry; goto do_MulI128E;
6462 do_MulI128E: {
6463 HReg dst = newVRegV(env);
6464 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6465 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6466 addInstr(env, PPCInstr_AvBinary(op, dst, argL, argR));
6467 return dst;
6470 case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
6471 case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
6472 do_AvBCDV128: {
6473 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6474 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6475 HReg dst = newVRegV(env);
6476 addInstr(env, PPCInstr_AvBCDV128Binary(op, dst, arg1, arg2));
6477 return dst;
6480 case Iop_DivU128: opav128 = Pav_DivU128; goto do_IntArithBinaryI128;
6481 case Iop_DivS128: opav128 = Pav_DivS128; goto do_IntArithBinaryI128;
6482 case Iop_DivU128E: opav128 = Pav_DivU128E; goto do_IntArithBinaryI128;
6483 case Iop_DivS128E: opav128 = Pav_DivS128E; goto do_IntArithBinaryI128;
6484 case Iop_ModU128: opav128 = Pav_ModU128; goto do_IntArithBinaryI128;
6485 case Iop_ModS128: opav128 = Pav_ModS128; goto do_IntArithBinaryI128;
6486 do_IntArithBinaryI128: {
6487 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6488 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6489 HReg dst = newVRegV(env);
6490 addInstr(env, PPCInstr_AvBinaryInt128(opav128, dst, arg1, arg2));
6491 return dst;
6494 default:
6495 break;
6496 } /* switch (e->Iex.Binop.op) */
6497 } /* if (e->tag == Iex_Binop) */
6499 if (e->tag == Iex_Triop) {
6500 IRTriop *triop = e->Iex.Triop.details;
6501 switch (triop->op) {
6502 case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
6503 case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
6504 case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
6505 do_32Fx4_with_rm:
6507 HReg argL = iselVecExpr(env, triop->arg2, IEndianess);
6508 HReg argR = iselVecExpr(env, triop->arg3, IEndianess);
6509 HReg dst = newVRegV(env);
6510 /* FIXME: this is bogus, in the sense that Altivec ignores
6511 FPSCR.RM, at least for some FP operations. So setting the
6512 RM is pointless. This is only really correct in the case
6513 where the RM is known, at JIT time, to be Irrm_NEAREST,
6514 since -- at least for Altivec FP add/sub/mul -- the
6515 emitted insn is hardwired to round to nearest. */
6516 set_FPU_rounding_mode(env, triop->arg1, IEndianess);
6517 addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6518 return dst;
6521 case Iop_2xMultU64Add128CarryOut:
6522 optri128 = Pav_2xMultU64Add128CarryOut; goto do_IntArithTrinaryI128;
6523 do_IntArithTrinaryI128: {
6524 HReg arg1 = iselVecExpr(env, triop->arg1, IEndianess);
6525 HReg arg2 = iselVecExpr(env, triop->arg2, IEndianess);
6526 HReg arg3 = iselVecExpr(env, triop->arg3, IEndianess);
6527 HReg dst = newVRegV(env);
6528 addInstr(env, PPCInstr_AvTernaryInt128(optri128, dst, arg1, arg2,
6529 arg3));
6530 return dst;
6533 default:
6534 break;
6535 } /* switch (e->Iex.Triop.op) */
6536 } /* if (e->tag == Iex_Trinop) */
6539 if (e->tag == Iex_Const ) {
6540 vassert(e->Iex.Const.con->tag == Ico_V128);
6541 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
6542 return generate_zeroes_V128(env);
6544 else if (e->Iex.Const.con->Ico.V128 == 0xffff) {
6545 return generate_ones_V128(env);
6549 vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
6550 LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
6551 env->hwcaps));
6552 ppIRExpr(e);
6553 vpanic("iselVecExpr_wrk(ppc)");
6557 /*---------------------------------------------------------*/
6558 /*--- ISEL: Statements ---*/
6559 /*---------------------------------------------------------*/
6561 static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
6563 Bool mode64 = env->mode64;
6564 if (vex_traceflags & VEX_TRACE_VCODE) {
6565 vex_printf("\n -- ");
6566 ppIRStmt(stmt);
6567 vex_printf("\n");
6570 switch (stmt->tag) {
6572 /* --------- STORE --------- */
6573 case Ist_Store: {
6574 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6575 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6576 IREndness end = stmt->Ist.Store.end;
6578 if (end != IEndianess)
6579 goto stmt_fail;
6580 if (!mode64 && (tya != Ity_I32))
6581 goto stmt_fail;
6582 if (mode64 && (tya != Ity_I64))
6583 goto stmt_fail;
6585 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
6586 (mode64 && (tyd == Ity_I64))) {
6587 PPCAMode* am_addr
6588 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6589 IEndianess);
6590 HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data, IEndianess);
6591 addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
6592 am_addr, r_src, mode64 ));
6593 return;
6595 if (tyd == Ity_F64) {
6596 PPCAMode* am_addr
6597 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6598 IEndianess);
6599 HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data, IEndianess);
6600 addInstr(env,
6601 PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6602 return;
6604 if (tyd == Ity_F32) {
6605 PPCAMode* am_addr
6606 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6607 IEndianess);
6608 HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data, IEndianess);
6609 addInstr(env,
6610 PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6611 return;
6613 if (tyd == Ity_D64) {
6614 PPCAMode* am_addr
6615 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6616 IEndianess);
6617 HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data, IEndianess);
6618 addInstr(env,
6619 PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6620 return;
6622 if (tyd == Ity_D32) {
6623 PPCAMode* am_addr
6624 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6625 IEndianess);
6626 HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data, IEndianess);
6627 addInstr(env,
6628 PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6629 return;
6631 if (tyd == Ity_V128) {
6632 PPCAMode* am_addr
6633 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6634 IEndianess);
6635 HReg v_src = iselVecExpr(env, stmt->Ist.Store.data, IEndianess);
6636 addInstr(env,
6637 PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6638 return;
6640 if (tyd == Ity_I64 && !mode64) {
6641 /* Just calculate the address in the register. Life is too
6642 short to arse around trying and possibly failing to adjust
6643 the offset in a 'reg+offset' style amode. */
6644 HReg rHi32, rLo32;
6645 HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr, IEndianess);
6646 iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data,
6647 IEndianess );
6648 addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6649 PPCAMode_IR( 0, r_addr ),
6650 rHi32,
6651 False/*32-bit insn please*/) );
6652 addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6653 PPCAMode_IR( 4, r_addr ),
6654 rLo32,
6655 False/*32-bit insn please*/) );
6656 return;
6658 break;
6661 /* --------- PUT --------- */
6662 case Ist_Put: {
6663 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6664 if (ty == Ity_I8 || ty == Ity_I16 ||
6665 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6666 HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data, IEndianess);
6667 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6668 GuestStatePtr(mode64) );
6669 addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
6670 am_addr, r_src, mode64 ));
6671 return;
6673 if (!mode64 && ty == Ity_I64) {
6674 HReg rHi, rLo;
6675 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6676 GuestStatePtr(mode64) );
6677 PPCAMode* am_addr4 = advance4(env, am_addr);
6678 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6679 addInstr(env, PPCInstr_Store( 4, am_addr, rHi, mode64 ));
6680 addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6681 return;
6683 if (ty == Ity_I128) {
6684 HReg rHi, rLo;
6685 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6686 GuestStatePtr(mode64) );
6687 PPCAMode* am_addr4 = advance4(env, am_addr);
6689 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6690 addInstr(env, PPCInstr_Store( 4, am_addr, rHi, mode64 ));
6691 addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6692 return;
6694 if (ty == Ity_F128) {
6695 /* Guest state vectors are 16byte aligned,
6696 so don't need to worry here */
6697 HReg v_src = iselFp128Expr(env, stmt->Ist.Put.data, IEndianess);
6699 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6700 GuestStatePtr(mode64) );
6701 addInstr(env,
6702 PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6703 return;
6705 if (ty == Ity_V128) {
6706 /* Guest state vectors are 16byte aligned,
6707 so don't need to worry here */
6708 HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
6709 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6710 GuestStatePtr(mode64) );
6711 addInstr(env,
6712 PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6713 return;
6715 if (ty == Ity_F64) {
6716 HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data, IEndianess);
6717 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6718 GuestStatePtr(mode64) );
6719 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
6720 fr_src, am_addr ));
6721 return;
6723 if (ty == Ity_D32) {
6724 /* The 32-bit value is stored in a 64-bit register */
6725 HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data, IEndianess );
6726 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6727 GuestStatePtr(mode64) );
6728 addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
6729 fr_src, am_addr ) );
6730 return;
6732 if (ty == Ity_D64) {
6733 HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data, IEndianess );
6734 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6735 GuestStatePtr(mode64) );
6736 addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
6737 return;
6739 break;
6742 /* --------- Indexed PUT --------- */
6743 case Ist_PutI: {
6744 IRPutI *puti = stmt->Ist.PutI.details;
6746 PPCAMode* dst_am
6747 = genGuestArrayOffset(
6748 env, puti->descr,
6749 puti->ix, puti->bias,
6750 IEndianess );
6751 IRType ty = typeOfIRExpr(env->type_env, puti->data);
6752 if (mode64 && ty == Ity_I64) {
6753 HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6754 addInstr(env, PPCInstr_Store( toUChar(8),
6755 dst_am, r_src, mode64 ));
6756 return;
6758 if ((!mode64) && ty == Ity_I32) {
6759 HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6760 addInstr(env, PPCInstr_Store( toUChar(4),
6761 dst_am, r_src, mode64 ));
6762 return;
6764 break;
6767 /* --------- TMP --------- */
6768 case Ist_WrTmp: {
6769 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6770 IRType ty = typeOfIRTemp(env->type_env, tmp);
6771 if (ty == Ity_I8 || ty == Ity_I16 ||
6772 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6773 HReg r_dst = lookupIRTemp(env, tmp);
6774 HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data, IEndianess);
6775 addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
6776 return;
6778 if (!mode64 && ty == Ity_I64) {
6779 HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6781 iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6782 IEndianess);
6783 lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6784 addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6785 addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6786 return;
6788 if (mode64 && ty == Ity_I128) {
6789 HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6790 iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6791 IEndianess);
6792 lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6793 addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6794 addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6795 return;
6797 if (!mode64 && ty == Ity_I128) {
6798 HReg r_srcHi = INVALID_HREG;
6799 HReg r_srcMedHi = INVALID_HREG;
6800 HReg r_srcMedLo = INVALID_HREG;
6801 HReg r_srcLo = INVALID_HREG;
6802 HReg r_dstHi, r_dstMedHi, r_dstMedLo, r_dstLo;
6804 iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
6805 &r_srcMedLo, &r_srcLo,
6806 env, stmt->Ist.WrTmp.data, IEndianess);
6808 lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
6809 &r_dstLo, env, tmp);
6811 addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6812 addInstr(env, mk_iMOVds_RR(r_dstMedHi, r_srcMedHi) );
6813 addInstr(env, mk_iMOVds_RR(r_dstMedLo, r_srcMedLo) );
6814 addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6815 return;
6817 if (ty == Ity_I1) {
6818 PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data,
6819 IEndianess);
6820 HReg r_dst = lookupIRTemp(env, tmp);
6821 addInstr(env, PPCInstr_Set(cond, r_dst));
6822 return;
6824 if (ty == Ity_F64) {
6825 HReg fr_dst = lookupIRTemp(env, tmp);
6826 HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6827 addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6828 return;
6830 if (ty == Ity_F32) {
6831 HReg fr_dst = lookupIRTemp(env, tmp);
6832 HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6833 addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6834 return;
6836 if (ty == Ity_D32) {
6837 HReg fr_dst = lookupIRTemp(env, tmp);
6838 HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6839 addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
6840 return;
6842 if (ty == Ity_F128) {
6843 HReg v_dst = lookupIRTemp(env, tmp);
6844 HReg v_src = iselFp128Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6845 addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6846 return;
6848 if (ty == Ity_V128) {
6849 HReg v_dst = lookupIRTemp(env, tmp);
6850 HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6851 addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6852 return;
6854 if (ty == Ity_D64) {
6855 HReg fr_dst = lookupIRTemp( env, tmp );
6856 HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data, IEndianess );
6857 addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
6858 return;
6860 if (ty == Ity_D128) {
6861 HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
6862 // lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6863 lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6864 iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data,
6865 IEndianess );
6866 addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
6867 addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
6868 return;
6870 break;
6873 /* --------- Load Linked or Store Conditional --------- */
6874 case Ist_LLSC: {
6875 IRTemp res = stmt->Ist.LLSC.result;
6876 IRType tyRes = typeOfIRTemp(env->type_env, res);
6877 IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
6879 if (stmt->Ist.LLSC.end != IEndianess)
6880 goto stmt_fail;
6881 if (!mode64 && (tyAddr != Ity_I32))
6882 goto stmt_fail;
6883 if (mode64 && (tyAddr != Ity_I64))
6884 goto stmt_fail;
6886 if (stmt->Ist.LLSC.storedata == NULL) {
6887 /* LL */
6888 HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
6889 HReg r_dst = lookupIRTemp(env, res);
6890 if (tyRes == Ity_I8) {
6891 addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
6892 return;
6894 if (tyRes == Ity_I16) {
6895 addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
6896 return;
6898 if (tyRes == Ity_I32) {
6899 addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
6900 return;
6902 if (tyRes == Ity_I64 && mode64) {
6903 addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
6904 return;
6906 /* fallthru */;
6907 } else {
6908 /* SC */
6909 HReg r_res = lookupIRTemp(env, res); /* :: Ity_I1 */
6910 HReg r_a = iselWordExpr_R(env, stmt->Ist.LLSC.addr, IEndianess);
6911 HReg r_src = iselWordExpr_R(env, stmt->Ist.LLSC.storedata,
6912 IEndianess);
6913 HReg r_tmp = newVRegI(env);
6914 IRType tyData = typeOfIRExpr(env->type_env,
6915 stmt->Ist.LLSC.storedata);
6916 vassert(tyRes == Ity_I1);
6917 if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
6918 (tyData == Ity_I64 && mode64)) {
6919 int size = 0;
6921 if (tyData == Ity_I64)
6922 size = 8;
6923 else if (tyData == Ity_I32)
6924 size = 4;
6925 else if (tyData == Ity_I16)
6926 size = 2;
6927 else if (tyData == Ity_I8)
6928 size = 1;
6930 addInstr(env, PPCInstr_StoreC( size,
6931 r_a, r_src, mode64 ));
6932 addInstr(env, PPCInstr_MfCR( r_tmp ));
6933 addInstr(env, PPCInstr_Shft(
6934 Pshft_SHR,
6935 env->mode64 ? False : True
6936 /*F:64-bit, T:32-bit shift*/,
6937 r_tmp, r_tmp,
6938 PPCRH_Imm(False/*unsigned*/, 29)));
6939 /* Probably unnecessary, since the IR dest type is Ity_I1,
6940 and so we are entitled to leave whatever junk we like
6941 drifting round in the upper 31 or 63 bits of r_res.
6942 However, for the sake of conservativeness .. */
6943 addInstr(env, PPCInstr_Alu(
6944 Palu_AND,
6945 r_res, r_tmp,
6946 PPCRH_Imm(False/*signed*/, 1)));
6947 return;
6949 /* fallthru */
6951 goto stmt_fail;
6952 /*NOTREACHED*/
6955 /* --------- Call to DIRTY helper --------- */
6956 case Ist_Dirty: {
6957 IRDirty* d = stmt->Ist.Dirty.details;
6959 /* Figure out the return type, if any. */
6960 IRType retty = Ity_INVALID;
6961 if (d->tmp != IRTemp_INVALID)
6962 retty = typeOfIRTemp(env->type_env, d->tmp);
6964 /* Throw out any return types we don't know about. The set of
6965 acceptable return types is the same in both 32- and 64-bit
6966 mode, so we don't need to inspect mode64 to make a
6967 decision. */
6968 Bool retty_ok = False;
6969 switch (retty) {
6970 case Ity_INVALID: /* function doesn't return anything */
6971 case Ity_V128:
6972 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6973 retty_ok = True; break;
6974 default:
6975 break;
6977 if (!retty_ok)
6978 break; /* will go to stmt_fail: */
6980 /* Marshal args, do the call, clear stack, set the return value
6981 to 0x555..555 if this is a conditional call that returns a
6982 value and the call is skipped. */
6983 UInt addToSp = 0;
6984 RetLoc rloc = mk_RetLoc_INVALID();
6985 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args,
6986 IEndianess );
6987 vassert(is_sane_RetLoc(rloc));
6989 /* Now figure out what to do with the returned value, if any. */
6990 switch (retty) {
6991 case Ity_INVALID: {
6992 /* No return value. Nothing to do. */
6993 vassert(d->tmp == IRTemp_INVALID);
6994 vassert(rloc.pri == RLPri_None);
6995 vassert(addToSp == 0);
6996 return;
6998 case Ity_I32: case Ity_I16: case Ity_I8: {
6999 /* The returned value is in %r3. Park it in the register
7000 associated with tmp. */
7001 HReg r_dst = lookupIRTemp(env, d->tmp);
7002 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7003 vassert(rloc.pri == RLPri_Int);
7004 vassert(addToSp == 0);
7005 return;
7007 case Ity_I64:
7008 if (mode64) {
7009 /* The returned value is in %r3. Park it in the register
7010 associated with tmp. */
7011 HReg r_dst = lookupIRTemp(env, d->tmp);
7012 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7013 vassert(rloc.pri == RLPri_Int);
7014 vassert(addToSp == 0);
7015 } else {
7016 /* The returned value is in %r3:%r4. Park it in the
7017 register-pair associated with tmp. */
7018 HReg r_dstHi = INVALID_HREG;
7019 HReg r_dstLo = INVALID_HREG;
7020 lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
7021 addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
7022 addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
7023 vassert(rloc.pri == RLPri_2Int);
7024 vassert(addToSp == 0);
7026 return;
7027 case Ity_V128: {
7028 /* The returned value is on the stack, and *retloc tells
7029 us where. Fish it off the stack and then move the
7030 stack pointer upwards to clear it, as directed by
7031 doHelperCall. */
7032 vassert(rloc.pri == RLPri_V128SpRel);
7033 vassert(addToSp >= 16);
7034 HReg dst = lookupIRTemp(env, d->tmp);
7035 PPCAMode* am = PPCAMode_IR(rloc.spOff, StackFramePtr(mode64));
7036 addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, dst, am ));
7037 add_to_sp(env, addToSp);
7038 return;
7040 default:
7041 /*NOTREACHED*/
7042 vassert(0);
7046 /* --------- MEM FENCE --------- */
7047 case Ist_MBE:
7048 switch (stmt->Ist.MBE.event) {
7049 case Imbe_Fence:
7050 addInstr(env, PPCInstr_MFence());
7051 return;
7052 default:
7053 break;
7055 break;
7057 /* --------- INSTR MARK --------- */
7058 /* Doesn't generate any executable code ... */
7059 case Ist_IMark:
7060 return;
7062 /* --------- ABI HINT --------- */
7063 /* These have no meaning (denotation in the IR) and so we ignore
7064 them ... if any actually made it this far. */
7065 case Ist_AbiHint:
7066 return;
7068 /* --------- NO-OP --------- */
7069 /* Fairly self-explanatory, wouldn't you say? */
7070 case Ist_NoOp:
7071 return;
7073 /* --------- EXIT --------- */
7074 case Ist_Exit: {
7075 IRConst* dst = stmt->Ist.Exit.dst;
7076 if (!mode64 && dst->tag != Ico_U32)
7077 vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
7078 if (mode64 && dst->tag != Ico_U64)
7079 vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
7081 PPCCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard, IEndianess);
7082 PPCAMode* amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
7083 hregPPC_GPR31(mode64));
7085 /* Case: boring transfer to known address */
7086 if (stmt->Ist.Exit.jk == Ijk_Boring
7087 || stmt->Ist.Exit.jk == Ijk_Call
7088 /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
7089 if (env->chainingAllowed) {
7090 /* .. almost always true .. */
7091 /* Skip the event check at the dst if this is a forwards
7092 edge. */
7093 Bool toFastEP
7094 = mode64
7095 ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
7096 : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
7097 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
7098 addInstr(env, PPCInstr_XDirect(
7099 mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
7100 : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
7101 amCIA, cc, toFastEP));
7102 } else {
7103 /* .. very occasionally .. */
7104 /* We can't use chaining, so ask for an assisted transfer,
7105 as that's the only alternative that is allowable. */
7106 HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7107 IEndianess);
7108 addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
7110 return;
7113 /* Case: assisted transfer to arbitrary address */
7114 switch (stmt->Ist.Exit.jk) {
7115 /* Keep this list in sync with that in iselNext below */
7116 case Ijk_ClientReq:
7117 case Ijk_EmFail:
7118 case Ijk_EmWarn:
7119 case Ijk_NoDecode:
7120 case Ijk_NoRedir:
7121 case Ijk_SigBUS:
7122 case Ijk_SigTRAP:
7123 case Ijk_Sys_syscall:
7124 case Ijk_InvalICache:
7126 HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7127 IEndianess);
7128 addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
7129 stmt->Ist.Exit.jk));
7130 return;
7132 default:
7133 break;
7136 /* Do we ever expect to see any other kind? */
7137 goto stmt_fail;
7140 default: break;
7142 stmt_fail:
7143 ppIRStmt(stmt);
7144 vpanic("iselStmt(ppc)");
7148 /*---------------------------------------------------------*/
7149 /*--- ISEL: Basic block terminators (Nexts) ---*/
7150 /*---------------------------------------------------------*/
7152 static void iselNext ( ISelEnv* env,
7153 IRExpr* next, IRJumpKind jk, Int offsIP,
7154 IREndness IEndianess)
7156 if (vex_traceflags & VEX_TRACE_VCODE) {
7157 vex_printf( "\n-- PUT(%d) = ", offsIP);
7158 ppIRExpr( next );
7159 vex_printf( "; exit-");
7160 ppIRJumpKind(jk);
7161 vex_printf( "\n");
7164 PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
7166 /* Case: boring transfer to known address */
7167 if (next->tag == Iex_Const) {
7168 IRConst* cdst = next->Iex.Const.con;
7169 vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
7170 if (jk == Ijk_Boring || jk == Ijk_Call) {
7171 /* Boring transfer to known address */
7172 PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7173 if (env->chainingAllowed) {
7174 /* .. almost always true .. */
7175 /* Skip the event check at the dst if this is a forwards
7176 edge. */
7177 Bool toFastEP
7178 = env->mode64
7179 ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
7180 : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
7181 if (0) vex_printf("%s", toFastEP ? "X" : ".");
7182 addInstr(env, PPCInstr_XDirect(
7183 env->mode64 ? (Addr64)cdst->Ico.U64
7184 : (Addr64)cdst->Ico.U32,
7185 amCIA, always, toFastEP));
7186 } else {
7187 /* .. very occasionally .. */
7188 /* We can't use chaining, so ask for an assisted transfer,
7189 as that's the only alternative that is allowable. */
7190 HReg r = iselWordExpr_R(env, next, IEndianess);
7191 addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7192 Ijk_Boring));
7194 return;
7198 /* Case: call/return (==boring) transfer to any address */
7199 switch (jk) {
7200 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
7201 HReg r = iselWordExpr_R(env, next, IEndianess);
7202 PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7203 if (env->chainingAllowed) {
7204 addInstr(env, PPCInstr_XIndir(r, amCIA, always));
7205 } else {
7206 addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7207 Ijk_Boring));
7209 return;
7211 default:
7212 break;
7215 /* Case: assisted transfer to arbitrary address */
7216 switch (jk) {
7217 /* Keep this list in sync with that for Ist_Exit above */
7218 case Ijk_ClientReq:
7219 case Ijk_EmFail:
7220 case Ijk_EmWarn:
7221 case Ijk_NoDecode:
7222 case Ijk_NoRedir:
7223 case Ijk_SigBUS:
7224 case Ijk_SigTRAP:
7225 case Ijk_Sys_syscall:
7226 case Ijk_InvalICache:
7228 HReg r = iselWordExpr_R(env, next, IEndianess);
7229 PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7230 addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
7231 return;
7233 default:
7234 break;
7237 vex_printf( "\n-- PUT(%d) = ", offsIP);
7238 ppIRExpr( next );
7239 vex_printf( "; exit-");
7240 ppIRJumpKind(jk);
7241 vex_printf( "\n");
7242 vassert(0); // are we expecting any other kind?
7246 /*---------------------------------------------------------*/
7247 /*--- Insn selector top-level ---*/
7248 /*---------------------------------------------------------*/
7250 /* Translate an entire SB to ppc code. */
7251 HInstrArray* iselSB_PPC ( const IRSB* bb,
7252 VexArch arch_host,
7253 const VexArchInfo* archinfo_host,
7254 const VexAbiInfo* vbi,
7255 Int offs_Host_EvC_Counter,
7256 Int offs_Host_EvC_FailAddr,
7257 Bool chainingAllowed,
7258 Bool addProfInc,
7259 Addr max_ga)
7262 Int i, j;
7263 HReg hregLo, hregMedLo, hregMedHi, hregHi;
7264 ISelEnv* env;
7265 UInt hwcaps_host = archinfo_host->hwcaps;
7266 Bool mode64 = False;
7267 UInt mask32, mask64;
7268 PPCAMode *amCounter, *amFailAddr;
7269 IREndness IEndianess;
7271 vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
7272 mode64 = arch_host == VexArchPPC64;
7274 /* do some sanity checks,
7275 * Note: no 32-bit support for ISA 3.0, ISA 3.1
7277 mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
7278 | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
7279 | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
7281 mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
7282 | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
7283 | VEX_HWCAPS_PPC64_ISA2_07 | VEX_HWCAPS_PPC64_ISA3_0
7284 | VEX_HWCAPS_PPC64_ISA3_1;
7286 if (mode64) {
7287 vassert((hwcaps_host & mask32) == 0);
7288 } else {
7289 vassert((hwcaps_host & mask64) == 0);
7292 /* Check that the host's endianness is as expected. */
7293 vassert((archinfo_host->endness == VexEndnessBE) ||
7294 (archinfo_host->endness == VexEndnessLE));
7296 if (archinfo_host->endness == VexEndnessBE)
7297 IEndianess = Iend_BE;
7298 else
7299 IEndianess = Iend_LE;
7301 /* Make up an initial environment to use. */
7302 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
7303 env->vreg_ctr = 0;
7305 /* Are we being ppc32 or ppc64? */
7306 env->mode64 = mode64;
7308 /* Set up output code array. */
7309 env->code = newHInstrArray();
7311 /* Copy BB's type env. */
7312 env->type_env = bb->tyenv;
7314 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
7315 * change as we go along.
7317 * vregmap2 and vregmap3 are only used in 32 bit mode
7318 * for supporting I128 in 32-bit mode
7320 env->n_vregmap = bb->tyenv->types_used;
7321 env->vregmapLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7322 env->vregmapMedLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7323 if (mode64) {
7324 env->vregmapMedHi = NULL;
7325 env->vregmapHi = NULL;
7326 } else {
7327 env->vregmapMedHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7328 env->vregmapHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7331 /* and finally ... */
7332 env->chainingAllowed = chainingAllowed;
7333 env->max_ga = max_ga;
7334 env->hwcaps = hwcaps_host;
7335 env->previous_rm = NULL;
7336 env->vbi = vbi;
7338 /* For each IR temporary, allocate a suitably-kinded virtual
7339 register. */
7340 j = 0;
7341 for (i = 0; i < env->n_vregmap; i++) {
7342 hregLo = hregMedLo = hregMedHi = hregHi = INVALID_HREG;
7343 switch (bb->tyenv->types[i]) {
7344 case Ity_I1:
7345 case Ity_I8:
7346 case Ity_I16:
7347 case Ity_I32:
7348 if (mode64) {
7349 hregLo = mkHReg(True, HRcInt64, 0, j++);
7350 } else {
7351 hregLo = mkHReg(True, HRcInt32, 0, j++);
7353 break;
7354 case Ity_I64:
7355 if (mode64) {
7356 hregLo = mkHReg(True, HRcInt64, 0, j++);
7357 } else {
7358 hregLo = mkHReg(True, HRcInt32, 0, j++);
7359 hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7361 break;
7362 case Ity_I128:
7363 if (mode64) {
7364 hregLo = mkHReg(True, HRcInt64, 0, j++);
7365 hregMedLo = mkHReg(True, HRcInt64, 0, j++);
7366 } else {
7367 hregLo = mkHReg(True, HRcInt32, 0, j++);
7368 hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7369 hregMedHi = mkHReg(True, HRcInt32, 0, j++);
7370 hregHi = mkHReg(True, HRcInt32, 0, j++);
7372 break;
7373 case Ity_F32:
7374 case Ity_F64:
7375 hregLo = mkHReg(True, HRcFlt64, 0, j++);
7376 break;
7377 case Ity_F128:
7378 case Ity_V128:
7379 hregLo = mkHReg(True, HRcVec128, 0, j++);
7380 break;
7381 case Ity_D32:
7382 case Ity_D64:
7383 hregLo = mkHReg(True, HRcFlt64, 0, j++);
7384 break;
7385 case Ity_D128:
7386 hregLo = mkHReg(True, HRcFlt64, 0, j++);
7387 hregMedLo = mkHReg(True, HRcFlt64, 0, j++);
7388 break;
7389 default:
7390 ppIRType(bb->tyenv->types[i]);
7391 vpanic("iselBB(ppc): IRTemp type");
7393 env->vregmapLo[i] = hregLo;
7394 env->vregmapMedLo[i] = hregMedLo;
7395 if (!mode64) {
7396 env->vregmapMedHi[i] = hregMedHi;
7397 env->vregmapHi[i] = hregHi;
7400 env->vreg_ctr = j;
7402 /* The very first instruction must be an event check. */
7403 amCounter = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
7404 amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
7405 addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
7407 /* Possibly a block counter increment (for profiling). At this
7408 point we don't know the address of the counter, so just pretend
7409 it is zero. It will have to be patched later, but before this
7410 translation is used, by a call to LibVEX_patchProfCtr. */
7411 if (addProfInc) {
7412 addInstr(env, PPCInstr_ProfInc());
7415 /* Ok, finally we can iterate over the statements. */
7416 for (i = 0; i < bb->stmts_used; i++)
7417 iselStmt(env, bb->stmts[i], IEndianess);
7419 iselNext(env, bb->next, bb->jumpkind, bb->offsIP, IEndianess);
7421 /* record the number of vregs we used. */
7422 env->code->n_vregs = env->vreg_ctr;
7423 return env->code;
7427 /*---------------------------------------------------------------*/
7428 /*--- end host_ppc_isel.c ---*/
7429 /*---------------------------------------------------------------*/