1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2013-2017 OpenWorks
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 /* KNOWN LIMITATIONS 2014-Nov-16
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
36 Also FP comparison "unordered" .. is implemented as normal FP
39 Both should be fixed. They behave incorrectly in the presence of
42 FMULX is treated the same as FMUL. That's also not correct.
44 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
53 just rounds to nearest.
56 /* "Special" instructions.
58 This instruction decoder can decode four special instructions
59 which mean nothing natively (are no-ops as far as regs/mem are
60 concerned) but have meaning for supporting Valgrind. A special
61 instruction is flagged by a 16-byte preamble:
63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64 (ror x12, x12, #3; ror x12, x12, #13
65 ror x12, x12, #51; ror x12, x12, #61)
67 Following that, one of the following 3 are allowed
68 (standard interpretation in parentheses):
70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
73 AA090129 (orr x9,x9,x9) IR injection
75 Any other bytes following the 16-byte preamble are illegal and
76 constitute a failure in instruction decoding. This all assumes
77 that the preamble will never occur except in specific code
78 fragments designed for Valgrind to catch.
81 /* Translates ARM64 code to IR. */
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
86 #include "libvex_guest_arm64.h"
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
94 /*------------------------------------------------------------*/
96 /*------------------------------------------------------------*/
98 /* These are set at the start of the translation of a instruction, so
99 that we don't have to pass them around endlessly. CONST means does
100 not change during translation of the instruction.
103 /* CONST: what is the host's endianness? We need to know this in
104 order to do sub-register accesses to the SIMD/FP registers
106 static VexEndness host_endness
;
108 /* CONST: The guest address for the instruction currently being
110 static Addr64 guest_PC_curr_instr
;
112 /* MOD: The IRSB* into which we're generating code. */
116 /*------------------------------------------------------------*/
117 /*--- Debugging output ---*/
118 /*------------------------------------------------------------*/
120 #define DIP(format, args...) \
121 if (vex_traceflags & VEX_TRACE_FE) \
122 vex_printf(format, ## args)
124 #define DIS(buf, format, args...) \
125 if (vex_traceflags & VEX_TRACE_FE) \
126 vex_sprintf(buf, format, ## args)
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the ---*/
131 /*--- arm insn stream. ---*/
132 /*------------------------------------------------------------*/
134 /* Do a little-endian load of a 32-bit word, regardless of the
135 endianness of the underlying host. */
136 static inline UInt
getUIntLittleEndianly ( const UChar
* p
)
146 /* Sign extend a N-bit value up to 64 bits, by copying
147 bit N-1 into all higher positions. */
148 static ULong
sx_to_64 ( ULong x
, UInt n
)
150 vassert(n
> 1 && n
< 64);
157 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
158 //ZZ endianness of the underlying host. */
159 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
162 //ZZ w = (w << 8) | p[1];
163 //ZZ w = (w << 8) | p[0];
167 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
168 //ZZ vassert(sh >= 0 && sh < 32);
172 //ZZ return (x << (32-sh)) | (x >> sh);
175 //ZZ static Int popcount32 ( UInt x )
178 //ZZ for (i = 0; i < 32; i++) {
185 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
187 //ZZ UInt mask = 1 << ix;
189 //ZZ x |= ((b << ix) & mask);
193 #define BITS2(_b1,_b0) \
194 (((_b1) << 1) | (_b0))
196 #define BITS3(_b2,_b1,_b0) \
197 (((_b2) << 2) | ((_b1) << 1) | (_b0))
199 #define BITS4(_b3,_b2,_b1,_b0) \
200 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
202 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
203 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
204 | BITS4((_b3),(_b2),(_b1),(_b0)))
206 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
210 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
211 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
213 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
215 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
217 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
218 (((_b9) << 9) | ((_b8) << 8) \
219 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
221 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
223 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
225 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
227 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
229 #define X00 BITS2(0,0)
230 #define X01 BITS2(0,1)
231 #define X10 BITS2(1,0)
232 #define X11 BITS2(1,1)
234 // produces _uint[_bMax:_bMin]
235 #define SLICE_UInt(_uint,_bMax,_bMin) \
236 (( ((UInt)(_uint)) >> (_bMin)) \
237 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
240 /*------------------------------------------------------------*/
241 /*--- Helper bits and pieces for creating IR fragments. ---*/
242 /*------------------------------------------------------------*/
244 static IRExpr
* mkV128 ( UShort w
)
246 return IRExpr_Const(IRConst_V128(w
));
249 static IRExpr
* mkU64 ( ULong i
)
251 return IRExpr_Const(IRConst_U64(i
));
254 static IRExpr
* mkU32 ( UInt i
)
256 return IRExpr_Const(IRConst_U32(i
));
259 static IRExpr
* mkU16 ( UInt i
)
262 return IRExpr_Const(IRConst_U16(i
));
265 static IRExpr
* mkU8 ( UInt i
)
268 return IRExpr_Const(IRConst_U8( (UChar
)i
));
271 static IRExpr
* mkexpr ( IRTemp tmp
)
273 return IRExpr_RdTmp(tmp
);
276 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
278 return IRExpr_Unop(op
, a
);
281 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
283 return IRExpr_Binop(op
, a1
, a2
);
286 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
288 return IRExpr_Triop(op
, a1
, a2
, a3
);
291 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
293 return IRExpr_Load(Iend_LE
, ty
, addr
);
296 /* Add a statement to the list held by "irbb". */
297 static void stmt ( IRStmt
* st
)
299 addStmtToIRSB( irsb
, st
);
302 static void assign ( IRTemp dst
, IRExpr
* e
)
304 stmt( IRStmt_WrTmp(dst
, e
) );
307 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
309 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
312 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
314 //ZZ if (guardT == IRTemp_INVALID) {
315 //ZZ /* unconditional */
316 //ZZ storeLE(addr, data);
318 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
319 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
323 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
324 //ZZ IRExpr* addr, IRExpr* alt,
325 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
327 //ZZ if (guardT == IRTemp_INVALID) {
328 //ZZ /* unconditional */
329 //ZZ IRExpr* loaded = NULL;
331 //ZZ case ILGop_Ident32:
332 //ZZ loaded = loadLE(Ity_I32, addr); break;
333 //ZZ case ILGop_8Uto32:
334 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
335 //ZZ case ILGop_8Sto32:
336 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
337 //ZZ case ILGop_16Uto32:
338 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
339 //ZZ case ILGop_16Sto32:
340 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
344 //ZZ vassert(loaded != NULL);
345 //ZZ assign(dst, loaded);
347 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
348 //ZZ loaded data before putting the data in 'dst'. If the load
349 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
350 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
351 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
355 /* Generate a new temporary of the given type. */
356 static IRTemp
newTemp ( IRType ty
)
358 vassert(isPlausibleIRType(ty
));
359 return newIRTemp( irsb
->tyenv
, ty
);
362 /* This is used in many places, so the brevity is an advantage. */
363 static IRTemp
newTempV128(void)
365 return newTemp(Ity_V128
);
368 /* Initialise V128 temporaries en masse. */
370 void newTempsV128_2(IRTemp
* t1
, IRTemp
* t2
)
372 vassert(t1
&& *t1
== IRTemp_INVALID
);
373 vassert(t2
&& *t2
== IRTemp_INVALID
);
379 void newTempsV128_3(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
)
381 vassert(t1
&& *t1
== IRTemp_INVALID
);
382 vassert(t2
&& *t2
== IRTemp_INVALID
);
383 vassert(t3
&& *t3
== IRTemp_INVALID
);
390 void newTempsV128_4(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
, IRTemp
* t4
)
392 vassert(t1
&& *t1
== IRTemp_INVALID
);
393 vassert(t2
&& *t2
== IRTemp_INVALID
);
394 vassert(t3
&& *t3
== IRTemp_INVALID
);
395 vassert(t4
&& *t4
== IRTemp_INVALID
);
403 void newTempsV128_7(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
,
404 IRTemp
* t4
, IRTemp
* t5
, IRTemp
* t6
, IRTemp
* t7
)
406 vassert(t1
&& *t1
== IRTemp_INVALID
);
407 vassert(t2
&& *t2
== IRTemp_INVALID
);
408 vassert(t3
&& *t3
== IRTemp_INVALID
);
409 vassert(t4
&& *t4
== IRTemp_INVALID
);
410 vassert(t5
&& *t5
== IRTemp_INVALID
);
411 vassert(t6
&& *t6
== IRTemp_INVALID
);
412 vassert(t7
&& *t7
== IRTemp_INVALID
);
422 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
423 //ZZ IRRoundingMode. */
424 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
426 //ZZ return mkU32(Irrm_NEAREST);
429 //ZZ /* Generate an expression for SRC rotated right by ROT. */
430 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
432 //ZZ vassert(rot >= 0 && rot < 32);
434 //ZZ return mkexpr(src);
437 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
438 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
441 //ZZ static IRExpr* mkU128 ( ULong i )
443 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
446 //ZZ /* Generate a 4-aligned version of the given expression if
447 //ZZ the given condition is true. Else return it unchanged. */
448 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
451 //ZZ return binop(Iop_And32, e, mkU32(~3));
456 /* Other IR construction helpers. */
457 static IROp
mkAND ( IRType ty
) {
459 case Ity_I32
: return Iop_And32
;
460 case Ity_I64
: return Iop_And64
;
461 default: vpanic("mkAND");
465 static IROp
mkOR ( IRType ty
) {
467 case Ity_I32
: return Iop_Or32
;
468 case Ity_I64
: return Iop_Or64
;
469 default: vpanic("mkOR");
473 static IROp
mkXOR ( IRType ty
) {
475 case Ity_I32
: return Iop_Xor32
;
476 case Ity_I64
: return Iop_Xor64
;
477 default: vpanic("mkXOR");
481 static IROp
mkSHL ( IRType ty
) {
483 case Ity_I32
: return Iop_Shl32
;
484 case Ity_I64
: return Iop_Shl64
;
485 default: vpanic("mkSHL");
489 static IROp
mkSHR ( IRType ty
) {
491 case Ity_I32
: return Iop_Shr32
;
492 case Ity_I64
: return Iop_Shr64
;
493 default: vpanic("mkSHR");
497 static IROp
mkSAR ( IRType ty
) {
499 case Ity_I32
: return Iop_Sar32
;
500 case Ity_I64
: return Iop_Sar64
;
501 default: vpanic("mkSAR");
505 static IROp
mkNOT ( IRType ty
) {
507 case Ity_I32
: return Iop_Not32
;
508 case Ity_I64
: return Iop_Not64
;
509 default: vpanic("mkNOT");
513 static IROp
mkADD ( IRType ty
) {
515 case Ity_I32
: return Iop_Add32
;
516 case Ity_I64
: return Iop_Add64
;
517 default: vpanic("mkADD");
521 static IROp
mkSUB ( IRType ty
) {
523 case Ity_I32
: return Iop_Sub32
;
524 case Ity_I64
: return Iop_Sub64
;
525 default: vpanic("mkSUB");
529 static IROp
mkADDF ( IRType ty
) {
531 case Ity_F32
: return Iop_AddF32
;
532 case Ity_F64
: return Iop_AddF64
;
533 default: vpanic("mkADDF");
537 static IROp
mkSUBF ( IRType ty
) {
539 case Ity_F32
: return Iop_SubF32
;
540 case Ity_F64
: return Iop_SubF64
;
541 default: vpanic("mkSUBF");
545 static IROp
mkMULF ( IRType ty
) {
547 case Ity_F32
: return Iop_MulF32
;
548 case Ity_F64
: return Iop_MulF64
;
549 default: vpanic("mkMULF");
553 static IROp
mkDIVF ( IRType ty
) {
555 case Ity_F32
: return Iop_DivF32
;
556 case Ity_F64
: return Iop_DivF64
;
557 default: vpanic("mkMULF");
561 static IROp
mkNEGF ( IRType ty
) {
563 case Ity_F32
: return Iop_NegF32
;
564 case Ity_F64
: return Iop_NegF64
;
565 default: vpanic("mkNEGF");
569 static IROp
mkABSF ( IRType ty
) {
571 case Ity_F32
: return Iop_AbsF32
;
572 case Ity_F64
: return Iop_AbsF64
;
573 default: vpanic("mkNEGF");
577 static IROp
mkSQRTF ( IRType ty
) {
579 case Ity_F32
: return Iop_SqrtF32
;
580 case Ity_F64
: return Iop_SqrtF64
;
581 default: vpanic("mkNEGF");
585 static IROp
mkVecADD ( UInt size
) {
587 = { Iop_Add8x16
, Iop_Add16x8
, Iop_Add32x4
, Iop_Add64x2
};
592 static IROp
mkVecQADDU ( UInt size
) {
594 = { Iop_QAdd8Ux16
, Iop_QAdd16Ux8
, Iop_QAdd32Ux4
, Iop_QAdd64Ux2
};
599 static IROp
mkVecQADDS ( UInt size
) {
601 = { Iop_QAdd8Sx16
, Iop_QAdd16Sx8
, Iop_QAdd32Sx4
, Iop_QAdd64Sx2
};
606 static IROp
mkVecQADDEXTSUSATUU ( UInt size
) {
608 = { Iop_QAddExtSUsatUU8x16
, Iop_QAddExtSUsatUU16x8
,
609 Iop_QAddExtSUsatUU32x4
, Iop_QAddExtSUsatUU64x2
};
614 static IROp
mkVecQADDEXTUSSATSS ( UInt size
) {
616 = { Iop_QAddExtUSsatSS8x16
, Iop_QAddExtUSsatSS16x8
,
617 Iop_QAddExtUSsatSS32x4
, Iop_QAddExtUSsatSS64x2
};
622 static IROp
mkVecSUB ( UInt size
) {
624 = { Iop_Sub8x16
, Iop_Sub16x8
, Iop_Sub32x4
, Iop_Sub64x2
};
629 static IROp
mkVecQSUBU ( UInt size
) {
631 = { Iop_QSub8Ux16
, Iop_QSub16Ux8
, Iop_QSub32Ux4
, Iop_QSub64Ux2
};
636 static IROp
mkVecQSUBS ( UInt size
) {
638 = { Iop_QSub8Sx16
, Iop_QSub16Sx8
, Iop_QSub32Sx4
, Iop_QSub64Sx2
};
643 static IROp
mkVecSARN ( UInt size
) {
645 = { Iop_SarN8x16
, Iop_SarN16x8
, Iop_SarN32x4
, Iop_SarN64x2
};
650 static IROp
mkVecSHRN ( UInt size
) {
652 = { Iop_ShrN8x16
, Iop_ShrN16x8
, Iop_ShrN32x4
, Iop_ShrN64x2
};
657 static IROp
mkVecSHLN ( UInt size
) {
659 = { Iop_ShlN8x16
, Iop_ShlN16x8
, Iop_ShlN32x4
, Iop_ShlN64x2
};
664 static IROp
mkVecCATEVENLANES ( UInt size
) {
666 = { Iop_CatEvenLanes8x16
, Iop_CatEvenLanes16x8
,
667 Iop_CatEvenLanes32x4
, Iop_InterleaveLO64x2
};
672 static IROp
mkVecCATODDLANES ( UInt size
) {
674 = { Iop_CatOddLanes8x16
, Iop_CatOddLanes16x8
,
675 Iop_CatOddLanes32x4
, Iop_InterleaveHI64x2
};
680 static IROp
mkVecINTERLEAVELO ( UInt size
) {
682 = { Iop_InterleaveLO8x16
, Iop_InterleaveLO16x8
,
683 Iop_InterleaveLO32x4
, Iop_InterleaveLO64x2
};
688 static IROp
mkVecINTERLEAVEHI ( UInt size
) {
690 = { Iop_InterleaveHI8x16
, Iop_InterleaveHI16x8
,
691 Iop_InterleaveHI32x4
, Iop_InterleaveHI64x2
};
696 static IROp
mkVecMAXU ( UInt size
) {
698 = { Iop_Max8Ux16
, Iop_Max16Ux8
, Iop_Max32Ux4
, Iop_Max64Ux2
};
703 static IROp
mkVecMAXS ( UInt size
) {
705 = { Iop_Max8Sx16
, Iop_Max16Sx8
, Iop_Max32Sx4
, Iop_Max64Sx2
};
710 static IROp
mkVecMINU ( UInt size
) {
712 = { Iop_Min8Ux16
, Iop_Min16Ux8
, Iop_Min32Ux4
, Iop_Min64Ux2
};
717 static IROp
mkVecMINS ( UInt size
) {
719 = { Iop_Min8Sx16
, Iop_Min16Sx8
, Iop_Min32Sx4
, Iop_Min64Sx2
};
724 static IROp
mkVecMUL ( UInt size
) {
726 = { Iop_Mul8x16
, Iop_Mul16x8
, Iop_Mul32x4
, Iop_INVALID
};
731 static IROp
mkVecMULLU ( UInt sizeNarrow
) {
733 = { Iop_Mull8Ux8
, Iop_Mull16Ux4
, Iop_Mull32Ux2
, Iop_INVALID
};
734 vassert(sizeNarrow
< 3);
735 return ops
[sizeNarrow
];
738 static IROp
mkVecMULLS ( UInt sizeNarrow
) {
740 = { Iop_Mull8Sx8
, Iop_Mull16Sx4
, Iop_Mull32Sx2
, Iop_INVALID
};
741 vassert(sizeNarrow
< 3);
742 return ops
[sizeNarrow
];
745 static IROp
mkVecQDMULLS ( UInt sizeNarrow
) {
747 = { Iop_INVALID
, Iop_QDMull16Sx4
, Iop_QDMull32Sx2
, Iop_INVALID
};
748 vassert(sizeNarrow
< 3);
749 return ops
[sizeNarrow
];
752 static IROp
mkVecCMPEQ ( UInt size
) {
754 = { Iop_CmpEQ8x16
, Iop_CmpEQ16x8
, Iop_CmpEQ32x4
, Iop_CmpEQ64x2
};
759 static IROp
mkVecCMPGTU ( UInt size
) {
761 = { Iop_CmpGT8Ux16
, Iop_CmpGT16Ux8
, Iop_CmpGT32Ux4
, Iop_CmpGT64Ux2
};
766 static IROp
mkVecCMPGTS ( UInt size
) {
768 = { Iop_CmpGT8Sx16
, Iop_CmpGT16Sx8
, Iop_CmpGT32Sx4
, Iop_CmpGT64Sx2
};
773 static IROp
mkVecABS ( UInt size
) {
775 = { Iop_Abs8x16
, Iop_Abs16x8
, Iop_Abs32x4
, Iop_Abs64x2
};
780 static IROp
mkVecZEROHIxxOFV128 ( UInt size
) {
782 = { Iop_ZeroHI120ofV128
, Iop_ZeroHI112ofV128
,
783 Iop_ZeroHI96ofV128
, Iop_ZeroHI64ofV128
};
788 static IRExpr
* mkU ( IRType ty
, ULong imm
) {
790 case Ity_I32
: return mkU32((UInt
)(imm
& 0xFFFFFFFFULL
));
791 case Ity_I64
: return mkU64(imm
);
792 default: vpanic("mkU");
796 static IROp
mkVecQDMULHIS ( UInt size
) {
798 = { Iop_INVALID
, Iop_QDMulHi16Sx8
, Iop_QDMulHi32Sx4
, Iop_INVALID
};
803 static IROp
mkVecQRDMULHIS ( UInt size
) {
805 = { Iop_INVALID
, Iop_QRDMulHi16Sx8
, Iop_QRDMulHi32Sx4
, Iop_INVALID
};
810 static IROp
mkVecQANDUQSH ( UInt size
) {
812 = { Iop_QandUQsh8x16
, Iop_QandUQsh16x8
,
813 Iop_QandUQsh32x4
, Iop_QandUQsh64x2
};
818 static IROp
mkVecQANDSQSH ( UInt size
) {
820 = { Iop_QandSQsh8x16
, Iop_QandSQsh16x8
,
821 Iop_QandSQsh32x4
, Iop_QandSQsh64x2
};
826 static IROp
mkVecQANDUQRSH ( UInt size
) {
828 = { Iop_QandUQRsh8x16
, Iop_QandUQRsh16x8
,
829 Iop_QandUQRsh32x4
, Iop_QandUQRsh64x2
};
834 static IROp
mkVecQANDSQRSH ( UInt size
) {
836 = { Iop_QandSQRsh8x16
, Iop_QandSQRsh16x8
,
837 Iop_QandSQRsh32x4
, Iop_QandSQRsh64x2
};
842 static IROp
mkVecSHU ( UInt size
) {
844 = { Iop_Sh8Ux16
, Iop_Sh16Ux8
, Iop_Sh32Ux4
, Iop_Sh64Ux2
};
849 static IROp
mkVecSHS ( UInt size
) {
851 = { Iop_Sh8Sx16
, Iop_Sh16Sx8
, Iop_Sh32Sx4
, Iop_Sh64Sx2
};
856 static IROp
mkVecRSHU ( UInt size
) {
858 = { Iop_Rsh8Ux16
, Iop_Rsh16Ux8
, Iop_Rsh32Ux4
, Iop_Rsh64Ux2
};
863 static IROp
mkVecRSHS ( UInt size
) {
865 = { Iop_Rsh8Sx16
, Iop_Rsh16Sx8
, Iop_Rsh32Sx4
, Iop_Rsh64Sx2
};
870 static IROp
mkVecNARROWUN ( UInt sizeNarrow
) {
872 = { Iop_NarrowUn16to8x8
, Iop_NarrowUn32to16x4
,
873 Iop_NarrowUn64to32x2
, Iop_INVALID
};
874 vassert(sizeNarrow
< 4);
875 return ops
[sizeNarrow
];
878 static IROp
mkVecQNARROWUNSU ( UInt sizeNarrow
) {
880 = { Iop_QNarrowUn16Sto8Ux8
, Iop_QNarrowUn32Sto16Ux4
,
881 Iop_QNarrowUn64Sto32Ux2
, Iop_INVALID
};
882 vassert(sizeNarrow
< 4);
883 return ops
[sizeNarrow
];
886 static IROp
mkVecQNARROWUNSS ( UInt sizeNarrow
) {
888 = { Iop_QNarrowUn16Sto8Sx8
, Iop_QNarrowUn32Sto16Sx4
,
889 Iop_QNarrowUn64Sto32Sx2
, Iop_INVALID
};
890 vassert(sizeNarrow
< 4);
891 return ops
[sizeNarrow
];
894 static IROp
mkVecQNARROWUNUU ( UInt sizeNarrow
) {
896 = { Iop_QNarrowUn16Uto8Ux8
, Iop_QNarrowUn32Uto16Ux4
,
897 Iop_QNarrowUn64Uto32Ux2
, Iop_INVALID
};
898 vassert(sizeNarrow
< 4);
899 return ops
[sizeNarrow
];
902 static IROp
mkVecQANDqshrNNARROWUU ( UInt sizeNarrow
) {
904 = { Iop_QandQShrNnarrow16Uto8Ux8
, Iop_QandQShrNnarrow32Uto16Ux4
,
905 Iop_QandQShrNnarrow64Uto32Ux2
, Iop_INVALID
};
906 vassert(sizeNarrow
< 4);
907 return ops
[sizeNarrow
];
910 static IROp
mkVecQANDqsarNNARROWSS ( UInt sizeNarrow
) {
912 = { Iop_QandQSarNnarrow16Sto8Sx8
, Iop_QandQSarNnarrow32Sto16Sx4
,
913 Iop_QandQSarNnarrow64Sto32Sx2
, Iop_INVALID
};
914 vassert(sizeNarrow
< 4);
915 return ops
[sizeNarrow
];
918 static IROp
mkVecQANDqsarNNARROWSU ( UInt sizeNarrow
) {
920 = { Iop_QandQSarNnarrow16Sto8Ux8
, Iop_QandQSarNnarrow32Sto16Ux4
,
921 Iop_QandQSarNnarrow64Sto32Ux2
, Iop_INVALID
};
922 vassert(sizeNarrow
< 4);
923 return ops
[sizeNarrow
];
926 static IROp
mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow
) {
928 = { Iop_QandQRShrNnarrow16Uto8Ux8
, Iop_QandQRShrNnarrow32Uto16Ux4
,
929 Iop_QandQRShrNnarrow64Uto32Ux2
, Iop_INVALID
};
930 vassert(sizeNarrow
< 4);
931 return ops
[sizeNarrow
];
934 static IROp
mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow
) {
936 = { Iop_QandQRSarNnarrow16Sto8Sx8
, Iop_QandQRSarNnarrow32Sto16Sx4
,
937 Iop_QandQRSarNnarrow64Sto32Sx2
, Iop_INVALID
};
938 vassert(sizeNarrow
< 4);
939 return ops
[sizeNarrow
];
942 static IROp
mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow
) {
944 = { Iop_QandQRSarNnarrow16Sto8Ux8
, Iop_QandQRSarNnarrow32Sto16Ux4
,
945 Iop_QandQRSarNnarrow64Sto32Ux2
, Iop_INVALID
};
946 vassert(sizeNarrow
< 4);
947 return ops
[sizeNarrow
];
950 static IROp
mkVecQSHLNSATUU ( UInt size
) {
952 = { Iop_QShlNsatUU8x16
, Iop_QShlNsatUU16x8
,
953 Iop_QShlNsatUU32x4
, Iop_QShlNsatUU64x2
};
958 static IROp
mkVecQSHLNSATSS ( UInt size
) {
960 = { Iop_QShlNsatSS8x16
, Iop_QShlNsatSS16x8
,
961 Iop_QShlNsatSS32x4
, Iop_QShlNsatSS64x2
};
966 static IROp
mkVecQSHLNSATSU ( UInt size
) {
968 = { Iop_QShlNsatSU8x16
, Iop_QShlNsatSU16x8
,
969 Iop_QShlNsatSU32x4
, Iop_QShlNsatSU64x2
};
974 static IROp
mkVecADDF ( UInt size
) {
976 = { Iop_INVALID
, Iop_INVALID
, Iop_Add32Fx4
, Iop_Add64Fx2
};
981 static IROp
mkVecMAXF ( UInt size
) {
983 = { Iop_INVALID
, Iop_INVALID
, Iop_Max32Fx4
, Iop_Max64Fx2
};
988 static IROp
mkVecMINF ( UInt size
) {
990 = { Iop_INVALID
, Iop_INVALID
, Iop_Min32Fx4
, Iop_Min64Fx2
};
995 /* Generate IR to create 'arg rotated right by imm', for sane values
996 of 'ty' and 'imm'. */
997 static IRTemp
mathROR ( IRType ty
, IRTemp arg
, UInt imm
)
1000 if (ty
== Ity_I64
) {
1003 vassert(ty
== Ity_I32
);
1011 IRTemp res
= newTemp(ty
);
1012 assign(res
, binop(mkOR(ty
),
1013 binop(mkSHL(ty
), mkexpr(arg
), mkU8(w
- imm
)),
1014 binop(mkSHR(ty
), mkexpr(arg
), mkU8(imm
)) ));
1018 /* Generate IR to set the returned temp to either all-zeroes or
1019 all ones, as a copy of arg<imm>. */
1020 static IRTemp
mathREPLICATE ( IRType ty
, IRTemp arg
, UInt imm
)
1023 if (ty
== Ity_I64
) {
1026 vassert(ty
== Ity_I32
);
1031 IRTemp res
= newTemp(ty
);
1032 assign(res
, binop(mkSAR(ty
),
1033 binop(mkSHL(ty
), mkexpr(arg
), mkU8(w
- 1 - imm
)),
1038 /* U-widen 8/16/32/64 bit int expr to 64. */
1039 static IRExpr
* widenUto64 ( IRType srcTy
, IRExpr
* e
)
1042 case Ity_I64
: return e
;
1043 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1044 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1045 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1046 default: vpanic("widenUto64(arm64)");
1050 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1051 of these combinations make sense. */
1052 static IRExpr
* narrowFrom64 ( IRType dstTy
, IRExpr
* e
)
1055 case Ity_I64
: return e
;
1056 case Ity_I32
: return unop(Iop_64to32
, e
);
1057 case Ity_I16
: return unop(Iop_64to16
, e
);
1058 case Ity_I8
: return unop(Iop_64to8
, e
);
1059 default: vpanic("narrowFrom64(arm64)");
1064 /*------------------------------------------------------------*/
1065 /*--- Helpers for accessing guest registers. ---*/
1066 /*------------------------------------------------------------*/
1068 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1069 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1070 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1071 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1072 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1073 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1074 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1075 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1076 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1077 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1078 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1079 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1080 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1081 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1082 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1083 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1084 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1085 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1086 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1087 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1088 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1089 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1090 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1091 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1092 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1093 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1094 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1095 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1096 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1097 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1098 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1100 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1101 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1103 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1104 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1105 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1106 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1108 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1109 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1111 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1112 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1113 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1114 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1115 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1116 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1117 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1118 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1119 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1120 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1121 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1122 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1123 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1124 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1125 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1126 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1127 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1128 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1129 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1130 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1131 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1132 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1133 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1134 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1135 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1136 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1137 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1138 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1139 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1140 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1141 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1142 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1144 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1145 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1147 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1148 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1150 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1151 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1152 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1155 /* ---------------- Integer registers ---------------- */
1157 static Int
offsetIReg64 ( UInt iregNo
)
1159 /* Do we care about endianness here? We do if sub-parts of integer
1160 registers are accessed. */
1162 case 0: return OFFB_X0
;
1163 case 1: return OFFB_X1
;
1164 case 2: return OFFB_X2
;
1165 case 3: return OFFB_X3
;
1166 case 4: return OFFB_X4
;
1167 case 5: return OFFB_X5
;
1168 case 6: return OFFB_X6
;
1169 case 7: return OFFB_X7
;
1170 case 8: return OFFB_X8
;
1171 case 9: return OFFB_X9
;
1172 case 10: return OFFB_X10
;
1173 case 11: return OFFB_X11
;
1174 case 12: return OFFB_X12
;
1175 case 13: return OFFB_X13
;
1176 case 14: return OFFB_X14
;
1177 case 15: return OFFB_X15
;
1178 case 16: return OFFB_X16
;
1179 case 17: return OFFB_X17
;
1180 case 18: return OFFB_X18
;
1181 case 19: return OFFB_X19
;
1182 case 20: return OFFB_X20
;
1183 case 21: return OFFB_X21
;
1184 case 22: return OFFB_X22
;
1185 case 23: return OFFB_X23
;
1186 case 24: return OFFB_X24
;
1187 case 25: return OFFB_X25
;
1188 case 26: return OFFB_X26
;
1189 case 27: return OFFB_X27
;
1190 case 28: return OFFB_X28
;
1191 case 29: return OFFB_X29
;
1192 case 30: return OFFB_X30
;
1194 default: vassert(0);
1198 static Int
offsetIReg64orSP ( UInt iregNo
)
1200 return iregNo
== 31 ? OFFB_XSP
: offsetIReg64(iregNo
);
1203 static const HChar
* nameIReg64orZR ( UInt iregNo
)
1205 vassert(iregNo
< 32);
1206 static const HChar
* names
[32]
1207 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1208 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1209 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1210 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1211 return names
[iregNo
];
1214 static const HChar
* nameIReg64orSP ( UInt iregNo
)
1219 vassert(iregNo
< 31);
1220 return nameIReg64orZR(iregNo
);
1223 static IRExpr
* getIReg64orSP ( UInt iregNo
)
1225 vassert(iregNo
< 32);
1226 return IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
);
1229 static IRExpr
* getIReg64orZR ( UInt iregNo
)
1234 vassert(iregNo
< 31);
1235 return IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
);
1238 static void putIReg64orSP ( UInt iregNo
, IRExpr
* e
)
1240 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1241 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), e
) );
1244 static void putIReg64orZR ( UInt iregNo
, IRExpr
* e
)
1246 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1250 vassert(iregNo
< 31);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), e
) );
1254 static const HChar
* nameIReg32orZR ( UInt iregNo
)
1256 vassert(iregNo
< 32);
1257 static const HChar
* names
[32]
1258 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1259 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1260 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1261 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1262 return names
[iregNo
];
1265 static const HChar
* nameIReg32orSP ( UInt iregNo
)
1270 vassert(iregNo
< 31);
1271 return nameIReg32orZR(iregNo
);
1274 static IRExpr
* getIReg32orSP ( UInt iregNo
)
1276 vassert(iregNo
< 32);
1277 return unop(Iop_64to32
,
1278 IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
));
1281 static IRExpr
* getIReg32orZR ( UInt iregNo
)
1286 vassert(iregNo
< 31);
1287 return unop(Iop_64to32
,
1288 IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
));
1291 static void putIReg32orSP ( UInt iregNo
, IRExpr
* e
)
1293 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
1294 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), unop(Iop_32Uto64
, e
)) );
1297 static void putIReg32orZR ( UInt iregNo
, IRExpr
* e
)
1299 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
1303 vassert(iregNo
< 31);
1304 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), unop(Iop_32Uto64
, e
)) );
1307 static const HChar
* nameIRegOrSP ( Bool is64
, UInt iregNo
)
1309 vassert(is64
== True
|| is64
== False
);
1310 return is64
? nameIReg64orSP(iregNo
) : nameIReg32orSP(iregNo
);
1313 static const HChar
* nameIRegOrZR ( Bool is64
, UInt iregNo
)
1315 vassert(is64
== True
|| is64
== False
);
1316 return is64
? nameIReg64orZR(iregNo
) : nameIReg32orZR(iregNo
);
1319 static IRExpr
* getIRegOrZR ( Bool is64
, UInt iregNo
)
1321 vassert(is64
== True
|| is64
== False
);
1322 return is64
? getIReg64orZR(iregNo
) : getIReg32orZR(iregNo
);
1325 static void putIRegOrZR ( Bool is64
, UInt iregNo
, IRExpr
* e
)
1327 vassert(is64
== True
|| is64
== False
);
1328 if (is64
) putIReg64orZR(iregNo
, e
); else putIReg32orZR(iregNo
, e
);
1331 static void putPC ( IRExpr
* e
)
1333 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1334 stmt( IRStmt_Put(OFFB_PC
, e
) );
1338 /* ---------------- Vector (Q) registers ---------------- */
1340 static Int
offsetQReg128 ( UInt qregNo
)
1342 /* We don't care about endianness at this point. It only becomes
1343 relevant when dealing with sections of these registers.*/
1345 case 0: return OFFB_Q0
;
1346 case 1: return OFFB_Q1
;
1347 case 2: return OFFB_Q2
;
1348 case 3: return OFFB_Q3
;
1349 case 4: return OFFB_Q4
;
1350 case 5: return OFFB_Q5
;
1351 case 6: return OFFB_Q6
;
1352 case 7: return OFFB_Q7
;
1353 case 8: return OFFB_Q8
;
1354 case 9: return OFFB_Q9
;
1355 case 10: return OFFB_Q10
;
1356 case 11: return OFFB_Q11
;
1357 case 12: return OFFB_Q12
;
1358 case 13: return OFFB_Q13
;
1359 case 14: return OFFB_Q14
;
1360 case 15: return OFFB_Q15
;
1361 case 16: return OFFB_Q16
;
1362 case 17: return OFFB_Q17
;
1363 case 18: return OFFB_Q18
;
1364 case 19: return OFFB_Q19
;
1365 case 20: return OFFB_Q20
;
1366 case 21: return OFFB_Q21
;
1367 case 22: return OFFB_Q22
;
1368 case 23: return OFFB_Q23
;
1369 case 24: return OFFB_Q24
;
1370 case 25: return OFFB_Q25
;
1371 case 26: return OFFB_Q26
;
1372 case 27: return OFFB_Q27
;
1373 case 28: return OFFB_Q28
;
1374 case 29: return OFFB_Q29
;
1375 case 30: return OFFB_Q30
;
1376 case 31: return OFFB_Q31
;
1377 default: vassert(0);
1381 /* Write to a complete Qreg. */
1382 static void putQReg128 ( UInt qregNo
, IRExpr
* e
)
1384 vassert(qregNo
< 32);
1385 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_V128
);
1386 stmt( IRStmt_Put(offsetQReg128(qregNo
), e
) );
1389 /* Read a complete Qreg. */
1390 static IRExpr
* getQReg128 ( UInt qregNo
)
1392 vassert(qregNo
< 32);
1393 return IRExpr_Get(offsetQReg128(qregNo
), Ity_V128
);
1396 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1397 bit sub-parts we can choose either integer or float types, and
1398 choose float on the basis that that is the common use case and so
1399 will give least interference with Put-to-Get forwarding later
1401 static IRType
preferredVectorSubTypeFromSize ( UInt szB
)
1404 case 1: return Ity_I8
;
1405 case 2: return Ity_I16
;
1406 case 4: return Ity_I32
; //Ity_F32;
1407 case 8: return Ity_F64
;
1408 case 16: return Ity_V128
;
1409 default: vassert(0);
1413 /* Find the offset of the laneNo'th lane of type laneTy in the given
1414 Qreg. Since the host is little-endian, the least significant lane
1415 has the lowest offset. */
1416 static Int
offsetQRegLane ( UInt qregNo
, IRType laneTy
, UInt laneNo
)
1418 vassert(host_endness
== VexEndnessLE
);
1419 Int base
= offsetQReg128(qregNo
);
1420 /* Since the host is little-endian, the least significant lane
1421 will be at the lowest address. */
1422 /* Restrict this to known types, so as to avoid silently accepting
1426 case Ity_I8
: laneSzB
= 1; break;
1427 case Ity_F16
: case Ity_I16
: laneSzB
= 2; break;
1428 case Ity_F32
: case Ity_I32
: laneSzB
= 4; break;
1429 case Ity_F64
: case Ity_I64
: laneSzB
= 8; break;
1430 case Ity_V128
: laneSzB
= 16; break;
1433 vassert(laneSzB
> 0);
1434 UInt minOff
= laneNo
* laneSzB
;
1435 UInt maxOff
= minOff
+ laneSzB
- 1;
1436 vassert(maxOff
< 16);
1437 return base
+ minOff
;
1440 /* Put to the least significant lane of a Qreg. */
1441 static void putQRegLO ( UInt qregNo
, IRExpr
* e
)
1443 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1444 Int off
= offsetQRegLane(qregNo
, ty
, 0);
1446 case Ity_I8
: case Ity_I16
: case Ity_I32
: case Ity_I64
:
1447 case Ity_F16
: case Ity_F32
: case Ity_F64
: case Ity_V128
:
1450 vassert(0); // Other cases are probably invalid
1452 stmt(IRStmt_Put(off
, e
));
1455 /* Get from the least significant lane of a Qreg. */
1456 static IRExpr
* getQRegLO ( UInt qregNo
, IRType ty
)
1458 Int off
= offsetQRegLane(qregNo
, ty
, 0);
1461 case Ity_F16
: case Ity_I16
:
1462 case Ity_I32
: case Ity_I64
:
1463 case Ity_F32
: case Ity_F64
: case Ity_V128
:
1466 vassert(0); // Other cases are ATC
1468 return IRExpr_Get(off
, ty
);
1471 static const HChar
* nameQRegLO ( UInt qregNo
, IRType laneTy
)
1473 static const HChar
* namesQ
[32]
1474 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1475 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1476 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1477 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1478 static const HChar
* namesD
[32]
1479 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1480 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1481 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1482 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1483 static const HChar
* namesS
[32]
1484 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1485 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1486 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1487 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1488 static const HChar
* namesH
[32]
1489 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1490 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1491 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1492 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1493 static const HChar
* namesB
[32]
1494 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1495 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1496 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1497 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1498 vassert(qregNo
< 32);
1499 switch (sizeofIRType(laneTy
)) {
1500 case 1: return namesB
[qregNo
];
1501 case 2: return namesH
[qregNo
];
1502 case 4: return namesS
[qregNo
];
1503 case 8: return namesD
[qregNo
];
1504 case 16: return namesQ
[qregNo
];
1505 default: vassert(0);
1510 static const HChar
* nameQReg128 ( UInt qregNo
)
1512 return nameQRegLO(qregNo
, Ity_V128
);
1515 /* Find the offset of the most significant half (8 bytes) of the given
1516 Qreg. This requires knowing the endianness of the host. */
1517 static Int
offsetQRegHI64 ( UInt qregNo
)
1519 return offsetQRegLane(qregNo
, Ity_I64
, 1);
1522 static IRExpr
* getQRegHI64 ( UInt qregNo
)
1524 return IRExpr_Get(offsetQRegHI64(qregNo
), Ity_I64
);
1527 static void putQRegHI64 ( UInt qregNo
, IRExpr
* e
)
1529 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1530 Int off
= offsetQRegHI64(qregNo
);
1532 case Ity_I64
: case Ity_F64
:
1535 vassert(0); // Other cases are plain wrong
1537 stmt(IRStmt_Put(off
, e
));
1540 /* Put to a specified lane of a Qreg. */
1541 static void putQRegLane ( UInt qregNo
, UInt laneNo
, IRExpr
* e
)
1543 IRType laneTy
= typeOfIRExpr(irsb
->tyenv
, e
);
1544 Int off
= offsetQRegLane(qregNo
, laneTy
, laneNo
);
1546 case Ity_F64
: case Ity_I64
:
1547 case Ity_I32
: case Ity_F32
:
1548 case Ity_I16
: case Ity_F16
:
1552 vassert(0); // Other cases are ATC
1554 stmt(IRStmt_Put(off
, e
));
1557 /* Get from a specified lane of a Qreg. */
1558 static IRExpr
* getQRegLane ( UInt qregNo
, UInt laneNo
, IRType laneTy
)
1560 Int off
= offsetQRegLane(qregNo
, laneTy
, laneNo
);
1562 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
1563 case Ity_F64
: case Ity_F32
: case Ity_F16
:
1566 vassert(0); // Other cases are ATC
1568 return IRExpr_Get(off
, laneTy
);
1572 //ZZ /* ---------------- Misc registers ---------------- */
1574 //ZZ static void putMiscReg32 ( UInt gsoffset,
1575 //ZZ IRExpr* e, /* :: Ity_I32 */
1576 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1578 //ZZ switch (gsoffset) {
1579 //ZZ case OFFB_FPSCR: break;
1580 //ZZ case OFFB_QFLAG32: break;
1581 //ZZ case OFFB_GEFLAG0: break;
1582 //ZZ case OFFB_GEFLAG1: break;
1583 //ZZ case OFFB_GEFLAG2: break;
1584 //ZZ case OFFB_GEFLAG3: break;
1585 //ZZ default: vassert(0); /* awaiting more cases */
1587 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1589 //ZZ if (guardT == IRTemp_INVALID) {
1590 //ZZ /* unconditional write */
1591 //ZZ stmt(IRStmt_Put(gsoffset, e));
1593 //ZZ stmt(IRStmt_Put(
1595 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1596 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1601 //ZZ static IRTemp get_ITSTATE ( void )
1603 //ZZ ASSERT_IS_THUMB;
1604 //ZZ IRTemp t = newTemp(Ity_I32);
1605 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1609 //ZZ static void put_ITSTATE ( IRTemp t )
1611 //ZZ ASSERT_IS_THUMB;
1612 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1615 //ZZ static IRTemp get_QFLAG32 ( void )
1617 //ZZ IRTemp t = newTemp(Ity_I32);
1618 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1622 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1624 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1627 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1628 //ZZ Status Register) to indicate that overflow or saturation occurred.
1629 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1630 //ZZ value to indicate saturation. */
1631 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1633 //ZZ IRTemp old = get_QFLAG32();
1634 //ZZ IRTemp nyu = newTemp(Ity_I32);
1635 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1636 //ZZ put_QFLAG32(nyu, condT);
1640 /* ---------------- FPCR stuff ---------------- */
1642 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1643 convert them to IR format. Bind the final result to the
1645 static IRTemp
/* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1647 /* The ARMvfp encoding for rounding mode bits is:
1652 We need to convert that to the IR encoding:
1653 00 to nearest (the default)
1657 Which can be done by swapping bits 0 and 1.
1658 The rmode bits are at 23:22 in FPSCR.
1660 IRTemp armEncd
= newTemp(Ity_I32
);
1661 IRTemp swapped
= newTemp(Ity_I32
);
1662 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1663 we don't zero out bits 24 and above, since the assignment to
1664 'swapped' will mask them out anyway. */
1666 binop(Iop_Shr32
, IRExpr_Get(OFFB_FPCR
, Ity_I32
), mkU8(22)));
1667 /* Now swap them. */
1671 binop(Iop_Shl32
, mkexpr(armEncd
), mkU8(1)),
1674 binop(Iop_Shr32
, mkexpr(armEncd
), mkU8(1)),
1681 /*------------------------------------------------------------*/
1682 /*--- Helpers for flag handling and conditional insns ---*/
1683 /*------------------------------------------------------------*/
1685 static const HChar
* nameARM64Condcode ( ARM64Condcode cond
)
1688 case ARM64CondEQ
: return "eq";
1689 case ARM64CondNE
: return "ne";
1690 case ARM64CondCS
: return "cs"; // or 'hs'
1691 case ARM64CondCC
: return "cc"; // or 'lo'
1692 case ARM64CondMI
: return "mi";
1693 case ARM64CondPL
: return "pl";
1694 case ARM64CondVS
: return "vs";
1695 case ARM64CondVC
: return "vc";
1696 case ARM64CondHI
: return "hi";
1697 case ARM64CondLS
: return "ls";
1698 case ARM64CondGE
: return "ge";
1699 case ARM64CondLT
: return "lt";
1700 case ARM64CondGT
: return "gt";
1701 case ARM64CondLE
: return "le";
1702 case ARM64CondAL
: return "al";
1703 case ARM64CondNV
: return "nv";
1704 default: vpanic("name_ARM64Condcode");
1708 /* and a handy shorthand for it */
1709 static const HChar
* nameCC ( ARM64Condcode cond
) {
1710 return nameARM64Condcode(cond
);
1714 /* Build IR to calculate some particular condition from stored
1715 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1716 Ity_I64, suitable for narrowing. Although the return type is
1717 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1718 :: Ity_I64 and must denote the condition to compute in
1719 bits 7:4, and be zero everywhere else.
1721 static IRExpr
* mk_arm64g_calculate_condition_dyn ( IRExpr
* cond
)
1723 vassert(typeOfIRExpr(irsb
->tyenv
, cond
) == Ity_I64
);
1724 /* And 'cond' had better produce a value in which only bits 7:4 are
1725 nonzero. However, obviously we can't assert for that. */
1727 /* So what we're constructing for the first argument is
1728 "(cond << 4) | stored-operation".
1729 However, as per comments above, 'cond' must be supplied
1730 pre-shifted to this function.
1732 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1733 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1734 8 bits of the first argument. */
1737 binop(Iop_Or64
, IRExpr_Get(OFFB_CC_OP
, Ity_I64
), cond
),
1738 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1739 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1740 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
)
1746 "arm64g_calculate_condition", &arm64g_calculate_condition
,
1750 /* Exclude the requested condition, OP and NDEP from definedness
1751 checking. We're only interested in DEP1 and DEP2. */
1752 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1757 /* Build IR to calculate some particular condition from stored
1758 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1759 Ity_I64, suitable for narrowing. Although the return type is
1760 Ity_I64, the returned value is either 0 or 1.
1762 static IRExpr
* mk_arm64g_calculate_condition ( ARM64Condcode cond
)
1764 /* First arg is "(cond << 4) | condition". This requires that the
1765 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1766 (COND, OP) pair in the lowest 8 bits of the first argument. */
1767 vassert(cond
>= 0 && cond
<= 15);
1768 return mk_arm64g_calculate_condition_dyn( mkU64(cond
<< 4) );
1772 /* Build IR to calculate just the carry flag from stored
1773 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1775 static IRExpr
* mk_arm64g_calculate_flag_c ( void )
1778 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1779 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1780 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1781 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1786 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c
,
1789 /* Exclude OP and NDEP from definedness checking. We're only
1790 interested in DEP1 and DEP2. */
1791 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1796 //ZZ /* Build IR to calculate just the overflow flag from stored
1797 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1799 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1802 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1803 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1804 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1805 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1807 //ZZ = mkIRExprCCall(
1810 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1813 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1814 //ZZ interested in DEP1 and DEP2. */
1815 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1820 /* Build IR to calculate N Z C V in bits 31:28 of the
1822 static IRExpr
* mk_arm64g_calculate_flags_nzcv ( void )
1825 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1826 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1827 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1828 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1833 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv
,
1836 /* Exclude OP and NDEP from definedness checking. We're only
1837 interested in DEP1 and DEP2. */
1838 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1843 /* Build IR to set the flags thunk, in the most general case. */
1845 void setFlags_D1_D2_ND ( UInt cc_op
,
1846 IRTemp t_dep1
, IRTemp t_dep2
, IRTemp t_ndep
)
1848 vassert(typeOfIRTemp(irsb
->tyenv
, t_dep1
== Ity_I64
));
1849 vassert(typeOfIRTemp(irsb
->tyenv
, t_dep2
== Ity_I64
));
1850 vassert(typeOfIRTemp(irsb
->tyenv
, t_ndep
== Ity_I64
));
1851 vassert(cc_op
>= ARM64G_CC_OP_COPY
&& cc_op
< ARM64G_CC_OP_NUMBER
);
1852 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(cc_op
) ));
1853 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t_dep1
) ));
1854 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkexpr(t_dep2
) ));
1855 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(t_ndep
) ));
1858 /* Build IR to set the flags thunk after ADD or SUB. */
1860 void setFlags_ADD_SUB ( Bool is64
, Bool isSUB
, IRTemp argL
, IRTemp argR
)
1862 IRTemp argL64
= IRTemp_INVALID
;
1863 IRTemp argR64
= IRTemp_INVALID
;
1864 IRTemp z64
= newTemp(Ity_I64
);
1869 argL64
= newTemp(Ity_I64
);
1870 argR64
= newTemp(Ity_I64
);
1871 assign(argL64
, unop(Iop_32Uto64
, mkexpr(argL
)));
1872 assign(argR64
, unop(Iop_32Uto64
, mkexpr(argR
)));
1874 assign(z64
, mkU64(0));
1875 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1876 /**/ if ( isSUB
&& is64
) { cc_op
= ARM64G_CC_OP_SUB64
; }
1877 else if ( isSUB
&& !is64
) { cc_op
= ARM64G_CC_OP_SUB32
; }
1878 else if (!isSUB
&& is64
) { cc_op
= ARM64G_CC_OP_ADD64
; }
1879 else if (!isSUB
&& !is64
) { cc_op
= ARM64G_CC_OP_ADD32
; }
1880 else { vassert(0); }
1881 setFlags_D1_D2_ND(cc_op
, argL64
, argR64
, z64
);
1884 /* Build IR to set the flags thunk after ADC or SBC. */
1886 void setFlags_ADC_SBC ( Bool is64
, Bool isSBC
,
1887 IRTemp argL
, IRTemp argR
, IRTemp oldC
)
1889 IRTemp argL64
= IRTemp_INVALID
;
1890 IRTemp argR64
= IRTemp_INVALID
;
1891 IRTemp oldC64
= IRTemp_INVALID
;
1897 argL64
= newTemp(Ity_I64
);
1898 argR64
= newTemp(Ity_I64
);
1899 oldC64
= newTemp(Ity_I64
);
1900 assign(argL64
, unop(Iop_32Uto64
, mkexpr(argL
)));
1901 assign(argR64
, unop(Iop_32Uto64
, mkexpr(argR
)));
1902 assign(oldC64
, unop(Iop_32Uto64
, mkexpr(oldC
)));
1904 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1905 /**/ if ( isSBC
&& is64
) { cc_op
= ARM64G_CC_OP_SBC64
; }
1906 else if ( isSBC
&& !is64
) { cc_op
= ARM64G_CC_OP_SBC32
; }
1907 else if (!isSBC
&& is64
) { cc_op
= ARM64G_CC_OP_ADC64
; }
1908 else if (!isSBC
&& !is64
) { cc_op
= ARM64G_CC_OP_ADC32
; }
1909 else { vassert(0); }
1910 setFlags_D1_D2_ND(cc_op
, argL64
, argR64
, oldC64
);
1913 /* Build IR to set the flags thunk after ADD or SUB, if the given
1914 condition evaluates to True at run time. If not, the flags are set
1915 to the specified NZCV value. */
1917 void setFlags_ADD_SUB_conditionally (
1918 Bool is64
, Bool isSUB
,
1919 IRTemp cond
, IRTemp argL
, IRTemp argR
, UInt nzcv
1922 /* Generate IR as follows:
1923 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1924 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1925 CC_DEP2 = ITE(cond, argR64, 0)
1929 IRTemp z64
= newTemp(Ity_I64
);
1930 assign(z64
, mkU64(0));
1932 /* Establish the operation and operands for the True case. */
1933 IRTemp t_dep1
= IRTemp_INVALID
;
1934 IRTemp t_dep2
= IRTemp_INVALID
;
1935 UInt t_op
= ARM64G_CC_OP_NUMBER
;
1936 /**/ if ( isSUB
&& is64
) { t_op
= ARM64G_CC_OP_SUB64
; }
1937 else if ( isSUB
&& !is64
) { t_op
= ARM64G_CC_OP_SUB32
; }
1938 else if (!isSUB
&& is64
) { t_op
= ARM64G_CC_OP_ADD64
; }
1939 else if (!isSUB
&& !is64
) { t_op
= ARM64G_CC_OP_ADD32
; }
1940 else { vassert(0); }
1946 t_dep1
= newTemp(Ity_I64
);
1947 t_dep2
= newTemp(Ity_I64
);
1948 assign(t_dep1
, unop(Iop_32Uto64
, mkexpr(argL
)));
1949 assign(t_dep2
, unop(Iop_32Uto64
, mkexpr(argR
)));
1952 /* Establish the operation and operands for the False case. */
1953 IRTemp f_dep1
= newTemp(Ity_I64
);
1954 IRTemp f_dep2
= z64
;
1955 UInt f_op
= ARM64G_CC_OP_COPY
;
1956 assign(f_dep1
, mkU64(nzcv
<< 28));
1958 /* Final thunk values */
1959 IRTemp dep1
= newTemp(Ity_I64
);
1960 IRTemp dep2
= newTemp(Ity_I64
);
1961 IRTemp op
= newTemp(Ity_I64
);
1963 assign(op
, IRExpr_ITE(mkexpr(cond
), mkU64(t_op
), mkU64(f_op
)));
1964 assign(dep1
, IRExpr_ITE(mkexpr(cond
), mkexpr(t_dep1
), mkexpr(f_dep1
)));
1965 assign(dep2
, IRExpr_ITE(mkexpr(cond
), mkexpr(t_dep2
), mkexpr(f_dep2
)));
1968 stmt( IRStmt_Put( OFFB_CC_OP
, mkexpr(op
) ));
1969 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(dep1
) ));
1970 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkexpr(dep2
) ));
1971 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(z64
) ));
1974 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1976 void setFlags_LOGIC ( Bool is64
, IRTemp res
)
1978 IRTemp res64
= IRTemp_INVALID
;
1979 IRTemp z64
= newTemp(Ity_I64
);
1980 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1983 cc_op
= ARM64G_CC_OP_LOGIC64
;
1985 res64
= newTemp(Ity_I64
);
1986 assign(res64
, unop(Iop_32Uto64
, mkexpr(res
)));
1987 cc_op
= ARM64G_CC_OP_LOGIC32
;
1989 assign(z64
, mkU64(0));
1990 setFlags_D1_D2_ND(cc_op
, res64
, z64
, z64
);
1993 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
1994 located in bits 31:28 of the supplied value. */
1996 void setFlags_COPY ( IRTemp nzcv_28x0
)
1998 IRTemp z64
= newTemp(Ity_I64
);
1999 assign(z64
, mkU64(0));
2000 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY
, nzcv_28x0
, z64
, z64
);
2004 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2005 //ZZ sets it at all) */
2006 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2008 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2010 //ZZ IRTemp z32 = newTemp(Ity_I32);
2011 //ZZ assign( z32, mkU32(0) );
2012 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2016 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2017 //ZZ sets it at all) */
2018 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2020 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2022 //ZZ IRTemp z32 = newTemp(Ity_I32);
2023 //ZZ assign( z32, mkU32(0) );
2024 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2028 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2029 //ZZ sets them at all) */
2030 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2031 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2033 //ZZ IRTemp z32 = newTemp(Ity_I32);
2034 //ZZ assign( z32, mkU32(0) );
2035 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2039 /*------------------------------------------------------------*/
2040 /*--- Misc math helpers ---*/
2041 /*------------------------------------------------------------*/
2043 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2044 static IRTemp
math_SWAPHELPER ( IRTemp x
, ULong mask
, Int sh
)
2046 IRTemp maskT
= newTemp(Ity_I64
);
2047 IRTemp res
= newTemp(Ity_I64
);
2048 vassert(sh
>= 1 && sh
<= 63);
2049 assign(maskT
, mkU64(mask
));
2053 binop(Iop_And64
,mkexpr(x
),mkexpr(maskT
)),
2056 binop(Iop_Shl64
,mkexpr(x
),mkU8(sh
)),
2063 /* Generates byte swaps within 32-bit lanes. */
2064 static IRTemp
math_UINTSWAP64 ( IRTemp src
)
2067 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2068 res
= math_SWAPHELPER(res
, 0xFFFF0000FFFF0000ULL
, 16);
2072 /* Generates byte swaps within 16-bit lanes. */
2073 static IRTemp
math_USHORTSWAP64 ( IRTemp src
)
2076 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2080 /* Generates a 64-bit byte swap. */
2081 static IRTemp
math_BYTESWAP64 ( IRTemp src
)
2084 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2085 res
= math_SWAPHELPER(res
, 0xFFFF0000FFFF0000ULL
, 16);
2086 res
= math_SWAPHELPER(res
, 0xFFFFFFFF00000000ULL
, 32);
2090 /* Generates a 64-bit bit swap. */
2091 static IRTemp
math_BITSWAP64 ( IRTemp src
)
2094 res
= math_SWAPHELPER(src
, 0xAAAAAAAAAAAAAAAAULL
, 1);
2095 res
= math_SWAPHELPER(res
, 0xCCCCCCCCCCCCCCCCULL
, 2);
2096 res
= math_SWAPHELPER(res
, 0xF0F0F0F0F0F0F0F0ULL
, 4);
2097 return math_BYTESWAP64(res
);
2100 /* Duplicates the bits at the bottom of the given word to fill the
2101 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2102 except for the bottom bits. */
2103 static IRTemp
math_DUP_TO_64 ( IRTemp src
, IRType srcTy
)
2105 if (srcTy
== Ity_I8
) {
2106 IRTemp t16
= newTemp(Ity_I64
);
2107 assign(t16
, binop(Iop_Or64
, mkexpr(src
),
2108 binop(Iop_Shl64
, mkexpr(src
), mkU8(8))));
2109 IRTemp t32
= newTemp(Ity_I64
);
2110 assign(t32
, binop(Iop_Or64
, mkexpr(t16
),
2111 binop(Iop_Shl64
, mkexpr(t16
), mkU8(16))));
2112 IRTemp t64
= newTemp(Ity_I64
);
2113 assign(t64
, binop(Iop_Or64
, mkexpr(t32
),
2114 binop(Iop_Shl64
, mkexpr(t32
), mkU8(32))));
2117 if (srcTy
== Ity_I16
) {
2118 IRTemp t32
= newTemp(Ity_I64
);
2119 assign(t32
, binop(Iop_Or64
, mkexpr(src
),
2120 binop(Iop_Shl64
, mkexpr(src
), mkU8(16))));
2121 IRTemp t64
= newTemp(Ity_I64
);
2122 assign(t64
, binop(Iop_Or64
, mkexpr(t32
),
2123 binop(Iop_Shl64
, mkexpr(t32
), mkU8(32))));
2126 if (srcTy
== Ity_I32
) {
2127 IRTemp t64
= newTemp(Ity_I64
);
2128 assign(t64
, binop(Iop_Or64
, mkexpr(src
),
2129 binop(Iop_Shl64
, mkexpr(src
), mkU8(32))));
2132 if (srcTy
== Ity_I64
) {
2139 /* Duplicates the src element exactly so as to fill a V128 value. */
2140 static IRTemp
math_DUP_TO_V128 ( IRTemp src
, IRType srcTy
)
2142 IRTemp res
= newTempV128();
2143 if (srcTy
== Ity_F64
) {
2144 IRTemp i64
= newTemp(Ity_I64
);
2145 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(src
)));
2146 assign(res
, binop(Iop_64HLtoV128
, mkexpr(i64
), mkexpr(i64
)));
2149 if (srcTy
== Ity_F32
) {
2150 IRTemp i64a
= newTemp(Ity_I64
);
2151 assign(i64a
, unop(Iop_32Uto64
, unop(Iop_ReinterpF32asI32
, mkexpr(src
))));
2152 IRTemp i64b
= newTemp(Ity_I64
);
2153 assign(i64b
, binop(Iop_Or64
, binop(Iop_Shl64
, mkexpr(i64a
), mkU8(32)),
2155 assign(res
, binop(Iop_64HLtoV128
, mkexpr(i64b
), mkexpr(i64b
)));
2158 if (srcTy
== Ity_I64
) {
2159 assign(res
, binop(Iop_64HLtoV128
, mkexpr(src
), mkexpr(src
)));
2162 if (srcTy
== Ity_I32
|| srcTy
== Ity_I16
|| srcTy
== Ity_I8
) {
2163 IRTemp t1
= newTemp(Ity_I64
);
2164 assign(t1
, widenUto64(srcTy
, mkexpr(src
)));
2165 IRTemp t2
= math_DUP_TO_64(t1
, srcTy
);
2166 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t2
), mkexpr(t2
)));
2173 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2174 zero out the upper half. */
2175 static IRExpr
* math_MAYBE_ZERO_HI64 ( UInt bitQ
, IRTemp fullWidth
)
2177 if (bitQ
== 1) return mkexpr(fullWidth
);
2178 if (bitQ
== 0) return unop(Iop_ZeroHI64ofV128
, mkexpr(fullWidth
));
2182 /* The same, but from an expression instead. */
2183 static IRExpr
* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ
, IRExpr
* fullWidth
)
2185 IRTemp fullWidthT
= newTempV128();
2186 assign(fullWidthT
, fullWidth
);
2187 return math_MAYBE_ZERO_HI64(bitQ
, fullWidthT
);
2191 /*------------------------------------------------------------*/
2192 /*--- FP comparison helpers ---*/
2193 /*------------------------------------------------------------*/
2195 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2196 as an IRCmpF64Result. Generate code to convert it to an
2197 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2198 Assign a new temp to hold that value, and return the temp. */
2200 IRTemp
mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32
)
2202 IRTemp ix
= newTemp(Ity_I64
);
2203 IRTemp termL
= newTemp(Ity_I64
);
2204 IRTemp termR
= newTemp(Ity_I64
);
2205 IRTemp nzcv
= newTemp(Ity_I64
);
2206 IRTemp irRes
= newTemp(Ity_I64
);
2208 /* This is where the fun starts. We have to convert 'irRes' from
2209 an IR-convention return result (IRCmpF64Result) to an
2210 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2211 4 bits of 'nzcv'. */
2212 /* Map compare result from IR to ARM(nzcv) */
2214 FP cmp result | IR | ARM(nzcv)
2215 --------------------------------
2221 /* Now since you're probably wondering WTF ..
2223 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2224 places them side by side, giving a number which is 0, 1, 2 or 3.
2226 termL is a sequence cooked up by GNU superopt. It converts ix
2227 into an almost correct value NZCV value (incredibly), except
2228 for the case of UN, where it produces 0100 instead of the
2231 termR is therefore a correction term, also computed from ix. It
2232 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2233 the final correct value, we subtract termR from termL.
2235 Don't take my word for it. There's a test program at the bottom
2236 of guest_arm_toIR.c, to try this out with.
2238 assign(irRes
, unop(Iop_32Uto64
, mkexpr(irRes32
)));
2244 binop(Iop_Shr64
, mkexpr(irRes
), mkU8(5)),
2246 binop(Iop_And64
, mkexpr(irRes
), mkU64(1))));
2254 binop(Iop_Xor64
, mkexpr(ix
), mkU64(1)),
2265 binop(Iop_Shr64
, mkexpr(ix
), mkU8(1))),
2268 assign(nzcv
, binop(Iop_Sub64
, mkexpr(termL
), mkexpr(termR
)));
2273 /*------------------------------------------------------------*/
2274 /*--- Data processing (immediate) ---*/
2275 /*------------------------------------------------------------*/
2277 /* Helper functions for supporting "DecodeBitMasks" */
2279 static ULong
dbm_ROR ( Int width
, ULong x
, Int rot
)
2281 vassert(width
> 0 && width
<= 64);
2282 vassert(rot
>= 0 && rot
< width
);
2283 if (rot
== 0) return x
;
2284 ULong res
= x
>> rot
;
2285 res
|= (x
<< (width
- rot
));
2287 res
&= ((1ULL << width
) - 1);
2291 static ULong
dbm_RepTo64( Int esize
, ULong x
)
2297 x
&= 0xFFFFFFFF; x
|= (x
<< 32);
2300 x
&= 0xFFFF; x
|= (x
<< 16); x
|= (x
<< 32);
2303 x
&= 0xFF; x
|= (x
<< 8); x
|= (x
<< 16); x
|= (x
<< 32);
2306 x
&= 0xF; x
|= (x
<< 4); x
|= (x
<< 8);
2307 x
|= (x
<< 16); x
|= (x
<< 32);
2310 x
&= 0x3; x
|= (x
<< 2); x
|= (x
<< 4); x
|= (x
<< 8);
2311 x
|= (x
<< 16); x
|= (x
<< 32);
2316 vpanic("dbm_RepTo64");
2321 static Int
dbm_highestSetBit ( ULong x
)
2324 for (i
= 63; i
>= 0; i
--) {
2325 if (x
& (1ULL << i
))
2333 Bool
dbm_DecodeBitMasks ( /*OUT*/ULong
* wmask
, /*OUT*/ULong
* tmask
,
2334 ULong immN
, ULong imms
, ULong immr
, Bool immediate
,
2335 UInt M
/*32 or 64*/)
2337 vassert(immN
< (1ULL << 1));
2338 vassert(imms
< (1ULL << 6));
2339 vassert(immr
< (1ULL << 6));
2340 vassert(immediate
== False
|| immediate
== True
);
2341 vassert(M
== 32 || M
== 64);
2343 Int len
= dbm_highestSetBit( ((immN
<< 6) & 64) | ((~imms
) & 63) );
2344 if (len
< 1) { /* printf("fail1\n"); */ return False
; }
2346 vassert(M
>= (1 << len
));
2348 vassert(len
>= 1 && len
<= 6);
2349 ULong levels
= // (zeroes(6 - len) << (6-len)) | ones(len);
2351 vassert(levels
>= 1 && levels
<= 63);
2353 if (immediate
&& ((imms
& levels
) == levels
)) {
2354 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2358 ULong S
= imms
& levels
;
2359 ULong R
= immr
& levels
;
2362 Int esize
= 1 << len
;
2363 vassert(2 <= esize
&& esize
<= 64);
2365 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2366 same below with d. S can be 63 in which case we have an out of
2367 range and hence undefined shift. */
2368 vassert(S
>= 0 && S
<= 63);
2369 vassert(esize
>= (S
+1));
2370 ULong elem_s
= // Zeroes(esize-(S+1)):Ones(S+1)
2371 //(1ULL << (S+1)) - 1;
2372 ((1ULL << S
) - 1) + (1ULL << S
);
2374 Int d
= // diff<len-1:0>
2375 diff
& ((1 << len
)-1);
2376 vassert(esize
>= (d
+1));
2377 vassert(d
>= 0 && d
<= 63);
2379 ULong elem_d
= // Zeroes(esize-(d+1)):Ones(d+1)
2380 //(1ULL << (d+1)) - 1;
2381 ((1ULL << d
) - 1) + (1ULL << d
);
2383 if (esize
!= 64) vassert(elem_s
< (1ULL << esize
));
2384 if (esize
!= 64) vassert(elem_d
< (1ULL << esize
));
2386 if (wmask
) *wmask
= dbm_RepTo64(esize
, dbm_ROR(esize
, elem_s
, R
));
2387 if (tmask
) *tmask
= dbm_RepTo64(esize
, elem_d
);
2394 Bool
dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult
* dres
,
2397 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2400 10000x PC-rel addressing
2401 10001x Add/subtract (immediate)
2402 100100 Logical (immediate)
2403 100101 Move Wide (immediate)
2408 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2409 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2410 Bool is64
= INSN(31,31) == 1;
2411 Bool isSub
= INSN(30,30) == 1;
2412 Bool setCC
= INSN(29,29) == 1;
2413 UInt sh
= INSN(23,22);
2414 UInt uimm12
= INSN(21,10);
2415 UInt nn
= INSN(9,5);
2416 UInt dd
= INSN(4,0);
2417 const HChar
* nm
= isSub
? "sub" : "add";
2419 /* Invalid; fall through */
2422 uimm12
<<= (12 * sh
);
2424 IRTemp argL
= newTemp(Ity_I64
);
2425 IRTemp argR
= newTemp(Ity_I64
);
2426 IRTemp res
= newTemp(Ity_I64
);
2427 assign(argL
, getIReg64orSP(nn
));
2428 assign(argR
, mkU64(uimm12
));
2429 assign(res
, binop(isSub
? Iop_Sub64
: Iop_Add64
,
2430 mkexpr(argL
), mkexpr(argR
)));
2432 putIReg64orZR(dd
, mkexpr(res
));
2433 setFlags_ADD_SUB(True
/*is64*/, isSub
, argL
, argR
);
2434 DIP("%ss %s, %s, 0x%x\n",
2435 nm
, nameIReg64orZR(dd
), nameIReg64orSP(nn
), uimm12
);
2437 putIReg64orSP(dd
, mkexpr(res
));
2438 DIP("%s %s, %s, 0x%x\n",
2439 nm
, nameIReg64orSP(dd
), nameIReg64orSP(nn
), uimm12
);
2442 IRTemp argL
= newTemp(Ity_I32
);
2443 IRTemp argR
= newTemp(Ity_I32
);
2444 IRTemp res
= newTemp(Ity_I32
);
2445 assign(argL
, getIReg32orSP(nn
));
2446 assign(argR
, mkU32(uimm12
));
2447 assign(res
, binop(isSub
? Iop_Sub32
: Iop_Add32
,
2448 mkexpr(argL
), mkexpr(argR
)));
2450 putIReg32orZR(dd
, mkexpr(res
));
2451 setFlags_ADD_SUB(False
/*!is64*/, isSub
, argL
, argR
);
2452 DIP("%ss %s, %s, 0x%x\n",
2453 nm
, nameIReg32orZR(dd
), nameIReg32orSP(nn
), uimm12
);
2455 putIReg32orSP(dd
, mkexpr(res
));
2456 DIP("%s %s, %s, 0x%x\n",
2457 nm
, nameIReg32orSP(dd
), nameIReg32orSP(nn
), uimm12
);
2464 /* -------------------- ADR/ADRP -------------------- */
2465 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2466 UInt bP
= INSN(31,31);
2467 UInt immLo
= INSN(30,29);
2468 UInt immHi
= INSN(23,5);
2469 UInt rD
= INSN(4,0);
2470 ULong uimm
= (immHi
<< 2) | immLo
;
2471 ULong simm
= sx_to_64(uimm
, 21);
2474 val
= (guest_PC_curr_instr
& 0xFFFFFFFFFFFFF000ULL
) + (simm
<< 12);
2476 val
= guest_PC_curr_instr
+ simm
;
2478 putIReg64orZR(rD
, mkU64(val
));
2479 DIP("adr%s %s, 0x%llx\n", bP
? "p" : "", nameIReg64orZR(rD
), val
);
2483 /* -------------------- LOGIC(imm) -------------------- */
2484 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2485 /* 31 30 28 22 21 15 9 4
2486 sf op 100100 N immr imms Rn Rd
2487 op=00: AND Rd|SP, Rn, #imm
2488 op=01: ORR Rd|SP, Rn, #imm
2489 op=10: EOR Rd|SP, Rn, #imm
2490 op=11: ANDS Rd|ZR, Rn, #imm
2492 Bool is64
= INSN(31,31) == 1;
2493 UInt op
= INSN(30,29);
2494 UInt N
= INSN(22,22);
2495 UInt immR
= INSN(21,16);
2496 UInt immS
= INSN(15,10);
2497 UInt nn
= INSN(9,5);
2498 UInt dd
= INSN(4,0);
2501 if (N
== 1 && !is64
)
2502 goto after_logic_imm
; /* not allowed; fall through */
2503 ok
= dbm_DecodeBitMasks(&imm
, NULL
,
2504 N
, immS
, immR
, True
, is64
? 64 : 32);
2506 goto after_logic_imm
;
2508 const HChar
* names
[4] = { "and", "orr", "eor", "ands" };
2509 const IROp ops64
[4] = { Iop_And64
, Iop_Or64
, Iop_Xor64
, Iop_And64
};
2510 const IROp ops32
[4] = { Iop_And32
, Iop_Or32
, Iop_Xor32
, Iop_And32
};
2514 IRExpr
* argL
= getIReg64orZR(nn
);
2515 IRExpr
* argR
= mkU64(imm
);
2516 IRTemp res
= newTemp(Ity_I64
);
2517 assign(res
, binop(ops64
[op
], argL
, argR
));
2519 putIReg64orSP(dd
, mkexpr(res
));
2520 DIP("%s %s, %s, 0x%llx\n", names
[op
],
2521 nameIReg64orSP(dd
), nameIReg64orZR(nn
), imm
);
2523 putIReg64orZR(dd
, mkexpr(res
));
2524 setFlags_LOGIC(True
/*is64*/, res
);
2525 DIP("%s %s, %s, 0x%llx\n", names
[op
],
2526 nameIReg64orZR(dd
), nameIReg64orZR(nn
), imm
);
2529 IRExpr
* argL
= getIReg32orZR(nn
);
2530 IRExpr
* argR
= mkU32((UInt
)imm
);
2531 IRTemp res
= newTemp(Ity_I32
);
2532 assign(res
, binop(ops32
[op
], argL
, argR
));
2534 putIReg32orSP(dd
, mkexpr(res
));
2535 DIP("%s %s, %s, 0x%x\n", names
[op
],
2536 nameIReg32orSP(dd
), nameIReg32orZR(nn
), (UInt
)imm
);
2538 putIReg32orZR(dd
, mkexpr(res
));
2539 setFlags_LOGIC(False
/*!is64*/, res
);
2540 DIP("%s %s, %s, 0x%x\n", names
[op
],
2541 nameIReg32orZR(dd
), nameIReg32orZR(nn
), (UInt
)imm
);
2548 /* -------------------- MOV{Z,N,K} -------------------- */
2549 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2552 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2553 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2554 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2556 Bool is64
= INSN(31,31) == 1;
2557 UInt subopc
= INSN(30,29);
2558 UInt hw
= INSN(22,21);
2559 UInt imm16
= INSN(20,5);
2560 UInt dd
= INSN(4,0);
2561 if (subopc
== BITS2(0,1) || (!is64
&& hw
>= 2)) {
2562 /* invalid; fall through */
2564 ULong imm64
= ((ULong
)imm16
) << (16 * hw
);
2566 vassert(imm64
< 0x100000000ULL
);
2568 case BITS2(1,0): // MOVZ
2569 putIRegOrZR(is64
, dd
, is64
? mkU64(imm64
) : mkU32((UInt
)imm64
));
2570 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64
, dd
), imm64
);
2572 case BITS2(0,0): // MOVN
2575 imm64
&= 0xFFFFFFFFULL
;
2576 putIRegOrZR(is64
, dd
, is64
? mkU64(imm64
) : mkU32((UInt
)imm64
));
2577 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64
, dd
), imm64
);
2579 case BITS2(1,1): // MOVK
2580 /* This is more complex. We are inserting a slice into
2581 the destination register, so we need to have the old
2584 IRTemp old
= newTemp(Ity_I64
);
2585 assign(old
, getIReg64orZR(dd
));
2586 ULong mask
= 0xFFFFULL
<< (16 * hw
);
2589 binop(Iop_And64
, mkexpr(old
), mkU64(~mask
)),
2591 putIReg64orZR(dd
, res
);
2592 DIP("movk %s, 0x%x, lsl %u\n",
2593 nameIReg64orZR(dd
), imm16
, 16*hw
);
2595 IRTemp old
= newTemp(Ity_I32
);
2596 assign(old
, getIReg32orZR(dd
));
2598 UInt mask
= ((UInt
)0xFFFF) << (16 * hw
);
2601 binop(Iop_And32
, mkexpr(old
), mkU32(~mask
)),
2602 mkU32((UInt
)imm64
));
2603 putIReg32orZR(dd
, res
);
2604 DIP("movk %s, 0x%x, lsl %u\n",
2605 nameIReg32orZR(dd
), imm16
, 16*hw
);
2615 /* -------------------- {U,S,}BFM -------------------- */
2616 /* 30 28 22 21 15 9 4
2618 sf 10 100110 N immr imms nn dd
2619 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2620 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2622 sf 00 100110 N immr imms nn dd
2623 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2624 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2626 sf 01 100110 N immr imms nn dd
2627 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2628 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2630 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2631 UInt sf
= INSN(31,31);
2632 UInt opc
= INSN(30,29);
2633 UInt N
= INSN(22,22);
2634 UInt immR
= INSN(21,16);
2635 UInt immS
= INSN(15,10);
2636 UInt nn
= INSN(9,5);
2637 UInt dd
= INSN(4,0);
2638 Bool inZero
= False
;
2639 Bool extend
= False
;
2640 const HChar
* nm
= "???";
2641 /* skip invalid combinations */
2644 inZero
= True
; extend
= True
; nm
= "sbfm"; break;
2646 inZero
= False
; extend
= False
; nm
= "bfm"; break;
2648 inZero
= True
; extend
= False
; nm
= "ubfm"; break;
2650 goto after_bfm
; /* invalid */
2654 if (sf
== 1 && N
!= 1) goto after_bfm
;
2655 if (sf
== 0 && (N
!= 0 || ((immR
>> 5) & 1) != 0
2656 || ((immS
>> 5) & 1) != 0)) goto after_bfm
;
2657 ULong wmask
= 0, tmask
= 0;
2658 Bool ok
= dbm_DecodeBitMasks(&wmask
, &tmask
,
2659 N
, immS
, immR
, False
, sf
== 1 ? 64 : 32);
2660 if (!ok
) goto after_bfm
; /* hmmm */
2662 Bool is64
= sf
== 1;
2663 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2665 IRTemp dst
= newTemp(ty
);
2666 IRTemp src
= newTemp(ty
);
2667 IRTemp bot
= newTemp(ty
);
2668 IRTemp top
= newTemp(ty
);
2669 IRTemp res
= newTemp(ty
);
2670 assign(dst
, inZero
? mkU(ty
,0) : getIRegOrZR(is64
, dd
));
2671 assign(src
, getIRegOrZR(is64
, nn
));
2672 /* perform bitfield move on low bits */
2673 assign(bot
, binop(mkOR(ty
),
2674 binop(mkAND(ty
), mkexpr(dst
), mkU(ty
, ~wmask
)),
2675 binop(mkAND(ty
), mkexpr(mathROR(ty
, src
, immR
)),
2677 /* determine extension bits (sign, zero or dest register) */
2678 assign(top
, mkexpr(extend
? mathREPLICATE(ty
, src
, immS
) : dst
));
2679 /* combine extension bits and result bits */
2680 assign(res
, binop(mkOR(ty
),
2681 binop(mkAND(ty
), mkexpr(top
), mkU(ty
, ~tmask
)),
2682 binop(mkAND(ty
), mkexpr(bot
), mkU(ty
, tmask
))));
2683 putIRegOrZR(is64
, dd
, mkexpr(res
));
2684 DIP("%s %s, %s, immR=%u, immS=%u\n",
2685 nm
, nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
, immS
);
2690 /* ---------------------- EXTR ---------------------- */
2691 /* 30 28 22 20 15 9 4
2692 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2693 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2695 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2696 Bool is64
= INSN(31,31) == 1;
2697 UInt mm
= INSN(20,16);
2698 UInt imm6
= INSN(15,10);
2699 UInt nn
= INSN(9,5);
2700 UInt dd
= INSN(4,0);
2702 if (INSN(31,31) != INSN(22,22))
2704 if (!is64
&& imm6
>= 32)
2706 if (!valid
) goto after_extr
;
2707 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2708 IRTemp srcHi
= newTemp(ty
);
2709 IRTemp srcLo
= newTemp(ty
);
2710 IRTemp res
= newTemp(ty
);
2711 assign(srcHi
, getIRegOrZR(is64
, nn
));
2712 assign(srcLo
, getIRegOrZR(is64
, mm
));
2714 assign(res
, mkexpr(srcLo
));
2716 UInt szBits
= 8 * sizeofIRType(ty
);
2717 vassert(imm6
> 0 && imm6
< szBits
);
2718 assign(res
, binop(mkOR(ty
),
2719 binop(mkSHL(ty
), mkexpr(srcHi
), mkU8(szBits
-imm6
)),
2720 binop(mkSHR(ty
), mkexpr(srcLo
), mkU8(imm6
))));
2722 putIRegOrZR(is64
, dd
, mkexpr(res
));
2723 DIP("extr %s, %s, %s, #%u\n",
2724 nameIRegOrZR(is64
,dd
),
2725 nameIRegOrZR(is64
,nn
), nameIRegOrZR(is64
,mm
), imm6
);
2730 vex_printf("ARM64 front end: data_processing_immediate\n");
2736 /*------------------------------------------------------------*/
2737 /*--- Data processing (register) instructions ---*/
2738 /*------------------------------------------------------------*/
2740 static const HChar
* nameSH ( UInt sh
) {
2742 case 0: return "lsl";
2743 case 1: return "lsr";
2744 case 2: return "asr";
2745 case 3: return "ror";
2746 default: vassert(0);
2750 /* Generate IR to get a register value, possibly shifted by an
2751 immediate. Returns either a 32- or 64-bit temporary holding the
2752 result. After the shift, the value can optionally be NOT-ed
2755 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2756 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2757 isn't allowed, but it's the job of the caller to check that.
2759 static IRTemp
getShiftedIRegOrZR ( Bool is64
,
2760 UInt sh_how
, UInt sh_amt
, UInt regNo
,
2763 vassert(sh_how
< 4);
2764 vassert(sh_amt
< (is64
? 64 : 32));
2765 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2766 IRTemp t0
= newTemp(ty
);
2767 assign(t0
, getIRegOrZR(is64
, regNo
));
2768 IRTemp t1
= newTemp(ty
);
2771 assign(t1
, binop(mkSHL(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2774 assign(t1
, binop(mkSHR(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2777 assign(t1
, binop(mkSAR(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2780 assign(t1
, mkexpr(mathROR(ty
, t0
, sh_amt
)));
2786 IRTemp t2
= newTemp(ty
);
2787 assign(t2
, unop(mkNOT(ty
), mkexpr(t1
)));
2796 Bool
dis_ARM64_data_processing_register(/*MB_OUT*/DisResult
* dres
,
2799 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2801 /* ------------------- ADD/SUB(reg) ------------------- */
2802 /* x==0 => 32 bit op x==1 => 64 bit op
2803 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2805 31 30 29 28 23 21 20 15 9 4
2807 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2808 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2809 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2810 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2812 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2813 UInt bX
= INSN(31,31);
2814 UInt bOP
= INSN(30,30); /* 0: ADD, 1: SUB */
2815 UInt bS
= INSN(29, 29); /* set flags? */
2816 UInt sh
= INSN(23,22);
2817 UInt rM
= INSN(20,16);
2818 UInt imm6
= INSN(15,10);
2819 UInt rN
= INSN(9,5);
2820 UInt rD
= INSN(4,0);
2821 Bool isSUB
= bOP
== 1;
2822 Bool is64
= bX
== 1;
2823 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2824 if ((!is64
&& imm6
> 31) || sh
== BITS2(1,1)) {
2825 /* invalid; fall through */
2827 IRTemp argL
= newTemp(ty
);
2828 assign(argL
, getIRegOrZR(is64
, rN
));
2829 IRTemp argR
= getShiftedIRegOrZR(is64
, sh
, imm6
, rM
, False
);
2830 IROp op
= isSUB
? mkSUB(ty
) : mkADD(ty
);
2831 IRTemp res
= newTemp(ty
);
2832 assign(res
, binop(op
, mkexpr(argL
), mkexpr(argR
)));
2833 if (rD
!= 31) putIRegOrZR(is64
, rD
, mkexpr(res
));
2835 setFlags_ADD_SUB(is64
, isSUB
, argL
, argR
);
2837 DIP("%s%s %s, %s, %s, %s #%u\n",
2838 bOP
? "sub" : "add", bS
? "s" : "",
2839 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2840 nameIRegOrZR(is64
, rM
), nameSH(sh
), imm6
);
2845 /* ------------------- ADC/SBC(reg) ------------------- */
2846 /* x==0 => 32 bit op x==1 => 64 bit op
2848 31 30 29 28 23 21 20 15 9 4
2850 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2851 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2852 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2853 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2856 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2857 UInt bX
= INSN(31,31);
2858 UInt bOP
= INSN(30,30); /* 0: ADC, 1: SBC */
2859 UInt bS
= INSN(29,29); /* set flags */
2860 UInt rM
= INSN(20,16);
2861 UInt rN
= INSN(9,5);
2862 UInt rD
= INSN(4,0);
2864 Bool isSUB
= bOP
== 1;
2865 Bool is64
= bX
== 1;
2866 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2868 IRTemp oldC
= newTemp(ty
);
2870 is64
? mk_arm64g_calculate_flag_c()
2871 : unop(Iop_64to32
, mk_arm64g_calculate_flag_c()) );
2873 IRTemp argL
= newTemp(ty
);
2874 assign(argL
, getIRegOrZR(is64
, rN
));
2875 IRTemp argR
= newTemp(ty
);
2876 assign(argR
, getIRegOrZR(is64
, rM
));
2878 IROp op
= isSUB
? mkSUB(ty
) : mkADD(ty
);
2879 IRTemp res
= newTemp(ty
);
2881 IRExpr
* one
= is64
? mkU64(1) : mkU32(1);
2882 IROp xorOp
= is64
? Iop_Xor64
: Iop_Xor32
;
2885 binop(op
, mkexpr(argL
), mkexpr(argR
)),
2886 binop(xorOp
, mkexpr(oldC
), one
)));
2890 binop(op
, mkexpr(argL
), mkexpr(argR
)),
2894 if (rD
!= 31) putIRegOrZR(is64
, rD
, mkexpr(res
));
2897 setFlags_ADC_SBC(is64
, isSUB
, argL
, argR
, oldC
);
2900 DIP("%s%s %s, %s, %s\n",
2901 bOP
? "sbc" : "adc", bS
? "s" : "",
2902 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2903 nameIRegOrZR(is64
, rM
));
2907 /* -------------------- LOGIC(reg) -------------------- */
2908 /* x==0 => 32 bit op x==1 => 64 bit op
2909 N==0 => inv? is no-op (no inversion)
2911 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2913 31 30 28 23 21 20 15 9 4
2915 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2916 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2917 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2918 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2919 With N=1, the names are: BIC ORN EON BICS
2921 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2922 UInt bX
= INSN(31,31);
2923 UInt sh
= INSN(23,22);
2924 UInt bN
= INSN(21,21);
2925 UInt rM
= INSN(20,16);
2926 UInt imm6
= INSN(15,10);
2927 UInt rN
= INSN(9,5);
2928 UInt rD
= INSN(4,0);
2929 Bool is64
= bX
== 1;
2930 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2931 if (!is64
&& imm6
> 31) {
2932 /* invalid; fall though */
2934 IRTemp argL
= newTemp(ty
);
2935 assign(argL
, getIRegOrZR(is64
, rN
));
2936 IRTemp argR
= getShiftedIRegOrZR(is64
, sh
, imm6
, rM
, bN
== 1);
2937 IROp op
= Iop_INVALID
;
2938 switch (INSN(30,29)) {
2939 case BITS2(0,0): case BITS2(1,1): op
= mkAND(ty
); break;
2940 case BITS2(0,1): op
= mkOR(ty
); break;
2941 case BITS2(1,0): op
= mkXOR(ty
); break;
2942 default: vassert(0);
2944 IRTemp res
= newTemp(ty
);
2945 assign(res
, binop(op
, mkexpr(argL
), mkexpr(argR
)));
2946 if (INSN(30,29) == BITS2(1,1)) {
2947 setFlags_LOGIC(is64
, res
);
2949 putIRegOrZR(is64
, rD
, mkexpr(res
));
2951 static const HChar
* names_op
[8]
2952 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2953 vassert(((bN
<< 2) | INSN(30,29)) < 8);
2954 const HChar
* nm_op
= names_op
[(bN
<< 2) | INSN(30,29)];
2955 /* Special-case the printing of "MOV" */
2956 if (rN
== 31/*zr*/ && sh
== 0/*LSL*/ && imm6
== 0 && bN
== 0) {
2957 DIP("mov %s, %s\n", nameIRegOrZR(is64
, rD
),
2958 nameIRegOrZR(is64
, rM
));
2960 DIP("%s %s, %s, %s, %s #%u\n", nm_op
,
2961 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2962 nameIRegOrZR(is64
, rM
), nameSH(sh
), imm6
);
2968 /* -------------------- {U,S}MULH -------------------- */
2969 /* 31 23 22 20 15 9 4
2970 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2971 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2973 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2974 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2975 Bool isU
= INSN(23,23) == 1;
2976 UInt mm
= INSN(20,16);
2977 UInt nn
= INSN(9,5);
2978 UInt dd
= INSN(4,0);
2979 putIReg64orZR(dd
, unop(Iop_128HIto64
,
2980 binop(isU
? Iop_MullU64
: Iop_MullS64
,
2981 getIReg64orZR(nn
), getIReg64orZR(mm
))));
2982 DIP("%cmulh %s, %s, %s\n",
2984 nameIReg64orZR(dd
), nameIReg64orZR(nn
), nameIReg64orZR(mm
));
2988 /* -------------------- M{ADD,SUB} -------------------- */
2989 /* 31 30 20 15 14 9 4
2990 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2991 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2993 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2994 Bool is64
= INSN(31,31) == 1;
2995 UInt mm
= INSN(20,16);
2996 Bool isAdd
= INSN(15,15) == 0;
2997 UInt aa
= INSN(14,10);
2998 UInt nn
= INSN(9,5);
2999 UInt dd
= INSN(4,0);
3003 binop(isAdd
? Iop_Add64
: Iop_Sub64
,
3005 binop(Iop_Mul64
, getIReg64orZR(mm
), getIReg64orZR(nn
))));
3009 binop(isAdd
? Iop_Add32
: Iop_Sub32
,
3011 binop(Iop_Mul32
, getIReg32orZR(mm
), getIReg32orZR(nn
))));
3013 DIP("%s %s, %s, %s, %s\n",
3014 isAdd
? "madd" : "msub",
3015 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
),
3016 nameIRegOrZR(is64
, mm
), nameIRegOrZR(is64
, aa
));
3020 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3021 /* 31 30 28 20 15 11 9 4
3022 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3023 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3024 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3025 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3026 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3028 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3029 Bool is64
= INSN(31,31) == 1;
3030 UInt b30
= INSN(30,30);
3031 UInt mm
= INSN(20,16);
3032 UInt cond
= INSN(15,12);
3033 UInt b10
= INSN(10,10);
3034 UInt nn
= INSN(9,5);
3035 UInt dd
= INSN(4,0);
3036 UInt op
= (b30
<< 1) | b10
; /* 00=id 01=inc 10=inv 11=neg */
3037 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3038 IRExpr
* argL
= getIRegOrZR(is64
, nn
);
3039 IRExpr
* argR
= getIRegOrZR(is64
, mm
);
3044 argR
= binop(mkADD(ty
), argR
, mkU(ty
,1));
3047 argR
= unop(mkNOT(ty
), argR
);
3050 argR
= binop(mkSUB(ty
), mkU(ty
,0), argR
);
3057 IRExpr_ITE(unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
3060 const HChar
* op_nm
[4] = { "csel", "csinc", "csinv", "csneg" };
3061 DIP("%s %s, %s, %s, %s\n", op_nm
[op
],
3062 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
),
3063 nameIRegOrZR(is64
, mm
), nameCC(cond
));
3067 /* -------------- ADD/SUB(extended reg) -------------- */
3069 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3070 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3072 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3073 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3075 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3076 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3078 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3079 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3081 The 'm' operand is extended per opt, thusly:
3084 001 Xm & 0xFFFF UXTH
3085 010 Xm & (2^32)-1 UXTW
3088 100 Xm sx from bit 7 SXTB
3089 101 Xm sx from bit 15 SXTH
3090 110 Xm sx from bit 31 SXTW
3093 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3094 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3095 are the identity operation on Wm.
3097 After extension, the value is shifted left by imm3 bits, which
3098 may only be in the range 0 .. 4 inclusive.
3100 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3101 Bool is64
= INSN(31,31) == 1;
3102 Bool isSub
= INSN(30,30) == 1;
3103 Bool setCC
= INSN(29,29) == 1;
3104 UInt mm
= INSN(20,16);
3105 UInt opt
= INSN(15,13);
3106 UInt imm3
= INSN(12,10);
3107 UInt nn
= INSN(9,5);
3108 UInt dd
= INSN(4,0);
3109 const HChar
* nameExt
[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3110 "sxtb", "sxth", "sxtw", "sxtx" };
3111 /* Do almost the same thing in the 32- and 64-bit cases. */
3112 IRTemp xN
= newTemp(Ity_I64
);
3113 IRTemp xM
= newTemp(Ity_I64
);
3114 assign(xN
, getIReg64orSP(nn
));
3115 assign(xM
, getIReg64orZR(mm
));
3116 IRExpr
* xMw
= mkexpr(xM
); /* "xM widened" */
3120 case BITS3(0,0,0): // UXTB
3121 xMw
= binop(Iop_And64
, xMw
, mkU64(0xFF)); break;
3122 case BITS3(0,0,1): // UXTH
3123 xMw
= binop(Iop_And64
, xMw
, mkU64(0xFFFF)); break;
3124 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3126 xMw
= unop(Iop_32Uto64
, unop(Iop_64to32
, xMw
));
3129 case BITS3(0,1,1): // UXTX -- always a noop
3131 case BITS3(1,0,0): // SXTB
3132 shSX
= 56; goto sxTo64
;
3133 case BITS3(1,0,1): // SXTH
3134 shSX
= 48; goto sxTo64
;
3135 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3137 shSX
= 32; goto sxTo64
;
3140 case BITS3(1,1,1): // SXTX -- always a noop
3143 vassert(shSX
>= 32);
3144 xMw
= binop(Iop_Sar64
, binop(Iop_Shl64
, xMw
, mkU8(shSX
)),
3152 IRTemp argR
= newTemp(Ity_I64
);
3153 assign(argR
, binop(Iop_Shl64
, xMw
, mkU8(imm3
)));
3154 IRTemp res
= newTemp(Ity_I64
);
3155 assign(res
, binop(isSub
? Iop_Sub64
: Iop_Add64
,
3156 mkexpr(argL
), mkexpr(argR
)));
3159 putIReg64orZR(dd
, mkexpr(res
));
3160 setFlags_ADD_SUB(True
/*is64*/, isSub
, argL
, argR
);
3162 putIReg64orSP(dd
, mkexpr(res
));
3166 IRTemp argL32
= newTemp(Ity_I32
);
3167 IRTemp argR32
= newTemp(Ity_I32
);
3168 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(res
)));
3169 assign(argL32
, unop(Iop_64to32
, mkexpr(argL
)));
3170 assign(argR32
, unop(Iop_64to32
, mkexpr(argR
)));
3171 setFlags_ADD_SUB(False
/*!is64*/, isSub
, argL32
, argR32
);
3173 putIReg32orSP(dd
, unop(Iop_64to32
, mkexpr(res
)));
3176 DIP("%s%s %s, %s, %s %s lsl %u\n",
3177 isSub
? "sub" : "add", setCC
? "s" : "",
3178 setCC
? nameIRegOrZR(is64
, dd
) : nameIRegOrSP(is64
, dd
),
3179 nameIRegOrSP(is64
, nn
), nameIRegOrSP(is64
, mm
),
3180 nameExt
[opt
], imm3
);
3184 /* ---------------- CCMP/CCMN(imm) ---------------- */
3185 /* Bizarrely, these appear in the "data processing register"
3186 category, even though they are operations against an
3188 /* 31 29 20 15 11 9 3
3189 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3190 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3193 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3194 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3196 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3197 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3198 Bool is64
= INSN(31,31) == 1;
3199 Bool isSUB
= INSN(30,30) == 1;
3200 UInt imm5
= INSN(20,16);
3201 UInt cond
= INSN(15,12);
3202 UInt nn
= INSN(9,5);
3203 UInt nzcv
= INSN(3,0);
3205 IRTemp condT
= newTemp(Ity_I1
);
3206 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
3208 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3209 IRTemp argL
= newTemp(ty
);
3210 IRTemp argR
= newTemp(ty
);
3213 assign(argL
, getIReg64orZR(nn
));
3214 assign(argR
, mkU64(imm5
));
3216 assign(argL
, getIReg32orZR(nn
));
3217 assign(argR
, mkU32(imm5
));
3219 setFlags_ADD_SUB_conditionally(is64
, isSUB
, condT
, argL
, argR
, nzcv
);
3221 DIP("ccm%c %s, #%u, #%u, %s\n",
3222 isSUB
? 'p' : 'n', nameIRegOrZR(is64
, nn
),
3223 imm5
, nzcv
, nameCC(cond
));
3227 /* ---------------- CCMP/CCMN(reg) ---------------- */
3228 /* 31 29 20 15 11 9 3
3229 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3230 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3232 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3233 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3235 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3236 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3237 Bool is64
= INSN(31,31) == 1;
3238 Bool isSUB
= INSN(30,30) == 1;
3239 UInt mm
= INSN(20,16);
3240 UInt cond
= INSN(15,12);
3241 UInt nn
= INSN(9,5);
3242 UInt nzcv
= INSN(3,0);
3244 IRTemp condT
= newTemp(Ity_I1
);
3245 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
3247 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3248 IRTemp argL
= newTemp(ty
);
3249 IRTemp argR
= newTemp(ty
);
3252 assign(argL
, getIReg64orZR(nn
));
3253 assign(argR
, getIReg64orZR(mm
));
3255 assign(argL
, getIReg32orZR(nn
));
3256 assign(argR
, getIReg32orZR(mm
));
3258 setFlags_ADD_SUB_conditionally(is64
, isSUB
, condT
, argL
, argR
, nzcv
);
3260 DIP("ccm%c %s, %s, #%u, %s\n",
3261 isSUB
? 'p' : 'n', nameIRegOrZR(is64
, nn
),
3262 nameIRegOrZR(is64
, mm
), nzcv
, nameCC(cond
));
3267 /* -------------- REV/REV16/REV32/RBIT -------------- */
3268 /* 31 30 28 20 15 11 9 4
3270 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3271 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3273 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3274 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3276 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3277 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3279 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3281 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3282 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3283 UInt b31
= INSN(31,31);
3284 UInt opc
= INSN(11,10);
3287 /**/ if (b31
== 1 && opc
== BITS2(1,1)) ix
= 1;
3288 else if (b31
== 0 && opc
== BITS2(1,0)) ix
= 2;
3289 else if (b31
== 1 && opc
== BITS2(0,0)) ix
= 3;
3290 else if (b31
== 0 && opc
== BITS2(0,0)) ix
= 4;
3291 else if (b31
== 1 && opc
== BITS2(0,1)) ix
= 5;
3292 else if (b31
== 0 && opc
== BITS2(0,1)) ix
= 6;
3293 else if (b31
== 1 && opc
== BITS2(1,0)) ix
= 7;
3294 if (ix
>= 1 && ix
<= 7) {
3295 Bool is64
= ix
== 1 || ix
== 3 || ix
== 5 || ix
== 7;
3296 UInt nn
= INSN(9,5);
3297 UInt dd
= INSN(4,0);
3298 IRTemp src
= newTemp(Ity_I64
);
3299 IRTemp dst
= IRTemp_INVALID
;
3300 IRTemp (*math
)(IRTemp
) = NULL
;
3302 case 1: case 2: math
= math_BYTESWAP64
; break;
3303 case 3: case 4: math
= math_BITSWAP64
; break;
3304 case 5: case 6: math
= math_USHORTSWAP64
; break;
3305 case 7: math
= math_UINTSWAP64
; break;
3306 default: vassert(0);
3308 const HChar
* names
[7]
3309 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3310 const HChar
* nm
= names
[ix
-1];
3313 /* This has to be special cased, since the logic below doesn't
3314 handle it correctly. */
3315 assign(src
, getIReg64orZR(nn
));
3318 unop(Iop_32Uto64
, unop(Iop_64to32
, mkexpr(dst
))));
3320 assign(src
, getIReg64orZR(nn
));
3322 putIReg64orZR(dd
, mkexpr(dst
));
3324 assign(src
, binop(Iop_Shl64
, getIReg64orZR(nn
), mkU8(32)));
3326 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(dst
)));
3328 DIP("%s %s, %s\n", nm
,
3329 nameIRegOrZR(is64
,dd
), nameIRegOrZR(is64
,nn
));
3332 /* else fall through */
3335 /* -------------------- CLZ/CLS -------------------- */
3336 /* 30 28 24 20 15 9 4
3337 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3338 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3340 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3341 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3342 Bool is64
= INSN(31,31) == 1;
3343 Bool isCLS
= INSN(10,10) == 1;
3344 UInt nn
= INSN(9,5);
3345 UInt dd
= INSN(4,0);
3346 IRTemp src
= newTemp(Ity_I64
);
3347 IRTemp srcZ
= newTemp(Ity_I64
);
3348 IRTemp dst
= newTemp(Ity_I64
);
3349 /* Get the argument, widened out to 64 bit */
3351 assign(src
, getIReg64orZR(nn
));
3353 assign(src
, binop(Iop_Shl64
,
3354 unop(Iop_32Uto64
, getIReg32orZR(nn
)), mkU8(32)));
3356 /* If this is CLS, mash the arg around accordingly */
3358 IRExpr
* one
= mkU8(1);
3361 binop(Iop_Shl64
, mkexpr(src
), one
),
3362 binop(Iop_Shl64
, binop(Iop_Shr64
, mkexpr(src
), one
), one
)));
3364 assign(srcZ
, mkexpr(src
));
3366 /* And compute CLZ. */
3368 assign(dst
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(srcZ
), mkU64(0)),
3369 mkU64(isCLS
? 63 : 64),
3370 unop(Iop_Clz64
, mkexpr(srcZ
))));
3371 putIReg64orZR(dd
, mkexpr(dst
));
3373 assign(dst
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(srcZ
), mkU64(0)),
3374 mkU64(isCLS
? 31 : 32),
3375 unop(Iop_Clz64
, mkexpr(srcZ
))));
3376 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(dst
)));
3378 DIP("cl%c %s, %s\n", isCLS
? 's' : 'z',
3379 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
));
3383 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3384 /* 30 28 20 15 11 9 4
3385 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3386 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3387 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3388 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3390 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3391 && INSN(15,12) == BITS4(0,0,1,0)) {
3392 Bool is64
= INSN(31,31) == 1;
3393 UInt mm
= INSN(20,16);
3394 UInt op
= INSN(11,10);
3395 UInt nn
= INSN(9,5);
3396 UInt dd
= INSN(4,0);
3397 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3398 IRTemp srcL
= newTemp(ty
);
3399 IRTemp srcR
= newTemp(Ity_I64
);
3400 IRTemp res
= newTemp(ty
);
3401 IROp iop
= Iop_INVALID
;
3402 assign(srcL
, getIRegOrZR(is64
, nn
));
3403 assign(srcR
, binop(Iop_And64
, getIReg64orZR(mm
),
3404 mkU64(is64
? 63 : 31)));
3408 case BITS2(0,0): iop
= mkSHL(ty
); break;
3409 case BITS2(0,1): iop
= mkSHR(ty
); break;
3410 case BITS2(1,0): iop
= mkSAR(ty
); break;
3411 default: vassert(0);
3413 assign(res
, binop(iop
, mkexpr(srcL
),
3414 unop(Iop_64to8
, mkexpr(srcR
))));
3417 IROp opSHL
= mkSHL(ty
);
3418 IROp opSHR
= mkSHR(ty
);
3419 IROp opOR
= mkOR(ty
);
3420 IRExpr
* width
= mkU64(is64
? 64: 32);
3424 binop(Iop_CmpEQ64
, mkexpr(srcR
), mkU64(0)),
3429 unop(Iop_64to8
, binop(Iop_Sub64
, width
,
3432 mkexpr(srcL
), unop(Iop_64to8
, mkexpr(srcR
))))
3435 putIRegOrZR(is64
, dd
, mkexpr(res
));
3437 const HChar
* names
[4] = { "lslv", "lsrv", "asrv", "rorv" };
3438 DIP("%s %s, %s, %s\n",
3439 names
[op
], nameIRegOrZR(is64
,dd
),
3440 nameIRegOrZR(is64
,nn
), nameIRegOrZR(is64
,mm
));
3444 /* -------------------- SDIV/UDIV -------------------- */
3445 /* 30 28 20 15 10 9 4
3446 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3447 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3449 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3450 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3451 Bool is64
= INSN(31,31) == 1;
3452 UInt mm
= INSN(20,16);
3453 Bool isS
= INSN(10,10) == 1;
3454 UInt nn
= INSN(9,5);
3455 UInt dd
= INSN(4,0);
3457 putIRegOrZR(is64
, dd
, binop(is64
? Iop_DivS64
: Iop_DivS32
,
3458 getIRegOrZR(is64
, nn
),
3459 getIRegOrZR(is64
, mm
)));
3461 putIRegOrZR(is64
, dd
, binop(is64
? Iop_DivU64
: Iop_DivU32
,
3462 getIRegOrZR(is64
, nn
),
3463 getIRegOrZR(is64
, mm
)));
3465 DIP("%cdiv %s, %s, %s\n", isS
? 's' : 'u',
3466 nameIRegOrZR(is64
, dd
),
3467 nameIRegOrZR(is64
, nn
), nameIRegOrZR(is64
, mm
));
3471 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3472 /* 31 23 20 15 14 9 4
3473 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3474 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3475 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3476 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3478 Xd = Xa +/- (Wn *u/s Wm)
3480 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3481 Bool isU
= INSN(23,23) == 1;
3482 UInt mm
= INSN(20,16);
3483 Bool isAdd
= INSN(15,15) == 0;
3484 UInt aa
= INSN(14,10);
3485 UInt nn
= INSN(9,5);
3486 UInt dd
= INSN(4,0);
3487 IRTemp wN
= newTemp(Ity_I32
);
3488 IRTemp wM
= newTemp(Ity_I32
);
3489 IRTemp xA
= newTemp(Ity_I64
);
3490 IRTemp muld
= newTemp(Ity_I64
);
3491 IRTemp res
= newTemp(Ity_I64
);
3492 assign(wN
, getIReg32orZR(nn
));
3493 assign(wM
, getIReg32orZR(mm
));
3494 assign(xA
, getIReg64orZR(aa
));
3495 assign(muld
, binop(isU
? Iop_MullU32
: Iop_MullS32
,
3496 mkexpr(wN
), mkexpr(wM
)));
3497 assign(res
, binop(isAdd
? Iop_Add64
: Iop_Sub64
,
3498 mkexpr(xA
), mkexpr(muld
)));
3499 putIReg64orZR(dd
, mkexpr(res
));
3500 DIP("%cm%sl %s, %s, %s, %s\n", isU
? 'u' : 's', isAdd
? "add" : "sub",
3501 nameIReg64orZR(dd
), nameIReg32orZR(nn
),
3502 nameIReg32orZR(mm
), nameIReg64orZR(aa
));
3506 /* -------------------- CRC32/CRC32C -------------------- */
3507 /* 31 30 20 15 11 9 4
3508 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3509 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3511 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3512 && INSN(15,13) == BITS3(0,1,0)) {
3513 UInt bitSF
= INSN(31,31);
3514 UInt mm
= INSN(20,16);
3515 UInt bitC
= INSN(12,12);
3516 UInt sz
= INSN(11,10);
3517 UInt nn
= INSN(9,5);
3518 UInt dd
= INSN(4,0);
3519 vassert(sz
>= 0 && sz
<= 3);
3520 if ((bitSF
== 0 && sz
<= BITS2(1,0))
3521 || (bitSF
== 1 && sz
== BITS2(1,1))) {
3522 UInt ix
= (bitC
== 1 ? 4 : 0) | sz
;
3524 = { &arm64g_calc_crc32b
, &arm64g_calc_crc32h
,
3525 &arm64g_calc_crc32w
, &arm64g_calc_crc32x
,
3526 &arm64g_calc_crc32cb
, &arm64g_calc_crc32ch
,
3527 &arm64g_calc_crc32cw
, &arm64g_calc_crc32cx
};
3528 const HChar
* hNames
[8]
3529 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3530 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3531 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3532 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3533 const HChar
* iNames
[8]
3534 = { "crc32b", "crc32h", "crc32w", "crc32x",
3535 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3537 IRTemp srcN
= newTemp(Ity_I64
);
3538 assign(srcN
, unop(Iop_32Uto64
, unop(Iop_64to32
, getIReg64orZR(nn
))));
3540 IRTemp srcM
= newTemp(Ity_I64
);
3541 IRExpr
* at64
= getIReg64orZR(mm
);
3544 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFF))); break;
3546 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFFFF))); break;
3548 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFFFFFFFF))); break;
3550 assign(srcM
, at64
); break;
3555 vassert(ix
>= 0 && ix
<= 7);
3561 mkIRExprCCall(Ity_I64
, 0/*regparm*/,
3562 hNames
[ix
], helpers
[ix
],
3563 mkIRExprVec_2(mkexpr(srcN
),
3566 DIP("%s %s, %s, %s\n", iNames
[ix
],
3568 nameIReg32orZR(nn
), nameIRegOrZR(bitSF
== 1, mm
));
3574 vex_printf("ARM64 front end: data_processing_register\n");
3580 /*------------------------------------------------------------*/
3581 /*--- Math helpers for vector interleave/deinterleave ---*/
3582 /*------------------------------------------------------------*/
3586 #define SL(_hi128,_lo128,_nbytes) \
3589 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3590 #define ROR(_v128,_nbytes) \
3591 SL((_v128),(_v128),(_nbytes))
3592 #define ROL(_v128,_nbytes) \
3593 SL((_v128),(_v128),16-(_nbytes))
3594 #define SHR(_v128,_nbytes) \
3595 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3596 #define SHL(_v128,_nbytes) \
3597 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3598 #define ILO64x2(_argL,_argR) \
3599 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3600 #define IHI64x2(_argL,_argR) \
3601 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3602 #define ILO32x4(_argL,_argR) \
3603 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3604 #define IHI32x4(_argL,_argR) \
3605 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3606 #define ILO16x8(_argL,_argR) \
3607 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3608 #define IHI16x8(_argL,_argR) \
3609 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3610 #define ILO8x16(_argL,_argR) \
3611 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3612 #define IHI8x16(_argL,_argR) \
3613 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3614 #define CEV32x4(_argL,_argR) \
3615 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3616 #define COD32x4(_argL,_argR) \
3617 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3618 #define COD16x8(_argL,_argR) \
3619 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3620 #define COD8x16(_argL,_argR) \
3621 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3622 #define CEV8x16(_argL,_argR) \
3623 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3624 #define AND(_arg1,_arg2) \
3625 binop(Iop_AndV128,(_arg1),(_arg2))
3626 #define OR2(_arg1,_arg2) \
3627 binop(Iop_OrV128,(_arg1),(_arg2))
3628 #define OR3(_arg1,_arg2,_arg3) \
3629 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3630 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3632 binop(Iop_OrV128,(_arg1),(_arg2)), \
3633 binop(Iop_OrV128,(_arg3),(_arg4)))
3636 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3638 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp
* i0
,
3639 UInt laneSzBlg2
, IRTemp u0
)
3641 assign(*i0
, mkexpr(u0
));
3645 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3647 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp
* i0
, IRTemp
* i1
,
3648 UInt laneSzBlg2
, IRTemp u0
, IRTemp u1
)
3650 /* This is pretty easy, since we have primitives directly to
3652 if (laneSzBlg2
== 3) {
3654 // u1 == B1 B0, u0 == A1 A0
3655 // i1 == B1 A1, i0 == B0 A0
3656 assign(*i0
, binop(Iop_InterleaveLO64x2
, mkexpr(u1
), mkexpr(u0
)));
3657 assign(*i1
, binop(Iop_InterleaveHI64x2
, mkexpr(u1
), mkexpr(u0
)));
3660 if (laneSzBlg2
== 2) {
3662 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3663 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3664 assign(*i0
, binop(Iop_InterleaveLO32x4
, mkexpr(u1
), mkexpr(u0
)));
3665 assign(*i1
, binop(Iop_InterleaveHI32x4
, mkexpr(u1
), mkexpr(u0
)));
3668 if (laneSzBlg2
== 1) {
3670 // u1 == B{7..0}, u0 == A{7..0}
3671 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3672 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3673 assign(*i0
, binop(Iop_InterleaveLO16x8
, mkexpr(u1
), mkexpr(u0
)));
3674 assign(*i1
, binop(Iop_InterleaveHI16x8
, mkexpr(u1
), mkexpr(u0
)));
3677 if (laneSzBlg2
== 0) {
3679 // u1 == B{f..0}, u0 == A{f..0}
3680 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3681 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3682 assign(*i0
, binop(Iop_InterleaveLO8x16
, mkexpr(u1
), mkexpr(u0
)));
3683 assign(*i1
, binop(Iop_InterleaveHI8x16
, mkexpr(u1
), mkexpr(u0
)));
3691 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3693 void math_INTERLEAVE3_128(
3694 /*OUTx3*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
,
3696 IRTemp u0
, IRTemp u1
, IRTemp u2
)
3698 if (laneSzBlg2
== 3) {
3700 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3701 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3702 assign(*i2
, IHI64x2( EX(u2
), EX(u1
) ));
3703 assign(*i1
, ILO64x2( ROR(EX(u0
),8), EX(u2
) ));
3704 assign(*i0
, ILO64x2( EX(u1
), EX(u0
) ));
3708 if (laneSzBlg2
== 2) {
3710 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3711 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3712 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3713 IRTemp p0
= newTempV128();
3714 IRTemp p1
= newTempV128();
3715 IRTemp p2
= newTempV128();
3716 IRTemp c1100
= newTempV128();
3717 IRTemp c0011
= newTempV128();
3718 IRTemp c0110
= newTempV128();
3719 assign(c1100
, mkV128(0xFF00));
3720 assign(c0011
, mkV128(0x00FF));
3721 assign(c0110
, mkV128(0x0FF0));
3722 // First interleave them at 64x2 granularity,
3723 // generating partial ("p") values.
3724 math_INTERLEAVE3_128(&p0
, &p1
, &p2
, 3, u0
, u1
, u2
);
3725 // And more shuffling around for the final answer
3726 assign(*i2
, OR2( AND( IHI32x4(EX(p2
), ROL(EX(p2
),8)), EX(c1100
) ),
3727 AND( IHI32x4(ROR(EX(p1
),4), EX(p2
)), EX(c0011
) ) ));
3728 assign(*i1
, OR3( SHL(EX(p2
),12),
3729 AND(EX(p1
),EX(c0110
)),
3731 assign(*i0
, OR2( AND( ILO32x4(EX(p0
),ROL(EX(p1
),4)), EX(c1100
) ),
3732 AND( ILO32x4(ROR(EX(p0
),8),EX(p0
)), EX(c0011
) ) ));
3736 if (laneSzBlg2
== 1) {
3738 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3739 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3740 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3742 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3743 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3744 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3746 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3747 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3748 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3749 IRTemp p0
= newTempV128();
3750 IRTemp p1
= newTempV128();
3751 IRTemp p2
= newTempV128();
3752 IRTemp c1000
= newTempV128();
3753 IRTemp c0100
= newTempV128();
3754 IRTemp c0010
= newTempV128();
3755 IRTemp c0001
= newTempV128();
3756 assign(c1000
, mkV128(0xF000));
3757 assign(c0100
, mkV128(0x0F00));
3758 assign(c0010
, mkV128(0x00F0));
3759 assign(c0001
, mkV128(0x000F));
3760 // First interleave them at 32x4 granularity,
3761 // generating partial ("p") values.
3762 math_INTERLEAVE3_128(&p0
, &p1
, &p2
, 2, u0
, u1
, u2
);
3763 // And more shuffling around for the final answer
3765 OR4( AND( IHI16x8( EX(p2
), ROL(EX(p2
),4) ), EX(c1000
) ),
3766 AND( IHI16x8( ROL(EX(p2
),6), EX(p2
) ), EX(c0100
) ),
3767 AND( IHI16x8( ROL(EX(p2
),2), ROL(EX(p2
),6) ), EX(c0010
) ),
3768 AND( ILO16x8( ROR(EX(p2
),2), ROL(EX(p1
),2) ), EX(c0001
) )
3771 OR4( AND( IHI16x8( ROL(EX(p1
),4), ROR(EX(p2
),2) ), EX(c1000
) ),
3772 AND( IHI16x8( EX(p1
), ROL(EX(p1
),4) ), EX(c0100
) ),
3773 AND( IHI16x8( ROL(EX(p1
),4), ROL(EX(p1
),8) ), EX(c0010
) ),
3774 AND( IHI16x8( ROR(EX(p0
),6), ROL(EX(p1
),4) ), EX(c0001
) )
3777 OR4( AND( IHI16x8( ROR(EX(p1
),2), ROL(EX(p0
),2) ), EX(c1000
) ),
3778 AND( IHI16x8( ROL(EX(p0
),2), ROL(EX(p0
),6) ), EX(c0100
) ),
3779 AND( IHI16x8( ROL(EX(p0
),8), ROL(EX(p0
),2) ), EX(c0010
) ),
3780 AND( IHI16x8( ROL(EX(p0
),4), ROL(EX(p0
),8) ), EX(c0001
) )
3785 if (laneSzBlg2
== 0) {
3786 // 8x16. It doesn't seem worth the hassle of first doing a
3787 // 16x8 interleave, so just generate all 24 partial results
3789 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3790 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3791 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3792 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3794 IRTemp i2_FEDC
= newTempV128(); IRTemp i2_BA98
= newTempV128();
3795 IRTemp i2_7654
= newTempV128(); IRTemp i2_3210
= newTempV128();
3796 IRTemp i1_FEDC
= newTempV128(); IRTemp i1_BA98
= newTempV128();
3797 IRTemp i1_7654
= newTempV128(); IRTemp i1_3210
= newTempV128();
3798 IRTemp i0_FEDC
= newTempV128(); IRTemp i0_BA98
= newTempV128();
3799 IRTemp i0_7654
= newTempV128(); IRTemp i0_3210
= newTempV128();
3800 IRTemp i2_hi64
= newTempV128(); IRTemp i2_lo64
= newTempV128();
3801 IRTemp i1_hi64
= newTempV128(); IRTemp i1_lo64
= newTempV128();
3802 IRTemp i0_hi64
= newTempV128(); IRTemp i0_lo64
= newTempV128();
3804 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3805 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3807 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3808 IRTemp t_##_tempName = newTempV128(); \
3809 assign(t_##_tempName, \
3810 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3811 ROR(EX(_srcVec2),(_srcShift2)) ) )
3813 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3814 IRTemp CC
= u2
; IRTemp BB
= u1
; IRTemp AA
= u0
;
3816 // The slicing and reassembly are done as interleavedly as possible,
3817 // so as to minimise the demand for registers in the back end, which
3818 // was observed to be a problem in testing.
3820 XXXX(CfBf
, CC
, 0xf, BB
, 0xf); // i2[15:14]
3821 XXXX(AfCe
, AA
, 0xf, CC
, 0xe);
3822 assign(i2_FEDC
, ILO16x8(EX(t_CfBf
), EX(t_AfCe
)));
3824 XXXX(BeAe
, BB
, 0xe, AA
, 0xe);
3825 XXXX(CdBd
, CC
, 0xd, BB
, 0xd);
3826 assign(i2_BA98
, ILO16x8(EX(t_BeAe
), EX(t_CdBd
)));
3827 assign(i2_hi64
, ILO32x4(EX(i2_FEDC
), EX(i2_BA98
)));
3829 XXXX(AdCc
, AA
, 0xd, CC
, 0xc);
3830 XXXX(BcAc
, BB
, 0xc, AA
, 0xc);
3831 assign(i2_7654
, ILO16x8(EX(t_AdCc
), EX(t_BcAc
)));
3833 XXXX(CbBb
, CC
, 0xb, BB
, 0xb);
3834 XXXX(AbCa
, AA
, 0xb, CC
, 0xa); // i2[1:0]
3835 assign(i2_3210
, ILO16x8(EX(t_CbBb
), EX(t_AbCa
)));
3836 assign(i2_lo64
, ILO32x4(EX(i2_7654
), EX(i2_3210
)));
3837 assign(*i2
, ILO64x2(EX(i2_hi64
), EX(i2_lo64
)));
3839 XXXX(BaAa
, BB
, 0xa, AA
, 0xa); // i1[15:14]
3840 XXXX(C9B9
, CC
, 0x9, BB
, 0x9);
3841 assign(i1_FEDC
, ILO16x8(EX(t_BaAa
), EX(t_C9B9
)));
3843 XXXX(A9C8
, AA
, 0x9, CC
, 0x8);
3844 XXXX(B8A8
, BB
, 0x8, AA
, 0x8);
3845 assign(i1_BA98
, ILO16x8(EX(t_A9C8
), EX(t_B8A8
)));
3846 assign(i1_hi64
, ILO32x4(EX(i1_FEDC
), EX(i1_BA98
)));
3848 XXXX(C7B7
, CC
, 0x7, BB
, 0x7);
3849 XXXX(A7C6
, AA
, 0x7, CC
, 0x6);
3850 assign(i1_7654
, ILO16x8(EX(t_C7B7
), EX(t_A7C6
)));
3852 XXXX(B6A6
, BB
, 0x6, AA
, 0x6);
3853 XXXX(C5B5
, CC
, 0x5, BB
, 0x5); // i1[1:0]
3854 assign(i1_3210
, ILO16x8(EX(t_B6A6
), EX(t_C5B5
)));
3855 assign(i1_lo64
, ILO32x4(EX(i1_7654
), EX(i1_3210
)));
3856 assign(*i1
, ILO64x2(EX(i1_hi64
), EX(i1_lo64
)));
3858 XXXX(A5C4
, AA
, 0x5, CC
, 0x4); // i0[15:14]
3859 XXXX(B4A4
, BB
, 0x4, AA
, 0x4);
3860 assign(i0_FEDC
, ILO16x8(EX(t_A5C4
), EX(t_B4A4
)));
3862 XXXX(C3B3
, CC
, 0x3, BB
, 0x3);
3863 XXXX(A3C2
, AA
, 0x3, CC
, 0x2);
3864 assign(i0_BA98
, ILO16x8(EX(t_C3B3
), EX(t_A3C2
)));
3865 assign(i0_hi64
, ILO32x4(EX(i0_FEDC
), EX(i0_BA98
)));
3867 XXXX(B2A2
, BB
, 0x2, AA
, 0x2);
3868 XXXX(C1B1
, CC
, 0x1, BB
, 0x1);
3869 assign(i0_7654
, ILO16x8(EX(t_B2A2
), EX(t_C1B1
)));
3871 XXXX(A1C0
, AA
, 0x1, CC
, 0x0);
3872 XXXX(B0A0
, BB
, 0x0, AA
, 0x0); // i0[1:0]
3873 assign(i0_3210
, ILO16x8(EX(t_A1C0
), EX(t_B0A0
)));
3874 assign(i0_lo64
, ILO32x4(EX(i0_7654
), EX(i0_3210
)));
3875 assign(*i0
, ILO64x2(EX(i0_hi64
), EX(i0_lo64
)));
3886 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3888 void math_INTERLEAVE4_128(
3889 /*OUTx4*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
, IRTemp
* i3
,
3891 IRTemp u0
, IRTemp u1
, IRTemp u2
, IRTemp u3
)
3893 if (laneSzBlg2
== 3) {
3895 assign(*i0
, ILO64x2(EX(u1
), EX(u0
)));
3896 assign(*i1
, ILO64x2(EX(u3
), EX(u2
)));
3897 assign(*i2
, IHI64x2(EX(u1
), EX(u0
)));
3898 assign(*i3
, IHI64x2(EX(u3
), EX(u2
)));
3901 if (laneSzBlg2
== 2) {
3903 // First, interleave at the 64-bit lane size.
3904 IRTemp p0
= newTempV128();
3905 IRTemp p1
= newTempV128();
3906 IRTemp p2
= newTempV128();
3907 IRTemp p3
= newTempV128();
3908 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 3, u0
, u1
, u2
, u3
);
3909 // And interleave (cat) at the 32 bit size.
3910 assign(*i0
, CEV32x4(EX(p1
), EX(p0
)));
3911 assign(*i1
, COD32x4(EX(p1
), EX(p0
)));
3912 assign(*i2
, CEV32x4(EX(p3
), EX(p2
)));
3913 assign(*i3
, COD32x4(EX(p3
), EX(p2
)));
3916 if (laneSzBlg2
== 1) {
3918 // First, interleave at the 32-bit lane size.
3919 IRTemp p0
= newTempV128();
3920 IRTemp p1
= newTempV128();
3921 IRTemp p2
= newTempV128();
3922 IRTemp p3
= newTempV128();
3923 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 2, u0
, u1
, u2
, u3
);
3924 // And rearrange within each vector, to get the right 16 bit lanes.
3925 assign(*i0
, COD16x8(EX(p0
), SHL(EX(p0
), 2)));
3926 assign(*i1
, COD16x8(EX(p1
), SHL(EX(p1
), 2)));
3927 assign(*i2
, COD16x8(EX(p2
), SHL(EX(p2
), 2)));
3928 assign(*i3
, COD16x8(EX(p3
), SHL(EX(p3
), 2)));
3931 if (laneSzBlg2
== 0) {
3933 // First, interleave at the 16-bit lane size.
3934 IRTemp p0
= newTempV128();
3935 IRTemp p1
= newTempV128();
3936 IRTemp p2
= newTempV128();
3937 IRTemp p3
= newTempV128();
3938 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 1, u0
, u1
, u2
, u3
);
3939 // And rearrange within each vector, to get the right 8 bit lanes.
3940 assign(*i0
, IHI32x4(COD8x16(EX(p0
),EX(p0
)), CEV8x16(EX(p0
),EX(p0
))));
3941 assign(*i1
, IHI32x4(COD8x16(EX(p1
),EX(p1
)), CEV8x16(EX(p1
),EX(p1
))));
3942 assign(*i2
, IHI32x4(COD8x16(EX(p2
),EX(p2
)), CEV8x16(EX(p2
),EX(p2
))));
3943 assign(*i3
, IHI32x4(COD8x16(EX(p3
),EX(p3
)), CEV8x16(EX(p3
),EX(p3
))));
3951 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3953 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp
* u0
,
3954 UInt laneSzBlg2
, IRTemp i0
)
3956 assign(*u0
, mkexpr(i0
));
3960 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3962 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp
* u0
, IRTemp
* u1
,
3963 UInt laneSzBlg2
, IRTemp i0
, IRTemp i1
)
3965 /* This is pretty easy, since we have primitives directly to
3967 if (laneSzBlg2
== 3) {
3969 // i1 == B1 A1, i0 == B0 A0
3970 // u1 == B1 B0, u0 == A1 A0
3971 assign(*u0
, binop(Iop_InterleaveLO64x2
, mkexpr(i1
), mkexpr(i0
)));
3972 assign(*u1
, binop(Iop_InterleaveHI64x2
, mkexpr(i1
), mkexpr(i0
)));
3975 if (laneSzBlg2
== 2) {
3977 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3978 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3979 assign(*u0
, binop(Iop_CatEvenLanes32x4
, mkexpr(i1
), mkexpr(i0
)));
3980 assign(*u1
, binop(Iop_CatOddLanes32x4
, mkexpr(i1
), mkexpr(i0
)));
3983 if (laneSzBlg2
== 1) {
3985 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3986 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3987 // u1 == B{7..0}, u0 == A{7..0}
3988 assign(*u0
, binop(Iop_CatEvenLanes16x8
, mkexpr(i1
), mkexpr(i0
)));
3989 assign(*u1
, binop(Iop_CatOddLanes16x8
, mkexpr(i1
), mkexpr(i0
)));
3992 if (laneSzBlg2
== 0) {
3994 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3995 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3996 // u1 == B{f..0}, u0 == A{f..0}
3997 assign(*u0
, binop(Iop_CatEvenLanes8x16
, mkexpr(i1
), mkexpr(i0
)));
3998 assign(*u1
, binop(Iop_CatOddLanes8x16
, mkexpr(i1
), mkexpr(i0
)));
4006 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4008 void math_DEINTERLEAVE3_128(
4009 /*OUTx3*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
,
4011 IRTemp i0
, IRTemp i1
, IRTemp i2
)
4013 if (laneSzBlg2
== 3) {
4015 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4016 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4017 assign(*u2
, ILO64x2( ROL(EX(i2
),8), EX(i1
) ));
4018 assign(*u1
, ILO64x2( EX(i2
), ROL(EX(i0
),8) ));
4019 assign(*u0
, ILO64x2( ROL(EX(i1
),8), EX(i0
) ));
4023 if (laneSzBlg2
== 2) {
4025 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4026 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4027 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4028 IRTemp t_a1c0b0a0
= newTempV128();
4029 IRTemp t_a2c1b1a1
= newTempV128();
4030 IRTemp t_a3c2b2a2
= newTempV128();
4031 IRTemp t_a0c3b3a3
= newTempV128();
4032 IRTemp p0
= newTempV128();
4033 IRTemp p1
= newTempV128();
4034 IRTemp p2
= newTempV128();
4035 // Compute some intermediate values.
4036 assign(t_a1c0b0a0
, EX(i0
));
4037 assign(t_a2c1b1a1
, SL(EX(i1
),EX(i0
),3*4));
4038 assign(t_a3c2b2a2
, SL(EX(i2
),EX(i1
),2*4));
4039 assign(t_a0c3b3a3
, SL(EX(i0
),EX(i2
),1*4));
4040 // First deinterleave into lane-pairs
4041 assign(p0
, ILO32x4(EX(t_a2c1b1a1
),EX(t_a1c0b0a0
)));
4042 assign(p1
, ILO64x2(ILO32x4(EX(t_a0c3b3a3
), EX(t_a3c2b2a2
)),
4043 IHI32x4(EX(t_a2c1b1a1
), EX(t_a1c0b0a0
))));
4044 assign(p2
, ILO32x4(ROR(EX(t_a0c3b3a3
),1*4), ROR(EX(t_a3c2b2a2
),1*4)));
4045 // Then deinterleave at 64x2 granularity.
4046 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 3, p0
, p1
, p2
);
4050 if (laneSzBlg2
== 1) {
4052 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4053 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4054 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4056 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4057 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4058 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4060 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4061 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4062 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4064 IRTemp s0
, s1
, s2
, s3
, t0
, t1
, t2
, t3
, p0
, p1
, p2
, c00111111
;
4066 = t0
= t1
= t2
= t3
= p0
= p1
= p2
= c00111111
= IRTemp_INVALID
;
4067 newTempsV128_4(&s0
, &s1
, &s2
, &s3
);
4068 newTempsV128_4(&t0
, &t1
, &t2
, &t3
);
4069 newTempsV128_4(&p0
, &p1
, &p2
, &c00111111
);
4071 // s0 == b2a2 c1b1a1 c0b0a0
4072 // s1 == b4a4 c3b3c3 c2b2a2
4073 // s2 == b6a6 c5b5a5 c4b4a4
4074 // s3 == b0a0 c7b7a7 c6b6a6
4076 assign(s1
, SL(EX(i1
),EX(i0
),6*2));
4077 assign(s2
, SL(EX(i2
),EX(i1
),4*2));
4078 assign(s3
, SL(EX(i0
),EX(i2
),2*2));
4080 // t0 == 0 0 c1c0 b1b0 a1a0
4081 // t1 == 0 0 c3c2 b3b2 a3a2
4082 // t2 == 0 0 c5c4 b5b4 a5a4
4083 // t3 == 0 0 c7c6 b7b6 a7a6
4084 assign(c00111111
, mkV128(0x0FFF));
4085 assign(t0
, AND( ILO16x8( ROR(EX(s0
),3*2), EX(s0
)), EX(c00111111
)));
4086 assign(t1
, AND( ILO16x8( ROR(EX(s1
),3*2), EX(s1
)), EX(c00111111
)));
4087 assign(t2
, AND( ILO16x8( ROR(EX(s2
),3*2), EX(s2
)), EX(c00111111
)));
4088 assign(t3
, AND( ILO16x8( ROR(EX(s3
),3*2), EX(s3
)), EX(c00111111
)));
4090 assign(p0
, OR2(EX(t0
), SHL(EX(t1
),6*2)));
4091 assign(p1
, OR2(SHL(EX(t2
),4*2), SHR(EX(t1
),2*2)));
4092 assign(p2
, OR2(SHL(EX(t3
),2*2), SHR(EX(t2
),4*2)));
4094 // Then deinterleave at 32x4 granularity.
4095 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 2, p0
, p1
, p2
);
4099 if (laneSzBlg2
== 0) {
4100 // 8x16. This is the same scheme as for 16x8, with twice the
4101 // number of intermediate values.
4107 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4108 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4109 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4111 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4112 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4113 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4115 IRTemp s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
,
4116 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, p0
, p1
, p2
, cMASK
;
4117 s0
= s1
= s2
= s3
= s4
= s5
= s6
= s7
4118 = t0
= t1
= t2
= t3
= t4
= t5
= t6
= t7
= p0
= p1
= p2
= cMASK
4120 newTempsV128_4(&s0
, &s1
, &s2
, &s3
);
4121 newTempsV128_4(&s4
, &s5
, &s6
, &s7
);
4122 newTempsV128_4(&t0
, &t1
, &t2
, &t3
);
4123 newTempsV128_4(&t4
, &t5
, &t6
, &t7
);
4124 newTempsV128_4(&p0
, &p1
, &p2
, &cMASK
);
4126 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4127 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4128 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4129 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4130 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4131 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4132 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4133 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4134 assign(s0
, SL(EX(i1
),EX(i0
), 0));
4135 assign(s1
, SL(EX(i1
),EX(i0
), 6));
4136 assign(s2
, SL(EX(i1
),EX(i0
),12));
4137 assign(s3
, SL(EX(i2
),EX(i1
), 2));
4138 assign(s4
, SL(EX(i2
),EX(i1
), 8));
4139 assign(s5
, SL(EX(i2
),EX(i1
),14));
4140 assign(s6
, SL(EX(i0
),EX(i2
), 4));
4141 assign(s7
, SL(EX(i0
),EX(i2
),10));
4143 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4144 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4145 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4146 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4147 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4148 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4149 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4150 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4151 assign(cMASK
, mkV128(0x003F));
4152 assign(t0
, AND( ILO8x16( ROR(EX(s0
),3), EX(s0
)), EX(cMASK
)));
4153 assign(t1
, AND( ILO8x16( ROR(EX(s1
),3), EX(s1
)), EX(cMASK
)));
4154 assign(t2
, AND( ILO8x16( ROR(EX(s2
),3), EX(s2
)), EX(cMASK
)));
4155 assign(t3
, AND( ILO8x16( ROR(EX(s3
),3), EX(s3
)), EX(cMASK
)));
4156 assign(t4
, AND( ILO8x16( ROR(EX(s4
),3), EX(s4
)), EX(cMASK
)));
4157 assign(t5
, AND( ILO8x16( ROR(EX(s5
),3), EX(s5
)), EX(cMASK
)));
4158 assign(t6
, AND( ILO8x16( ROR(EX(s6
),3), EX(s6
)), EX(cMASK
)));
4159 assign(t7
, AND( ILO8x16( ROR(EX(s7
),3), EX(s7
)), EX(cMASK
)));
4161 assign(p0
, OR3( SHL(EX(t2
),12), SHL(EX(t1
),6), EX(t0
) ));
4162 assign(p1
, OR4( SHL(EX(t5
),14), SHL(EX(t4
),8),
4163 SHL(EX(t3
),2), SHR(EX(t2
),4) ));
4164 assign(p2
, OR3( SHL(EX(t7
),10), SHL(EX(t6
),4), SHR(EX(t5
),2) ));
4166 // Then deinterleave at 16x8 granularity.
4167 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 1, p0
, p1
, p2
);
4176 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4178 void math_DEINTERLEAVE4_128(
4179 /*OUTx4*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
, IRTemp
* u3
,
4181 IRTemp i0
, IRTemp i1
, IRTemp i2
, IRTemp i3
)
4183 if (laneSzBlg2
== 3) {
4185 assign(*u0
, ILO64x2(EX(i2
), EX(i0
)));
4186 assign(*u1
, IHI64x2(EX(i2
), EX(i0
)));
4187 assign(*u2
, ILO64x2(EX(i3
), EX(i1
)));
4188 assign(*u3
, IHI64x2(EX(i3
), EX(i1
)));
4191 if (laneSzBlg2
== 2) {
4193 IRTemp p0
= newTempV128();
4194 IRTemp p2
= newTempV128();
4195 IRTemp p1
= newTempV128();
4196 IRTemp p3
= newTempV128();
4197 assign(p0
, ILO32x4(EX(i1
), EX(i0
)));
4198 assign(p1
, IHI32x4(EX(i1
), EX(i0
)));
4199 assign(p2
, ILO32x4(EX(i3
), EX(i2
)));
4200 assign(p3
, IHI32x4(EX(i3
), EX(i2
)));
4201 // And now do what we did for the 64-bit case.
4202 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 3, p0
, p1
, p2
, p3
);
4205 if (laneSzBlg2
== 1) {
4207 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4208 IRTemp p0
= newTempV128();
4209 IRTemp p1
= newTempV128();
4210 IRTemp p2
= newTempV128();
4211 IRTemp p3
= newTempV128();
4212 assign(p0
, IHI16x8(EX(i0
), SHL(EX(i0
), 8)));
4213 assign(p1
, IHI16x8(EX(i1
), SHL(EX(i1
), 8)));
4214 assign(p2
, IHI16x8(EX(i2
), SHL(EX(i2
), 8)));
4215 assign(p3
, IHI16x8(EX(i3
), SHL(EX(i3
), 8)));
4216 // From here on is like the 32 bit case.
4217 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 2, p0
, p1
, p2
, p3
);
4220 if (laneSzBlg2
== 0) {
4222 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4223 IRTemp p0
= newTempV128();
4224 IRTemp p1
= newTempV128();
4225 IRTemp p2
= newTempV128();
4226 IRTemp p3
= newTempV128();
4227 assign(p0
, IHI64x2( IHI8x16(EX(i0
),ROL(EX(i0
),4)),
4228 ILO8x16(EX(i0
),ROL(EX(i0
),4)) ));
4229 assign(p1
, IHI64x2( IHI8x16(EX(i1
),ROL(EX(i1
),4)),
4230 ILO8x16(EX(i1
),ROL(EX(i1
),4)) ));
4231 assign(p2
, IHI64x2( IHI8x16(EX(i2
),ROL(EX(i2
),4)),
4232 ILO8x16(EX(i2
),ROL(EX(i2
),4)) ));
4233 assign(p3
, IHI64x2( IHI8x16(EX(i3
),ROL(EX(i3
),4)),
4234 ILO8x16(EX(i3
),ROL(EX(i3
),4)) ));
4235 // From here on is like the 16 bit case.
4236 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 1, p0
, p1
, p2
, p3
);
4244 /* Wrappers that use the full-width (de)interleavers to do half-width
4245 (de)interleaving. The scheme is to clone each input lane in the
4246 lower half of each incoming value, do a full width (de)interleave
4247 at the next lane size up, and remove every other lane of the the
4248 result. The returned values may have any old junk in the upper
4249 64 bits -- the caller must ignore that. */
4251 /* Helper function -- get doubling and narrowing operations. */
4253 void math_get_doubler_and_halver ( /*OUT*/IROp
* doubler
,
4254 /*OUT*/IROp
* halver
,
4257 switch (laneSzBlg2
) {
4259 *doubler
= Iop_InterleaveLO32x4
; *halver
= Iop_CatEvenLanes32x4
;
4262 *doubler
= Iop_InterleaveLO16x8
; *halver
= Iop_CatEvenLanes16x8
;
4265 *doubler
= Iop_InterleaveLO8x16
; *halver
= Iop_CatEvenLanes8x16
;
4272 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4274 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp
* i0
,
4275 UInt laneSzBlg2
, IRTemp u0
)
4277 assign(*i0
, mkexpr(u0
));
4281 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4283 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp
* i0
, IRTemp
* i1
,
4284 UInt laneSzBlg2
, IRTemp u0
, IRTemp u1
)
4286 if (laneSzBlg2
== 3) {
4287 // 1x64, degenerate case
4288 assign(*i0
, EX(u0
));
4289 assign(*i1
, EX(u1
));
4293 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4294 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4295 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4297 IRTemp du0
= newTempV128();
4298 IRTemp du1
= newTempV128();
4299 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4300 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4301 IRTemp di0
= newTempV128();
4302 IRTemp di1
= newTempV128();
4303 math_INTERLEAVE2_128(&di0
, &di1
, laneSzBlg2
+ 1, du0
, du1
);
4304 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4305 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4309 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4311 void math_INTERLEAVE3_64(
4312 /*OUTx3*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
,
4314 IRTemp u0
, IRTemp u1
, IRTemp u2
)
4316 if (laneSzBlg2
== 3) {
4317 // 1x64, degenerate case
4318 assign(*i0
, EX(u0
));
4319 assign(*i1
, EX(u1
));
4320 assign(*i2
, EX(u2
));
4324 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4325 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4326 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4328 IRTemp du0
= newTempV128();
4329 IRTemp du1
= newTempV128();
4330 IRTemp du2
= newTempV128();
4331 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4332 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4333 assign(du2
, binop(doubler
, EX(u2
), EX(u2
)));
4334 IRTemp di0
= newTempV128();
4335 IRTemp di1
= newTempV128();
4336 IRTemp di2
= newTempV128();
4337 math_INTERLEAVE3_128(&di0
, &di1
, &di2
, laneSzBlg2
+ 1, du0
, du1
, du2
);
4338 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4339 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4340 assign(*i2
, binop(halver
, EX(di2
), EX(di2
)));
4344 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4346 void math_INTERLEAVE4_64(
4347 /*OUTx4*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
, IRTemp
* i3
,
4349 IRTemp u0
, IRTemp u1
, IRTemp u2
, IRTemp u3
)
4351 if (laneSzBlg2
== 3) {
4352 // 1x64, degenerate case
4353 assign(*i0
, EX(u0
));
4354 assign(*i1
, EX(u1
));
4355 assign(*i2
, EX(u2
));
4356 assign(*i3
, EX(u3
));
4360 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4361 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4362 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4364 IRTemp du0
= newTempV128();
4365 IRTemp du1
= newTempV128();
4366 IRTemp du2
= newTempV128();
4367 IRTemp du3
= newTempV128();
4368 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4369 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4370 assign(du2
, binop(doubler
, EX(u2
), EX(u2
)));
4371 assign(du3
, binop(doubler
, EX(u3
), EX(u3
)));
4372 IRTemp di0
= newTempV128();
4373 IRTemp di1
= newTempV128();
4374 IRTemp di2
= newTempV128();
4375 IRTemp di3
= newTempV128();
4376 math_INTERLEAVE4_128(&di0
, &di1
, &di2
, &di3
,
4377 laneSzBlg2
+ 1, du0
, du1
, du2
, du3
);
4378 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4379 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4380 assign(*i2
, binop(halver
, EX(di2
), EX(di2
)));
4381 assign(*i3
, binop(halver
, EX(di3
), EX(di3
)));
4385 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4387 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp
* u0
,
4388 UInt laneSzBlg2
, IRTemp i0
)
4390 assign(*u0
, mkexpr(i0
));
4394 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4396 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp
* u0
, IRTemp
* u1
,
4397 UInt laneSzBlg2
, IRTemp i0
, IRTemp i1
)
4399 if (laneSzBlg2
== 3) {
4400 // 1x64, degenerate case
4401 assign(*u0
, EX(i0
));
4402 assign(*u1
, EX(i1
));
4406 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4407 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4408 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4410 IRTemp di0
= newTempV128();
4411 IRTemp di1
= newTempV128();
4412 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4413 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4415 IRTemp du0
= newTempV128();
4416 IRTemp du1
= newTempV128();
4417 math_DEINTERLEAVE2_128(&du0
, &du1
, laneSzBlg2
+ 1, di0
, di1
);
4418 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4419 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4423 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4425 void math_DEINTERLEAVE3_64(
4426 /*OUTx3*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
,
4428 IRTemp i0
, IRTemp i1
, IRTemp i2
)
4430 if (laneSzBlg2
== 3) {
4431 // 1x64, degenerate case
4432 assign(*u0
, EX(i0
));
4433 assign(*u1
, EX(i1
));
4434 assign(*u2
, EX(i2
));
4438 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4439 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4440 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4442 IRTemp di0
= newTempV128();
4443 IRTemp di1
= newTempV128();
4444 IRTemp di2
= newTempV128();
4445 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4446 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4447 assign(di2
, binop(doubler
, EX(i2
), EX(i2
)));
4448 IRTemp du0
= newTempV128();
4449 IRTemp du1
= newTempV128();
4450 IRTemp du2
= newTempV128();
4451 math_DEINTERLEAVE3_128(&du0
, &du1
, &du2
, laneSzBlg2
+ 1, di0
, di1
, di2
);
4452 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4453 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4454 assign(*u2
, binop(halver
, EX(du2
), EX(du2
)));
4458 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4460 void math_DEINTERLEAVE4_64(
4461 /*OUTx4*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
, IRTemp
* u3
,
4463 IRTemp i0
, IRTemp i1
, IRTemp i2
, IRTemp i3
)
4465 if (laneSzBlg2
== 3) {
4466 // 1x64, degenerate case
4467 assign(*u0
, EX(i0
));
4468 assign(*u1
, EX(i1
));
4469 assign(*u2
, EX(i2
));
4470 assign(*u3
, EX(i3
));
4474 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4475 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4476 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4478 IRTemp di0
= newTempV128();
4479 IRTemp di1
= newTempV128();
4480 IRTemp di2
= newTempV128();
4481 IRTemp di3
= newTempV128();
4482 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4483 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4484 assign(di2
, binop(doubler
, EX(i2
), EX(i2
)));
4485 assign(di3
, binop(doubler
, EX(i3
), EX(i3
)));
4486 IRTemp du0
= newTempV128();
4487 IRTemp du1
= newTempV128();
4488 IRTemp du2
= newTempV128();
4489 IRTemp du3
= newTempV128();
4490 math_DEINTERLEAVE4_128(&du0
, &du1
, &du2
, &du3
,
4491 laneSzBlg2
+ 1, di0
, di1
, di2
, di3
);
4492 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4493 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4494 assign(*u2
, binop(halver
, EX(du2
), EX(du2
)));
4495 assign(*u3
, binop(halver
, EX(du3
), EX(du3
)));
4524 /*------------------------------------------------------------*/
4525 /*--- Load and Store instructions ---*/
4526 /*------------------------------------------------------------*/
4528 /* Generate the EA for a "reg + reg" style amode. This is done from
4529 parts of the insn, but for sanity checking sake it takes the whole
4530 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4533 The possible forms, along with their opt:S values, are:
4536 011:1 Xn|SP + Xm * transfer_szB
4537 111:1 Xn|SP + Xm * transfer_szB
4538 010:0 Xn|SP + 32Uto64(Wm)
4539 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4540 110:0 Xn|SP + 32Sto64(Wm)
4541 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4543 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4544 the transfer size is insn[23,31,30]. For integer loads/stores,
4545 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4547 If the decoding fails, it returns IRTemp_INVALID.
4549 isInt is True iff this is decoding is for transfers to/from integer
4550 registers. If False it is for transfers to/from vector registers.
4552 static IRTemp
gen_indexed_EA ( /*OUT*/HChar
* buf
, UInt insn
, Bool isInt
)
4554 UInt optS
= SLICE_UInt(insn
, 15, 12);
4555 UInt mm
= SLICE_UInt(insn
, 20, 16);
4556 UInt nn
= SLICE_UInt(insn
, 9, 5);
4557 UInt szLg2
= (isInt
? 0 : (SLICE_UInt(insn
, 23, 23) << 2))
4558 | SLICE_UInt(insn
, 31, 30); // Log2 of the size
4562 /* Sanity checks, that this really is a load/store insn. */
4563 if (SLICE_UInt(insn
, 11, 10) != BITS2(1,0))
4567 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4568 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4569 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4570 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4574 && SLICE_UInt(insn
, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4577 /* Throw out non-verified but possibly valid cases. */
4579 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4580 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4581 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4582 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4583 case BITS3(1,0,0): // can only ever be valid for the vector case
4584 if (isInt
) goto fail
; else break;
4585 case BITS3(1,0,1): // these sizes are never valid
4587 case BITS3(1,1,1): goto fail
;
4589 default: vassert(0);
4594 case BITS4(1,1,1,0): goto fail
; //ATC
4595 case BITS4(0,1,1,0):
4596 rhs
= getIReg64orZR(mm
);
4597 vex_sprintf(buf
, "[%s, %s]",
4598 nameIReg64orZR(nn
), nameIReg64orZR(mm
));
4600 case BITS4(1,1,1,1): goto fail
; //ATC
4601 case BITS4(0,1,1,1):
4602 rhs
= binop(Iop_Shl64
, getIReg64orZR(mm
), mkU8(szLg2
));
4603 vex_sprintf(buf
, "[%s, %s lsl %u]",
4604 nameIReg64orZR(nn
), nameIReg64orZR(mm
), szLg2
);
4606 case BITS4(0,1,0,0):
4607 rhs
= unop(Iop_32Uto64
, getIReg32orZR(mm
));
4608 vex_sprintf(buf
, "[%s, %s uxtx]",
4609 nameIReg64orZR(nn
), nameIReg32orZR(mm
));
4611 case BITS4(0,1,0,1):
4612 rhs
= binop(Iop_Shl64
,
4613 unop(Iop_32Uto64
, getIReg32orZR(mm
)), mkU8(szLg2
));
4614 vex_sprintf(buf
, "[%s, %s uxtx, lsl %u]",
4615 nameIReg64orZR(nn
), nameIReg32orZR(mm
), szLg2
);
4617 case BITS4(1,1,0,0):
4618 rhs
= unop(Iop_32Sto64
, getIReg32orZR(mm
));
4619 vex_sprintf(buf
, "[%s, %s sxtx]",
4620 nameIReg64orZR(nn
), nameIReg32orZR(mm
));
4622 case BITS4(1,1,0,1):
4623 rhs
= binop(Iop_Shl64
,
4624 unop(Iop_32Sto64
, getIReg32orZR(mm
)), mkU8(szLg2
));
4625 vex_sprintf(buf
, "[%s, %s sxtx, lsl %u]",
4626 nameIReg64orZR(nn
), nameIReg32orZR(mm
), szLg2
);
4629 /* The rest appear to be genuinely invalid */
4634 IRTemp res
= newTemp(Ity_I64
);
4635 assign(res
, binop(Iop_Add64
, getIReg64orSP(nn
), rhs
));
4639 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS
);
4640 return IRTemp_INVALID
;
4644 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4645 bits of DATAE :: Ity_I64. */
4646 static void gen_narrowing_store ( UInt szB
, IRTemp addr
, IRExpr
* dataE
)
4648 IRExpr
* addrE
= mkexpr(addr
);
4651 storeLE(addrE
, dataE
);
4654 storeLE(addrE
, unop(Iop_64to32
, dataE
));
4657 storeLE(addrE
, unop(Iop_64to16
, dataE
));
4660 storeLE(addrE
, unop(Iop_64to8
, dataE
));
4668 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4669 placing the result in an Ity_I64 temporary. */
4670 static IRTemp
gen_zwidening_load ( UInt szB
, IRTemp addr
)
4672 IRTemp res
= newTemp(Ity_I64
);
4673 IRExpr
* addrE
= mkexpr(addr
);
4676 assign(res
, loadLE(Ity_I64
,addrE
));
4679 assign(res
, unop(Iop_32Uto64
, loadLE(Ity_I32
,addrE
)));
4682 assign(res
, unop(Iop_16Uto64
, loadLE(Ity_I16
,addrE
)));
4685 assign(res
, unop(Iop_8Uto64
, loadLE(Ity_I8
,addrE
)));
4694 /* Generate a "standard 7" name, from bitQ and size. But also
4695 allow ".1d" since that's occasionally useful. */
4697 const HChar
* nameArr_Q_SZ ( UInt bitQ
, UInt size
)
4699 vassert(bitQ
<= 1 && size
<= 3);
4701 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4702 UInt ix
= (bitQ
<< 2) | size
;
4709 Bool
dis_ARM64_load_store(/*MB_OUT*/DisResult
* dres
, UInt insn
,
4710 const VexAbiInfo
* abiinfo
4713 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4715 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4716 /* uimm12 is scaled by the transfer size
4720 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4721 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4723 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4724 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4726 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4727 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4729 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4730 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4732 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4733 UInt szLg2
= INSN(31,30);
4734 UInt szB
= 1 << szLg2
;
4735 Bool isLD
= INSN(22,22) == 1;
4736 UInt offs
= INSN(21,10) * szB
;
4737 UInt nn
= INSN(9,5);
4738 UInt tt
= INSN(4,0);
4739 IRTemp ta
= newTemp(Ity_I64
);
4740 assign(ta
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(offs
)));
4741 if (nn
== 31) { /* FIXME generate stack alignment check */ }
4744 putIReg64orZR(tt
, mkexpr(gen_zwidening_load(szB
, ta
)));
4746 gen_narrowing_store(szB
, ta
, getIReg64orZR(tt
));
4748 const HChar
* ld_name
[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4749 const HChar
* st_name
[4] = { "strb", "strh", "str", "str" };
4750 DIP("%s %s, [%s, #%u]\n",
4751 (isLD
? ld_name
: st_name
)[szLg2
], nameIRegOrZR(szB
== 8, tt
),
4752 nameIReg64orSP(nn
), offs
);
4756 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4760 (at-Rn-then-Rn=EA) | | |
4761 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4762 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4765 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4766 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4769 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4770 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4774 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4775 load case this is because would create two competing values for
4776 Rt. In the store case the reason is unclear, but the spec
4777 disallows it anyway.
4779 Stores are narrowing, loads are unsigned widening. sz encodes
4780 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4782 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4783 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4784 UInt szLg2
= INSN(31,30);
4785 UInt szB
= 1 << szLg2
;
4786 Bool isLoad
= INSN(22,22) == 1;
4787 UInt imm9
= INSN(20,12);
4788 UInt nn
= INSN(9,5);
4789 UInt tt
= INSN(4,0);
4790 Bool wBack
= INSN(10,10) == 1;
4791 UInt how
= INSN(11,10);
4792 if (how
== BITS2(1,0) || (wBack
&& nn
== tt
&& tt
!= 31)) {
4793 /* undecodable; fall through */
4795 if (nn
== 31) { /* FIXME generate stack alignment check */ }
4797 // Compute the transfer address TA and the writeback address WA.
4798 IRTemp tRN
= newTemp(Ity_I64
);
4799 assign(tRN
, getIReg64orSP(nn
));
4800 IRTemp tEA
= newTemp(Ity_I64
);
4801 Long simm9
= (Long
)sx_to_64(imm9
, 9);
4802 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
4804 IRTemp tTA
= newTemp(Ity_I64
);
4805 IRTemp tWA
= newTemp(Ity_I64
);
4808 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
4810 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
4812 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
4814 vassert(0); /* NOTREACHED */
4817 /* Normally rN would be updated after the transfer. However, in
4818 the special cases typifed by
4821 it is necessary to update SP before the transfer, (1)
4822 because Memcheck will otherwise complain about a write
4823 below the stack pointer, and (2) because the segfault
4824 stack extension mechanism will otherwise extend the stack
4825 only down to SP before the instruction, which might not be
4826 far enough, if the -16/-32 bit takes the actual access
4827 address to the next page.
4830 = wBack
&& simm9
< 0 && (szB
== 8 || szB
== 4)
4831 && how
== BITS2(1,1) && nn
== 31 && !isLoad
;
4833 if (wBack
&& earlyWBack
)
4834 putIReg64orSP(nn
, mkexpr(tEA
));
4837 putIReg64orZR(tt
, mkexpr(gen_zwidening_load(szB
, tTA
)));
4839 gen_narrowing_store(szB
, tTA
, getIReg64orZR(tt
));
4842 if (wBack
&& !earlyWBack
)
4843 putIReg64orSP(nn
, mkexpr(tEA
));
4845 const HChar
* ld_name
[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4846 const HChar
* st_name
[4] = { "sturb", "sturh", "stur", "stur" };
4847 const HChar
* fmt_str
= NULL
;
4850 fmt_str
= "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4853 fmt_str
= "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4856 fmt_str
= "%s %s, [%s, #%lld] (at-Rn)\n";
4861 DIP(fmt_str
, (isLoad
? ld_name
: st_name
)[szLg2
],
4862 nameIRegOrZR(szB
== 8, tt
),
4863 nameIReg64orSP(nn
), simm9
);
4868 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4871 x==0 => 32 bit transfers, and zero extended loads
4872 x==1 => 64 bit transfers
4873 simm7 is scaled by the (single-register) transfer size
4876 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4879 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4882 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4884 UInt insn_30_23
= INSN(30,23);
4885 if (insn_30_23
== BITS8(0,1,0,1,0,0,0,1)
4886 || insn_30_23
== BITS8(0,1,0,1,0,0,1,1)
4887 || insn_30_23
== BITS8(0,1,0,1,0,0,1,0)) {
4888 UInt bL
= INSN(22,22);
4889 UInt bX
= INSN(31,31);
4890 UInt bWBack
= INSN(23,23);
4891 UInt rT1
= INSN(4,0);
4892 UInt rN
= INSN(9,5);
4893 UInt rT2
= INSN(14,10);
4894 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
4895 if ((bWBack
&& (rT1
== rN
|| rT2
== rN
) && rN
!= 31)
4896 || (bL
&& rT1
== rT2
)) {
4897 /* undecodable; fall through */
4899 if (rN
== 31) { /* FIXME generate stack alignment check */ }
4901 // Compute the transfer address TA and the writeback address WA.
4902 IRTemp tRN
= newTemp(Ity_I64
);
4903 assign(tRN
, getIReg64orSP(rN
));
4904 IRTemp tEA
= newTemp(Ity_I64
);
4905 simm7
= (bX
? 8 : 4) * simm7
;
4906 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
4908 IRTemp tTA
= newTemp(Ity_I64
);
4909 IRTemp tWA
= newTemp(Ity_I64
);
4910 switch (INSN(24,23)) {
4912 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
4914 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
4916 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
4918 vassert(0); /* NOTREACHED */
4921 /* Normally rN would be updated after the transfer. However, in
4922 the special case typifed by
4923 stp x29, x30, [sp,#-112]!
4924 it is necessary to update SP before the transfer, (1)
4925 because Memcheck will otherwise complain about a write
4926 below the stack pointer, and (2) because the segfault
4927 stack extension mechanism will otherwise extend the stack
4928 only down to SP before the instruction, which might not be
4929 far enough, if the -112 bit takes the actual access
4930 address to the next page.
4933 = bWBack
&& simm7
< 0
4934 && INSN(24,23) == BITS2(1,1) && rN
== 31 && bL
== 0;
4936 if (bWBack
&& earlyWBack
)
4937 putIReg64orSP(rN
, mkexpr(tEA
));
4939 /**/ if (bL
== 1 && bX
== 1) {
4941 putIReg64orZR(rT1
, loadLE(Ity_I64
,
4942 binop(Iop_Add64
,mkexpr(tTA
),mkU64(0))));
4943 putIReg64orZR(rT2
, loadLE(Ity_I64
,
4944 binop(Iop_Add64
,mkexpr(tTA
),mkU64(8))));
4945 } else if (bL
== 1 && bX
== 0) {
4947 putIReg32orZR(rT1
, loadLE(Ity_I32
,
4948 binop(Iop_Add64
,mkexpr(tTA
),mkU64(0))));
4949 putIReg32orZR(rT2
, loadLE(Ity_I32
,
4950 binop(Iop_Add64
,mkexpr(tTA
),mkU64(4))));
4951 } else if (bL
== 0 && bX
== 1) {
4953 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(0)),
4954 getIReg64orZR(rT1
));
4955 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(8)),
4956 getIReg64orZR(rT2
));
4958 vassert(bL
== 0 && bX
== 0);
4960 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(0)),
4961 getIReg32orZR(rT1
));
4962 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(4)),
4963 getIReg32orZR(rT2
));
4966 if (bWBack
&& !earlyWBack
)
4967 putIReg64orSP(rN
, mkexpr(tEA
));
4969 const HChar
* fmt_str
= NULL
;
4970 switch (INSN(24,23)) {
4972 fmt_str
= "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4975 fmt_str
= "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4978 fmt_str
= "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4983 DIP(fmt_str
, bL
== 0 ? "st" : "ld",
4984 nameIRegOrZR(bX
== 1, rT1
),
4985 nameIRegOrZR(bX
== 1, rT2
),
4986 nameIReg64orSP(rN
), simm7
);
4991 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
4992 /* Does 32 bit transfers which are sign extended to 64 bits.
4993 simm7 is scaled by the (single-register) transfer size
4996 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
4999 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5002 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5004 UInt insn_31_22
= INSN(31,22);
5005 if (insn_31_22
== BITS10(0,1,1,0,1,0,0,0,1,1)
5006 || insn_31_22
== BITS10(0,1,1,0,1,0,0,1,1,1)
5007 || insn_31_22
== BITS10(0,1,1,0,1,0,0,1,0,1)) {
5008 UInt bWBack
= INSN(23,23);
5009 UInt rT1
= INSN(4,0);
5010 UInt rN
= INSN(9,5);
5011 UInt rT2
= INSN(14,10);
5012 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5013 if ((bWBack
&& (rT1
== rN
|| rT2
== rN
) && rN
!= 31)
5015 /* undecodable; fall through */
5017 if (rN
== 31) { /* FIXME generate stack alignment check */ }
5019 // Compute the transfer address TA and the writeback address WA.
5020 IRTemp tRN
= newTemp(Ity_I64
);
5021 assign(tRN
, getIReg64orSP(rN
));
5022 IRTemp tEA
= newTemp(Ity_I64
);
5024 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5026 IRTemp tTA
= newTemp(Ity_I64
);
5027 IRTemp tWA
= newTemp(Ity_I64
);
5028 switch (INSN(24,23)) {
5030 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5032 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5034 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5036 vassert(0); /* NOTREACHED */
5039 // 32 bit load, sign extended to 64 bits
5040 putIReg64orZR(rT1
, unop(Iop_32Sto64
,
5041 loadLE(Ity_I32
, binop(Iop_Add64
,
5044 putIReg64orZR(rT2
, unop(Iop_32Sto64
,
5045 loadLE(Ity_I32
, binop(Iop_Add64
,
5049 putIReg64orSP(rN
, mkexpr(tEA
));
5051 const HChar
* fmt_str
= NULL
;
5052 switch (INSN(24,23)) {
5054 fmt_str
= "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5057 fmt_str
= "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5060 fmt_str
= "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5065 DIP(fmt_str
, nameIReg64orZR(rT1
),
5066 nameIReg64orZR(rT2
),
5067 nameIReg64orSP(rN
), simm7
);
5072 /* ---------------- LDR (literal, int reg) ---------------- */
5074 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5075 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5076 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5077 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5078 Just handles the first two cases for now.
5080 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5081 UInt imm19
= INSN(23,5);
5082 UInt rT
= INSN(4,0);
5083 UInt bX
= INSN(30,30);
5084 ULong ea
= guest_PC_curr_instr
+ sx_to_64(imm19
<< 2, 21);
5086 putIReg64orZR(rT
, loadLE(Ity_I64
, mkU64(ea
)));
5088 putIReg32orZR(rT
, loadLE(Ity_I32
, mkU64(ea
)));
5090 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX
== 1, rT
), ea
);
5094 /* -------------- {LD,ST}R (integer register) --------------- */
5095 /* 31 29 20 15 12 11 9 4
5097 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5098 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5099 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5100 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5102 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5103 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5104 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5105 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5107 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5108 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5110 UInt szLg2
= INSN(31,30);
5111 Bool isLD
= INSN(22,22) == 1;
5112 UInt tt
= INSN(4,0);
5113 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
5114 if (ea
!= IRTemp_INVALID
) {
5116 case 3: /* 64 bit */
5118 putIReg64orZR(tt
, loadLE(Ity_I64
, mkexpr(ea
)));
5119 DIP("ldr %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5121 storeLE(mkexpr(ea
), getIReg64orZR(tt
));
5122 DIP("str %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5125 case 2: /* 32 bit */
5127 putIReg32orZR(tt
, loadLE(Ity_I32
, mkexpr(ea
)));
5128 DIP("ldr %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5130 storeLE(mkexpr(ea
), getIReg32orZR(tt
));
5131 DIP("str %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5134 case 1: /* 16 bit */
5136 putIReg64orZR(tt
, unop(Iop_16Uto64
,
5137 loadLE(Ity_I16
, mkexpr(ea
))));
5138 DIP("ldruh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5140 storeLE(mkexpr(ea
), unop(Iop_64to16
, getIReg64orZR(tt
)));
5141 DIP("strh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5146 putIReg64orZR(tt
, unop(Iop_8Uto64
,
5147 loadLE(Ity_I8
, mkexpr(ea
))));
5148 DIP("ldrub %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5150 storeLE(mkexpr(ea
), unop(Iop_64to8
, getIReg64orZR(tt
)));
5151 DIP("strb %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5161 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5162 /* 31 29 26 23 21 9 4
5163 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5164 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5165 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5167 Rt is Wt when x==1, Xt when x==0
5169 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5170 /* Further checks on bits 31:30 and 22 */
5172 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5174 case BITS3(0,1,0): case BITS3(0,1,1):
5175 case BITS3(0,0,0): case BITS3(0,0,1):
5180 UInt szLg2
= INSN(31,30);
5181 UInt bitX
= INSN(22,22);
5182 UInt imm12
= INSN(21,10);
5183 UInt nn
= INSN(9,5);
5184 UInt tt
= INSN(4,0);
5185 UInt szB
= 1 << szLg2
;
5186 IRExpr
* ea
= binop(Iop_Add64
,
5187 getIReg64orSP(nn
), mkU64(imm12
* szB
));
5191 putIReg64orZR(tt
, unop(Iop_32Sto64
, loadLE(Ity_I32
, ea
)));
5192 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt
),
5193 nameIReg64orSP(nn
), imm12
* szB
);
5197 putIReg32orZR(tt
, unop(Iop_16Sto32
, loadLE(Ity_I16
, ea
)));
5199 putIReg64orZR(tt
, unop(Iop_16Sto64
, loadLE(Ity_I16
, ea
)));
5201 DIP("ldrsh %s, [%s, #%u]\n",
5202 nameIRegOrZR(bitX
== 0, tt
),
5203 nameIReg64orSP(nn
), imm12
* szB
);
5207 putIReg32orZR(tt
, unop(Iop_8Sto32
, loadLE(Ity_I8
, ea
)));
5209 putIReg64orZR(tt
, unop(Iop_8Sto64
, loadLE(Ity_I8
, ea
)));
5211 DIP("ldrsb %s, [%s, #%u]\n",
5212 nameIRegOrZR(bitX
== 0, tt
),
5213 nameIReg64orSP(nn
), imm12
* szB
);
5220 /* else fall through */
5223 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5224 /* (at-Rn-then-Rn=EA)
5225 31 29 23 21 20 11 9 4
5226 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5227 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5228 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5231 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5232 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5233 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5235 Rt is Wt when x==1, Xt when x==0
5236 transfer-at-Rn when [11]==0, at EA when [11]==1
5238 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5239 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5240 /* Further checks on bits 31:30 and 22 */
5242 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5243 case BITS3(1,0,0): // LDRSW Xt
5244 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5245 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5250 UInt szLg2
= INSN(31,30);
5251 UInt imm9
= INSN(20,12);
5252 Bool atRN
= INSN(11,11) == 0;
5253 UInt nn
= INSN(9,5);
5254 UInt tt
= INSN(4,0);
5255 IRTemp tRN
= newTemp(Ity_I64
);
5256 IRTemp tEA
= newTemp(Ity_I64
);
5257 IRTemp tTA
= IRTemp_INVALID
;
5258 ULong simm9
= sx_to_64(imm9
, 9);
5259 Bool is64
= INSN(22,22) == 0;
5260 assign(tRN
, getIReg64orSP(nn
));
5261 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5262 tTA
= atRN
? tRN
: tEA
;
5264 /* There are 5 cases:
5266 byte load, SX to 32, ZX to 64
5267 halfword load, SX to 64
5268 halfword load, SX to 32, ZX to 64
5270 The ifs below handle them in the listed order.
5275 putIReg64orZR(tt
, unop(Iop_8Sto64
,
5276 loadLE(Ity_I8
, mkexpr(tTA
))));
5278 putIReg32orZR(tt
, unop(Iop_8Sto32
,
5279 loadLE(Ity_I8
, mkexpr(tTA
))));
5282 else if (szLg2
== 1) {
5285 putIReg64orZR(tt
, unop(Iop_16Sto64
,
5286 loadLE(Ity_I16
, mkexpr(tTA
))));
5288 putIReg32orZR(tt
, unop(Iop_16Sto32
,
5289 loadLE(Ity_I16
, mkexpr(tTA
))));
5292 else if (szLg2
== 2 && is64
) {
5294 putIReg64orZR(tt
, unop(Iop_32Sto64
,
5295 loadLE(Ity_I32
, mkexpr(tTA
))));
5300 putIReg64orSP(nn
, mkexpr(tEA
));
5301 DIP(atRN
? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5302 ch
, nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
), simm9
);
5305 /* else fall through */
5308 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5309 /* 31 29 23 21 20 11 9 4
5310 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5311 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5312 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5314 Rt is Wt when x==1, Xt when x==0
5316 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5317 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5318 /* Further checks on bits 31:30 and 22 */
5320 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5321 case BITS3(1,0,0): // LDURSW Xt
5322 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5323 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5328 UInt szLg2
= INSN(31,30);
5329 UInt imm9
= INSN(20,12);
5330 UInt nn
= INSN(9,5);
5331 UInt tt
= INSN(4,0);
5332 IRTemp tRN
= newTemp(Ity_I64
);
5333 IRTemp tEA
= newTemp(Ity_I64
);
5334 ULong simm9
= sx_to_64(imm9
, 9);
5335 Bool is64
= INSN(22,22) == 0;
5336 assign(tRN
, getIReg64orSP(nn
));
5337 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5339 /* There are 5 cases:
5341 byte load, SX to 32, ZX to 64
5342 halfword load, SX to 64
5343 halfword load, SX to 32, ZX to 64
5345 The ifs below handle them in the listed order.
5350 putIReg64orZR(tt
, unop(Iop_8Sto64
,
5351 loadLE(Ity_I8
, mkexpr(tEA
))));
5353 putIReg32orZR(tt
, unop(Iop_8Sto32
,
5354 loadLE(Ity_I8
, mkexpr(tEA
))));
5357 else if (szLg2
== 1) {
5360 putIReg64orZR(tt
, unop(Iop_16Sto64
,
5361 loadLE(Ity_I16
, mkexpr(tEA
))));
5363 putIReg32orZR(tt
, unop(Iop_16Sto32
,
5364 loadLE(Ity_I16
, mkexpr(tEA
))));
5367 else if (szLg2
== 2 && is64
) {
5369 putIReg64orZR(tt
, unop(Iop_32Sto64
,
5370 loadLE(Ity_I32
, mkexpr(tEA
))));
5375 DIP("ldurs%c %s, [%s, #%lld]",
5376 ch
, nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
), (Long
)simm9
);
5379 /* else fall through */
5382 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5385 sz==00 => 32 bit (S) transfers
5386 sz==01 => 64 bit (D) transfers
5387 sz==10 => 128 bit (Q) transfers
5388 sz==11 isn't allowed
5389 simm7 is scaled by the (single-register) transfer size
5391 31 29 26 22 21 14 9 4
5393 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5394 (at-EA, with nontemporal hint)
5396 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5399 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5402 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5405 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5406 UInt szSlg2
= INSN(31,30); // log2 of the xfer size in 32-bit units
5407 Bool isLD
= INSN(22,22) == 1;
5408 Bool wBack
= INSN(23,23) == 1;
5409 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5410 UInt tt2
= INSN(14,10);
5411 UInt nn
= INSN(9,5);
5412 UInt tt1
= INSN(4,0);
5413 if (szSlg2
== BITS2(1,1) || (isLD
&& tt1
== tt2
)) {
5414 /* undecodable; fall through */
5416 if (nn
== 31) { /* FIXME generate stack alignment check */ }
5418 // Compute the transfer address TA and the writeback address WA.
5419 UInt szB
= 4 << szSlg2
; /* szB is the per-register size */
5420 IRTemp tRN
= newTemp(Ity_I64
);
5421 assign(tRN
, getIReg64orSP(nn
));
5422 IRTemp tEA
= newTemp(Ity_I64
);
5423 simm7
= szB
* simm7
;
5424 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5426 IRTemp tTA
= newTemp(Ity_I64
);
5427 IRTemp tWA
= newTemp(Ity_I64
);
5428 switch (INSN(24,23)) {
5430 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5432 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5435 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5437 vassert(0); /* NOTREACHED */
5440 IRType ty
= Ity_INVALID
;
5442 case 4: ty
= Ity_F32
; break;
5443 case 8: ty
= Ity_F64
; break;
5444 case 16: ty
= Ity_V128
; break;
5445 default: vassert(0);
5448 /* Normally rN would be updated after the transfer. However, in
5449 the special cases typifed by
5450 stp q0, q1, [sp,#-512]!
5451 stp d0, d1, [sp,#-512]!
5452 stp s0, s1, [sp,#-512]!
5453 it is necessary to update SP before the transfer, (1)
5454 because Memcheck will otherwise complain about a write
5455 below the stack pointer, and (2) because the segfault
5456 stack extension mechanism will otherwise extend the stack
5457 only down to SP before the instruction, which might not be
5458 far enough, if the -512 bit takes the actual access
5459 address to the next page.
5462 = wBack
&& simm7
< 0
5463 && INSN(24,23) == BITS2(1,1) && nn
== 31 && !isLD
;
5465 if (wBack
&& earlyWBack
)
5466 putIReg64orSP(nn
, mkexpr(tEA
));
5470 putQReg128(tt1
, mkV128(0x0000));
5473 loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
), mkU64(0))));
5475 putQReg128(tt2
, mkV128(0x0000));
5478 loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
), mkU64(szB
))));
5480 storeLE(binop(Iop_Add64
, mkexpr(tTA
), mkU64(0)),
5481 getQRegLO(tt1
, ty
));
5482 storeLE(binop(Iop_Add64
, mkexpr(tTA
), mkU64(szB
)),
5483 getQRegLO(tt2
, ty
));
5486 if (wBack
&& !earlyWBack
)
5487 putIReg64orSP(nn
, mkexpr(tEA
));
5489 const HChar
* fmt_str
= NULL
;
5490 switch (INSN(24,23)) {
5492 fmt_str
= "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5495 fmt_str
= "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5498 fmt_str
= "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5501 fmt_str
= "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5506 DIP(fmt_str
, isLD
? "ld" : "st",
5507 nameQRegLO(tt1
, ty
), nameQRegLO(tt2
, ty
),
5508 nameIReg64orSP(nn
), simm7
);
5513 /* -------------- {LD,ST}R (vector register) --------------- */
5514 /* 31 29 23 20 15 12 11 9 4
5516 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5517 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5518 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5519 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5520 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5522 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5523 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5524 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5525 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5526 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5528 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5529 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5531 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5532 Bool isLD
= INSN(22,22) == 1;
5533 UInt tt
= INSN(4,0);
5534 if (szLg2
> 4) goto after_LDR_STR_vector_register
;
5535 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, False
/*to/from vec regs*/);
5536 if (ea
== IRTemp_INVALID
) goto after_LDR_STR_vector_register
;
5540 putQReg128(tt
, mkV128(0x0000));
5541 putQRegLO(tt
, loadLE(Ity_I8
, mkexpr(ea
)));
5542 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I8
), dis_buf
);
5544 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I8
));
5545 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I8
), dis_buf
);
5550 putQReg128(tt
, mkV128(0x0000));
5551 putQRegLO(tt
, loadLE(Ity_I16
, mkexpr(ea
)));
5552 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I16
), dis_buf
);
5554 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I16
));
5555 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I16
), dis_buf
);
5558 case 2: /* 32 bit */
5560 putQReg128(tt
, mkV128(0x0000));
5561 putQRegLO(tt
, loadLE(Ity_I32
, mkexpr(ea
)));
5562 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I32
), dis_buf
);
5564 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I32
));
5565 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I32
), dis_buf
);
5568 case 3: /* 64 bit */
5570 putQReg128(tt
, mkV128(0x0000));
5571 putQRegLO(tt
, loadLE(Ity_I64
, mkexpr(ea
)));
5572 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I64
), dis_buf
);
5574 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I64
));
5575 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I64
), dis_buf
);
5580 putQReg128(tt
, loadLE(Ity_V128
, mkexpr(ea
)));
5581 DIP("ldr %s, %s\n", nameQReg128(tt
), dis_buf
);
5583 storeLE(mkexpr(ea
), getQReg128(tt
));
5584 DIP("str %s, %s\n", nameQReg128(tt
), dis_buf
);
5592 after_LDR_STR_vector_register
:
5594 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5595 /* 31 29 22 20 15 12 11 9 4
5597 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5599 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5600 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5602 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5603 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5605 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5606 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5608 UInt szLg2
= INSN(31,30);
5609 Bool sxTo64
= INSN(22,22) == 0; // else sx to 32 and zx to 64
5610 UInt tt
= INSN(4,0);
5611 if (szLg2
== 3) goto after_LDRS_integer_register
;
5612 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
5613 if (ea
== IRTemp_INVALID
) goto after_LDRS_integer_register
;
5614 /* Enumerate the 5 variants explicitly. */
5615 if (szLg2
== 2/*32 bit*/ && sxTo64
) {
5616 putIReg64orZR(tt
, unop(Iop_32Sto64
, loadLE(Ity_I32
, mkexpr(ea
))));
5617 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5621 if (szLg2
== 1/*16 bit*/) {
5623 putIReg64orZR(tt
, unop(Iop_16Sto64
, loadLE(Ity_I16
, mkexpr(ea
))));
5624 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5626 putIReg32orZR(tt
, unop(Iop_16Sto32
, loadLE(Ity_I16
, mkexpr(ea
))));
5627 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5632 if (szLg2
== 0/*8 bit*/) {
5634 putIReg64orZR(tt
, unop(Iop_8Sto64
, loadLE(Ity_I8
, mkexpr(ea
))));
5635 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5637 putIReg32orZR(tt
, unop(Iop_8Sto32
, loadLE(Ity_I8
, mkexpr(ea
))));
5638 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5642 /* else it's an invalid combination */
5644 after_LDRS_integer_register
:
5646 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5647 /* This is the Unsigned offset variant only. The Post-Index and
5648 Pre-Index variants are below.
5651 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5652 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5653 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5654 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5655 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5657 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5658 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5659 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5660 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5661 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5663 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5664 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5665 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5666 Bool isLD
= INSN(22,22) == 1;
5667 UInt pimm12
= INSN(21,10) << szLg2
;
5668 UInt nn
= INSN(9,5);
5669 UInt tt
= INSN(4,0);
5670 IRTemp tEA
= newTemp(Ity_I64
);
5671 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5672 assign(tEA
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(pimm12
)));
5675 putQReg128(tt
, mkV128(0x0000));
5677 putQRegLO(tt
, loadLE(ty
, mkexpr(tEA
)));
5679 storeLE(mkexpr(tEA
), getQRegLO(tt
, ty
));
5681 DIP("%s %s, [%s, #%u]\n",
5682 isLD
? "ldr" : "str",
5683 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), pimm12
);
5687 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5688 /* These are the Post-Index and Pre-Index variants.
5692 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5693 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5694 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5695 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5696 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5699 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5700 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5701 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5702 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5703 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5705 Stores are the same except with bit 22 set to 0.
5707 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5708 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5709 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5710 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5711 Bool isLD
= INSN(22,22) == 1;
5712 UInt imm9
= INSN(20,12);
5713 Bool atRN
= INSN(11,11) == 0;
5714 UInt nn
= INSN(9,5);
5715 UInt tt
= INSN(4,0);
5716 IRTemp tRN
= newTemp(Ity_I64
);
5717 IRTemp tEA
= newTemp(Ity_I64
);
5718 IRTemp tTA
= IRTemp_INVALID
;
5719 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5720 ULong simm9
= sx_to_64(imm9
, 9);
5721 assign(tRN
, getIReg64orSP(nn
));
5722 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5723 tTA
= atRN
? tRN
: tEA
;
5725 /* Do early writeback for the cases typified by
5727 str d10, [sp, #-128]!
5729 for the same reasons as described in a similar comment in the
5730 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5733 = !atRN
&& !isLD
&& (ty
== Ity_F64
|| ty
== Ity_V128
)
5734 && nn
== 31 && ((Long
)simm9
) < 0;
5737 putIReg64orSP(nn
, mkexpr(tEA
));
5741 putQReg128(tt
, mkV128(0x0000));
5743 putQRegLO(tt
, loadLE(ty
, mkexpr(tTA
)));
5745 storeLE(mkexpr(tTA
), getQRegLO(tt
, ty
));
5749 putIReg64orSP(nn
, mkexpr(tEA
));
5751 DIP(atRN
? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5752 isLD
? "ldr" : "str",
5753 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), (Long
)simm9
);
5757 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5758 /* 31 29 23 20 11 9 4
5759 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5760 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5761 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5762 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5763 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5765 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5766 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5767 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5768 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5769 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5771 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5772 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5773 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5774 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5775 Bool isLD
= INSN(22,22) == 1;
5776 UInt imm9
= INSN(20,12);
5777 UInt nn
= INSN(9,5);
5778 UInt tt
= INSN(4,0);
5779 ULong simm9
= sx_to_64(imm9
, 9);
5780 IRTemp tEA
= newTemp(Ity_I64
);
5781 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5782 assign(tEA
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(simm9
)));
5785 putQReg128(tt
, mkV128(0x0000));
5787 putQRegLO(tt
, loadLE(ty
, mkexpr(tEA
)));
5789 storeLE(mkexpr(tEA
), getQRegLO(tt
, ty
));
5791 DIP("%s %s, [%s, #%lld]\n",
5792 isLD
? "ldur" : "stur",
5793 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), (Long
)simm9
);
5797 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5799 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5800 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5801 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5803 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5804 UInt szB
= 4 << INSN(31,30);
5805 UInt imm19
= INSN(23,5);
5806 UInt tt
= INSN(4,0);
5807 ULong ea
= guest_PC_curr_instr
+ sx_to_64(imm19
<< 2, 21);
5808 IRType ty
= preferredVectorSubTypeFromSize(szB
);
5809 putQReg128(tt
, mkV128(0x0000));
5810 putQRegLO(tt
, loadLE(ty
, mkU64(ea
)));
5811 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt
, ty
), ea
);
5815 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5816 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5817 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5818 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5819 /* 31 29 26 22 21 20 15 11 9 4
5821 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5822 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5824 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5825 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5827 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5828 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5830 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5831 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5833 T = defined by Q and sz in the normal way
5834 step = if m == 11111 then transfer-size else Xm
5835 xx = case L of 1 -> LD ; 0 -> ST
5837 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5838 && INSN(21,21) == 0) {
5839 Bool bitQ
= INSN(30,30);
5840 Bool isPX
= INSN(23,23) == 1;
5841 Bool isLD
= INSN(22,22) == 1;
5842 UInt mm
= INSN(20,16);
5843 UInt opc
= INSN(15,12);
5844 UInt sz
= INSN(11,10);
5845 UInt nn
= INSN(9,5);
5846 UInt tt
= INSN(4,0);
5847 Bool isQ
= bitQ
== 1;
5848 Bool is1d
= sz
== BITS2(1,1) && !isQ
;
5851 case BITS4(0,0,0,0): nRegs
= 4; break;
5852 case BITS4(0,1,0,0): nRegs
= 3; break;
5853 case BITS4(1,0,0,0): nRegs
= 2; break;
5854 case BITS4(0,1,1,1): nRegs
= 1; break;
5858 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5859 If we see it, set nRegs to 0 so as to cause the next conditional
5861 if (!isPX
&& mm
!= 0)
5864 if (nRegs
== 1 /* .1d is allowed */
5865 || (nRegs
>= 2 && nRegs
<= 4 && !is1d
) /* .1d is not allowed */) {
5867 UInt xferSzB
= (isQ
? 16 : 8) * nRegs
;
5869 /* Generate the transfer address (TA) and if necessary the
5870 writeback address (WB) */
5871 IRTemp tTA
= newTemp(Ity_I64
);
5872 assign(tTA
, getIReg64orSP(nn
));
5873 if (nn
== 31) { /* FIXME generate stack alignment check */ }
5874 IRTemp tWB
= IRTemp_INVALID
;
5876 tWB
= newTemp(Ity_I64
);
5877 assign(tWB
, binop(Iop_Add64
,
5879 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
5880 : getIReg64orZR(mm
)));
5883 /* -- BEGIN generate the transfers -- */
5885 IRTemp u0
, u1
, u2
, u3
, i0
, i1
, i2
, i3
;
5886 u0
= u1
= u2
= u3
= i0
= i1
= i2
= i3
= IRTemp_INVALID
;
5888 case 4: u3
= newTempV128(); i3
= newTempV128(); /* fallthru */
5889 case 3: u2
= newTempV128(); i2
= newTempV128(); /* fallthru */
5890 case 2: u1
= newTempV128(); i1
= newTempV128(); /* fallthru */
5891 case 1: u0
= newTempV128(); i0
= newTempV128(); break;
5892 default: vassert(0);
5895 /* -- Multiple 128 or 64 bit stores -- */
5898 case 4: assign(u3
, getQReg128((tt
+3) % 32)); /* fallthru */
5899 case 3: assign(u2
, getQReg128((tt
+2) % 32)); /* fallthru */
5900 case 2: assign(u1
, getQReg128((tt
+1) % 32)); /* fallthru */
5901 case 1: assign(u0
, getQReg128((tt
+0) % 32)); break;
5902 default: vassert(0);
5905 case 4: (isQ
? math_INTERLEAVE4_128
: math_INTERLEAVE4_64
)
5906 (&i0
, &i1
, &i2
, &i3
, sz
, u0
, u1
, u2
, u3
);
5908 case 3: (isQ
? math_INTERLEAVE3_128
: math_INTERLEAVE3_64
)
5909 (&i0
, &i1
, &i2
, sz
, u0
, u1
, u2
);
5911 case 2: (isQ
? math_INTERLEAVE2_128
: math_INTERLEAVE2_64
)
5912 (&i0
, &i1
, sz
, u0
, u1
);
5914 case 1: (isQ
? math_INTERLEAVE1_128
: math_INTERLEAVE1_64
)
5917 default: vassert(0);
5919 # define MAYBE_NARROW_TO_64(_expr) \
5920 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5921 UInt step
= isQ
? 16 : 8;
5923 case 4: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(3*step
)),
5924 MAYBE_NARROW_TO_64(mkexpr(i3
)) );
5926 case 3: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(2*step
)),
5927 MAYBE_NARROW_TO_64(mkexpr(i2
)) );
5929 case 2: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(1*step
)),
5930 MAYBE_NARROW_TO_64(mkexpr(i1
)) );
5932 case 1: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(0*step
)),
5933 MAYBE_NARROW_TO_64(mkexpr(i0
)) );
5935 default: vassert(0);
5937 # undef MAYBE_NARROW_TO_64
5940 /* -- Multiple 128 or 64 bit loads -- */
5942 UInt step
= isQ
? 16 : 8;
5943 IRType loadTy
= isQ
? Ity_V128
: Ity_I64
;
5944 # define MAYBE_WIDEN_FROM_64(_expr) \
5945 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5948 assign(i3
, MAYBE_WIDEN_FROM_64(
5950 binop(Iop_Add64
, mkexpr(tTA
),
5951 mkU64(3 * step
)))));
5954 assign(i2
, MAYBE_WIDEN_FROM_64(
5956 binop(Iop_Add64
, mkexpr(tTA
),
5957 mkU64(2 * step
)))));
5960 assign(i1
, MAYBE_WIDEN_FROM_64(
5962 binop(Iop_Add64
, mkexpr(tTA
),
5963 mkU64(1 * step
)))));
5966 assign(i0
, MAYBE_WIDEN_FROM_64(
5968 binop(Iop_Add64
, mkexpr(tTA
),
5969 mkU64(0 * step
)))));
5974 # undef MAYBE_WIDEN_FROM_64
5976 case 4: (isQ
? math_DEINTERLEAVE4_128
: math_DEINTERLEAVE4_64
)
5977 (&u0
, &u1
, &u2
, &u3
, sz
, i0
,i1
,i2
,i3
);
5979 case 3: (isQ
? math_DEINTERLEAVE3_128
: math_DEINTERLEAVE3_64
)
5980 (&u0
, &u1
, &u2
, sz
, i0
, i1
, i2
);
5982 case 2: (isQ
? math_DEINTERLEAVE2_128
: math_DEINTERLEAVE2_64
)
5983 (&u0
, &u1
, sz
, i0
, i1
);
5985 case 1: (isQ
? math_DEINTERLEAVE1_128
: math_DEINTERLEAVE1_64
)
5988 default: vassert(0);
5991 case 4: putQReg128( (tt
+3) % 32,
5992 math_MAYBE_ZERO_HI64(bitQ
, u3
));
5994 case 3: putQReg128( (tt
+2) % 32,
5995 math_MAYBE_ZERO_HI64(bitQ
, u2
));
5997 case 2: putQReg128( (tt
+1) % 32,
5998 math_MAYBE_ZERO_HI64(bitQ
, u1
));
6000 case 1: putQReg128( (tt
+0) % 32,
6001 math_MAYBE_ZERO_HI64(bitQ
, u0
));
6003 default: vassert(0);
6007 /* -- END generate the transfers -- */
6009 /* Do the writeback, if necessary */
6011 putIReg64orSP(nn
, mkexpr(tWB
));
6015 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6017 if (mm
== BITS5(1,1,1,1,1))
6018 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6020 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6022 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6023 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6024 isLD
? "ld" : "st", nRegs
,
6025 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6030 /* else fall through */
6033 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6034 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6035 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6036 /* 31 29 26 22 21 20 15 11 9 4
6038 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6039 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6041 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6042 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6044 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6045 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6047 T = defined by Q and sz in the normal way
6048 step = if m == 11111 then transfer-size else Xm
6049 xx = case L of 1 -> LD ; 0 -> ST
6051 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6052 && INSN(21,21) == 0) {
6053 Bool bitQ
= INSN(30,30);
6054 Bool isPX
= INSN(23,23) == 1;
6055 Bool isLD
= INSN(22,22) == 1;
6056 UInt mm
= INSN(20,16);
6057 UInt opc
= INSN(15,12);
6058 UInt sz
= INSN(11,10);
6059 UInt nn
= INSN(9,5);
6060 UInt tt
= INSN(4,0);
6061 Bool isQ
= bitQ
== 1;
6064 case BITS4(0,0,1,0): nRegs
= 4; break;
6065 case BITS4(0,1,1,0): nRegs
= 3; break;
6066 case BITS4(1,0,1,0): nRegs
= 2; break;
6070 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6071 If we see it, set nRegs to 0 so as to cause the next conditional
6073 if (!isPX
&& mm
!= 0)
6076 if (nRegs
>= 2 && nRegs
<= 4) {
6078 UInt xferSzB
= (isQ
? 16 : 8) * nRegs
;
6080 /* Generate the transfer address (TA) and if necessary the
6081 writeback address (WB) */
6082 IRTemp tTA
= newTemp(Ity_I64
);
6083 assign(tTA
, getIReg64orSP(nn
));
6084 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6085 IRTemp tWB
= IRTemp_INVALID
;
6087 tWB
= newTemp(Ity_I64
);
6088 assign(tWB
, binop(Iop_Add64
,
6090 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6091 : getIReg64orZR(mm
)));
6094 /* -- BEGIN generate the transfers -- */
6096 IRTemp u0
, u1
, u2
, u3
;
6097 u0
= u1
= u2
= u3
= IRTemp_INVALID
;
6099 case 4: u3
= newTempV128(); /* fallthru */
6100 case 3: u2
= newTempV128(); /* fallthru */
6101 case 2: u1
= newTempV128();
6102 u0
= newTempV128(); break;
6103 default: vassert(0);
6106 /* -- Multiple 128 or 64 bit stores -- */
6109 case 4: assign(u3
, getQReg128((tt
+3) % 32)); /* fallthru */
6110 case 3: assign(u2
, getQReg128((tt
+2) % 32)); /* fallthru */
6111 case 2: assign(u1
, getQReg128((tt
+1) % 32));
6112 assign(u0
, getQReg128((tt
+0) % 32)); break;
6113 default: vassert(0);
6115 # define MAYBE_NARROW_TO_64(_expr) \
6116 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6117 UInt step
= isQ
? 16 : 8;
6119 case 4: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(3*step
)),
6120 MAYBE_NARROW_TO_64(mkexpr(u3
)) );
6122 case 3: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(2*step
)),
6123 MAYBE_NARROW_TO_64(mkexpr(u2
)) );
6125 case 2: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(1*step
)),
6126 MAYBE_NARROW_TO_64(mkexpr(u1
)) );
6127 storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(0*step
)),
6128 MAYBE_NARROW_TO_64(mkexpr(u0
)) );
6130 default: vassert(0);
6132 # undef MAYBE_NARROW_TO_64
6135 /* -- Multiple 128 or 64 bit loads -- */
6137 UInt step
= isQ
? 16 : 8;
6138 IRType loadTy
= isQ
? Ity_V128
: Ity_I64
;
6139 # define MAYBE_WIDEN_FROM_64(_expr) \
6140 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6143 assign(u3
, MAYBE_WIDEN_FROM_64(
6145 binop(Iop_Add64
, mkexpr(tTA
),
6146 mkU64(3 * step
)))));
6149 assign(u2
, MAYBE_WIDEN_FROM_64(
6151 binop(Iop_Add64
, mkexpr(tTA
),
6152 mkU64(2 * step
)))));
6155 assign(u1
, MAYBE_WIDEN_FROM_64(
6157 binop(Iop_Add64
, mkexpr(tTA
),
6158 mkU64(1 * step
)))));
6159 assign(u0
, MAYBE_WIDEN_FROM_64(
6161 binop(Iop_Add64
, mkexpr(tTA
),
6162 mkU64(0 * step
)))));
6167 # undef MAYBE_WIDEN_FROM_64
6169 case 4: putQReg128( (tt
+3) % 32,
6170 math_MAYBE_ZERO_HI64(bitQ
, u3
));
6172 case 3: putQReg128( (tt
+2) % 32,
6173 math_MAYBE_ZERO_HI64(bitQ
, u2
));
6175 case 2: putQReg128( (tt
+1) % 32,
6176 math_MAYBE_ZERO_HI64(bitQ
, u1
));
6177 putQReg128( (tt
+0) % 32,
6178 math_MAYBE_ZERO_HI64(bitQ
, u0
));
6180 default: vassert(0);
6184 /* -- END generate the transfers -- */
6186 /* Do the writeback, if necessary */
6188 putIReg64orSP(nn
, mkexpr(tWB
));
6192 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6194 if (mm
== BITS5(1,1,1,1,1))
6195 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6197 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6199 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6200 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6202 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6207 /* else fall through */
6210 /* ---------- LD1R (single structure, replicate) ---------- */
6211 /* ---------- LD2R (single structure, replicate) ---------- */
6212 /* ---------- LD3R (single structure, replicate) ---------- */
6213 /* ---------- LD4R (single structure, replicate) ---------- */
6214 /* 31 29 22 20 15 11 9 4
6215 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6216 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6218 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6219 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6221 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6222 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6224 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6225 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6227 step = if m == 11111 then transfer-size else Xm
6229 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6230 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6231 && INSN(12,12) == 0) {
6232 UInt bitQ
= INSN(30,30);
6233 Bool isPX
= INSN(23,23) == 1;
6234 UInt nRegs
= ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6235 UInt mm
= INSN(20,16);
6236 UInt sz
= INSN(11,10);
6237 UInt nn
= INSN(9,5);
6238 UInt tt
= INSN(4,0);
6240 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6241 if (isPX
|| mm
== 0) {
6243 IRType ty
= integerIRTypeOfSize(1 << sz
);
6245 UInt laneSzB
= 1 << sz
;
6246 UInt xferSzB
= laneSzB
* nRegs
;
6248 /* Generate the transfer address (TA) and if necessary the
6249 writeback address (WB) */
6250 IRTemp tTA
= newTemp(Ity_I64
);
6251 assign(tTA
, getIReg64orSP(nn
));
6252 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6253 IRTemp tWB
= IRTemp_INVALID
;
6255 tWB
= newTemp(Ity_I64
);
6256 assign(tWB
, binop(Iop_Add64
,
6258 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6259 : getIReg64orZR(mm
)));
6262 /* Do the writeback, if necessary */
6264 putIReg64orSP(nn
, mkexpr(tWB
));
6267 IRTemp e0
, e1
, e2
, e3
, v0
, v1
, v2
, v3
;
6268 e0
= e1
= e2
= e3
= v0
= v1
= v2
= v3
= IRTemp_INVALID
;
6272 assign(e3
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6273 mkU64(3 * laneSzB
))));
6274 v3
= math_DUP_TO_V128(e3
, ty
);
6275 putQReg128((tt
+3) % 32, math_MAYBE_ZERO_HI64(bitQ
, v3
));
6279 assign(e2
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6280 mkU64(2 * laneSzB
))));
6281 v2
= math_DUP_TO_V128(e2
, ty
);
6282 putQReg128((tt
+2) % 32, math_MAYBE_ZERO_HI64(bitQ
, v2
));
6286 assign(e1
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6287 mkU64(1 * laneSzB
))));
6288 v1
= math_DUP_TO_V128(e1
, ty
);
6289 putQReg128((tt
+1) % 32, math_MAYBE_ZERO_HI64(bitQ
, v1
));
6293 assign(e0
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6294 mkU64(0 * laneSzB
))));
6295 v0
= math_DUP_TO_V128(e0
, ty
);
6296 putQReg128((tt
+0) % 32, math_MAYBE_ZERO_HI64(bitQ
, v0
));
6303 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6305 if (mm
== BITS5(1,1,1,1,1))
6306 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6308 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6310 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6311 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6313 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6318 /* else fall through */
6321 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6322 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6323 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6324 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6325 /* 31 29 22 21 20 15 11 9 4
6326 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6327 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6329 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6330 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6332 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6333 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6335 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6336 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6338 step = if m == 11111 then transfer-size else Xm
6339 op = case L of 1 -> LD ; 0 -> ST
6341 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6346 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6347 UInt bitQ
= INSN(30,30);
6348 Bool isPX
= INSN(23,23) == 1;
6349 Bool isLD
= INSN(22,22) == 1;
6350 UInt nRegs
= ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6351 UInt mm
= INSN(20,16);
6352 UInt xx
= INSN(15,14);
6353 UInt bitS
= INSN(12,12);
6354 UInt sz
= INSN(11,10);
6355 UInt nn
= INSN(9,5);
6356 UInt tt
= INSN(4,0);
6360 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6361 if (!isPX
&& mm
!= 0)
6364 UInt laneSzB
= 0; /* invalid */
6365 UInt ix
= 16; /* invalid */
6367 UInt xx_q_S_sz
= (xx
<< 4) | (bitQ
<< 3) | (bitS
<< 2) | sz
;
6368 switch (xx_q_S_sz
) {
6369 case 0x00: case 0x01: case 0x02: case 0x03:
6370 case 0x04: case 0x05: case 0x06: case 0x07:
6371 case 0x08: case 0x09: case 0x0A: case 0x0B:
6372 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6373 laneSzB
= 1; ix
= xx_q_S_sz
& 0xF;
6375 case 0x10: case 0x12: case 0x14: case 0x16:
6376 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6377 laneSzB
= 2; ix
= (xx_q_S_sz
>> 1) & 7;
6379 case 0x20: case 0x24: case 0x28: case 0x2C:
6380 laneSzB
= 4; ix
= (xx_q_S_sz
>> 2) & 3;
6382 case 0x21: case 0x29:
6383 laneSzB
= 8; ix
= (xx_q_S_sz
>> 3) & 1;
6389 if (valid
&& laneSzB
!= 0) {
6391 IRType ty
= integerIRTypeOfSize(laneSzB
);
6392 UInt xferSzB
= laneSzB
* nRegs
;
6394 /* Generate the transfer address (TA) and if necessary the
6395 writeback address (WB) */
6396 IRTemp tTA
= newTemp(Ity_I64
);
6397 assign(tTA
, getIReg64orSP(nn
));
6398 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6399 IRTemp tWB
= IRTemp_INVALID
;
6401 tWB
= newTemp(Ity_I64
);
6402 assign(tWB
, binop(Iop_Add64
,
6404 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6405 : getIReg64orZR(mm
)));
6408 /* Do the writeback, if necessary */
6410 putIReg64orSP(nn
, mkexpr(tWB
));
6416 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(3 * laneSzB
));
6418 putQRegLane((tt
+3) % 32, ix
, loadLE(ty
, addr
));
6420 storeLE(addr
, getQRegLane((tt
+3) % 32, ix
, ty
));
6426 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(2 * laneSzB
));
6428 putQRegLane((tt
+2) % 32, ix
, loadLE(ty
, addr
));
6430 storeLE(addr
, getQRegLane((tt
+2) % 32, ix
, ty
));
6436 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(1 * laneSzB
));
6438 putQRegLane((tt
+1) % 32, ix
, loadLE(ty
, addr
));
6440 storeLE(addr
, getQRegLane((tt
+1) % 32, ix
, ty
));
6446 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(0 * laneSzB
));
6448 putQRegLane((tt
+0) % 32, ix
, loadLE(ty
, addr
));
6450 storeLE(addr
, getQRegLane((tt
+0) % 32, ix
, ty
));
6459 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6461 if (mm
== BITS5(1,1,1,1,1))
6462 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6464 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6466 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6467 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6468 isLD
? "ld" : "st", nRegs
,
6469 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
,
6470 ix
, nameIReg64orSP(nn
), pxStr
);
6474 /* else fall through */
6477 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6478 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6479 /* 31 29 23 20 14 9 4
6480 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6481 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6482 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6483 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6485 /* For the "standard" implementation we pass through the LL and SC to
6486 the host. For the "fallback" implementation, for details see
6487 https://bugs.kde.org/show_bug.cgi?id=344524 and
6488 https://bugs.kde.org/show_bug.cgi?id=369459,
6492 gs.LLsize = load_size // 1, 2, 4 or 8
6494 gs.LLdata = zeroExtend(*addr)
6496 StoreCond(addr, data)
6497 tmp_LLsize = gs.LLsize
6498 gs.LLsize = 0 // "no transaction"
6499 if tmp_LLsize != store_size -> fail
6500 if addr != gs.LLaddr -> fail
6501 if zeroExtend(*addr) != gs.LLdata -> fail
6502 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6506 When thread scheduled
6507 gs.LLsize = 0 // "no transaction"
6508 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6511 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6512 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6513 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6514 UInt szBlg2
= INSN(31,30);
6515 Bool isLD
= INSN(22,22) == 1;
6516 Bool isAcqOrRel
= INSN(15,15) == 1;
6517 UInt ss
= INSN(20,16);
6518 UInt nn
= INSN(9,5);
6519 UInt tt
= INSN(4,0);
6521 vassert(szBlg2
< 4);
6522 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
6523 IRType ty
= integerIRTypeOfSize(szB
);
6524 const HChar
* suffix
[4] = { "rb", "rh", "r", "r" };
6526 IRTemp ea
= newTemp(Ity_I64
);
6527 assign(ea
, getIReg64orSP(nn
));
6528 /* FIXME generate check that ea is szB-aligned */
6530 if (isLD
&& ss
== BITS5(1,1,1,1,1)) {
6531 IRTemp res
= newTemp(ty
);
6532 if (abiinfo
->guest__use_fallback_LLSC
) {
6533 // Do the load first so we don't update any guest state
6535 IRTemp loaded_data64
= newTemp(Ity_I64
);
6536 assign(loaded_data64
, widenUto64(ty
, loadLE(ty
, mkexpr(ea
))));
6537 stmt( IRStmt_Put( OFFB_LLSC_DATA
, mkexpr(loaded_data64
) ));
6538 stmt( IRStmt_Put( OFFB_LLSC_ADDR
, mkexpr(ea
) ));
6539 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(szB
) ));
6540 putIReg64orZR(tt
, mkexpr(loaded_data64
));
6542 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), NULL
/*LL*/));
6543 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(res
)));
6546 stmt(IRStmt_MBE(Imbe_Fence
));
6548 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel
? "a" : "", suffix
[szBlg2
],
6549 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
),
6550 abiinfo
->guest__use_fallback_LLSC
6551 ? "(fallback implementation)" : "");
6556 stmt(IRStmt_MBE(Imbe_Fence
));
6558 IRExpr
* data
= narrowFrom64(ty
, getIReg64orZR(tt
));
6559 if (abiinfo
->guest__use_fallback_LLSC
) {
6560 // This is really ugly, since we don't have any way to do
6561 // proper if-then-else. First, set up as if the SC failed,
6562 // and jump forwards if it really has failed.
6564 // Continuation address
6565 IRConst
* nia
= IRConst_U64(guest_PC_curr_instr
+ 4);
6567 // "the SC failed". Any non-zero value means failure.
6568 putIReg64orZR(ss
, mkU64(1));
6570 IRTemp tmp_LLsize
= newTemp(Ity_I64
);
6571 assign(tmp_LLsize
, IRExpr_Get(OFFB_LLSC_SIZE
, Ity_I64
));
6572 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) // "no transaction"
6574 // Fail if no or wrong-size transaction
6575 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
6577 binop(Iop_CmpNE64
, mkexpr(tmp_LLsize
), mkU64(szB
)),
6578 Ijk_Boring
, nia
, OFFB_PC
6580 // Fail if the address doesn't match the LL address
6582 binop(Iop_CmpNE64
, mkexpr(ea
),
6583 IRExpr_Get(OFFB_LLSC_ADDR
, Ity_I64
)),
6584 Ijk_Boring
, nia
, OFFB_PC
6586 // Fail if the data doesn't match the LL data
6587 IRTemp llsc_data64
= newTemp(Ity_I64
);
6588 assign(llsc_data64
, IRExpr_Get(OFFB_LLSC_DATA
, Ity_I64
));
6590 binop(Iop_CmpNE64
, widenUto64(ty
, loadLE(ty
, mkexpr(ea
))),
6591 mkexpr(llsc_data64
)),
6592 Ijk_Boring
, nia
, OFFB_PC
6594 // Try to CAS the new value in.
6595 IRTemp old
= newTemp(ty
);
6596 IRTemp expd
= newTemp(ty
);
6597 assign(expd
, narrowFrom64(ty
, mkexpr(llsc_data64
)));
6598 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
6599 Iend_LE
, mkexpr(ea
),
6600 /*expdHi*/NULL
, mkexpr(expd
),
6601 /*dataHi*/NULL
, data
6603 // Fail if the CAS failed (viz, old != expd)
6606 widenUto64(ty
, mkexpr(old
)),
6607 widenUto64(ty
, mkexpr(expd
))),
6608 Ijk_Boring
, nia
, OFFB_PC
6610 // Otherwise we succeeded (!)
6611 putIReg64orZR(ss
, mkU64(0));
6613 IRTemp res
= newTemp(Ity_I1
);
6614 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), data
));
6615 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6616 Need to set rS to 1 on failure, 0 on success. */
6617 putIReg64orZR(ss
, binop(Iop_Xor64
, unop(Iop_1Uto64
, mkexpr(res
)),
6620 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel
? "a" : "", suffix
[szBlg2
],
6621 nameIRegOrZR(False
, ss
),
6622 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
),
6623 abiinfo
->guest__use_fallback_LLSC
6624 ? "(fallback implementation)" : "");
6627 /* else fall through */
6630 /* ------------------ LDA{R,RH,RB} ------------------ */
6631 /* ------------------ STL{R,RH,RB} ------------------ */
6632 /* 31 29 23 20 14 9 4
6633 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6634 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6636 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6637 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6638 UInt szBlg2
= INSN(31,30);
6639 Bool isLD
= INSN(22,22) == 1;
6640 UInt nn
= INSN(9,5);
6641 UInt tt
= INSN(4,0);
6643 vassert(szBlg2
< 4);
6644 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
6645 IRType ty
= integerIRTypeOfSize(szB
);
6646 const HChar
* suffix
[4] = { "rb", "rh", "r", "r" };
6648 IRTemp ea
= newTemp(Ity_I64
);
6649 assign(ea
, getIReg64orSP(nn
));
6650 /* FIXME generate check that ea is szB-aligned */
6653 IRTemp res
= newTemp(ty
);
6654 assign(res
, loadLE(ty
, mkexpr(ea
)));
6655 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(res
)));
6656 stmt(IRStmt_MBE(Imbe_Fence
));
6657 DIP("lda%s %s, [%s]\n", suffix
[szBlg2
],
6658 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
));
6660 stmt(IRStmt_MBE(Imbe_Fence
));
6661 IRExpr
* data
= narrowFrom64(ty
, getIReg64orZR(tt
));
6662 storeLE(mkexpr(ea
), data
);
6663 DIP("stl%s %s, [%s]\n", suffix
[szBlg2
],
6664 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
));
6669 /* The PRFM cases that follow are possibly allow Rt values (the
6670 prefetch operation) which are not allowed by the documentation.
6671 This should be looked into. */
6672 /* ------------------ PRFM (immediate) ------------------ */
6674 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6676 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6677 UInt imm12
= INSN(21,10);
6678 UInt nn
= INSN(9,5);
6679 UInt tt
= INSN(4,0);
6680 /* Generating any IR here is pointless, except for documentation
6681 purposes, as it will get optimised away later. */
6682 IRTemp ea
= newTemp(Ity_I64
);
6683 assign(ea
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(imm12
* 8)));
6684 DIP("prfm prfop=%u, [%s, #%u]\n", tt
, nameIReg64orSP(nn
), imm12
* 8);
6688 /* ------------------ PRFM (register) ------------------ */
6689 /* 31 29 22 20 15 12 11 9 4
6690 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6692 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6693 && INSN(11,10) == BITS2(1,0)) {
6695 UInt tt
= INSN(4,0);
6696 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
6697 if (ea
!= IRTemp_INVALID
) {
6698 /* No actual code to generate. */
6699 DIP("prfm prfop=%u, %s\n", tt
, dis_buf
);
6704 /* ------------------ PRFM (unscaled offset) ------------------ */
6705 /* 31 29 22 20 11 9 4
6706 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6708 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6709 && INSN(11,10) == BITS2(0,0)) {
6710 ULong imm9
= INSN(20,12);
6711 UInt nn
= INSN(9,5);
6712 UInt tt
= INSN(4,0);
6713 ULong offset
= sx_to_64(imm9
, 9);
6714 IRTemp ea
= newTemp(Ity_I64
);
6715 assign(ea
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(offset
)));
6716 /* No actual code to generate. */
6717 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt
, nameIReg64orSP(nn
), offset
);
6721 vex_printf("ARM64 front end: load_store\n");
6727 /*------------------------------------------------------------*/
6728 /*--- Control flow and misc instructions ---*/
6729 /*------------------------------------------------------------*/
6732 Bool
dis_ARM64_branch_etc(/*MB_OUT*/DisResult
* dres
, UInt insn
,
6733 const VexArchInfo
* archinfo
,
6734 const VexAbiInfo
* abiinfo
)
6736 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6738 /* ---------------------- B cond ----------------------- */
6740 0101010 0 imm19 0 cond */
6741 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6742 UInt cond
= INSN(3,0);
6743 ULong uimm64
= INSN(23,5) << 2;
6744 Long simm64
= (Long
)sx_to_64(uimm64
, 21);
6745 vassert(dres
->whatNext
== Dis_Continue
);
6746 vassert(dres
->len
== 4);
6747 vassert(dres
->continueAt
== 0);
6748 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
6749 stmt( IRStmt_Exit(unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
6751 IRConst_U64(guest_PC_curr_instr
+ simm64
),
6753 putPC(mkU64(guest_PC_curr_instr
+ 4));
6754 dres
->whatNext
= Dis_StopHere
;
6755 dres
->jk_StopHere
= Ijk_Boring
;
6756 DIP("b.%s 0x%llx\n", nameCC(cond
), guest_PC_curr_instr
+ simm64
);
6760 /* -------------------- B{L} uncond -------------------- */
6761 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6762 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6763 100101 imm26 B (PC + sxTo64(imm26 << 2))
6765 UInt bLink
= INSN(31,31);
6766 ULong uimm64
= INSN(25,0) << 2;
6767 Long simm64
= (Long
)sx_to_64(uimm64
, 28);
6769 putIReg64orSP(30, mkU64(guest_PC_curr_instr
+ 4));
6771 putPC(mkU64(guest_PC_curr_instr
+ simm64
));
6772 dres
->whatNext
= Dis_StopHere
;
6773 dres
->jk_StopHere
= Ijk_Call
;
6774 DIP("b%s 0x%llx\n", bLink
== 1 ? "l" : "",
6775 guest_PC_curr_instr
+ simm64
);
6779 /* --------------------- B{L} reg --------------------- */
6780 /* 31 24 22 20 15 9 4
6781 1101011 00 10 11111 000000 nn 00000 RET Rn
6782 1101011 00 01 11111 000000 nn 00000 CALL Rn
6783 1101011 00 00 11111 000000 nn 00000 JMP Rn
6785 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6786 && INSN(20,16) == BITS5(1,1,1,1,1)
6787 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6788 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6789 UInt branch_type
= INSN(22,21);
6790 UInt nn
= INSN(9,5);
6791 if (branch_type
== BITS2(1,0) /* RET */) {
6792 putPC(getIReg64orZR(nn
));
6793 dres
->whatNext
= Dis_StopHere
;
6794 dres
->jk_StopHere
= Ijk_Ret
;
6795 DIP("ret %s\n", nameIReg64orZR(nn
));
6798 if (branch_type
== BITS2(0,1) /* CALL */) {
6799 IRTemp dst
= newTemp(Ity_I64
);
6800 assign(dst
, getIReg64orZR(nn
));
6801 putIReg64orSP(30, mkU64(guest_PC_curr_instr
+ 4));
6803 dres
->whatNext
= Dis_StopHere
;
6804 dres
->jk_StopHere
= Ijk_Call
;
6805 DIP("blr %s\n", nameIReg64orZR(nn
));
6808 if (branch_type
== BITS2(0,0) /* JMP */) {
6809 putPC(getIReg64orZR(nn
));
6810 dres
->whatNext
= Dis_StopHere
;
6811 dres
->jk_StopHere
= Ijk_Boring
;
6812 DIP("jmp %s\n", nameIReg64orZR(nn
));
6817 /* -------------------- CB{N}Z -------------------- */
6818 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6819 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6821 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6822 Bool is64
= INSN(31,31) == 1;
6823 Bool bIfZ
= INSN(24,24) == 0;
6824 ULong uimm64
= INSN(23,5) << 2;
6825 UInt rT
= INSN(4,0);
6826 Long simm64
= (Long
)sx_to_64(uimm64
, 21);
6827 IRExpr
* cond
= NULL
;
6829 cond
= binop(bIfZ
? Iop_CmpEQ64
: Iop_CmpNE64
,
6830 getIReg64orZR(rT
), mkU64(0));
6832 cond
= binop(bIfZ
? Iop_CmpEQ32
: Iop_CmpNE32
,
6833 getIReg32orZR(rT
), mkU32(0));
6835 stmt( IRStmt_Exit(cond
,
6837 IRConst_U64(guest_PC_curr_instr
+ simm64
),
6839 putPC(mkU64(guest_PC_curr_instr
+ 4));
6840 dres
->whatNext
= Dis_StopHere
;
6841 dres
->jk_StopHere
= Ijk_Boring
;
6842 DIP("cb%sz %s, 0x%llx\n",
6843 bIfZ
? "" : "n", nameIRegOrZR(is64
, rT
),
6844 guest_PC_curr_instr
+ simm64
);
6848 /* -------------------- TB{N}Z -------------------- */
6849 /* 31 30 24 23 18 5 4
6850 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6851 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6853 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6854 UInt b5
= INSN(31,31);
6855 Bool bIfZ
= INSN(24,24) == 0;
6856 UInt b40
= INSN(23,19);
6857 UInt imm14
= INSN(18,5);
6858 UInt tt
= INSN(4,0);
6859 UInt bitNo
= (b5
<< 5) | b40
;
6860 ULong uimm64
= imm14
<< 2;
6861 Long simm64
= sx_to_64(uimm64
, 16);
6863 = binop(bIfZ
? Iop_CmpEQ64
: Iop_CmpNE64
,
6865 binop(Iop_Shr64
, getIReg64orZR(tt
), mkU8(bitNo
)),
6868 stmt( IRStmt_Exit(cond
,
6870 IRConst_U64(guest_PC_curr_instr
+ simm64
),
6872 putPC(mkU64(guest_PC_curr_instr
+ 4));
6873 dres
->whatNext
= Dis_StopHere
;
6874 dres
->jk_StopHere
= Ijk_Boring
;
6875 DIP("tb%sz %s, #%u, 0x%llx\n",
6876 bIfZ
? "" : "n", nameIReg64orZR(tt
), bitNo
,
6877 guest_PC_curr_instr
+ simm64
);
6881 /* -------------------- SVC -------------------- */
6882 /* 11010100 000 imm16 000 01
6883 Don't bother with anything except the imm16==0 case.
6885 if (INSN(31,0) == 0xD4000001) {
6886 putPC(mkU64(guest_PC_curr_instr
+ 4));
6887 dres
->whatNext
= Dis_StopHere
;
6888 dres
->jk_StopHere
= Ijk_Sys_syscall
;
6893 /* ------------------ M{SR,RS} ------------------ */
6894 /* ---- Cases for TPIDR_EL0 ----
6895 0xD51BD0 010 Rt MSR tpidr_el0, rT
6896 0xD53BD0 010 Rt MRS rT, tpidr_el0
6898 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6899 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6900 Bool toSys
= INSN(21,21) == 0;
6901 UInt tt
= INSN(4,0);
6903 stmt( IRStmt_Put( OFFB_TPIDR_EL0
, getIReg64orZR(tt
)) );
6904 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt
));
6906 putIReg64orZR(tt
, IRExpr_Get( OFFB_TPIDR_EL0
, Ity_I64
));
6907 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt
));
6911 /* ---- Cases for FPCR ----
6912 0xD51B44 000 Rt MSR fpcr, rT
6913 0xD53B44 000 Rt MSR rT, fpcr
6915 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6916 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6917 Bool toSys
= INSN(21,21) == 0;
6918 UInt tt
= INSN(4,0);
6920 stmt( IRStmt_Put( OFFB_FPCR
, getIReg32orZR(tt
)) );
6921 DIP("msr fpcr, %s\n", nameIReg64orZR(tt
));
6923 putIReg32orZR(tt
, IRExpr_Get(OFFB_FPCR
, Ity_I32
));
6924 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt
));
6928 /* ---- Cases for FPSR ----
6929 0xD51B44 001 Rt MSR fpsr, rT
6930 0xD53B44 001 Rt MSR rT, fpsr
6931 The only part of this we model is FPSR.QC. All other bits
6932 are ignored when writing to it and RAZ when reading from it.
6934 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6935 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6936 Bool toSys
= INSN(21,21) == 0;
6937 UInt tt
= INSN(4,0);
6939 /* Just deal with FPSR.QC. Make up a V128 value which is
6940 zero if Xt[27] is zero and any other value if Xt[27] is
6942 IRTemp qc64
= newTemp(Ity_I64
);
6943 assign(qc64
, binop(Iop_And64
,
6944 binop(Iop_Shr64
, getIReg64orZR(tt
), mkU8(27)),
6946 IRExpr
* qcV128
= binop(Iop_64HLtoV128
, mkexpr(qc64
), mkexpr(qc64
));
6947 stmt( IRStmt_Put( OFFB_QCFLAG
, qcV128
) );
6948 DIP("msr fpsr, %s\n", nameIReg64orZR(tt
));
6950 /* Generate a value which is all zeroes except for bit 27,
6951 which must be zero if QCFLAG is all zeroes and one otherwise. */
6952 IRTemp qcV128
= newTempV128();
6953 assign(qcV128
, IRExpr_Get( OFFB_QCFLAG
, Ity_V128
));
6954 IRTemp qc64
= newTemp(Ity_I64
);
6955 assign(qc64
, binop(Iop_Or64
, unop(Iop_V128HIto64
, mkexpr(qcV128
)),
6956 unop(Iop_V128to64
, mkexpr(qcV128
))));
6957 IRExpr
* res
= binop(Iop_Shl64
,
6959 binop(Iop_CmpNE64
, mkexpr(qc64
), mkU64(0))),
6961 putIReg64orZR(tt
, res
);
6962 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt
));
6966 /* ---- Cases for NZCV ----
6967 D51B42 000 Rt MSR nzcv, rT
6968 D53B42 000 Rt MRS rT, nzcv
6969 The only parts of NZCV that actually exist are bits 31:28, which
6970 are the N Z C and V bits themselves. Hence the flags thunk provides
6971 all the state we need.
6973 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6974 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6975 Bool toSys
= INSN(21,21) == 0;
6976 UInt tt
= INSN(4,0);
6978 IRTemp t
= newTemp(Ity_I64
);
6979 assign(t
, binop(Iop_And64
, getIReg64orZR(tt
), mkU64(0xF0000000ULL
)));
6981 DIP("msr %s, nzcv\n", nameIReg32orZR(tt
));
6983 IRTemp res
= newTemp(Ity_I64
);
6984 assign(res
, mk_arm64g_calculate_flags_nzcv());
6985 putIReg32orZR(tt
, unop(Iop_64to32
, mkexpr(res
)));
6986 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt
));
6990 /* ---- Cases for DCZID_EL0 ----
6991 Don't support arbitrary reads and writes to this register. Just
6992 return the value 16, which indicates that the DC ZVA instruction
6993 is not permitted, so we don't have to emulate it.
6994 D5 3B 00 111 Rt MRS rT, dczid_el0
6996 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6997 UInt tt
= INSN(4,0);
6998 putIReg64orZR(tt
, mkU64(1<<4));
6999 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt
));
7002 /* ---- Cases for CTR_EL0 ----
7003 We just handle reads, and make up a value from the D and I line
7004 sizes in the VexArchInfo we are given, and patch in the following
7005 fields that the Foundation model gives ("natively"):
7006 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7007 D5 3B 00 001 Rt MRS rT, dczid_el0
7009 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7010 UInt tt
= INSN(4,0);
7011 /* Need to generate a value from dMinLine_lg2_szB and
7012 dMinLine_lg2_szB. The value in the register is in 32-bit
7013 units, so need to subtract 2 from the values in the
7014 VexArchInfo. We can assume that the values here are valid --
7015 disInstr_ARM64 checks them -- so there's no need to deal with
7016 out-of-range cases. */
7017 vassert(archinfo
->arm64_dMinLine_lg2_szB
>= 2
7018 && archinfo
->arm64_dMinLine_lg2_szB
<= 17
7019 && archinfo
->arm64_iMinLine_lg2_szB
>= 2
7020 && archinfo
->arm64_iMinLine_lg2_szB
<= 17);
7022 = 0x8440c000 | ((0xF & (archinfo
->arm64_dMinLine_lg2_szB
- 2)) << 16)
7023 | ((0xF & (archinfo
->arm64_iMinLine_lg2_szB
- 2)) << 0);
7024 putIReg64orZR(tt
, mkU64(val
));
7025 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt
));
7028 /* ---- Cases for CNTVCT_EL0 ----
7029 This is a timestamp counter of some sort. Support reads of it only
7030 by passing through to the host.
7031 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7033 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7034 UInt tt
= INSN(4,0);
7035 IRTemp val
= newTemp(Ity_I64
);
7036 IRExpr
** args
= mkIRExprVec_0();
7037 IRDirty
* d
= unsafeIRDirty_1_N (
7040 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7041 &arm64g_dirtyhelper_MRS_CNTVCT_EL0
,
7044 /* execute the dirty call, dumping the result in val. */
7045 stmt( IRStmt_Dirty(d
) );
7046 putIReg64orZR(tt
, mkexpr(val
));
7047 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt
));
7050 /* ---- Cases for CNTFRQ_EL0 ----
7051 This is always RO at EL0, so it's safe to pass through to the host.
7052 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7054 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7055 UInt tt
= INSN(4,0);
7056 IRTemp val
= newTemp(Ity_I64
);
7057 IRExpr
** args
= mkIRExprVec_0();
7058 IRDirty
* d
= unsafeIRDirty_1_N (
7061 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7062 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0
,
7065 /* execute the dirty call, dumping the result in val. */
7066 stmt( IRStmt_Dirty(d
) );
7067 putIReg64orZR(tt
, mkexpr(val
));
7068 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt
));
7072 /* ------------------ IC_IVAU ------------------ */
7073 /* D5 0B 75 001 Rt ic ivau, rT
7075 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7076 /* We will always be provided with a valid iMinLine value. */
7077 vassert(archinfo
->arm64_iMinLine_lg2_szB
>= 2
7078 && archinfo
->arm64_iMinLine_lg2_szB
<= 17);
7079 /* Round the requested address, in rT, down to the start of the
7080 containing block. */
7081 UInt tt
= INSN(4,0);
7082 ULong lineszB
= 1ULL << archinfo
->arm64_iMinLine_lg2_szB
;
7083 IRTemp addr
= newTemp(Ity_I64
);
7084 assign( addr
, binop( Iop_And64
,
7086 mkU64(~(lineszB
- 1))) );
7087 /* Set the invalidation range, request exit-and-invalidate, with
7088 continuation at the next instruction. */
7089 stmt(IRStmt_Put(OFFB_CMSTART
, mkexpr(addr
)));
7090 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
)));
7091 /* be paranoid ... */
7092 stmt( IRStmt_MBE(Imbe_Fence
) );
7093 putPC(mkU64( guest_PC_curr_instr
+ 4 ));
7094 dres
->whatNext
= Dis_StopHere
;
7095 dres
->jk_StopHere
= Ijk_InvalICache
;
7096 DIP("ic ivau, %s\n", nameIReg64orZR(tt
));
7100 /* ------------------ DC_CVAU ------------------ */
7101 /* D5 0B 7B 001 Rt dc cvau, rT
7103 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
7104 /* Exactly the same scheme as for IC IVAU, except we observe the
7105 dMinLine size, and request an Ijk_FlushDCache instead of
7107 /* We will always be provided with a valid dMinLine value. */
7108 vassert(archinfo
->arm64_dMinLine_lg2_szB
>= 2
7109 && archinfo
->arm64_dMinLine_lg2_szB
<= 17);
7110 /* Round the requested address, in rT, down to the start of the
7111 containing block. */
7112 UInt tt
= INSN(4,0);
7113 ULong lineszB
= 1ULL << archinfo
->arm64_dMinLine_lg2_szB
;
7114 IRTemp addr
= newTemp(Ity_I64
);
7115 assign( addr
, binop( Iop_And64
,
7117 mkU64(~(lineszB
- 1))) );
7118 /* Set the flush range, request exit-and-flush, with
7119 continuation at the next instruction. */
7120 stmt(IRStmt_Put(OFFB_CMSTART
, mkexpr(addr
)));
7121 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
)));
7122 /* be paranoid ... */
7123 stmt( IRStmt_MBE(Imbe_Fence
) );
7124 putPC(mkU64( guest_PC_curr_instr
+ 4 ));
7125 dres
->whatNext
= Dis_StopHere
;
7126 dres
->jk_StopHere
= Ijk_FlushDCache
;
7127 DIP("dc cvau, %s\n", nameIReg64orZR(tt
));
7131 /* ------------------ ISB, DMB, DSB ------------------ */
7133 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7134 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7135 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7137 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7138 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7140 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7141 UInt opc
= INSN(6,5);
7142 UInt CRm
= INSN(11,8);
7143 vassert(opc
<= 2 && CRm
<= 15);
7144 stmt(IRStmt_MBE(Imbe_Fence
));
7145 const HChar
* opNames
[3]
7146 = { "dsb", "dmb", "isb" };
7147 const HChar
* howNames
[16]
7148 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7149 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7150 DIP("%s %s\n", opNames
[opc
], howNames
[CRm
]);
7154 /* -------------------- NOP -------------------- */
7155 if (INSN(31,0) == 0xD503201F) {
7160 /* -------------------- BRK -------------------- */
7162 1101 0100 001 imm16 00000 BRK #imm16
7164 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7165 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7166 UInt imm16
= INSN(20,5);
7167 /* Request SIGTRAP and then restart of this insn. */
7168 putPC(mkU64(guest_PC_curr_instr
+ 0));
7169 dres
->whatNext
= Dis_StopHere
;
7170 dres
->jk_StopHere
= Ijk_SigTRAP
;
7171 DIP("brk #%u\n", imm16
);
7175 /* ------------------- YIELD ------------------- */
7177 1101 0101 0000 0011 0010 0000 0011 1111
7179 if (INSN(31,0) == 0xD503203F) {
7180 /* Request yield followed by continuation at the next insn. */
7181 putPC(mkU64(guest_PC_curr_instr
+ 4));
7182 dres
->whatNext
= Dis_StopHere
;
7183 dres
->jk_StopHere
= Ijk_Yield
;
7188 /* -------------------- HINT ------------------- */
7190 1101 0101 0000 0011 0010 imm7 1 1111
7191 Catch otherwise unhandled HINT instructions - any
7192 like YIELD which are explicitly handled should go
7195 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7196 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7197 && INSN(15,12) == BITS4(0,0,1,0)
7198 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7199 UInt imm7
= INSN(11,5);
7200 DIP("hint #%u\n", imm7
);
7204 /* ------------------- CLREX ------------------ */
7206 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7207 CRm is apparently ignored.
7209 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7210 UInt mm
= INSN(11,8);
7211 /* AFAICS, this simply cancels a (all?) reservations made by a
7212 (any?) preceding LDREX(es). Arrange to hand it through to
7214 if (abiinfo
->guest__use_fallback_LLSC
) {
7215 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) )); // "no transaction"
7217 stmt( IRStmt_MBE(Imbe_CancelReservation
) );
7219 DIP("clrex #%u\n", mm
);
7223 vex_printf("ARM64 front end: branch_etc\n");
7229 /*------------------------------------------------------------*/
7230 /*--- SIMD and FP instructions: helper functions ---*/
7231 /*------------------------------------------------------------*/
7233 /* Some constructors for interleave/deinterleave expressions. */
7235 static IRExpr
* mk_CatEvenLanes64x2 ( IRTemp a10
, IRTemp b10
) {
7237 return binop(Iop_InterleaveLO64x2
, mkexpr(a10
), mkexpr(b10
));
7240 static IRExpr
* mk_CatOddLanes64x2 ( IRTemp a10
, IRTemp b10
) {
7242 return binop(Iop_InterleaveHI64x2
, mkexpr(a10
), mkexpr(b10
));
7245 static IRExpr
* mk_CatEvenLanes32x4 ( IRTemp a3210
, IRTemp b3210
) {
7246 // returns a2 a0 b2 b0
7247 return binop(Iop_CatEvenLanes32x4
, mkexpr(a3210
), mkexpr(b3210
));
7250 static IRExpr
* mk_CatOddLanes32x4 ( IRTemp a3210
, IRTemp b3210
) {
7251 // returns a3 a1 b3 b1
7252 return binop(Iop_CatOddLanes32x4
, mkexpr(a3210
), mkexpr(b3210
));
7255 static IRExpr
* mk_InterleaveLO32x4 ( IRTemp a3210
, IRTemp b3210
) {
7256 // returns a1 b1 a0 b0
7257 return binop(Iop_InterleaveLO32x4
, mkexpr(a3210
), mkexpr(b3210
));
7260 static IRExpr
* mk_InterleaveHI32x4 ( IRTemp a3210
, IRTemp b3210
) {
7261 // returns a3 b3 a2 b2
7262 return binop(Iop_InterleaveHI32x4
, mkexpr(a3210
), mkexpr(b3210
));
7265 static IRExpr
* mk_CatEvenLanes16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7266 // returns a6 a4 a2 a0 b6 b4 b2 b0
7267 return binop(Iop_CatEvenLanes16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7270 static IRExpr
* mk_CatOddLanes16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7271 // returns a7 a5 a3 a1 b7 b5 b3 b1
7272 return binop(Iop_CatOddLanes16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7275 static IRExpr
* mk_InterleaveLO16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7276 // returns a3 b3 a2 b2 a1 b1 a0 b0
7277 return binop(Iop_InterleaveLO16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7280 static IRExpr
* mk_InterleaveHI16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7281 // returns a7 b7 a6 b6 a5 b5 a4 b4
7282 return binop(Iop_InterleaveHI16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7285 static IRExpr
* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210
,
7286 IRTemp bFEDCBA9876543210
) {
7287 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7288 return binop(Iop_CatEvenLanes8x16
, mkexpr(aFEDCBA9876543210
),
7289 mkexpr(bFEDCBA9876543210
));
7292 static IRExpr
* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210
,
7293 IRTemp bFEDCBA9876543210
) {
7294 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7295 return binop(Iop_CatOddLanes8x16
, mkexpr(aFEDCBA9876543210
),
7296 mkexpr(bFEDCBA9876543210
));
7299 static IRExpr
* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210
,
7300 IRTemp bFEDCBA9876543210
) {
7301 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7302 return binop(Iop_InterleaveLO8x16
, mkexpr(aFEDCBA9876543210
),
7303 mkexpr(bFEDCBA9876543210
));
7306 static IRExpr
* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210
,
7307 IRTemp bFEDCBA9876543210
) {
7308 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7309 return binop(Iop_InterleaveHI8x16
, mkexpr(aFEDCBA9876543210
),
7310 mkexpr(bFEDCBA9876543210
));
7313 /* Generate N copies of |bit| in the bottom of a ULong. */
7314 static ULong
Replicate ( ULong bit
, Int N
)
7316 vassert(bit
<= 1 && N
>= 1 && N
< 64);
7320 /* Careful. This won't work for N == 64. */
7321 return (1ULL << N
) - 1;
7325 static ULong
Replicate32x2 ( ULong bits32
)
7327 vassert(0 == (bits32
& ~0xFFFFFFFFULL
));
7328 return (bits32
<< 32) | bits32
;
7331 static ULong
Replicate16x4 ( ULong bits16
)
7333 vassert(0 == (bits16
& ~0xFFFFULL
));
7334 return Replicate32x2((bits16
<< 16) | bits16
);
7337 static ULong
Replicate8x8 ( ULong bits8
)
7339 vassert(0 == (bits8
& ~0xFFULL
));
7340 return Replicate16x4((bits8
<< 8) | bits8
);
7343 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7344 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7345 is 64. In the former case, the upper 32 bits of the returned value
7346 are guaranteed to be zero. */
7347 static ULong
VFPExpandImm ( ULong imm8
, Int N
)
7349 vassert(imm8
<= 0xFF);
7350 vassert(N
== 32 || N
== 64);
7351 Int E
= ((N
== 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7353 ULong imm8_6
= (imm8
>> 6) & 1;
7357 ULong sign
= (imm8
>> 7) & 1;
7358 ULong exp
= ((imm8_6
^ 1) << (E
-1)) | Replicate(imm8_6
, E
-1);
7359 ULong frac
= ((imm8
& 63) << (F
-6)) | Replicate(0, F
-6);
7360 vassert(sign
< (1ULL << 1));
7361 vassert(exp
< (1ULL << E
));
7362 vassert(frac
< (1ULL << F
));
7363 vassert(1 + E
+ F
== N
);
7364 ULong res
= (sign
<< (E
+F
)) | (exp
<< F
) | frac
;
7368 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7369 This might fail, as indicated by the returned Bool. Page 2530 of
7371 static Bool
AdvSIMDExpandImm ( /*OUT*/ULong
* res
,
7372 UInt op
, UInt cmode
, UInt imm8
)
7375 vassert(cmode
<= 15);
7376 vassert(imm8
<= 255);
7378 *res
= 0; /* will overwrite iff returning True */
7381 Bool testimm8
= False
;
7383 switch (cmode
>> 1) {
7385 testimm8
= False
; imm64
= Replicate32x2(imm8
); break;
7387 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 8); break;
7389 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 16); break;
7391 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 24); break;
7393 testimm8
= False
; imm64
= Replicate16x4(imm8
); break;
7395 testimm8
= True
; imm64
= Replicate16x4(imm8
<< 8); break;
7398 if ((cmode
& 1) == 0)
7399 imm64
= Replicate32x2((imm8
<< 8) | 0xFF);
7401 imm64
= Replicate32x2((imm8
<< 16) | 0xFFFF);
7405 if ((cmode
& 1) == 0 && op
== 0)
7406 imm64
= Replicate8x8(imm8
);
7407 if ((cmode
& 1) == 0 && op
== 1) {
7408 imm64
= 0; imm64
|= (imm8
& 0x80) ? 0xFF : 0x00;
7409 imm64
<<= 8; imm64
|= (imm8
& 0x40) ? 0xFF : 0x00;
7410 imm64
<<= 8; imm64
|= (imm8
& 0x20) ? 0xFF : 0x00;
7411 imm64
<<= 8; imm64
|= (imm8
& 0x10) ? 0xFF : 0x00;
7412 imm64
<<= 8; imm64
|= (imm8
& 0x08) ? 0xFF : 0x00;
7413 imm64
<<= 8; imm64
|= (imm8
& 0x04) ? 0xFF : 0x00;
7414 imm64
<<= 8; imm64
|= (imm8
& 0x02) ? 0xFF : 0x00;
7415 imm64
<<= 8; imm64
|= (imm8
& 0x01) ? 0xFF : 0x00;
7417 if ((cmode
& 1) == 1 && op
== 0) {
7418 ULong imm8_7
= (imm8
>> 7) & 1;
7419 ULong imm8_6
= (imm8
>> 6) & 1;
7420 ULong imm8_50
= imm8
& 63;
7421 ULong imm32
= (imm8_7
<< (1 + 5 + 6 + 19))
7422 | ((imm8_6
^ 1) << (5 + 6 + 19))
7423 | (Replicate(imm8_6
, 5) << (6 + 19))
7425 imm64
= Replicate32x2(imm32
);
7427 if ((cmode
& 1) == 1 && op
== 1) {
7428 // imm64 = imm8<7>:NOT(imm8<6>)
7429 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7430 ULong imm8_7
= (imm8
>> 7) & 1;
7431 ULong imm8_6
= (imm8
>> 6) & 1;
7432 ULong imm8_50
= imm8
& 63;
7433 imm64
= (imm8_7
<< 63) | ((imm8_6
^ 1) << 62)
7434 | (Replicate(imm8_6
, 8) << 54)
7442 if (testimm8
&& imm8
== 0)
7449 /* Help a bit for decoding laneage for vector operations that can be
7450 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7451 and SZ bits, typically for vector floating point. */
7452 static Bool
getLaneInfo_Q_SZ ( /*OUT*/IRType
* tyI
, /*OUT*/IRType
* tyF
,
7453 /*OUT*/UInt
* nLanes
, /*OUT*/Bool
* zeroUpper
,
7454 /*OUT*/const HChar
** arrSpec
,
7455 Bool bitQ
, Bool bitSZ
)
7457 vassert(bitQ
== True
|| bitQ
== False
);
7458 vassert(bitSZ
== True
|| bitSZ
== False
);
7459 if (bitQ
&& bitSZ
) { // 2x64
7460 if (tyI
) *tyI
= Ity_I64
;
7461 if (tyF
) *tyF
= Ity_F64
;
7462 if (nLanes
) *nLanes
= 2;
7463 if (zeroUpper
) *zeroUpper
= False
;
7464 if (arrSpec
) *arrSpec
= "2d";
7467 if (bitQ
&& !bitSZ
) { // 4x32
7468 if (tyI
) *tyI
= Ity_I32
;
7469 if (tyF
) *tyF
= Ity_F32
;
7470 if (nLanes
) *nLanes
= 4;
7471 if (zeroUpper
) *zeroUpper
= False
;
7472 if (arrSpec
) *arrSpec
= "4s";
7475 if (!bitQ
&& !bitSZ
) { // 2x32
7476 if (tyI
) *tyI
= Ity_I32
;
7477 if (tyF
) *tyF
= Ity_F32
;
7478 if (nLanes
) *nLanes
= 2;
7479 if (zeroUpper
) *zeroUpper
= True
;
7480 if (arrSpec
) *arrSpec
= "2s";
7483 // Else impliedly 1x64, which isn't allowed.
7487 /* Helper for decoding laneage for shift-style vector operations
7488 that involve an immediate shift amount. */
7489 static Bool
getLaneInfo_IMMH_IMMB ( /*OUT*/UInt
* shift
, /*OUT*/UInt
* szBlg2
,
7490 UInt immh
, UInt immb
)
7492 vassert(immh
< (1<<4));
7493 vassert(immb
< (1<<3));
7494 UInt immhb
= (immh
<< 3) | immb
;
7496 if (shift
) *shift
= 128 - immhb
;
7497 if (szBlg2
) *szBlg2
= 3;
7501 if (shift
) *shift
= 64 - immhb
;
7502 if (szBlg2
) *szBlg2
= 2;
7506 if (shift
) *shift
= 32 - immhb
;
7507 if (szBlg2
) *szBlg2
= 1;
7511 if (shift
) *shift
= 16 - immhb
;
7512 if (szBlg2
) *szBlg2
= 0;
7518 /* Generate IR to fold all lanes of the V128 value in 'src' as
7519 characterised by the operator 'op', and return the result in the
7520 bottom bits of a V128, with all other bits set to zero. */
7521 static IRTemp
math_FOLDV ( IRTemp src
, IROp op
)
7523 /* The basic idea is to use repeated applications of Iop_CatEven*
7524 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7525 a complete vector. Then fold all those vectors with 'op' and
7526 zero out all but the least significant lane. */
7528 case Iop_Min8Sx16
: case Iop_Min8Ux16
:
7529 case Iop_Max8Sx16
: case Iop_Max8Ux16
: case Iop_Add8x16
: {
7530 /* NB: temp naming here is misleading -- the naming is for 8
7531 lanes of 16 bit, whereas what is being operated on is 16
7533 IRTemp x76543210
= src
;
7534 IRTemp x76547654
= newTempV128();
7535 IRTemp x32103210
= newTempV128();
7536 assign(x76547654
, mk_CatOddLanes64x2 (x76543210
, x76543210
));
7537 assign(x32103210
, mk_CatEvenLanes64x2(x76543210
, x76543210
));
7538 IRTemp x76767676
= newTempV128();
7539 IRTemp x54545454
= newTempV128();
7540 IRTemp x32323232
= newTempV128();
7541 IRTemp x10101010
= newTempV128();
7542 assign(x76767676
, mk_CatOddLanes32x4 (x76547654
, x76547654
));
7543 assign(x54545454
, mk_CatEvenLanes32x4(x76547654
, x76547654
));
7544 assign(x32323232
, mk_CatOddLanes32x4 (x32103210
, x32103210
));
7545 assign(x10101010
, mk_CatEvenLanes32x4(x32103210
, x32103210
));
7546 IRTemp x77777777
= newTempV128();
7547 IRTemp x66666666
= newTempV128();
7548 IRTemp x55555555
= newTempV128();
7549 IRTemp x44444444
= newTempV128();
7550 IRTemp x33333333
= newTempV128();
7551 IRTemp x22222222
= newTempV128();
7552 IRTemp x11111111
= newTempV128();
7553 IRTemp x00000000
= newTempV128();
7554 assign(x77777777
, mk_CatOddLanes16x8 (x76767676
, x76767676
));
7555 assign(x66666666
, mk_CatEvenLanes16x8(x76767676
, x76767676
));
7556 assign(x55555555
, mk_CatOddLanes16x8 (x54545454
, x54545454
));
7557 assign(x44444444
, mk_CatEvenLanes16x8(x54545454
, x54545454
));
7558 assign(x33333333
, mk_CatOddLanes16x8 (x32323232
, x32323232
));
7559 assign(x22222222
, mk_CatEvenLanes16x8(x32323232
, x32323232
));
7560 assign(x11111111
, mk_CatOddLanes16x8 (x10101010
, x10101010
));
7561 assign(x00000000
, mk_CatEvenLanes16x8(x10101010
, x10101010
));
7562 /* Naming not misleading after here. */
7563 IRTemp xAllF
= newTempV128();
7564 IRTemp xAllE
= newTempV128();
7565 IRTemp xAllD
= newTempV128();
7566 IRTemp xAllC
= newTempV128();
7567 IRTemp xAllB
= newTempV128();
7568 IRTemp xAllA
= newTempV128();
7569 IRTemp xAll9
= newTempV128();
7570 IRTemp xAll8
= newTempV128();
7571 IRTemp xAll7
= newTempV128();
7572 IRTemp xAll6
= newTempV128();
7573 IRTemp xAll5
= newTempV128();
7574 IRTemp xAll4
= newTempV128();
7575 IRTemp xAll3
= newTempV128();
7576 IRTemp xAll2
= newTempV128();
7577 IRTemp xAll1
= newTempV128();
7578 IRTemp xAll0
= newTempV128();
7579 assign(xAllF
, mk_CatOddLanes8x16 (x77777777
, x77777777
));
7580 assign(xAllE
, mk_CatEvenLanes8x16(x77777777
, x77777777
));
7581 assign(xAllD
, mk_CatOddLanes8x16 (x66666666
, x66666666
));
7582 assign(xAllC
, mk_CatEvenLanes8x16(x66666666
, x66666666
));
7583 assign(xAllB
, mk_CatOddLanes8x16 (x55555555
, x55555555
));
7584 assign(xAllA
, mk_CatEvenLanes8x16(x55555555
, x55555555
));
7585 assign(xAll9
, mk_CatOddLanes8x16 (x44444444
, x44444444
));
7586 assign(xAll8
, mk_CatEvenLanes8x16(x44444444
, x44444444
));
7587 assign(xAll7
, mk_CatOddLanes8x16 (x33333333
, x33333333
));
7588 assign(xAll6
, mk_CatEvenLanes8x16(x33333333
, x33333333
));
7589 assign(xAll5
, mk_CatOddLanes8x16 (x22222222
, x22222222
));
7590 assign(xAll4
, mk_CatEvenLanes8x16(x22222222
, x22222222
));
7591 assign(xAll3
, mk_CatOddLanes8x16 (x11111111
, x11111111
));
7592 assign(xAll2
, mk_CatEvenLanes8x16(x11111111
, x11111111
));
7593 assign(xAll1
, mk_CatOddLanes8x16 (x00000000
, x00000000
));
7594 assign(xAll0
, mk_CatEvenLanes8x16(x00000000
, x00000000
));
7595 IRTemp maxFE
= newTempV128();
7596 IRTemp maxDC
= newTempV128();
7597 IRTemp maxBA
= newTempV128();
7598 IRTemp max98
= newTempV128();
7599 IRTemp max76
= newTempV128();
7600 IRTemp max54
= newTempV128();
7601 IRTemp max32
= newTempV128();
7602 IRTemp max10
= newTempV128();
7603 assign(maxFE
, binop(op
, mkexpr(xAllF
), mkexpr(xAllE
)));
7604 assign(maxDC
, binop(op
, mkexpr(xAllD
), mkexpr(xAllC
)));
7605 assign(maxBA
, binop(op
, mkexpr(xAllB
), mkexpr(xAllA
)));
7606 assign(max98
, binop(op
, mkexpr(xAll9
), mkexpr(xAll8
)));
7607 assign(max76
, binop(op
, mkexpr(xAll7
), mkexpr(xAll6
)));
7608 assign(max54
, binop(op
, mkexpr(xAll5
), mkexpr(xAll4
)));
7609 assign(max32
, binop(op
, mkexpr(xAll3
), mkexpr(xAll2
)));
7610 assign(max10
, binop(op
, mkexpr(xAll1
), mkexpr(xAll0
)));
7611 IRTemp maxFEDC
= newTempV128();
7612 IRTemp maxBA98
= newTempV128();
7613 IRTemp max7654
= newTempV128();
7614 IRTemp max3210
= newTempV128();
7615 assign(maxFEDC
, binop(op
, mkexpr(maxFE
), mkexpr(maxDC
)));
7616 assign(maxBA98
, binop(op
, mkexpr(maxBA
), mkexpr(max98
)));
7617 assign(max7654
, binop(op
, mkexpr(max76
), mkexpr(max54
)));
7618 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
7619 IRTemp maxFEDCBA98
= newTempV128();
7620 IRTemp max76543210
= newTempV128();
7621 assign(maxFEDCBA98
, binop(op
, mkexpr(maxFEDC
), mkexpr(maxBA98
)));
7622 assign(max76543210
, binop(op
, mkexpr(max7654
), mkexpr(max3210
)));
7623 IRTemp maxAllLanes
= newTempV128();
7624 assign(maxAllLanes
, binop(op
, mkexpr(maxFEDCBA98
),
7625 mkexpr(max76543210
)));
7626 IRTemp res
= newTempV128();
7627 assign(res
, unop(Iop_ZeroHI120ofV128
, mkexpr(maxAllLanes
)));
7630 case Iop_Min16Sx8
: case Iop_Min16Ux8
:
7631 case Iop_Max16Sx8
: case Iop_Max16Ux8
: case Iop_Add16x8
: {
7632 IRTemp x76543210
= src
;
7633 IRTemp x76547654
= newTempV128();
7634 IRTemp x32103210
= newTempV128();
7635 assign(x76547654
, mk_CatOddLanes64x2 (x76543210
, x76543210
));
7636 assign(x32103210
, mk_CatEvenLanes64x2(x76543210
, x76543210
));
7637 IRTemp x76767676
= newTempV128();
7638 IRTemp x54545454
= newTempV128();
7639 IRTemp x32323232
= newTempV128();
7640 IRTemp x10101010
= newTempV128();
7641 assign(x76767676
, mk_CatOddLanes32x4 (x76547654
, x76547654
));
7642 assign(x54545454
, mk_CatEvenLanes32x4(x76547654
, x76547654
));
7643 assign(x32323232
, mk_CatOddLanes32x4 (x32103210
, x32103210
));
7644 assign(x10101010
, mk_CatEvenLanes32x4(x32103210
, x32103210
));
7645 IRTemp x77777777
= newTempV128();
7646 IRTemp x66666666
= newTempV128();
7647 IRTemp x55555555
= newTempV128();
7648 IRTemp x44444444
= newTempV128();
7649 IRTemp x33333333
= newTempV128();
7650 IRTemp x22222222
= newTempV128();
7651 IRTemp x11111111
= newTempV128();
7652 IRTemp x00000000
= newTempV128();
7653 assign(x77777777
, mk_CatOddLanes16x8 (x76767676
, x76767676
));
7654 assign(x66666666
, mk_CatEvenLanes16x8(x76767676
, x76767676
));
7655 assign(x55555555
, mk_CatOddLanes16x8 (x54545454
, x54545454
));
7656 assign(x44444444
, mk_CatEvenLanes16x8(x54545454
, x54545454
));
7657 assign(x33333333
, mk_CatOddLanes16x8 (x32323232
, x32323232
));
7658 assign(x22222222
, mk_CatEvenLanes16x8(x32323232
, x32323232
));
7659 assign(x11111111
, mk_CatOddLanes16x8 (x10101010
, x10101010
));
7660 assign(x00000000
, mk_CatEvenLanes16x8(x10101010
, x10101010
));
7661 IRTemp max76
= newTempV128();
7662 IRTemp max54
= newTempV128();
7663 IRTemp max32
= newTempV128();
7664 IRTemp max10
= newTempV128();
7665 assign(max76
, binop(op
, mkexpr(x77777777
), mkexpr(x66666666
)));
7666 assign(max54
, binop(op
, mkexpr(x55555555
), mkexpr(x44444444
)));
7667 assign(max32
, binop(op
, mkexpr(x33333333
), mkexpr(x22222222
)));
7668 assign(max10
, binop(op
, mkexpr(x11111111
), mkexpr(x00000000
)));
7669 IRTemp max7654
= newTempV128();
7670 IRTemp max3210
= newTempV128();
7671 assign(max7654
, binop(op
, mkexpr(max76
), mkexpr(max54
)));
7672 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
7673 IRTemp max76543210
= newTempV128();
7674 assign(max76543210
, binop(op
, mkexpr(max7654
), mkexpr(max3210
)));
7675 IRTemp res
= newTempV128();
7676 assign(res
, unop(Iop_ZeroHI112ofV128
, mkexpr(max76543210
)));
7679 case Iop_Max32Fx4
: case Iop_Min32Fx4
:
7680 case Iop_Min32Sx4
: case Iop_Min32Ux4
:
7681 case Iop_Max32Sx4
: case Iop_Max32Ux4
: case Iop_Add32x4
: {
7683 IRTemp x3232
= newTempV128();
7684 IRTemp x1010
= newTempV128();
7685 assign(x3232
, mk_CatOddLanes64x2 (x3210
, x3210
));
7686 assign(x1010
, mk_CatEvenLanes64x2(x3210
, x3210
));
7687 IRTemp x3333
= newTempV128();
7688 IRTemp x2222
= newTempV128();
7689 IRTemp x1111
= newTempV128();
7690 IRTemp x0000
= newTempV128();
7691 assign(x3333
, mk_CatOddLanes32x4 (x3232
, x3232
));
7692 assign(x2222
, mk_CatEvenLanes32x4(x3232
, x3232
));
7693 assign(x1111
, mk_CatOddLanes32x4 (x1010
, x1010
));
7694 assign(x0000
, mk_CatEvenLanes32x4(x1010
, x1010
));
7695 IRTemp max32
= newTempV128();
7696 IRTemp max10
= newTempV128();
7697 assign(max32
, binop(op
, mkexpr(x3333
), mkexpr(x2222
)));
7698 assign(max10
, binop(op
, mkexpr(x1111
), mkexpr(x0000
)));
7699 IRTemp max3210
= newTempV128();
7700 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
7701 IRTemp res
= newTempV128();
7702 assign(res
, unop(Iop_ZeroHI96ofV128
, mkexpr(max3210
)));
7707 IRTemp x00
= newTempV128();
7708 IRTemp x11
= newTempV128();
7709 assign(x11
, binop(Iop_InterleaveHI64x2
, mkexpr(x10
), mkexpr(x10
)));
7710 assign(x00
, binop(Iop_InterleaveLO64x2
, mkexpr(x10
), mkexpr(x10
)));
7711 IRTemp max10
= newTempV128();
7712 assign(max10
, binop(op
, mkexpr(x11
), mkexpr(x00
)));
7713 IRTemp res
= newTempV128();
7714 assign(res
, unop(Iop_ZeroHI64ofV128
, mkexpr(max10
)));
7723 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7725 static IRTemp
math_TBL_TBX ( IRTemp tab
[4], UInt len
, IRTemp src
,
7728 vassert(len
>= 0 && len
<= 3);
7730 /* Generate some useful constants as concisely as possible. */
7731 IRTemp half15
= newTemp(Ity_I64
);
7732 assign(half15
, mkU64(0x0F0F0F0F0F0F0F0FULL
));
7733 IRTemp half16
= newTemp(Ity_I64
);
7734 assign(half16
, mkU64(0x1010101010101010ULL
));
7737 IRTemp allZero
= newTempV128();
7738 assign(allZero
, mkV128(0x0000));
7739 /* A vector containing 15 in each 8-bit lane */
7740 IRTemp all15
= newTempV128();
7741 assign(all15
, binop(Iop_64HLtoV128
, mkexpr(half15
), mkexpr(half15
)));
7742 /* A vector containing 16 in each 8-bit lane */
7743 IRTemp all16
= newTempV128();
7744 assign(all16
, binop(Iop_64HLtoV128
, mkexpr(half16
), mkexpr(half16
)));
7745 /* A vector containing 32 in each 8-bit lane */
7746 IRTemp all32
= newTempV128();
7747 assign(all32
, binop(Iop_Add8x16
, mkexpr(all16
), mkexpr(all16
)));
7748 /* A vector containing 48 in each 8-bit lane */
7749 IRTemp all48
= newTempV128();
7750 assign(all48
, binop(Iop_Add8x16
, mkexpr(all16
), mkexpr(all32
)));
7751 /* A vector containing 64 in each 8-bit lane */
7752 IRTemp all64
= newTempV128();
7753 assign(all64
, binop(Iop_Add8x16
, mkexpr(all32
), mkexpr(all32
)));
7755 /* Group the 16/32/48/64 vectors so as to be indexable. */
7756 IRTemp allXX
[4] = { all16
, all32
, all48
, all64
};
7758 /* Compute the result for each table vector, with zeroes in places
7759 where the index values are out of range, and OR them into the
7761 IRTemp running_result
= newTempV128();
7762 assign(running_result
, mkV128(0));
7765 for (tabent
= 0; tabent
<= len
; tabent
++) {
7766 vassert(tabent
>= 0 && tabent
< 4);
7767 IRTemp bias
= newTempV128();
7769 mkexpr(tabent
== 0 ? allZero
: allXX
[tabent
-1]));
7770 IRTemp biased_indices
= newTempV128();
7771 assign(biased_indices
,
7772 binop(Iop_Sub8x16
, mkexpr(src
), mkexpr(bias
)));
7773 IRTemp valid_mask
= newTempV128();
7775 binop(Iop_CmpGT8Ux16
, mkexpr(all16
), mkexpr(biased_indices
)));
7776 IRTemp safe_biased_indices
= newTempV128();
7777 assign(safe_biased_indices
,
7778 binop(Iop_AndV128
, mkexpr(biased_indices
), mkexpr(all15
)));
7779 IRTemp results_or_junk
= newTempV128();
7780 assign(results_or_junk
,
7781 binop(Iop_Perm8x16
, mkexpr(tab
[tabent
]),
7782 mkexpr(safe_biased_indices
)));
7783 IRTemp results_or_zero
= newTempV128();
7784 assign(results_or_zero
,
7785 binop(Iop_AndV128
, mkexpr(results_or_junk
), mkexpr(valid_mask
)));
7786 /* And OR that into the running result. */
7787 IRTemp tmp
= newTempV128();
7788 assign(tmp
, binop(Iop_OrV128
, mkexpr(results_or_zero
),
7789 mkexpr(running_result
)));
7790 running_result
= tmp
;
7793 /* So now running_result holds the overall result where the indices
7794 are in range, and zero in out-of-range lanes. Now we need to
7795 compute an overall validity mask and use this to copy in the
7796 lanes in the oor_values for out of range indices. This is
7797 unnecessary for TBL but will get folded out by iropt, so we lean
7798 on that and generate the same code for TBL and TBX here. */
7799 IRTemp overall_valid_mask
= newTempV128();
7800 assign(overall_valid_mask
,
7801 binop(Iop_CmpGT8Ux16
, mkexpr(allXX
[len
]), mkexpr(src
)));
7802 IRTemp result
= newTempV128();
7805 mkexpr(running_result
),
7808 unop(Iop_NotV128
, mkexpr(overall_valid_mask
)))));
7813 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7814 an op which takes two I64s and produces a V128. That is, a widening
7815 operator. Generate IR which applies |opI64x2toV128| to either the
7816 lower (if |is2| is False) or upper (if |is2| is True) halves of
7817 |argL| and |argR|, and return the value in a new IRTemp.
7820 IRTemp
math_BINARY_WIDENING_V128 ( Bool is2
, IROp opI64x2toV128
,
7821 IRExpr
* argL
, IRExpr
* argR
)
7823 IRTemp res
= newTempV128();
7824 IROp slice
= is2
? Iop_V128HIto64
: Iop_V128to64
;
7825 assign(res
, binop(opI64x2toV128
, unop(slice
, argL
),
7826 unop(slice
, argR
)));
7831 /* Generate signed/unsigned absolute difference vector IR. */
7833 IRTemp
math_ABD ( Bool isU
, UInt size
, IRExpr
* argLE
, IRExpr
* argRE
)
7836 IRTemp argL
= newTempV128();
7837 IRTemp argR
= newTempV128();
7838 IRTemp msk
= newTempV128();
7839 IRTemp res
= newTempV128();
7840 assign(argL
, argLE
);
7841 assign(argR
, argRE
);
7842 assign(msk
, binop(isU
? mkVecCMPGTU(size
) : mkVecCMPGTS(size
),
7843 mkexpr(argL
), mkexpr(argR
)));
7847 binop(mkVecSUB(size
), mkexpr(argL
), mkexpr(argR
)),
7850 binop(mkVecSUB(size
), mkexpr(argR
), mkexpr(argL
)),
7851 unop(Iop_NotV128
, mkexpr(msk
)))));
7856 /* Generate IR that takes a V128 and sign- or zero-widens
7857 either the lower or upper set of lanes to twice-as-wide,
7858 resulting in a new V128 value. */
7860 IRTemp
math_WIDEN_LO_OR_HI_LANES ( Bool zWiden
, Bool fromUpperHalf
,
7861 UInt sizeNarrow
, IRExpr
* srcE
)
7863 IRTemp src
= newTempV128();
7864 IRTemp res
= newTempV128();
7866 switch (sizeNarrow
) {
7869 binop(zWiden
? Iop_ShrN64x2
: Iop_SarN64x2
,
7870 binop(fromUpperHalf
? Iop_InterleaveHI32x4
7871 : Iop_InterleaveLO32x4
,
7878 binop(zWiden
? Iop_ShrN32x4
: Iop_SarN32x4
,
7879 binop(fromUpperHalf
? Iop_InterleaveHI16x8
7880 : Iop_InterleaveLO16x8
,
7887 binop(zWiden
? Iop_ShrN16x8
: Iop_SarN16x8
,
7888 binop(fromUpperHalf
? Iop_InterleaveHI8x16
7889 : Iop_InterleaveLO8x16
,
7901 /* Generate IR that takes a V128 and sign- or zero-widens
7902 either the even or odd lanes to twice-as-wide,
7903 resulting in a new V128 value. */
7905 IRTemp
math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden
, Bool fromOdd
,
7906 UInt sizeNarrow
, IRExpr
* srcE
)
7908 IRTemp src
= newTempV128();
7909 IRTemp res
= newTempV128();
7910 IROp opSAR
= mkVecSARN(sizeNarrow
+1);
7911 IROp opSHR
= mkVecSHRN(sizeNarrow
+1);
7912 IROp opSHL
= mkVecSHLN(sizeNarrow
+1);
7913 IROp opSxR
= zWiden
? opSHR
: opSAR
;
7915 switch (sizeNarrow
) {
7916 case X10
: amt
= 32; break;
7917 case X01
: amt
= 16; break;
7918 case X00
: amt
= 8; break;
7919 default: vassert(0);
7923 assign(res
, binop(opSxR
, mkexpr(src
), mkU8(amt
)));
7925 assign(res
, binop(opSxR
, binop(opSHL
, mkexpr(src
), mkU8(amt
)),
7932 /* Generate IR that takes two V128s and narrows (takes lower half)
7933 of each lane, producing a single V128 value. */
7935 IRTemp
math_NARROW_LANES ( IRTemp argHi
, IRTemp argLo
, UInt sizeNarrow
)
7937 IRTemp res
= newTempV128();
7938 assign(res
, binop(mkVecCATEVENLANES(sizeNarrow
),
7939 mkexpr(argHi
), mkexpr(argLo
)));
7944 /* Return a temp which holds the vector dup of the lane of width
7945 (1 << size) obtained from src[laneNo]. */
7947 IRTemp
math_DUP_VEC_ELEM ( IRExpr
* src
, UInt size
, UInt laneNo
)
7950 /* Normalise |laneNo| so it is of the form
7951 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7952 This puts the bits we want to inspect at constant offsets
7953 regardless of the value of |size|.
7955 UInt ix
= laneNo
<< size
;
7957 IROp ops
[4] = { Iop_INVALID
, Iop_INVALID
, Iop_INVALID
, Iop_INVALID
};
7960 ops
[0] = (ix
& 1) ? Iop_CatOddLanes8x16
: Iop_CatEvenLanes8x16
;
7963 ops
[1] = (ix
& 2) ? Iop_CatOddLanes16x8
: Iop_CatEvenLanes16x8
;
7966 ops
[2] = (ix
& 4) ? Iop_CatOddLanes32x4
: Iop_CatEvenLanes32x4
;
7969 ops
[3] = (ix
& 8) ? Iop_InterleaveHI64x2
: Iop_InterleaveLO64x2
;
7974 IRTemp res
= newTempV128();
7977 for (i
= 3; i
>= 0; i
--) {
7978 if (ops
[i
] == Iop_INVALID
)
7980 IRTemp tmp
= newTempV128();
7981 assign(tmp
, binop(ops
[i
], mkexpr(res
), mkexpr(res
)));
7988 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7989 selector encoded as shown below. Return a new V128 holding the
7990 selected lane from |srcV| dup'd out to V128, and also return the
7991 lane number, log2 of the lane size in bytes, and width-character via
7992 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7993 is an invalid selector, in which case return
7994 IRTemp_INVALID, 0, 0 and '?' respectively.
7996 imm5 = xxxx1 signifies .b[xxxx]
8003 IRTemp
handle_DUP_VEC_ELEM ( /*OUT*/UInt
* laneNo
,
8004 /*OUT*/UInt
* laneSzLg2
, /*OUT*/HChar
* laneCh
,
8005 IRExpr
* srcV
, UInt imm5
)
8012 *laneNo
= (imm5
>> 1) & 15;
8016 else if (imm5
& 2) {
8017 *laneNo
= (imm5
>> 2) & 7;
8021 else if (imm5
& 4) {
8022 *laneNo
= (imm5
>> 3) & 3;
8026 else if (imm5
& 8) {
8027 *laneNo
= (imm5
>> 4) & 1;
8033 return IRTemp_INVALID
;
8036 return math_DUP_VEC_ELEM(srcV
, *laneSzLg2
, *laneNo
);
8040 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8042 IRTemp
math_VEC_DUP_IMM ( UInt size
, ULong imm
)
8044 IRType ty
= Ity_INVALID
;
8045 IRTemp rcS
= IRTemp_INVALID
;
8048 vassert(imm
<= 0xFFFFULL
);
8050 rcS
= newTemp(ty
); assign(rcS
, mkU16( (UShort
)imm
));
8053 vassert(imm
<= 0xFFFFFFFFULL
);
8055 rcS
= newTemp(ty
); assign(rcS
, mkU32( (UInt
)imm
));
8059 rcS
= newTemp(ty
); assign(rcS
, mkU64(imm
)); break;
8063 IRTemp rcV
= math_DUP_TO_V128(rcS
, ty
);
8068 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8069 and the upper can contain any value -- it is ignored. If |is2| is False,
8070 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8071 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8072 half of vector reg |dd| and leave the lower half unchanged. This
8073 simulates the behaviour of the "foo/foo2" instructions in which the
8074 destination is half the width of sources, for example addhn/addhn2.
8077 void putLO64andZUorPutHI64 ( Bool is2
, UInt dd
, IRTemp new64
)
8080 /* Get the old contents of Vdd, zero the upper half, and replace
8082 IRTemp t_zero_oldLO
= newTempV128();
8083 assign(t_zero_oldLO
, unop(Iop_ZeroHI64ofV128
, getQReg128(dd
)));
8084 IRTemp t_newHI_zero
= newTempV128();
8085 assign(t_newHI_zero
, binop(Iop_InterleaveLO64x2
, mkexpr(new64
),
8087 IRTemp res
= newTempV128();
8088 assign(res
, binop(Iop_OrV128
, mkexpr(t_zero_oldLO
),
8089 mkexpr(t_newHI_zero
)));
8090 putQReg128(dd
, mkexpr(res
));
8092 /* This is simple. */
8093 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(new64
)));
8098 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8099 the q result in |*qabs| and the normal result in |*nabs|. */
8101 void math_SQABS ( /*OUT*/IRTemp
* qabs
, /*OUT*/IRTemp
* nabs
,
8102 IRExpr
* srcE
, UInt size
)
8104 IRTemp src
, mask
, maskn
, nsub
, qsub
;
8105 src
= mask
= maskn
= nsub
= qsub
= IRTemp_INVALID
;
8106 newTempsV128_7(&src
, &mask
, &maskn
, &nsub
, &qsub
, nabs
, qabs
);
8108 assign(mask
, binop(mkVecCMPGTS(size
), mkV128(0x0000), mkexpr(src
)));
8109 assign(maskn
, unop(Iop_NotV128
, mkexpr(mask
)));
8110 assign(nsub
, binop(mkVecSUB(size
), mkV128(0x0000), mkexpr(src
)));
8111 assign(qsub
, binop(mkVecQSUBS(size
), mkV128(0x0000), mkexpr(src
)));
8112 assign(*nabs
, binop(Iop_OrV128
,
8113 binop(Iop_AndV128
, mkexpr(nsub
), mkexpr(mask
)),
8114 binop(Iop_AndV128
, mkexpr(src
), mkexpr(maskn
))));
8115 assign(*qabs
, binop(Iop_OrV128
,
8116 binop(Iop_AndV128
, mkexpr(qsub
), mkexpr(mask
)),
8117 binop(Iop_AndV128
, mkexpr(src
), mkexpr(maskn
))));
8121 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8122 the q result in |*qneg| and the normal result in |*nneg|. */
8124 void math_SQNEG ( /*OUT*/IRTemp
* qneg
, /*OUT*/IRTemp
* nneg
,
8125 IRExpr
* srcE
, UInt size
)
8127 IRTemp src
= IRTemp_INVALID
;
8128 newTempsV128_3(&src
, nneg
, qneg
);
8130 assign(*nneg
, binop(mkVecSUB(size
), mkV128(0x0000), mkexpr(src
)));
8131 assign(*qneg
, binop(mkVecQSUBS(size
), mkV128(0x0000), mkexpr(src
)));
8135 /* Zero all except the least significant lane of |srcE|, where |size|
8136 indicates the lane size in the usual way. */
8137 static IRTemp
math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size
, IRExpr
* srcE
)
8140 IRTemp t
= newTempV128();
8141 assign(t
, unop(mkVecZEROHIxxOFV128(size
), srcE
));
8146 /* Generate IR to compute vector widening MULL from either the lower
8147 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8148 widening multiplies are unsigned when isU==True and signed when
8149 isU==False. |size| is the narrow lane size indication. Optionally,
8150 the product may be added to or subtracted from vecD, at the wide lane
8151 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8152 is 'm' (only multiply) then the accumulate part does not happen, and
8153 |vecD| is expected to == IRTemp_INVALID.
8155 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8156 are allowed. The result is returned in a new IRTemp, which is
8157 returned in *res. */
8159 void math_MULL_ACC ( /*OUT*/IRTemp
* res
,
8160 Bool is2
, Bool isU
, UInt size
, HChar mas
,
8161 IRTemp vecN
, IRTemp vecM
, IRTemp vecD
)
8163 vassert(res
&& *res
== IRTemp_INVALID
);
8165 vassert(mas
== 'm' || mas
== 'a' || mas
== 's');
8166 if (mas
== 'm') vassert(vecD
== IRTemp_INVALID
);
8167 IROp mulOp
= isU
? mkVecMULLU(size
) : mkVecMULLS(size
);
8168 IROp accOp
= (mas
== 'a') ? mkVecADD(size
+1)
8169 : (mas
== 's' ? mkVecSUB(size
+1)
8171 IRTemp mul
= math_BINARY_WIDENING_V128(is2
, mulOp
,
8172 mkexpr(vecN
), mkexpr(vecM
));
8173 *res
= newTempV128();
8174 assign(*res
, mas
== 'm' ? mkexpr(mul
)
8175 : binop(accOp
, mkexpr(vecD
), mkexpr(mul
)));
8179 /* Same as math_MULL_ACC, except the multiply is signed widening,
8180 the multiplied value is then doubled, before being added to or
8181 subtracted from the accumulated value. And everything is
8182 saturated. In all cases, saturation residuals are returned
8183 via (sat1q, sat1n), and in the accumulate cases,
8184 via (sat2q, sat2n) too. All results are returned in new temporaries.
8185 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8186 so the caller can tell this has happened. */
8188 void math_SQDMULL_ACC ( /*OUT*/IRTemp
* res
,
8189 /*OUT*/IRTemp
* sat1q
, /*OUT*/IRTemp
* sat1n
,
8190 /*OUT*/IRTemp
* sat2q
, /*OUT*/IRTemp
* sat2n
,
8191 Bool is2
, UInt size
, HChar mas
,
8192 IRTemp vecN
, IRTemp vecM
, IRTemp vecD
)
8195 vassert(mas
== 'm' || mas
== 'a' || mas
== 's');
8197 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8198 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8199 IOW take either the low or high halves of vecN and vecM, signed widen,
8200 multiply, double that, and signedly saturate. Also compute the same
8201 but without saturation.
8203 vassert(sat2q
&& *sat2q
== IRTemp_INVALID
);
8204 vassert(sat2n
&& *sat2n
== IRTemp_INVALID
);
8205 newTempsV128_3(sat1q
, sat1n
, res
);
8206 IRTemp tq
= math_BINARY_WIDENING_V128(is2
, mkVecQDMULLS(size
),
8207 mkexpr(vecN
), mkexpr(vecM
));
8208 IRTemp tn
= math_BINARY_WIDENING_V128(is2
, mkVecMULLS(size
),
8209 mkexpr(vecN
), mkexpr(vecM
));
8210 assign(*sat1q
, mkexpr(tq
));
8211 assign(*sat1n
, binop(mkVecADD(size
+1), mkexpr(tn
), mkexpr(tn
)));
8213 /* If there is no accumulation, the final result is sat1q,
8214 and there's no assignment to sat2q or sat2n. */
8216 assign(*res
, mkexpr(*sat1q
));
8221 sat2q = vecD +sq/-sq sat1q
8222 sat2n = vecD +/- sat1n
8225 newTempsV128_2(sat2q
, sat2n
);
8226 assign(*sat2q
, binop(mas
== 'a' ? mkVecQADDS(size
+1) : mkVecQSUBS(size
+1),
8227 mkexpr(vecD
), mkexpr(*sat1q
)));
8228 assign(*sat2n
, binop(mas
== 'a' ? mkVecADD(size
+1) : mkVecSUB(size
+1),
8229 mkexpr(vecD
), mkexpr(*sat1n
)));
8230 assign(*res
, mkexpr(*sat2q
));
8234 /* Generate IR for widening signed vector multiplies. The operands
8235 have their lane width signedly widened, and they are then multiplied
8236 at the wider width, returning results in two new IRTemps. */
8238 void math_MULLS ( /*OUT*/IRTemp
* resHI
, /*OUT*/IRTemp
* resLO
,
8239 UInt sizeNarrow
, IRTemp argL
, IRTemp argR
)
8241 vassert(sizeNarrow
<= 2);
8242 newTempsV128_2(resHI
, resLO
);
8243 IRTemp argLhi
= newTemp(Ity_I64
);
8244 IRTemp argLlo
= newTemp(Ity_I64
);
8245 IRTemp argRhi
= newTemp(Ity_I64
);
8246 IRTemp argRlo
= newTemp(Ity_I64
);
8247 assign(argLhi
, unop(Iop_V128HIto64
, mkexpr(argL
)));
8248 assign(argLlo
, unop(Iop_V128to64
, mkexpr(argL
)));
8249 assign(argRhi
, unop(Iop_V128HIto64
, mkexpr(argR
)));
8250 assign(argRlo
, unop(Iop_V128to64
, mkexpr(argR
)));
8251 IROp opMulls
= mkVecMULLS(sizeNarrow
);
8252 assign(*resHI
, binop(opMulls
, mkexpr(argLhi
), mkexpr(argRhi
)));
8253 assign(*resLO
, binop(opMulls
, mkexpr(argLlo
), mkexpr(argRlo
)));
8257 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8258 double that, possibly add a rounding constant (R variants), and take
8261 void math_SQDMULH ( /*OUT*/IRTemp
* res
,
8262 /*OUT*/IRTemp
* sat1q
, /*OUT*/IRTemp
* sat1n
,
8263 Bool isR
, UInt size
, IRTemp vN
, IRTemp vM
)
8265 vassert(size
== X01
|| size
== X10
); /* s or h only */
8267 newTempsV128_3(res
, sat1q
, sat1n
);
8269 IRTemp mullsHI
= IRTemp_INVALID
, mullsLO
= IRTemp_INVALID
;
8270 math_MULLS(&mullsHI
, &mullsLO
, size
, vN
, vM
);
8272 IRTemp addWide
= mkVecADD(size
+1);
8275 assign(*sat1q
, binop(mkVecQRDMULHIS(size
), mkexpr(vN
), mkexpr(vM
)));
8277 Int rcShift
= size
== X01
? 15 : 31;
8278 IRTemp roundConst
= math_VEC_DUP_IMM(size
+1, 1ULL << rcShift
);
8280 binop(mkVecCATODDLANES(size
),
8282 binop(addWide
, mkexpr(mullsHI
), mkexpr(mullsHI
)),
8283 mkexpr(roundConst
)),
8285 binop(addWide
, mkexpr(mullsLO
), mkexpr(mullsLO
)),
8286 mkexpr(roundConst
))));
8288 assign(*sat1q
, binop(mkVecQDMULHIS(size
), mkexpr(vN
), mkexpr(vM
)));
8291 binop(mkVecCATODDLANES(size
),
8292 binop(addWide
, mkexpr(mullsHI
), mkexpr(mullsHI
)),
8293 binop(addWide
, mkexpr(mullsLO
), mkexpr(mullsLO
))));
8296 assign(*res
, mkexpr(*sat1q
));
8300 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8301 a new temp in *res, and the Q difference pair in new temps in
8302 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8303 three operations it is. */
8305 void math_QSHL_IMM ( /*OUT*/IRTemp
* res
,
8306 /*OUT*/IRTemp
* qDiff1
, /*OUT*/IRTemp
* qDiff2
,
8307 IRTemp src
, UInt size
, UInt shift
, const HChar
* nm
)
8310 UInt laneBits
= 8 << size
;
8311 vassert(shift
< laneBits
);
8312 newTempsV128_3(res
, qDiff1
, qDiff2
);
8313 IRTemp z128
= newTempV128();
8314 assign(z128
, mkV128(0x0000));
8317 if (vex_streq(nm
, "uqshl")) {
8318 IROp qop
= mkVecQSHLNSATUU(size
);
8319 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
8321 /* No shift means no saturation. */
8322 assign(*qDiff1
, mkexpr(z128
));
8323 assign(*qDiff2
, mkexpr(z128
));
8325 /* Saturation has occurred if any of the shifted-out bits are
8326 nonzero. We get the shifted-out bits by right-shifting the
8328 UInt rshift
= laneBits
- shift
;
8329 vassert(rshift
>= 1 && rshift
< laneBits
);
8330 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
8331 assign(*qDiff2
, mkexpr(z128
));
8337 if (vex_streq(nm
, "sqshl")) {
8338 IROp qop
= mkVecQSHLNSATSS(size
);
8339 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
8341 /* No shift means no saturation. */
8342 assign(*qDiff1
, mkexpr(z128
));
8343 assign(*qDiff2
, mkexpr(z128
));
8345 /* Saturation has occurred if any of the shifted-out bits are
8346 different from the top bit of the original value. */
8347 UInt rshift
= laneBits
- 1 - shift
;
8348 vassert(rshift
>= 0 && rshift
< laneBits
-1);
8349 /* qDiff1 is the shifted out bits, and the top bit of the original
8350 value, preceded by zeroes. */
8351 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
8352 /* qDiff2 is the top bit of the original value, cloned the
8353 correct number of times. */
8354 assign(*qDiff2
, binop(mkVecSHRN(size
),
8355 binop(mkVecSARN(size
), mkexpr(src
),
8358 /* This also succeeds in comparing the top bit of the original
8359 value to itself, which is a bit stupid, but not wrong. */
8365 if (vex_streq(nm
, "sqshlu")) {
8366 IROp qop
= mkVecQSHLNSATSU(size
);
8367 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
8369 /* If there's no shift, saturation depends on the top bit
8371 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(laneBits
-1)));
8372 assign(*qDiff2
, mkexpr(z128
));
8374 /* Saturation has occurred if any of the shifted-out bits are
8375 nonzero. We get the shifted-out bits by right-shifting the
8377 UInt rshift
= laneBits
- shift
;
8378 vassert(rshift
>= 1 && rshift
< laneBits
);
8379 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
8380 assign(*qDiff2
, mkexpr(z128
));
8389 /* Generate IR to do SRHADD and URHADD. */
8391 IRTemp
math_RHADD ( UInt size
, Bool isU
, IRTemp aa
, IRTemp bb
)
8394 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8397 IROp opSHR
= isU
? mkVecSHRN(size
) : mkVecSARN(size
);
8398 IROp opADD
= mkVecADD(size
);
8399 /* The only tricky bit is to generate the correct vector 1 constant. */
8400 const ULong ones64
[4]
8401 = { 0x0101010101010101ULL
, 0x0001000100010001ULL
,
8402 0x0000000100000001ULL
, 0x0000000000000001ULL
};
8403 IRTemp imm64
= newTemp(Ity_I64
);
8404 assign(imm64
, mkU64(ones64
[size
]));
8405 IRTemp vecOne
= newTempV128();
8406 assign(vecOne
, binop(Iop_64HLtoV128
, mkexpr(imm64
), mkexpr(imm64
)));
8407 IRTemp scaOne
= newTemp(Ity_I8
);
8408 assign(scaOne
, mkU8(1));
8409 IRTemp res
= newTempV128();
8412 binop(opSHR
, mkexpr(aa
), mkexpr(scaOne
)),
8414 binop(opSHR
, mkexpr(bb
), mkexpr(scaOne
)),
8418 binop(Iop_AndV128
, mkexpr(aa
),
8420 binop(Iop_AndV128
, mkexpr(bb
),
8434 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8435 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8436 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8437 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8438 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8439 unmodified. The presence |opZHI| means this function can be used to
8440 generate QCFLAG update code for both scalar and vector SIMD operations.
8443 void updateQCFLAGwithDifferenceZHI ( IRTemp qres
, IRTemp nres
, IROp opZHI
)
8445 IRTemp diff
= newTempV128();
8446 IRTemp oldQCFLAG
= newTempV128();
8447 IRTemp newQCFLAG
= newTempV128();
8448 if (opZHI
== Iop_INVALID
) {
8449 assign(diff
, binop(Iop_XorV128
, mkexpr(qres
), mkexpr(nres
)));
8451 vassert(opZHI
== Iop_ZeroHI64ofV128
8452 || opZHI
== Iop_ZeroHI96ofV128
|| opZHI
== Iop_ZeroHI112ofV128
);
8453 assign(diff
, unop(opZHI
, binop(Iop_XorV128
, mkexpr(qres
), mkexpr(nres
))));
8455 assign(oldQCFLAG
, IRExpr_Get(OFFB_QCFLAG
, Ity_V128
));
8456 assign(newQCFLAG
, binop(Iop_OrV128
, mkexpr(oldQCFLAG
), mkexpr(diff
)));
8457 stmt(IRStmt_Put(OFFB_QCFLAG
, mkexpr(newQCFLAG
)));
8461 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8462 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8465 void updateQCFLAGwithDifference ( IRTemp qres
, IRTemp nres
)
8467 updateQCFLAGwithDifferenceZHI(qres
, nres
, Iop_INVALID
);
8471 /* Generate IR to rearrange two vector values in a way which is useful
8472 for doing S/D add-pair etc operations. There are 3 cases:
8474 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8476 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8478 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8480 The cases are distinguished as follows:
8481 isD == True, bitQ == 1 => 2d
8482 isD == False, bitQ == 1 => 4s
8483 isD == False, bitQ == 0 => 2s
8486 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8487 /*OUT*/IRTemp
* rearrL
, /*OUT*/IRTemp
* rearrR
,
8488 IRTemp vecM
, IRTemp vecN
, Bool isD
, UInt bitQ
8491 vassert(rearrL
&& *rearrL
== IRTemp_INVALID
);
8492 vassert(rearrR
&& *rearrR
== IRTemp_INVALID
);
8493 *rearrL
= newTempV128();
8494 *rearrR
= newTempV128();
8498 assign(*rearrL
, binop(Iop_InterleaveHI64x2
, mkexpr(vecM
), mkexpr(vecN
)));
8499 assign(*rearrR
, binop(Iop_InterleaveLO64x2
, mkexpr(vecM
), mkexpr(vecN
)));
8501 else if (!isD
&& bitQ
== 1) {
8503 assign(*rearrL
, binop(Iop_CatOddLanes32x4
, mkexpr(vecM
), mkexpr(vecN
)));
8504 assign(*rearrR
, binop(Iop_CatEvenLanes32x4
, mkexpr(vecM
), mkexpr(vecN
)));
8507 vassert(!isD
&& bitQ
== 0);
8508 IRTemp m1n1m0n0
= newTempV128();
8509 IRTemp m0n0m1n1
= newTempV128();
8510 assign(m1n1m0n0
, binop(Iop_InterleaveLO32x4
,
8511 mkexpr(vecM
), mkexpr(vecN
)));
8512 assign(m0n0m1n1
, triop(Iop_SliceV128
,
8513 mkexpr(m1n1m0n0
), mkexpr(m1n1m0n0
), mkU8(8)));
8514 assign(*rearrL
, unop(Iop_ZeroHI64ofV128
, mkexpr(m1n1m0n0
)));
8515 assign(*rearrR
, unop(Iop_ZeroHI64ofV128
, mkexpr(m0n0m1n1
)));
8520 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8521 static Double
two_to_the_minus ( Int n
)
8523 if (n
== 1) return 0.5;
8524 vassert(n
>= 2 && n
<= 64);
8526 return two_to_the_minus(half
) * two_to_the_minus(n
- half
);
8530 /* Returns 2.0 ^ n for n in 1 .. 64 */
8531 static Double
two_to_the_plus ( Int n
)
8533 if (n
== 1) return 2.0;
8534 vassert(n
>= 2 && n
<= 64);
8536 return two_to_the_plus(half
) * two_to_the_plus(n
- half
);
8540 /*------------------------------------------------------------*/
8541 /*--- SIMD and FP instructions ---*/
8542 /*------------------------------------------------------------*/
8545 Bool
dis_AdvSIMD_EXT(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8547 /* 31 29 23 21 20 15 14 10 9 4
8548 0 q 101110 op2 0 m 0 imm4 0 n d
8551 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8552 if (INSN(31,31) != 0
8553 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8554 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8557 UInt bitQ
= INSN(30,30);
8558 UInt op2
= INSN(23,22);
8559 UInt mm
= INSN(20,16);
8560 UInt imm4
= INSN(14,11);
8561 UInt nn
= INSN(9,5);
8562 UInt dd
= INSN(4,0);
8564 if (op2
== BITS2(0,0)) {
8565 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8566 IRTemp sHi
= newTempV128();
8567 IRTemp sLo
= newTempV128();
8568 IRTemp res
= newTempV128();
8569 assign(sHi
, getQReg128(mm
));
8570 assign(sLo
, getQReg128(nn
));
8573 assign(res
, mkexpr(sLo
));
8575 vassert(imm4
>= 1 && imm4
<= 15);
8576 assign(res
, triop(Iop_SliceV128
,
8577 mkexpr(sHi
), mkexpr(sLo
), mkU8(imm4
)));
8579 putQReg128(dd
, mkexpr(res
));
8580 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd
, nn
, mm
, imm4
);
8582 if (imm4
>= 8) return False
;
8584 assign(res
, mkexpr(sLo
));
8586 vassert(imm4
>= 1 && imm4
<= 7);
8587 IRTemp hi64lo64
= newTempV128();
8588 assign(hi64lo64
, binop(Iop_InterleaveLO64x2
,
8589 mkexpr(sHi
), mkexpr(sLo
)));
8590 assign(res
, triop(Iop_SliceV128
,
8591 mkexpr(hi64lo64
), mkexpr(hi64lo64
), mkU8(imm4
)));
8593 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
8594 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd
, nn
, mm
, imm4
);
8605 Bool
dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8607 /* 31 29 23 21 20 15 14 12 11 9 4
8608 0 q 001110 op2 0 m 0 len op 00 n d
8609 Decode fields: op2,len,op
8611 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8612 if (INSN(31,31) != 0
8613 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8616 || INSN(11,10) != BITS2(0,0)) {
8619 UInt bitQ
= INSN(30,30);
8620 UInt op2
= INSN(23,22);
8621 UInt mm
= INSN(20,16);
8622 UInt len
= INSN(14,13);
8623 UInt bitOP
= INSN(12,12);
8624 UInt nn
= INSN(9,5);
8625 UInt dd
= INSN(4,0);
8628 /* -------- 00,xx,0 TBL, xx register table -------- */
8629 /* -------- 00,xx,1 TBX, xx register table -------- */
8630 /* 31 28 20 15 14 12 9 4
8631 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8632 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8633 where Ta = 16b(q=1) or 8b(q=0)
8635 Bool isTBX
= bitOP
== 1;
8636 /* The out-of-range values to use. */
8637 IRTemp oor_values
= newTempV128();
8638 assign(oor_values
, isTBX
? getQReg128(dd
) : mkV128(0));
8640 IRTemp src
= newTempV128();
8641 assign(src
, getQReg128(mm
));
8642 /* The table values */
8645 for (i
= 0; i
<= len
; i
++) {
8647 tab
[i
] = newTempV128();
8648 assign(tab
[i
], getQReg128((nn
+ i
) % 32));
8650 IRTemp res
= math_TBL_TBX(tab
, len
, src
, oor_values
);
8651 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8652 const HChar
* Ta
= bitQ
==1 ? "16b" : "8b";
8653 const HChar
* nm
= isTBX
? "tbx" : "tbl";
8654 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8655 nm
, nameQReg128(dd
), Ta
, nn
, (nn
+ len
) % 32, nameQReg128(mm
), Ta
);
8659 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8666 Bool
dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8668 /* 31 29 23 21 20 15 14 11 9 4
8669 0 q 001110 size 0 m 0 opcode 10 n d
8670 Decode fields: opcode
8672 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8673 if (INSN(31,31) != 0
8674 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8675 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8678 UInt bitQ
= INSN(30,30);
8679 UInt size
= INSN(23,22);
8680 UInt mm
= INSN(20,16);
8681 UInt opcode
= INSN(14,12);
8682 UInt nn
= INSN(9,5);
8683 UInt dd
= INSN(4,0);
8685 if (opcode
== BITS3(0,0,1) || opcode
== BITS3(1,0,1)) {
8686 /* -------- 001 UZP1 std7_std7_std7 -------- */
8687 /* -------- 101 UZP2 std7_std7_std7 -------- */
8688 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
8689 Bool isUZP1
= opcode
== BITS3(0,0,1);
8690 IROp op
= isUZP1
? mkVecCATEVENLANES(size
)
8691 : mkVecCATODDLANES(size
);
8692 IRTemp preL
= newTempV128();
8693 IRTemp preR
= newTempV128();
8694 IRTemp res
= newTempV128();
8696 assign(preL
, binop(Iop_InterleaveLO64x2
, getQReg128(mm
),
8698 assign(preR
, mkexpr(preL
));
8700 assign(preL
, getQReg128(mm
));
8701 assign(preR
, getQReg128(nn
));
8703 assign(res
, binop(op
, mkexpr(preL
), mkexpr(preR
)));
8704 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8705 const HChar
* nm
= isUZP1
? "uzp1" : "uzp2";
8706 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8707 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
8708 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
8712 if (opcode
== BITS3(0,1,0) || opcode
== BITS3(1,1,0)) {
8713 /* -------- 010 TRN1 std7_std7_std7 -------- */
8714 /* -------- 110 TRN2 std7_std7_std7 -------- */
8715 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
8716 Bool isTRN1
= opcode
== BITS3(0,1,0);
8717 IROp op1
= isTRN1
? mkVecCATEVENLANES(size
)
8718 : mkVecCATODDLANES(size
);
8719 IROp op2
= mkVecINTERLEAVEHI(size
);
8720 IRTemp srcM
= newTempV128();
8721 IRTemp srcN
= newTempV128();
8722 IRTemp res
= newTempV128();
8723 assign(srcM
, getQReg128(mm
));
8724 assign(srcN
, getQReg128(nn
));
8725 assign(res
, binop(op2
, binop(op1
, mkexpr(srcM
), mkexpr(srcM
)),
8726 binop(op1
, mkexpr(srcN
), mkexpr(srcN
))));
8727 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8728 const HChar
* nm
= isTRN1
? "trn1" : "trn2";
8729 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8730 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
8731 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
8735 if (opcode
== BITS3(0,1,1) || opcode
== BITS3(1,1,1)) {
8736 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8737 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8738 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
8739 Bool isZIP1
= opcode
== BITS3(0,1,1);
8740 IROp op
= isZIP1
? mkVecINTERLEAVELO(size
)
8741 : mkVecINTERLEAVEHI(size
);
8742 IRTemp preL
= newTempV128();
8743 IRTemp preR
= newTempV128();
8744 IRTemp res
= newTempV128();
8745 if (bitQ
== 0 && !isZIP1
) {
8746 IRTemp z128
= newTempV128();
8747 assign(z128
, mkV128(0x0000));
8748 // preL = Vm shifted left 32 bits
8749 // preR = Vn shifted left 32 bits
8750 assign(preL
, triop(Iop_SliceV128
,
8751 getQReg128(mm
), mkexpr(z128
), mkU8(12)));
8752 assign(preR
, triop(Iop_SliceV128
,
8753 getQReg128(nn
), mkexpr(z128
), mkU8(12)));
8756 assign(preL
, getQReg128(mm
));
8757 assign(preR
, getQReg128(nn
));
8759 assign(res
, binop(op
, mkexpr(preL
), mkexpr(preR
)));
8760 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8761 const HChar
* nm
= isZIP1
? "zip1" : "zip2";
8762 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8763 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
8764 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
8774 Bool
dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8776 /* 31 28 23 21 16 11 9 4
8777 0 q u 01110 size 11000 opcode 10 n d
8778 Decode fields: u,size,opcode
8780 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8781 if (INSN(31,31) != 0
8782 || INSN(28,24) != BITS5(0,1,1,1,0)
8783 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8786 UInt bitQ
= INSN(30,30);
8787 UInt bitU
= INSN(29,29);
8788 UInt size
= INSN(23,22);
8789 UInt opcode
= INSN(16,12);
8790 UInt nn
= INSN(9,5);
8791 UInt dd
= INSN(4,0);
8793 if (opcode
== BITS5(0,0,0,1,1)) {
8794 /* -------- 0,xx,00011 SADDLV -------- */
8795 /* -------- 1,xx,00011 UADDLV -------- */
8796 /* size is the narrow size */
8797 if (size
== X11
|| (size
== X10
&& bitQ
== 0)) return False
;
8798 Bool isU
= bitU
== 1;
8799 IRTemp src
= newTempV128();
8800 assign(src
, getQReg128(nn
));
8801 /* The basic plan is to widen the lower half, and if Q = 1,
8802 the upper half too. Add them together (if Q = 1), and in
8803 either case fold with add at twice the lane width.
8806 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8807 isU
, False
/*!fromUpperHalf*/, size
, mkexpr(src
)));
8810 = binop(mkVecADD(size
+1),
8812 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8813 isU
, True
/*fromUpperHalf*/, size
, mkexpr(src
)))
8817 IRTemp tWi
= newTempV128();
8818 assign(tWi
, widened
);
8819 IRTemp res
= math_FOLDV(tWi
, mkVecADD(size
+1));
8820 putQReg128(dd
, mkexpr(res
));
8821 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8822 const HChar ch
= "bhsd"[size
];
8823 DIP("%s %s.%c, %s.%s\n", isU
? "uaddlv" : "saddlv",
8824 nameQReg128(dd
), ch
, nameQReg128(nn
), arr
);
8829 /**/ if (opcode
== BITS5(0,1,0,1,0)) { ix
= bitU
== 0 ? 1 : 2; }
8830 else if (opcode
== BITS5(1,1,0,1,0)) { ix
= bitU
== 0 ? 3 : 4; }
8831 else if (opcode
== BITS5(1,1,0,1,1) && bitU
== 0) { ix
= 5; }
8834 /* -------- 0,xx,01010: SMAXV -------- (1) */
8835 /* -------- 1,xx,01010: UMAXV -------- (2) */
8836 /* -------- 0,xx,11010: SMINV -------- (3) */
8837 /* -------- 1,xx,11010: UMINV -------- (4) */
8838 /* -------- 0,xx,11011: ADDV -------- (5) */
8839 vassert(ix
>= 1 && ix
<= 5);
8840 if (size
== X11
) return False
; // 1d,2d cases not allowed
8841 if (size
== X10
&& bitQ
== 0) return False
; // 2s case not allowed
8842 const IROp opMAXS
[3]
8843 = { Iop_Max8Sx16
, Iop_Max16Sx8
, Iop_Max32Sx4
};
8844 const IROp opMAXU
[3]
8845 = { Iop_Max8Ux16
, Iop_Max16Ux8
, Iop_Max32Ux4
};
8846 const IROp opMINS
[3]
8847 = { Iop_Min8Sx16
, Iop_Min16Sx8
, Iop_Min32Sx4
};
8848 const IROp opMINU
[3]
8849 = { Iop_Min8Ux16
, Iop_Min16Ux8
, Iop_Min32Ux4
};
8851 = { Iop_Add8x16
, Iop_Add16x8
, Iop_Add32x4
};
8853 IROp op
= Iop_INVALID
;
8854 const HChar
* nm
= NULL
;
8856 case 1: op
= opMAXS
[size
]; nm
= "smaxv"; break;
8857 case 2: op
= opMAXU
[size
]; nm
= "umaxv"; break;
8858 case 3: op
= opMINS
[size
]; nm
= "sminv"; break;
8859 case 4: op
= opMINU
[size
]; nm
= "uminv"; break;
8860 case 5: op
= opADD
[size
]; nm
= "addv"; break;
8861 default: vassert(0);
8863 vassert(op
!= Iop_INVALID
&& nm
!= NULL
);
8864 IRTemp tN1
= newTempV128();
8865 assign(tN1
, getQReg128(nn
));
8866 /* If Q == 0, we're just folding lanes in the lower half of
8867 the value. In which case, copy the lower half of the
8868 source into the upper half, so we can then treat it the
8869 same as the full width case. Except for the addition case,
8870 in which we have to zero out the upper half. */
8871 IRTemp tN2
= newTempV128();
8872 assign(tN2
, bitQ
== 0
8873 ? (ix
== 5 ? unop(Iop_ZeroHI64ofV128
, mkexpr(tN1
))
8874 : mk_CatEvenLanes64x2(tN1
,tN1
))
8876 IRTemp res
= math_FOLDV(tN2
, op
);
8877 if (res
== IRTemp_INVALID
)
8878 return False
; /* means math_FOLDV
8879 doesn't handle this case yet */
8880 putQReg128(dd
, mkexpr(res
));
8881 const IRType tys
[3] = { Ity_I8
, Ity_I16
, Ity_I32
};
8882 IRType laneTy
= tys
[size
];
8883 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
8884 DIP("%s %s, %s.%s\n", nm
,
8885 nameQRegLO(dd
, laneTy
), nameQReg128(nn
), arr
);
8889 if ((size
== X00
|| size
== X10
)
8890 && (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,1,1))) {
8891 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8892 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8893 /* -------- 1,00,01111: FMAXV s_4s -------- */
8894 /* -------- 1,10,01111: FMINV s_4s -------- */
8895 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8896 if (bitQ
== 0) return False
; // Only 4s is allowed
8897 Bool isMIN
= (size
& 2) == 2;
8898 Bool isNM
= opcode
== BITS5(0,1,1,0,0);
8899 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(2);
8900 IRTemp src
= newTempV128();
8901 assign(src
, getQReg128(nn
));
8902 IRTemp res
= math_FOLDV(src
, opMXX
);
8903 putQReg128(dd
, mkexpr(res
));
8904 DIP("%s%sv s%u, %u.4s\n",
8905 isMIN
? "fmin" : "fmax", isNM
? "nm" : "", dd
, nn
);
8909 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8916 Bool
dis_AdvSIMD_copy(/*MB_OUT*/DisResult
* dres
, UInt insn
)
8918 /* 31 28 20 15 14 10 9 4
8919 0 q op 01110000 imm5 0 imm4 1 n d
8920 Decode fields: q,op,imm4
8922 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8923 if (INSN(31,31) != 0
8924 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8925 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8928 UInt bitQ
= INSN(30,30);
8929 UInt bitOP
= INSN(29,29);
8930 UInt imm5
= INSN(20,16);
8931 UInt imm4
= INSN(14,11);
8932 UInt nn
= INSN(9,5);
8933 UInt dd
= INSN(4,0);
8935 /* -------- x,0,0000: DUP (element, vector) -------- */
8937 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8939 if (bitOP
== 0 && imm4
== BITS4(0,0,0,0)) {
8943 IRTemp res
= handle_DUP_VEC_ELEM(&laneNo
, &laneSzLg2
, &laneCh
,
8944 getQReg128(nn
), imm5
);
8945 if (res
== IRTemp_INVALID
)
8947 if (bitQ
== 0 && laneSzLg2
== X11
)
8948 return False
; /* .1d case */
8949 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
8950 const HChar
* arT
= nameArr_Q_SZ(bitQ
, laneSzLg2
);
8951 DIP("dup %s.%s, %s.%c[%u]\n",
8952 nameQReg128(dd
), arT
, nameQReg128(nn
), laneCh
, laneNo
);
8956 /* -------- x,0,0001: DUP (general, vector) -------- */
8958 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8959 Q=0 writes 64, Q=1 writes 128
8960 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8961 xxx10 4H(q=0) or 8H(q=1), R=W
8962 xx100 2S(q=0) or 4S(q=1), R=W
8963 x1000 Invalid(q=0) or 2D(q=1), R=X
8964 x0000 Invalid(q=0) or Invalid(q=1)
8965 Require op=0, imm4=0001
8967 if (bitOP
== 0 && imm4
== BITS4(0,0,0,1)) {
8968 Bool isQ
= bitQ
== 1;
8969 IRTemp w0
= newTemp(Ity_I64
);
8970 const HChar
* arT
= "??";
8971 IRType laneTy
= Ity_INVALID
;
8973 arT
= isQ
? "16b" : "8b";
8975 assign(w0
, unop(Iop_8Uto64
, unop(Iop_64to8
, getIReg64orZR(nn
))));
8977 else if (imm5
& 2) {
8978 arT
= isQ
? "8h" : "4h";
8980 assign(w0
, unop(Iop_16Uto64
, unop(Iop_64to16
, getIReg64orZR(nn
))));
8982 else if (imm5
& 4) {
8983 arT
= isQ
? "4s" : "2s";
8985 assign(w0
, unop(Iop_32Uto64
, unop(Iop_64to32
, getIReg64orZR(nn
))));
8987 else if ((imm5
& 8) && isQ
) {
8990 assign(w0
, getIReg64orZR(nn
));
8993 /* invalid; leave laneTy unchanged. */
8996 if (laneTy
!= Ity_INVALID
) {
8997 IRTemp w1
= math_DUP_TO_64(w0
, laneTy
);
8998 putQReg128(dd
, binop(Iop_64HLtoV128
,
8999 isQ
? mkexpr(w1
) : mkU64(0), mkexpr(w1
)));
9000 DIP("dup %s.%s, %s\n",
9001 nameQReg128(dd
), arT
, nameIRegOrZR(laneTy
== Ity_I64
, nn
));
9008 /* -------- 1,0,0011: INS (general) -------- */
9010 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9011 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9016 if (bitQ
== 1 && bitOP
== 0 && imm4
== BITS4(0,0,1,1)) {
9021 src
= unop(Iop_64to8
, getIReg64orZR(nn
));
9022 laneNo
= (imm5
>> 1) & 15;
9025 else if (imm5
& 2) {
9026 src
= unop(Iop_64to16
, getIReg64orZR(nn
));
9027 laneNo
= (imm5
>> 2) & 7;
9030 else if (imm5
& 4) {
9031 src
= unop(Iop_64to32
, getIReg64orZR(nn
));
9032 laneNo
= (imm5
>> 3) & 3;
9035 else if (imm5
& 8) {
9036 src
= getIReg64orZR(nn
);
9037 laneNo
= (imm5
>> 4) & 1;
9042 vassert(laneNo
< 16);
9043 putQRegLane(dd
, laneNo
, src
);
9044 DIP("ins %s.%c[%u], %s\n",
9045 nameQReg128(dd
), ts
, laneNo
, nameIReg64orZR(nn
));
9052 /* -------- x,0,0101: SMOV -------- */
9053 /* -------- x,0,0111: UMOV -------- */
9055 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9056 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9057 dest is Xd when q==1, Wd when q==0
9059 Ts,index,ops = case q:imm5 of
9060 0:xxxx1 -> B, xxxx, 8Uto64
9062 0:xxx10 -> H, xxx, 16Uto64
9064 0:xx100 -> S, xx, 32Uto64
9066 1:x1000 -> D, x, copy64
9069 Ts,index,ops = case q:imm5 of
9070 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9071 1:xxxx1 -> B, xxxx, 8Sto64
9072 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9073 1:xxx10 -> H, xxx, 16Sto64
9075 1:xx100 -> S, xx, 32Sto64
9079 if (bitOP
== 0 && (imm4
== BITS4(0,1,0,1) || imm4
== BITS4(0,1,1,1))) {
9080 Bool isU
= (imm4
& 2) == 2;
9081 const HChar
* arTs
= "??";
9082 UInt laneNo
= 16; /* invalid */
9083 // Setting 'res' to non-NULL determines valid/invalid
9085 if (!bitQ
&& (imm5
& 1)) { // 0:xxxx1
9086 laneNo
= (imm5
>> 1) & 15;
9087 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I8
);
9088 res
= isU
? unop(Iop_8Uto64
, lane
)
9089 : unop(Iop_32Uto64
, unop(Iop_8Sto32
, lane
));
9092 else if (bitQ
&& (imm5
& 1)) { // 1:xxxx1
9093 laneNo
= (imm5
>> 1) & 15;
9094 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I8
);
9096 : unop(Iop_8Sto64
, lane
);
9099 else if (!bitQ
&& (imm5
& 2)) { // 0:xxx10
9100 laneNo
= (imm5
>> 2) & 7;
9101 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I16
);
9102 res
= isU
? unop(Iop_16Uto64
, lane
)
9103 : unop(Iop_32Uto64
, unop(Iop_16Sto32
, lane
));
9106 else if (bitQ
&& (imm5
& 2)) { // 1:xxx10
9107 laneNo
= (imm5
>> 2) & 7;
9108 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I16
);
9110 : unop(Iop_16Sto64
, lane
);
9113 else if (!bitQ
&& (imm5
& 4)) { // 0:xx100
9114 laneNo
= (imm5
>> 3) & 3;
9115 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I32
);
9116 res
= isU
? unop(Iop_32Uto64
, lane
)
9120 else if (bitQ
&& (imm5
& 4)) { // 1:xxx10
9121 laneNo
= (imm5
>> 3) & 3;
9122 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I32
);
9124 : unop(Iop_32Sto64
, lane
);
9127 else if (bitQ
&& (imm5
& 8)) { // 1:x1000
9128 laneNo
= (imm5
>> 4) & 1;
9129 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I64
);
9136 vassert(laneNo
< 16);
9137 putIReg64orZR(dd
, res
);
9138 DIP("%cmov %s, %s.%s[%u]\n", isU
? 'u' : 's',
9139 nameIRegOrZR(bitQ
== 1, dd
),
9140 nameQReg128(nn
), arTs
, laneNo
);
9147 /* -------- 1,1,xxxx: INS (element) -------- */
9149 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9151 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9152 xxx10 -> H, xxx, imm4[3:1]
9153 xx100 -> S, xx, imm4[3:2]
9154 x1000 -> D, x, imm4[3:3]
9156 if (bitQ
== 1 && bitOP
== 1) {
9158 IRType ity
= Ity_INVALID
;
9164 ix1
= (imm5
>> 1) & 15;
9165 ix2
= (imm4
>> 0) & 15;
9167 else if (imm5
& 2) {
9170 ix1
= (imm5
>> 2) & 7;
9171 ix2
= (imm4
>> 1) & 7;
9173 else if (imm5
& 4) {
9176 ix1
= (imm5
>> 3) & 3;
9177 ix2
= (imm4
>> 2) & 3;
9179 else if (imm5
& 8) {
9182 ix1
= (imm5
>> 4) & 1;
9183 ix2
= (imm4
>> 3) & 1;
9186 if (ity
!= Ity_INVALID
) {
9189 putQRegLane(dd
, ix1
, getQRegLane(nn
, ix2
, ity
));
9190 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9191 nameQReg128(dd
), ts
, ix1
, nameQReg128(nn
), ts
, ix2
);
9204 Bool
dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9206 /* 31 28 18 15 11 9 4
9207 0q op 01111 00000 abc cmode 01 defgh d
9208 Decode fields: q,op,cmode
9209 Bit 11 is really "o2", but it is always zero.
9211 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9212 if (INSN(31,31) != 0
9213 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9214 || INSN(11,10) != BITS2(0,1)) {
9217 UInt bitQ
= INSN(30,30);
9218 UInt bitOP
= INSN(29,29);
9219 UInt cmode
= INSN(15,12);
9220 UInt abcdefgh
= (INSN(18,16) << 5) | INSN(9,5);
9221 UInt dd
= INSN(4,0);
9224 UInt op_cmode
= (bitOP
<< 4) | cmode
;
9230 Bool isFMOV
= False
;
9232 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9233 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9234 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9235 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9236 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9237 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9238 ok
= True
; isMOV
= True
; break;
9240 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9241 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9242 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9243 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9244 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9245 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9246 ok
= True
; isORR
= True
; break;
9248 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9249 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9250 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9251 ok
= True
; isMOV
= True
; break;
9253 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9254 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9255 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9256 ok
= True
; isORR
= True
; break;
9258 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9259 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9260 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9261 ok
= True
; isMOV
= True
; break;
9263 /* -------- x,0,1110 MOVI 8-bit -------- */
9264 case BITS5(0,1,1,1,0):
9265 ok
= True
; isMOV
= True
; break;
9267 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9268 case BITS5(0,1,1,1,1): // 0:1111
9269 ok
= True
; isFMOV
= True
; break;
9271 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9272 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9273 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9274 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9275 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9276 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9277 ok
= True
; isMVN
= True
; break;
9279 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9280 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9281 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9282 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9283 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9284 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9285 ok
= True
; isBIC
= True
; break;
9287 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9288 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9289 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9290 ok
= True
; isMVN
= True
; break;
9292 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9293 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9294 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9295 ok
= True
; isBIC
= True
; break;
9297 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9298 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9299 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9300 ok
= True
; isMVN
= True
; break;
9302 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9303 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9304 case BITS5(1,1,1,1,0):
9305 ok
= True
; isMOV
= True
; break;
9307 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9308 case BITS5(1,1,1,1,1): // 1:1111
9309 ok
= bitQ
== 1; isFMOV
= True
; break;
9315 vassert(1 == (isMOV
? 1 : 0) + (isMVN
? 1 : 0)
9316 + (isORR
? 1 : 0) + (isBIC
? 1 : 0) + (isFMOV
? 1 : 0));
9317 ok
= AdvSIMDExpandImm(&imm64lo
, bitOP
, cmode
, abcdefgh
);
9320 if (isORR
|| isBIC
) {
9322 = isORR
? 0ULL : ~0ULL;
9324 = binop(Iop_64HLtoV128
, mkU64(inv
^ imm64lo
), mkU64(inv
^ imm64lo
));
9326 = binop(isORR
? Iop_OrV128
: Iop_AndV128
, getQReg128(dd
), immV128
);
9327 const HChar
* nm
= isORR
? "orr" : "bic";
9329 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, res
));
9330 DIP("%s %s.1d, %016llx\n", nm
, nameQReg128(dd
), imm64lo
);
9332 putQReg128(dd
, res
);
9333 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm
,
9334 nameQReg128(dd
), imm64lo
, imm64lo
);
9337 else if (isMOV
|| isMVN
|| isFMOV
) {
9338 if (isMVN
) imm64lo
= ~imm64lo
;
9339 ULong imm64hi
= bitQ
== 0 ? 0 : imm64lo
;
9340 IRExpr
* immV128
= binop(Iop_64HLtoV128
, mkU64(imm64hi
),
9342 putQReg128(dd
, immV128
);
9343 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd
), imm64hi
, imm64lo
);
9347 /* else fall through */
9355 Bool
dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9357 /* 31 28 20 15 14 10 9 4
9358 01 op 11110000 imm5 0 imm4 1 n d
9359 Decode fields: op,imm4
9361 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9362 if (INSN(31,30) != BITS2(0,1)
9363 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9364 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9367 UInt bitOP
= INSN(29,29);
9368 UInt imm5
= INSN(20,16);
9369 UInt imm4
= INSN(14,11);
9370 UInt nn
= INSN(9,5);
9371 UInt dd
= INSN(4,0);
9373 if (bitOP
== 0 && imm4
== BITS4(0,0,0,0)) {
9374 /* -------- 0,0000 DUP (element, scalar) -------- */
9375 IRTemp w0
= newTemp(Ity_I64
);
9376 const HChar
* arTs
= "??";
9377 IRType laneTy
= Ity_INVALID
;
9378 UInt laneNo
= 16; /* invalid */
9381 laneNo
= (imm5
>> 1) & 15;
9383 assign(w0
, unop(Iop_8Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
9385 else if (imm5
& 2) {
9387 laneNo
= (imm5
>> 2) & 7;
9389 assign(w0
, unop(Iop_16Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
9391 else if (imm5
& 4) {
9393 laneNo
= (imm5
>> 3) & 3;
9395 assign(w0
, unop(Iop_32Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
9397 else if (imm5
& 8) {
9399 laneNo
= (imm5
>> 4) & 1;
9401 assign(w0
, getQRegLane(nn
, laneNo
, laneTy
));
9404 /* invalid; leave laneTy unchanged. */
9407 if (laneTy
!= Ity_INVALID
) {
9408 vassert(laneNo
< 16);
9409 putQReg128(dd
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(w0
)));
9410 DIP("dup %s, %s.%s[%u]\n",
9411 nameQRegLO(dd
, laneTy
), nameQReg128(nn
), arTs
, laneNo
);
9414 /* else fall through */
9423 Bool
dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9425 /* 31 28 23 21 16 11 9 4
9426 01 u 11110 sz 11000 opcode 10 n d
9427 Decode fields: u,sz,opcode
9429 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9430 if (INSN(31,30) != BITS2(0,1)
9431 || INSN(28,24) != BITS5(1,1,1,1,0)
9432 || INSN(21,17) != BITS5(1,1,0,0,0)
9433 || INSN(11,10) != BITS2(1,0)) {
9436 UInt bitU
= INSN(29,29);
9437 UInt sz
= INSN(23,22);
9438 UInt opcode
= INSN(16,12);
9439 UInt nn
= INSN(9,5);
9440 UInt dd
= INSN(4,0);
9442 if (bitU
== 0 && sz
== X11
&& opcode
== BITS5(1,1,0,1,1)) {
9443 /* -------- 0,11,11011 ADDP d_2d -------- */
9444 IRTemp xy
= newTempV128();
9445 IRTemp xx
= newTempV128();
9446 assign(xy
, getQReg128(nn
));
9447 assign(xx
, binop(Iop_InterleaveHI64x2
, mkexpr(xy
), mkexpr(xy
)));
9448 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
9449 binop(Iop_Add64x2
, mkexpr(xy
), mkexpr(xx
))));
9450 DIP("addp d%u, %s.2d\n", dd
, nameQReg128(nn
));
9454 if (bitU
== 1 && sz
<= X01
&& opcode
== BITS5(0,1,1,0,1)) {
9455 /* -------- 1,00,01101 ADDP s_2s -------- */
9456 /* -------- 1,01,01101 ADDP d_2d -------- */
9457 Bool isD
= sz
== X01
;
9458 IROp opZHI
= mkVecZEROHIxxOFV128(isD
? 3 : 2);
9459 IROp opADD
= mkVecADDF(isD
? 3 : 2);
9460 IRTemp src
= newTempV128();
9461 IRTemp argL
= newTempV128();
9462 IRTemp argR
= newTempV128();
9463 assign(src
, getQReg128(nn
));
9464 assign(argL
, unop(opZHI
, mkexpr(src
)));
9465 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
9466 mkU8(isD
? 8 : 4))));
9467 putQReg128(dd
, unop(opZHI
,
9468 triop(opADD
, mkexpr(mk_get_IR_rounding_mode()),
9469 mkexpr(argL
), mkexpr(argR
))));
9470 DIP(isD
? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd
, nn
);
9475 && (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,1,1))) {
9476 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9477 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9478 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9479 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9480 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9481 Bool isD
= (sz
& 1) == 1;
9482 Bool isMIN
= (sz
& 2) == 2;
9483 Bool isNM
= opcode
== BITS5(0,1,1,0,0);
9484 IROp opZHI
= mkVecZEROHIxxOFV128(isD
? 3 : 2);
9485 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? 3 : 2);
9486 IRTemp src
= newTempV128();
9487 IRTemp argL
= newTempV128();
9488 IRTemp argR
= newTempV128();
9489 assign(src
, getQReg128(nn
));
9490 assign(argL
, unop(opZHI
, mkexpr(src
)));
9491 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
9492 mkU8(isD
? 8 : 4))));
9493 putQReg128(dd
, unop(opZHI
,
9494 binop(opMXX
, mkexpr(argL
), mkexpr(argR
))));
9495 HChar c
= isD
? 'd' : 's';
9496 DIP("%s%sp %c%u, v%u.2%c\n",
9497 isMIN
? "fmin" : "fmax", isNM
? "nm" : "", c
, dd
, nn
, c
);
9507 Bool
dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9509 /* 31 28 22 18 15 10 9 4
9510 01 u 111110 immh immb opcode 1 n d
9511 Decode fields: u,immh,opcode
9513 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9514 if (INSN(31,30) != BITS2(0,1)
9515 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9518 UInt bitU
= INSN(29,29);
9519 UInt immh
= INSN(22,19);
9520 UInt immb
= INSN(18,16);
9521 UInt opcode
= INSN(15,11);
9522 UInt nn
= INSN(9,5);
9523 UInt dd
= INSN(4,0);
9524 UInt immhb
= (immh
<< 3) | immb
;
9527 && (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,0,1,0))) {
9528 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9529 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9530 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9531 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9532 Bool isU
= bitU
== 1;
9533 Bool isAcc
= opcode
== BITS5(0,0,0,1,0);
9534 UInt sh
= 128 - immhb
;
9535 vassert(sh
>= 1 && sh
<= 64);
9536 IROp op
= isU
? Iop_ShrN64x2
: Iop_SarN64x2
;
9537 IRExpr
* src
= getQReg128(nn
);
9538 IRTemp shf
= newTempV128();
9539 IRTemp res
= newTempV128();
9540 if (sh
== 64 && isU
) {
9541 assign(shf
, mkV128(0x0000));
9548 assign(shf
, binop(op
, src
, mkU8(sh
- nudge
)));
9550 assign(res
, isAcc
? binop(Iop_Add64x2
, getQReg128(dd
), mkexpr(shf
))
9552 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9553 const HChar
* nm
= isAcc
? (isU
? "usra" : "ssra")
9554 : (isU
? "ushr" : "sshr");
9555 DIP("%s d%u, d%u, #%u\n", nm
, dd
, nn
, sh
);
9560 && (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,1,0))) {
9561 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9562 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9563 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9564 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9565 Bool isU
= bitU
== 1;
9566 Bool isAcc
= opcode
== BITS5(0,0,1,1,0);
9567 UInt sh
= 128 - immhb
;
9568 vassert(sh
>= 1 && sh
<= 64);
9569 IROp op
= isU
? Iop_Rsh64Ux2
: Iop_Rsh64Sx2
;
9570 vassert(sh
>= 1 && sh
<= 64);
9571 IRExpr
* src
= getQReg128(nn
);
9572 IRTemp imm8
= newTemp(Ity_I8
);
9573 assign(imm8
, mkU8((UChar
)(-sh
)));
9574 IRExpr
* amt
= mkexpr(math_DUP_TO_V128(imm8
, Ity_I8
));
9575 IRTemp shf
= newTempV128();
9576 IRTemp res
= newTempV128();
9577 assign(shf
, binop(op
, src
, amt
));
9578 assign(res
, isAcc
? binop(Iop_Add64x2
, getQReg128(dd
), mkexpr(shf
))
9580 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9581 const HChar
* nm
= isAcc
? (isU
? "ursra" : "srsra")
9582 : (isU
? "urshr" : "srshr");
9583 DIP("%s d%u, d%u, #%u\n", nm
, dd
, nn
, sh
);
9587 if (bitU
== 1 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,0,0)) {
9588 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9589 UInt sh
= 128 - immhb
;
9590 vassert(sh
>= 1 && sh
<= 64);
9592 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, getQReg128(dd
)));
9594 /* sh is in range 1 .. 63 */
9595 ULong nmask
= (ULong
)(((Long
)0x8000000000000000ULL
) >> (sh
-1));
9596 IRExpr
* nmaskV
= binop(Iop_64HLtoV128
, mkU64(nmask
), mkU64(nmask
));
9597 IRTemp res
= newTempV128();
9598 assign(res
, binop(Iop_OrV128
,
9599 binop(Iop_AndV128
, getQReg128(dd
), nmaskV
),
9600 binop(Iop_ShrN64x2
, getQReg128(nn
), mkU8(sh
))));
9601 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9603 DIP("sri d%u, d%u, #%u\n", dd
, nn
, sh
);
9607 if (bitU
== 0 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,1,0)) {
9608 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9609 UInt sh
= immhb
- 64;
9610 vassert(sh
>= 0 && sh
< 64);
9612 unop(Iop_ZeroHI64ofV128
,
9613 sh
== 0 ? getQReg128(nn
)
9614 : binop(Iop_ShlN64x2
, getQReg128(nn
), mkU8(sh
))));
9615 DIP("shl d%u, d%u, #%u\n", dd
, nn
, sh
);
9619 if (bitU
== 1 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,1,0)) {
9620 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9621 UInt sh
= immhb
- 64;
9622 vassert(sh
>= 0 && sh
< 64);
9624 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, getQReg128(nn
)));
9626 /* sh is in range 1 .. 63 */
9627 ULong nmask
= (1ULL << sh
) - 1;
9628 IRExpr
* nmaskV
= binop(Iop_64HLtoV128
, mkU64(nmask
), mkU64(nmask
));
9629 IRTemp res
= newTempV128();
9630 assign(res
, binop(Iop_OrV128
,
9631 binop(Iop_AndV128
, getQReg128(dd
), nmaskV
),
9632 binop(Iop_ShlN64x2
, getQReg128(nn
), mkU8(sh
))));
9633 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9635 DIP("sli d%u, d%u, #%u\n", dd
, nn
, sh
);
9639 if (opcode
== BITS5(0,1,1,1,0)
9640 || (bitU
== 1 && opcode
== BITS5(0,1,1,0,0))) {
9641 /* -------- 0,01110 SQSHL #imm -------- */
9642 /* -------- 1,01110 UQSHL #imm -------- */
9643 /* -------- 1,01100 SQSHLU #imm -------- */
9646 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
9647 if (!ok
) return False
;
9648 vassert(size
>= 0 && size
<= 3);
9649 /* The shift encoding has opposite sign for the leftwards case.
9650 Adjust shift to compensate. */
9651 UInt lanebits
= 8 << size
;
9652 shift
= lanebits
- shift
;
9653 vassert(shift
>= 0 && shift
< lanebits
);
9654 const HChar
* nm
= NULL
;
9655 /**/ if (bitU
== 0 && opcode
== BITS5(0,1,1,1,0)) nm
= "sqshl";
9656 else if (bitU
== 1 && opcode
== BITS5(0,1,1,1,0)) nm
= "uqshl";
9657 else if (bitU
== 1 && opcode
== BITS5(0,1,1,0,0)) nm
= "sqshlu";
9659 IRTemp qDiff1
= IRTemp_INVALID
;
9660 IRTemp qDiff2
= IRTemp_INVALID
;
9661 IRTemp res
= IRTemp_INVALID
;
9662 IRTemp src
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(nn
));
9663 /* This relies on the fact that the zeroed out lanes generate zeroed
9664 result lanes and don't saturate, so there's no point in trimming
9665 the resulting res, qDiff1 or qDiff2 values. */
9666 math_QSHL_IMM(&res
, &qDiff1
, &qDiff2
, src
, size
, shift
, nm
);
9667 putQReg128(dd
, mkexpr(res
));
9668 updateQCFLAGwithDifference(qDiff1
, qDiff2
);
9669 const HChar arr
= "bhsd"[size
];
9670 DIP("%s %c%u, %c%u, #%u\n", nm
, arr
, dd
, arr
, nn
, shift
);
9674 if (opcode
== BITS5(1,0,0,1,0) || opcode
== BITS5(1,0,0,1,1)
9676 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1)))) {
9677 /* -------- 0,10010 SQSHRN #imm -------- */
9678 /* -------- 1,10010 UQSHRN #imm -------- */
9679 /* -------- 0,10011 SQRSHRN #imm -------- */
9680 /* -------- 1,10011 UQRSHRN #imm -------- */
9681 /* -------- 1,10000 SQSHRUN #imm -------- */
9682 /* -------- 1,10001 SQRSHRUN #imm -------- */
9685 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
9686 if (!ok
|| size
== X11
) return False
;
9687 vassert(size
>= X00
&& size
<= X10
);
9688 vassert(shift
>= 1 && shift
<= (8 << size
));
9689 const HChar
* nm
= "??";
9690 IROp op
= Iop_INVALID
;
9691 /* Decide on the name and the operation. */
9692 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
9693 nm
= "sqshrn"; op
= mkVecQANDqsarNNARROWSS(size
);
9695 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
9696 nm
= "uqshrn"; op
= mkVecQANDqshrNNARROWUU(size
);
9698 else if (bitU
== 0 && opcode
== BITS5(1,0,0,1,1)) {
9699 nm
= "sqrshrn"; op
= mkVecQANDqrsarNNARROWSS(size
);
9701 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
9702 nm
= "uqrshrn"; op
= mkVecQANDqrshrNNARROWUU(size
);
9704 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,0)) {
9705 nm
= "sqshrun"; op
= mkVecQANDqsarNNARROWSU(size
);
9707 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,1)) {
9708 nm
= "sqrshrun"; op
= mkVecQANDqrsarNNARROWSU(size
);
9711 /* Compute the result (Q, shifted value) pair. */
9712 IRTemp src128
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
+1, getQReg128(nn
));
9713 IRTemp pair
= newTempV128();
9714 assign(pair
, binop(op
, mkexpr(src128
), mkU8(shift
)));
9715 /* Update the result reg */
9716 IRTemp res64in128
= newTempV128();
9717 assign(res64in128
, unop(Iop_ZeroHI64ofV128
, mkexpr(pair
)));
9718 putQReg128(dd
, mkexpr(res64in128
));
9719 /* Update the Q flag. */
9720 IRTemp q64q64
= newTempV128();
9721 assign(q64q64
, binop(Iop_InterleaveHI64x2
, mkexpr(pair
), mkexpr(pair
)));
9722 IRTemp z128
= newTempV128();
9723 assign(z128
, mkV128(0x0000));
9724 updateQCFLAGwithDifference(q64q64
, z128
);
9726 const HChar arrNarrow
= "bhsd"[size
];
9727 const HChar arrWide
= "bhsd"[size
+1];
9728 DIP("%s %c%u, %c%u, #%u\n", nm
, arrNarrow
, dd
, arrWide
, nn
, shift
);
9732 if (immh
>= BITS4(0,1,0,0) && opcode
== BITS5(1,1,1,0,0)) {
9733 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9734 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9737 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
9738 /* The following holds because immh is never zero. */
9740 /* The following holds because immh >= 0100. */
9741 vassert(size
== X10
|| size
== X11
);
9742 Bool isD
= size
== X11
;
9743 Bool isU
= bitU
== 1;
9744 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
9745 Double scale
= two_to_the_minus(fbits
);
9746 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
9747 : IRExpr_Const(IRConst_F32( (Float
)scale
));
9748 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
9749 IROp opCVT
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
9750 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
9751 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
9752 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
9753 IRTemp src
= newTemp(tyI
);
9754 IRTemp res
= newTemp(tyF
);
9755 IRTemp rm
= mk_get_IR_rounding_mode();
9756 assign(src
, getQRegLane(nn
, 0, tyI
));
9757 assign(res
, triop(opMUL
, mkexpr(rm
),
9758 binop(opCVT
, mkexpr(rm
), mkexpr(src
)), scaleE
));
9759 putQRegLane(dd
, 0, mkexpr(res
));
9761 putQRegLane(dd
, 1, mkU32(0));
9763 putQRegLane(dd
, 1, mkU64(0));
9764 const HChar ch
= isD
? 'd' : 's';
9765 DIP("%s %c%u, %c%u, #%u\n", isU
? "ucvtf" : "scvtf",
9766 ch
, dd
, ch
, nn
, fbits
);
9770 if (immh
>= BITS4(0,1,0,0) && opcode
== BITS5(1,1,1,1,1)) {
9771 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9772 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9775 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
9776 /* The following holds because immh is never zero. */
9778 /* The following holds because immh >= 0100. */
9779 vassert(size
== X10
|| size
== X11
);
9780 Bool isD
= size
== X11
;
9781 Bool isU
= bitU
== 1;
9782 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
9783 Double scale
= two_to_the_plus(fbits
);
9784 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
9785 : IRExpr_Const(IRConst_F32( (Float
)scale
));
9786 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
9787 IROp opCVT
= isU
? (isD
? Iop_F64toI64U
: Iop_F32toI32U
)
9788 : (isD
? Iop_F64toI64S
: Iop_F32toI32S
);
9789 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
9790 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
9791 IRTemp src
= newTemp(tyF
);
9792 IRTemp res
= newTemp(tyI
);
9793 IRTemp rm
= newTemp(Ity_I32
);
9794 assign(src
, getQRegLane(nn
, 0, tyF
));
9795 assign(rm
, mkU32(Irrm_ZERO
));
9796 assign(res
, binop(opCVT
, mkexpr(rm
),
9797 triop(opMUL
, mkexpr(rm
), mkexpr(src
), scaleE
)));
9798 putQRegLane(dd
, 0, mkexpr(res
));
9800 putQRegLane(dd
, 1, mkU32(0));
9802 putQRegLane(dd
, 1, mkU64(0));
9803 const HChar ch
= isD
? 'd' : 's';
9804 DIP("%s %c%u, %c%u, #%u\n", isU
? "fcvtzu" : "fcvtzs",
9805 ch
, dd
, ch
, nn
, fbits
);
9809 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9816 Bool
dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9818 /* 31 29 28 23 21 20 15 11 9 4
9819 01 U 11110 size 1 m opcode 00 n d
9820 Decode fields: u,opcode
9822 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9823 if (INSN(31,30) != BITS2(0,1)
9824 || INSN(28,24) != BITS5(1,1,1,1,0)
9826 || INSN(11,10) != BITS2(0,0)) {
9829 UInt bitU
= INSN(29,29);
9830 UInt size
= INSN(23,22);
9831 UInt mm
= INSN(20,16);
9832 UInt opcode
= INSN(15,12);
9833 UInt nn
= INSN(9,5);
9834 UInt dd
= INSN(4,0);
9838 && (opcode
== BITS4(1,1,0,1)
9839 || opcode
== BITS4(1,0,0,1) || opcode
== BITS4(1,0,1,1))) {
9840 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9841 /* -------- 0,1001 SQDMLAL -------- */ // 1
9842 /* -------- 0,1011 SQDMLSL -------- */ // 2
9843 /* Widens, and size refers to the narrowed lanes. */
9846 case BITS4(1,1,0,1): ks
= 0; break;
9847 case BITS4(1,0,0,1): ks
= 1; break;
9848 case BITS4(1,0,1,1): ks
= 2; break;
9849 default: vassert(0);
9851 vassert(ks
>= 0 && ks
<= 2);
9852 if (size
== X00
|| size
== X11
) return False
;
9854 IRTemp vecN
, vecM
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
9855 vecN
= vecM
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
9856 newTempsV128_3(&vecN
, &vecM
, &vecD
);
9857 assign(vecN
, getQReg128(nn
));
9858 assign(vecM
, getQReg128(mm
));
9859 assign(vecD
, getQReg128(dd
));
9860 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
9861 False
/*!is2*/, size
, "mas"[ks
],
9862 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
9863 IROp opZHI
= mkVecZEROHIxxOFV128(size
+1);
9864 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
9865 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
9866 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
9867 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
9868 updateQCFLAGwithDifferenceZHI(sat2q
, sat2n
, opZHI
);
9870 const HChar
* nm
= ks
== 0 ? "sqdmull"
9871 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
9872 const HChar arrNarrow
= "bhsd"[size
];
9873 const HChar arrWide
= "bhsd"[size
+1];
9874 DIP("%s %c%u, %c%u, %c%u\n",
9875 nm
, arrWide
, dd
, arrNarrow
, nn
, arrNarrow
, mm
);
9885 Bool
dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9887 /* 31 29 28 23 21 20 15 10 9 4
9888 01 U 11110 size 1 m opcode 1 n d
9889 Decode fields: u,size,opcode
9891 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9892 if (INSN(31,30) != BITS2(0,1)
9893 || INSN(28,24) != BITS5(1,1,1,1,0)
9895 || INSN(10,10) != 1) {
9898 UInt bitU
= INSN(29,29);
9899 UInt size
= INSN(23,22);
9900 UInt mm
= INSN(20,16);
9901 UInt opcode
= INSN(15,11);
9902 UInt nn
= INSN(9,5);
9903 UInt dd
= INSN(4,0);
9906 if (opcode
== BITS5(0,0,0,0,1) || opcode
== BITS5(0,0,1,0,1)) {
9907 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9908 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9909 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9910 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9911 Bool isADD
= opcode
== BITS5(0,0,0,0,1);
9912 Bool isU
= bitU
== 1;
9913 IROp qop
= Iop_INVALID
;
9914 IROp nop
= Iop_INVALID
;
9916 qop
= isU
? mkVecQADDU(size
) : mkVecQADDS(size
);
9917 nop
= mkVecADD(size
);
9919 qop
= isU
? mkVecQSUBU(size
) : mkVecQSUBS(size
);
9920 nop
= mkVecSUB(size
);
9922 IRTemp argL
= newTempV128();
9923 IRTemp argR
= newTempV128();
9924 IRTemp qres
= newTempV128();
9925 IRTemp nres
= newTempV128();
9926 assign(argL
, getQReg128(nn
));
9927 assign(argR
, getQReg128(mm
));
9928 assign(qres
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9929 size
, binop(qop
, mkexpr(argL
), mkexpr(argR
)))));
9930 assign(nres
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9931 size
, binop(nop
, mkexpr(argL
), mkexpr(argR
)))));
9932 putQReg128(dd
, mkexpr(qres
));
9933 updateQCFLAGwithDifference(qres
, nres
);
9934 const HChar
* nm
= isADD
? (isU
? "uqadd" : "sqadd")
9935 : (isU
? "uqsub" : "sqsub");
9936 const HChar arr
= "bhsd"[size
];
9937 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
9941 if (size
== X11
&& opcode
== BITS5(0,0,1,1,0)) {
9942 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9943 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9944 Bool isGT
= bitU
== 0;
9945 IRExpr
* argL
= getQReg128(nn
);
9946 IRExpr
* argR
= getQReg128(mm
);
9947 IRTemp res
= newTempV128();
9949 isGT
? binop(Iop_CmpGT64Sx2
, argL
, argR
)
9950 : binop(Iop_CmpGT64Ux2
, argL
, argR
));
9951 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9952 DIP("%s %s, %s, %s\n",isGT
? "cmgt" : "cmhi",
9953 nameQRegLO(dd
, Ity_I64
),
9954 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
9958 if (size
== X11
&& opcode
== BITS5(0,0,1,1,1)) {
9959 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9960 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9961 Bool isGE
= bitU
== 0;
9962 IRExpr
* argL
= getQReg128(nn
);
9963 IRExpr
* argR
= getQReg128(mm
);
9964 IRTemp res
= newTempV128();
9966 isGE
? unop(Iop_NotV128
, binop(Iop_CmpGT64Sx2
, argR
, argL
))
9967 : unop(Iop_NotV128
, binop(Iop_CmpGT64Ux2
, argR
, argL
)));
9968 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9969 DIP("%s %s, %s, %s\n", isGE
? "cmge" : "cmhs",
9970 nameQRegLO(dd
, Ity_I64
),
9971 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
9975 if (size
== X11
&& (opcode
== BITS5(0,1,0,0,0)
9976 || opcode
== BITS5(0,1,0,1,0))) {
9977 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9978 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9979 /* -------- 1,xx,01000 USHL d_d_d -------- */
9980 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9981 Bool isU
= bitU
== 1;
9982 Bool isR
= opcode
== BITS5(0,1,0,1,0);
9983 IROp op
= isR
? (isU
? mkVecRSHU(size
) : mkVecRSHS(size
))
9984 : (isU
? mkVecSHU(size
) : mkVecSHS(size
));
9985 IRTemp res
= newTempV128();
9986 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
9987 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9988 const HChar
* nm
= isR
? (isU
? "urshl" : "srshl")
9989 : (isU
? "ushl" : "sshl");
9990 DIP("%s %s, %s, %s\n", nm
,
9991 nameQRegLO(dd
, Ity_I64
),
9992 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
9996 if (opcode
== BITS5(0,1,0,0,1) || opcode
== BITS5(0,1,0,1,1)) {
9997 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9998 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9999 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10000 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10001 Bool isU
= bitU
== 1;
10002 Bool isR
= opcode
== BITS5(0,1,0,1,1);
10003 IROp op
= isR
? (isU
? mkVecQANDUQRSH(size
) : mkVecQANDSQRSH(size
))
10004 : (isU
? mkVecQANDUQSH(size
) : mkVecQANDSQSH(size
));
10005 /* This is a bit tricky. Since we're only interested in the lowest
10006 lane of the result, we zero out all the rest in the operands, so
10007 as to ensure that other lanes don't pollute the returned Q value.
10008 This works because it means, for the lanes we don't care about, we
10009 are shifting zero by zero, which can never saturate. */
10010 IRTemp res256
= newTemp(Ity_V256
);
10011 IRTemp resSH
= newTempV128();
10012 IRTemp resQ
= newTempV128();
10013 IRTemp zero
= newTempV128();
10017 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(nn
))),
10018 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(mm
)))));
10019 assign(resSH
, unop(Iop_V256toV128_0
, mkexpr(res256
)));
10020 assign(resQ
, unop(Iop_V256toV128_1
, mkexpr(res256
)));
10021 assign(zero
, mkV128(0x0000));
10022 putQReg128(dd
, mkexpr(resSH
));
10023 updateQCFLAGwithDifference(resQ
, zero
);
10024 const HChar
* nm
= isR
? (isU
? "uqrshl" : "sqrshl")
10025 : (isU
? "uqshl" : "sqshl");
10026 const HChar arr
= "bhsd"[size
];
10027 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10031 if (size
== X11
&& opcode
== BITS5(1,0,0,0,0)) {
10032 /* -------- 0,11,10000 ADD d_d_d -------- */
10033 /* -------- 1,11,10000 SUB d_d_d -------- */
10034 Bool isSUB
= bitU
== 1;
10035 IRTemp res
= newTemp(Ity_I64
);
10036 assign(res
, binop(isSUB
? Iop_Sub64
: Iop_Add64
,
10037 getQRegLane(nn
, 0, Ity_I64
),
10038 getQRegLane(mm
, 0, Ity_I64
)));
10039 putQRegLane(dd
, 0, mkexpr(res
));
10040 putQRegLane(dd
, 1, mkU64(0));
10041 DIP("%s %s, %s, %s\n", isSUB
? "sub" : "add",
10042 nameQRegLO(dd
, Ity_I64
),
10043 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10047 if (size
== X11
&& opcode
== BITS5(1,0,0,0,1)) {
10048 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10049 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10050 Bool isEQ
= bitU
== 1;
10051 IRExpr
* argL
= getQReg128(nn
);
10052 IRExpr
* argR
= getQReg128(mm
);
10053 IRTemp res
= newTempV128();
10055 isEQ
? binop(Iop_CmpEQ64x2
, argL
, argR
)
10056 : unop(Iop_NotV128
, binop(Iop_CmpEQ64x2
,
10057 binop(Iop_AndV128
, argL
, argR
),
10059 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10060 DIP("%s %s, %s, %s\n", isEQ
? "cmeq" : "cmtst",
10061 nameQRegLO(dd
, Ity_I64
),
10062 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10066 if (opcode
== BITS5(1,0,1,1,0)) {
10067 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10068 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10069 if (size
== X00
|| size
== X11
) return False
;
10070 Bool isR
= bitU
== 1;
10071 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
10072 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
10073 newTempsV128_2(&vN
, &vM
);
10074 assign(vN
, getQReg128(nn
));
10075 assign(vM
, getQReg128(mm
));
10076 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
10078 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
))));
10079 updateQCFLAGwithDifference(
10080 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(sat1q
)),
10081 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(sat1n
)));
10082 const HChar arr
= "bhsd"[size
];
10083 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
10084 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10088 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,0,1,0)) {
10089 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10090 IRType ity
= size
== X11
? Ity_F64
: Ity_F32
;
10091 IRTemp res
= newTemp(ity
);
10092 assign(res
, unop(mkABSF(ity
),
10094 mkexpr(mk_get_IR_rounding_mode()),
10095 getQRegLO(nn
,ity
), getQRegLO(mm
,ity
))));
10096 putQReg128(dd
, mkV128(0x0000));
10097 putQRegLO(dd
, mkexpr(res
));
10098 DIP("fabd %s, %s, %s\n",
10099 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10103 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,1,0,1,1)) {
10104 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10105 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10106 IRType ity
= size
== X01
? Ity_F64
: Ity_F32
;
10107 IRTemp res
= newTemp(ity
);
10108 assign(res
, triop(mkMULF(ity
),
10109 mkexpr(mk_get_IR_rounding_mode()),
10110 getQRegLO(nn
,ity
), getQRegLO(mm
,ity
)));
10111 putQReg128(dd
, mkV128(0x0000));
10112 putQRegLO(dd
, mkexpr(res
));
10113 DIP("fmulx %s, %s, %s\n",
10114 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10118 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,0)) {
10119 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10120 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10121 Bool isD
= size
== X01
;
10122 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10123 Bool isGE
= bitU
== 1;
10124 IROp opCMP
= isGE
? (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
)
10125 : (isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
);
10126 IRTemp res
= newTempV128();
10127 assign(res
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
)) // swapd
10128 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
10129 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10131 DIP("%s %s, %s, %s\n", isGE
? "fcmge" : "fcmeq",
10132 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10136 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,0,0)) {
10137 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10138 Bool isD
= size
== X11
;
10139 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10140 IROp opCMP
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
10141 IRTemp res
= newTempV128();
10142 assign(res
, binop(opCMP
, getQReg128(mm
), getQReg128(nn
))); // swapd
10143 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10145 DIP("%s %s, %s, %s\n", "fcmgt",
10146 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10150 if (bitU
== 1 && opcode
== BITS5(1,1,1,0,1)) {
10151 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10152 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10153 Bool isD
= (size
& 1) == 1;
10154 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10155 Bool isGT
= (size
& 2) == 2;
10156 IROp opCMP
= isGT
? (isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
)
10157 : (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
);
10158 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
10159 IRTemp res
= newTempV128();
10160 assign(res
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
10161 unop(opABS
, getQReg128(nn
)))); // swapd
10162 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10164 DIP("%s %s, %s, %s\n", isGT
? "facgt" : "facge",
10165 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10169 if (bitU
== 0 && opcode
== BITS5(1,1,1,1,1)) {
10170 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10171 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10172 Bool isSQRT
= (size
& 2) == 2;
10173 Bool isD
= (size
& 1) == 1;
10174 IROp op
= isSQRT
? (isD
? Iop_RSqrtStep64Fx2
: Iop_RSqrtStep32Fx4
)
10175 : (isD
? Iop_RecipStep64Fx2
: Iop_RecipStep32Fx4
);
10176 IRTemp res
= newTempV128();
10177 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
10178 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10180 HChar c
= isD
? 'd' : 's';
10181 DIP("%s %c%u, %c%u, %c%u\n", isSQRT
? "frsqrts" : "frecps",
10182 c
, dd
, c
, nn
, c
, mm
);
10192 Bool
dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10194 /* 31 29 28 23 21 16 11 9 4
10195 01 U 11110 size 10000 opcode 10 n d
10196 Decode fields: u,size,opcode
10198 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10199 if (INSN(31,30) != BITS2(0,1)
10200 || INSN(28,24) != BITS5(1,1,1,1,0)
10201 || INSN(21,17) != BITS5(1,0,0,0,0)
10202 || INSN(11,10) != BITS2(1,0)) {
10205 UInt bitU
= INSN(29,29);
10206 UInt size
= INSN(23,22);
10207 UInt opcode
= INSN(16,12);
10208 UInt nn
= INSN(9,5);
10209 UInt dd
= INSN(4,0);
10212 if (opcode
== BITS5(0,0,0,1,1)) {
10213 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10214 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10215 /* These are a bit tricky (to say the least). See comments on
10216 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10218 Bool isUSQADD
= bitU
== 1;
10219 IROp qop
= isUSQADD
? mkVecQADDEXTSUSATUU(size
)
10220 : mkVecQADDEXTUSSATSS(size
);
10221 IROp nop
= mkVecADD(size
);
10222 IRTemp argL
= newTempV128();
10223 IRTemp argR
= newTempV128();
10224 assign(argL
, getQReg128(nn
));
10225 assign(argR
, getQReg128(dd
));
10226 IRTemp qres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10227 size
, binop(qop
, mkexpr(argL
), mkexpr(argR
)));
10228 IRTemp nres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10229 size
, binop(nop
, mkexpr(argL
), mkexpr(argR
)));
10230 putQReg128(dd
, mkexpr(qres
));
10231 updateQCFLAGwithDifference(qres
, nres
);
10232 const HChar arr
= "bhsd"[size
];
10233 DIP("%s %c%u, %c%u\n", isUSQADD
? "usqadd" : "suqadd", arr
, dd
, arr
, nn
);
10237 if (opcode
== BITS5(0,0,1,1,1)) {
10238 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10239 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10240 Bool isNEG
= bitU
== 1;
10241 IRTemp qresFW
= IRTemp_INVALID
, nresFW
= IRTemp_INVALID
;
10242 (isNEG
? math_SQNEG
: math_SQABS
)( &qresFW
, &nresFW
,
10243 getQReg128(nn
), size
);
10244 IRTemp qres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(qresFW
));
10245 IRTemp nres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(nresFW
));
10246 putQReg128(dd
, mkexpr(qres
));
10247 updateQCFLAGwithDifference(qres
, nres
);
10248 const HChar arr
= "bhsd"[size
];
10249 DIP("%s %c%u, %c%u\n", isNEG
? "sqneg" : "sqabs", arr
, dd
, arr
, nn
);
10253 if (size
== X11
&& opcode
== BITS5(0,1,0,0,0)) {
10254 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10255 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10256 Bool isGT
= bitU
== 0;
10257 IRExpr
* argL
= getQReg128(nn
);
10258 IRExpr
* argR
= mkV128(0x0000);
10259 IRTemp res
= newTempV128();
10260 assign(res
, isGT
? binop(Iop_CmpGT64Sx2
, argL
, argR
)
10261 : unop(Iop_NotV128
, binop(Iop_CmpGT64Sx2
, argR
, argL
)));
10262 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10263 DIP("cm%s d%u, d%u, #0\n", isGT
? "gt" : "ge", dd
, nn
);
10267 if (size
== X11
&& opcode
== BITS5(0,1,0,0,1)) {
10268 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10269 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10270 Bool isEQ
= bitU
== 0;
10271 IRExpr
* argL
= getQReg128(nn
);
10272 IRExpr
* argR
= mkV128(0x0000);
10273 IRTemp res
= newTempV128();
10274 assign(res
, isEQ
? binop(Iop_CmpEQ64x2
, argL
, argR
)
10275 : unop(Iop_NotV128
,
10276 binop(Iop_CmpGT64Sx2
, argL
, argR
)));
10277 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10278 DIP("cm%s d%u, d%u, #0\n", isEQ
? "eq" : "le", dd
, nn
);
10282 if (bitU
== 0 && size
== X11
&& opcode
== BITS5(0,1,0,1,0)) {
10283 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10284 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10285 binop(Iop_CmpGT64Sx2
, mkV128(0x0000),
10287 DIP("cm%s d%u, d%u, #0\n", "lt", dd
, nn
);
10291 if (bitU
== 0 && size
== X11
&& opcode
== BITS5(0,1,0,1,1)) {
10292 /* -------- 0,11,01011 ABS d_d -------- */
10293 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10294 unop(Iop_Abs64x2
, getQReg128(nn
))));
10295 DIP("abs d%u, d%u\n", dd
, nn
);
10299 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(0,1,0,1,1)) {
10300 /* -------- 1,11,01011 NEG d_d -------- */
10301 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10302 binop(Iop_Sub64x2
, mkV128(0x0000), getQReg128(nn
))));
10303 DIP("neg d%u, d%u\n", dd
, nn
);
10307 UInt ix
= 0; /*INVALID*/
10310 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 4 : 1; break;
10311 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 5 : 2; break;
10312 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break;
10317 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10318 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10319 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10320 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10321 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10322 Bool isD
= size
== X11
;
10323 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10324 IROp opCmpEQ
= isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
;
10325 IROp opCmpLE
= isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
;
10326 IROp opCmpLT
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
10327 IROp opCmp
= Iop_INVALID
;
10329 const HChar
* nm
= "??";
10331 case 1: nm
= "fcmgt"; opCmp
= opCmpLT
; swap
= True
; break;
10332 case 2: nm
= "fcmeq"; opCmp
= opCmpEQ
; break;
10333 case 3: nm
= "fcmlt"; opCmp
= opCmpLT
; break;
10334 case 4: nm
= "fcmge"; opCmp
= opCmpLE
; swap
= True
; break;
10335 case 5: nm
= "fcmle"; opCmp
= opCmpLE
; break;
10336 default: vassert(0);
10338 IRExpr
* zero
= mkV128(0x0000);
10339 IRTemp res
= newTempV128();
10340 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
10341 : binop(opCmp
, getQReg128(nn
), zero
));
10342 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10345 DIP("%s %s, %s, #0.0\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
10349 if (opcode
== BITS5(1,0,1,0,0)
10350 || (bitU
== 1 && opcode
== BITS5(1,0,0,1,0))) {
10351 /* -------- 0,xx,10100: SQXTN -------- */
10352 /* -------- 1,xx,10100: UQXTN -------- */
10353 /* -------- 1,xx,10010: SQXTUN -------- */
10354 if (size
== X11
) return False
;
10356 IROp opN
= Iop_INVALID
;
10357 Bool zWiden
= True
;
10358 const HChar
* nm
= "??";
10359 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,1,0,0)) {
10360 opN
= mkVecQNARROWUNSS(size
); nm
= "sqxtn"; zWiden
= False
;
10362 else if (bitU
== 1 && opcode
== BITS5(1,0,1,0,0)) {
10363 opN
= mkVecQNARROWUNUU(size
); nm
= "uqxtn";
10365 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
10366 opN
= mkVecQNARROWUNSU(size
); nm
= "sqxtun";
10369 IRTemp src
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10370 size
+1, getQReg128(nn
));
10371 IRTemp resN
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10372 size
, unop(Iop_64UtoV128
, unop(opN
, mkexpr(src
))));
10373 putQReg128(dd
, mkexpr(resN
));
10374 /* This widens zero lanes to zero, and compares it against zero, so all
10375 of the non-participating lanes make no contribution to the
10377 IRTemp resW
= math_WIDEN_LO_OR_HI_LANES(zWiden
, False
/*!fromUpperHalf*/,
10378 size
, mkexpr(resN
));
10379 updateQCFLAGwithDifference(src
, resW
);
10380 const HChar arrNarrow
= "bhsd"[size
];
10381 const HChar arrWide
= "bhsd"[size
+1];
10382 DIP("%s %c%u, %c%u\n", nm
, arrNarrow
, dd
, arrWide
, nn
);
10386 if (opcode
== BITS5(1,0,1,1,0) && bitU
== 1 && size
== X01
) {
10387 /* -------- 1,01,10110 FCVTXN s_d -------- */
10388 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10389 odd" but I don't know what that really means. */
10391 binop(Iop_F64toF32
, mkU32(Irrm_NEAREST
),
10392 getQRegLO(nn
, Ity_F64
)));
10393 putQRegLane(dd
, 1, mkU32(0));
10394 putQRegLane(dd
, 1, mkU64(0));
10395 DIP("fcvtxn s%u, d%u\n", dd
, nn
);
10399 ix
= 0; /*INVALID*/
10401 case BITS5(1,1,0,1,0): ix
= ((size
& 2) == 2) ? 4 : 1; break;
10402 case BITS5(1,1,0,1,1): ix
= ((size
& 2) == 2) ? 5 : 2; break;
10403 case BITS5(1,1,1,0,0): if ((size
& 2) == 0) ix
= 3; break;
10407 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10408 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10409 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10410 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10411 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10412 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10413 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10414 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10415 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10416 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10417 Bool isD
= (size
& 1) == 1;
10418 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
10419 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10420 IRRoundingMode irrm
= 8; /*impossible*/
10423 case 1: ch
= 'n'; irrm
= Irrm_NEAREST
; break;
10424 case 2: ch
= 'm'; irrm
= Irrm_NegINF
; break;
10425 case 3: ch
= 'a'; irrm
= Irrm_NEAREST
; break; /* kludge? */
10426 case 4: ch
= 'p'; irrm
= Irrm_PosINF
; break;
10427 case 5: ch
= 'z'; irrm
= Irrm_ZERO
; break;
10428 default: vassert(0);
10430 IROp cvt
= Iop_INVALID
;
10432 cvt
= isD
? Iop_F64toI64U
: Iop_F32toI32U
;
10434 cvt
= isD
? Iop_F64toI64S
: Iop_F32toI32S
;
10436 IRTemp src
= newTemp(tyF
);
10437 IRTemp res
= newTemp(tyI
);
10438 assign(src
, getQRegLane(nn
, 0, tyF
));
10439 assign(res
, binop(cvt
, mkU32(irrm
), mkexpr(src
)));
10440 putQRegLane(dd
, 0, mkexpr(res
)); /* bits 31-0 or 63-0 */
10442 putQRegLane(dd
, 1, mkU32(0)); /* bits 63-32 */
10444 putQRegLane(dd
, 1, mkU64(0)); /* bits 127-64 */
10445 HChar sOrD
= isD
? 'd' : 's';
10446 DIP("fcvt%c%c %c%u, %c%u\n", ch
, bitU
== 1 ? 'u' : 's',
10447 sOrD
, dd
, sOrD
, nn
);
10451 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,1)) {
10452 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10453 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10454 Bool isU
= bitU
== 1;
10455 Bool isD
= (size
& 1) == 1;
10456 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10457 IROp iop
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
10458 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
10459 IRTemp rm
= mk_get_IR_rounding_mode();
10460 putQRegLO(dd
, binop(iop
, mkexpr(rm
), getQRegLO(nn
, tyI
)));
10462 putQRegLane(dd
, 1, mkU32(0)); /* bits 63-32 */
10464 putQRegLane(dd
, 1, mkU64(0)); /* bits 127-64 */
10465 HChar c
= isD
? 'd' : 's';
10466 DIP("%ccvtf %c%u, %c%u\n", isU
? 'u' : 's', c
, dd
, c
, nn
);
10470 if (size
>= X10
&& opcode
== BITS5(1,1,1,0,1)) {
10471 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10472 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10473 Bool isSQRT
= bitU
== 1;
10474 Bool isD
= (size
& 1) == 1;
10475 IROp op
= isSQRT
? (isD
? Iop_RSqrtEst64Fx2
: Iop_RSqrtEst32Fx4
)
10476 : (isD
? Iop_RecipEst64Fx2
: Iop_RecipEst32Fx4
);
10477 IRTemp resV
= newTempV128();
10478 assign(resV
, unop(op
, getQReg128(nn
)));
10479 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10481 HChar c
= isD
? 'd' : 's';
10482 DIP("%s %c%u, %c%u\n", isSQRT
? "frsqrte" : "frecpe", c
, dd
, c
, nn
);
10486 if (bitU
== 0 && size
>= X10
&& opcode
== BITS5(1,1,1,1,1)) {
10487 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10488 Bool isD
= (size
& 1) == 1;
10489 IRType ty
= isD
? Ity_F64
: Ity_F32
;
10490 IROp op
= isD
? Iop_RecpExpF64
: Iop_RecpExpF32
;
10491 IRTemp res
= newTemp(ty
);
10492 IRTemp rm
= mk_get_IR_rounding_mode();
10493 assign(res
, binop(op
, mkexpr(rm
), getQRegLane(nn
, 0, ty
)));
10494 putQReg128(dd
, mkV128(0x0000));
10495 putQRegLane(dd
, 0, mkexpr(res
));
10496 HChar c
= isD
? 'd' : 's';
10497 DIP("%s %c%u, %c%u\n", "frecpx", c
, dd
, c
, nn
);
10507 Bool
dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10509 /* 31 28 23 21 20 19 15 11 9 4
10510 01 U 11111 size L M m opcode H 0 n d
10511 Decode fields are: u,size,opcode
10512 M is really part of the mm register number. Individual
10513 cases need to inspect L and H though.
10515 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10516 if (INSN(31,30) != BITS2(0,1)
10517 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10520 UInt bitU
= INSN(29,29);
10521 UInt size
= INSN(23,22);
10522 UInt bitL
= INSN(21,21);
10523 UInt bitM
= INSN(20,20);
10524 UInt mmLO4
= INSN(19,16);
10525 UInt opcode
= INSN(15,12);
10526 UInt bitH
= INSN(11,11);
10527 UInt nn
= INSN(9,5);
10528 UInt dd
= INSN(4,0);
10530 vassert(bitH
< 2 && bitM
< 2 && bitL
< 2);
10532 if (bitU
== 0 && size
>= X10
10533 && (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,1,0,1))) {
10534 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10535 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10536 Bool isD
= (size
& 1) == 1;
10537 Bool isSUB
= opcode
== BITS4(0,1,0,1);
10539 if (!isD
) index
= (bitH
<< 1) | bitL
;
10540 else if (isD
&& bitL
== 0) index
= bitH
;
10541 else return False
; // sz:L == x11 => unallocated encoding
10542 vassert(index
< (isD
? 2 : 4));
10543 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10544 IRTemp elem
= newTemp(ity
);
10545 UInt mm
= (bitM
<< 4) | mmLO4
;
10546 assign(elem
, getQRegLane(mm
, index
, ity
));
10547 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
10548 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
10549 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
10550 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
10551 IRTemp rm
= mk_get_IR_rounding_mode();
10552 IRTemp t1
= newTempV128();
10553 IRTemp t2
= newTempV128();
10554 // FIXME: double rounding; use FMA primops instead
10555 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
10556 assign(t2
, triop(isSUB
? opSUB
: opADD
,
10557 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
10559 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? 3 : 2,
10561 const HChar c
= isD
? 'd' : 's';
10562 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB
? "fmls" : "fmla",
10563 c
, dd
, c
, nn
, nameQReg128(mm
), c
, index
);
10567 if (size
>= X10
&& opcode
== BITS4(1,0,0,1)) {
10568 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10569 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10570 Bool isD
= (size
& 1) == 1;
10571 Bool isMULX
= bitU
== 1;
10573 if (!isD
) index
= (bitH
<< 1) | bitL
;
10574 else if (isD
&& bitL
== 0) index
= bitH
;
10575 else return False
; // sz:L == x11 => unallocated encoding
10576 vassert(index
< (isD
? 2 : 4));
10577 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10578 IRTemp elem
= newTemp(ity
);
10579 UInt mm
= (bitM
<< 4) | mmLO4
;
10580 assign(elem
, getQRegLane(mm
, index
, ity
));
10581 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
10582 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
10583 IRTemp rm
= mk_get_IR_rounding_mode();
10584 IRTemp t1
= newTempV128();
10585 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10586 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
10588 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? 3 : 2,
10590 const HChar c
= isD
? 'd' : 's';
10591 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX
? "fmulx" : "fmul",
10592 c
, dd
, c
, nn
, nameQReg128(mm
), c
, index
);
10597 && (opcode
== BITS4(1,0,1,1)
10598 || opcode
== BITS4(0,0,1,1) || opcode
== BITS4(0,1,1,1))) {
10599 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10600 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10601 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10602 /* Widens, and size refers to the narrowed lanes. */
10605 case BITS4(1,0,1,1): ks
= 0; break;
10606 case BITS4(0,0,1,1): ks
= 1; break;
10607 case BITS4(0,1,1,1): ks
= 2; break;
10608 default: vassert(0);
10610 vassert(ks
>= 0 && ks
<= 2);
10611 UInt mm
= 32; // invalid
10612 UInt ix
= 16; // invalid
10615 return False
; // h_b_b[] case is not allowed
10617 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
10619 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
10621 return False
; // q_d_d[] case is not allowed
10625 vassert(mm
< 32 && ix
< 16);
10626 IRTemp vecN
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
10627 vecN
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
10628 newTempsV128_2(&vecN
, &vecD
);
10629 assign(vecN
, getQReg128(nn
));
10630 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
10631 assign(vecD
, getQReg128(dd
));
10632 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
10633 False
/*!is2*/, size
, "mas"[ks
],
10634 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
10635 IROp opZHI
= mkVecZEROHIxxOFV128(size
+1);
10636 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
10637 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
10638 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
10639 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
10640 updateQCFLAGwithDifferenceZHI(sat2q
, sat2n
, opZHI
);
10642 const HChar
* nm
= ks
== 0 ? "sqmull"
10643 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
10644 const HChar arrNarrow
= "bhsd"[size
];
10645 const HChar arrWide
= "bhsd"[size
+1];
10646 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10647 nm
, arrWide
, dd
, arrNarrow
, nn
, dd
, arrNarrow
, ix
);
10651 if (opcode
== BITS4(1,1,0,0) || opcode
== BITS4(1,1,0,1)) {
10652 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10653 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10654 UInt mm
= 32; // invalid
10655 UInt ix
= 16; // invalid
10658 return False
; // b case is not allowed
10660 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
10662 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
10664 return False
; // q case is not allowed
10668 vassert(mm
< 32 && ix
< 16);
10669 Bool isR
= opcode
== BITS4(1,1,0,1);
10670 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
10671 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
10672 vN
= newTempV128();
10673 assign(vN
, getQReg128(nn
));
10674 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
10675 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
10676 IROp opZHI
= mkVecZEROHIxxOFV128(size
);
10677 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
10678 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
10679 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
10680 HChar ch
= size
== X01
? 'h' : 's';
10681 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm
, ch
, dd
, ch
, nn
, ch
, (Int
)dd
, ix
);
10691 Bool
dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10693 /* 31 28 22 18 15 10 9 4
10694 0 q u 011110 immh immb opcode 1 n d
10695 Decode fields: u,opcode
10697 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10698 if (INSN(31,31) != 0
10699 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10702 UInt bitQ
= INSN(30,30);
10703 UInt bitU
= INSN(29,29);
10704 UInt immh
= INSN(22,19);
10705 UInt immb
= INSN(18,16);
10706 UInt opcode
= INSN(15,11);
10707 UInt nn
= INSN(9,5);
10708 UInt dd
= INSN(4,0);
10710 if (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,0,1,0)) {
10711 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10712 /* -------- 1,00000 USHR std7_std7_#imm -------- */
10713 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10714 /* -------- 1,00010 USRA std7_std7_#imm -------- */
10715 /* laneTy, shift = case immh:immb of
10716 0001:xxx -> B, SHR:8-xxx
10717 001x:xxx -> H, SHR:16-xxxx
10718 01xx:xxx -> S, SHR:32-xxxxx
10719 1xxx:xxx -> D, SHR:64-xxxxxx
10724 Bool isQ
= bitQ
== 1;
10725 Bool isU
= bitU
== 1;
10726 Bool isAcc
= opcode
== BITS5(0,0,0,1,0);
10727 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10728 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
10729 vassert(size
>= 0 && size
<= 3);
10730 UInt lanebits
= 8 << size
;
10731 vassert(shift
>= 1 && shift
<= lanebits
);
10732 IROp op
= isU
? mkVecSHRN(size
) : mkVecSARN(size
);
10733 IRExpr
* src
= getQReg128(nn
);
10734 IRTemp shf
= newTempV128();
10735 IRTemp res
= newTempV128();
10736 if (shift
== lanebits
&& isU
) {
10737 assign(shf
, mkV128(0x0000));
10740 if (shift
== lanebits
) {
10744 assign(shf
, binop(op
, src
, mkU8(shift
- nudge
)));
10746 assign(res
, isAcc
? binop(mkVecADD(size
), getQReg128(dd
), mkexpr(shf
))
10748 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
10749 HChar laneCh
= "bhsd"[size
];
10750 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
10751 const HChar
* nm
= isAcc
? (isU
? "usra" : "ssra")
10752 : (isU
? "ushr" : "sshr");
10753 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
10754 nameQReg128(dd
), nLanes
, laneCh
,
10755 nameQReg128(nn
), nLanes
, laneCh
, shift
);
10759 if (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,1,0)) {
10760 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10761 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10762 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10763 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10764 /* laneTy, shift = case immh:immb of
10765 0001:xxx -> B, SHR:8-xxx
10766 001x:xxx -> H, SHR:16-xxxx
10767 01xx:xxx -> S, SHR:32-xxxxx
10768 1xxx:xxx -> D, SHR:64-xxxxxx
10773 Bool isQ
= bitQ
== 1;
10774 Bool isU
= bitU
== 1;
10775 Bool isAcc
= opcode
== BITS5(0,0,1,1,0);
10776 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10777 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
10778 vassert(size
>= 0 && size
<= 3);
10779 UInt lanebits
= 8 << size
;
10780 vassert(shift
>= 1 && shift
<= lanebits
);
10781 IROp op
= isU
? mkVecRSHU(size
) : mkVecRSHS(size
);
10782 IRExpr
* src
= getQReg128(nn
);
10783 IRTemp imm8
= newTemp(Ity_I8
);
10784 assign(imm8
, mkU8((UChar
)(-shift
)));
10785 IRExpr
* amt
= mkexpr(math_DUP_TO_V128(imm8
, Ity_I8
));
10786 IRTemp shf
= newTempV128();
10787 IRTemp res
= newTempV128();
10788 assign(shf
, binop(op
, src
, amt
));
10789 assign(res
, isAcc
? binop(mkVecADD(size
), getQReg128(dd
), mkexpr(shf
))
10791 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
10792 HChar laneCh
= "bhsd"[size
];
10793 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
10794 const HChar
* nm
= isAcc
? (isU
? "ursra" : "srsra")
10795 : (isU
? "urshr" : "srshr");
10796 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
10797 nameQReg128(dd
), nLanes
, laneCh
,
10798 nameQReg128(nn
), nLanes
, laneCh
, shift
);
10802 if (bitU
== 1 && opcode
== BITS5(0,1,0,0,0)) {
10803 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10804 /* laneTy, shift = case immh:immb of
10805 0001:xxx -> B, SHR:8-xxx
10806 001x:xxx -> H, SHR:16-xxxx
10807 01xx:xxx -> S, SHR:32-xxxxx
10808 1xxx:xxx -> D, SHR:64-xxxxxx
10813 Bool isQ
= bitQ
== 1;
10814 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10815 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
10816 vassert(size
>= 0 && size
<= 3);
10817 UInt lanebits
= 8 << size
;
10818 vassert(shift
>= 1 && shift
<= lanebits
);
10819 IRExpr
* src
= getQReg128(nn
);
10820 IRTemp res
= newTempV128();
10821 if (shift
== lanebits
) {
10822 assign(res
, getQReg128(dd
));
10824 assign(res
, binop(mkVecSHRN(size
), src
, mkU8(shift
)));
10825 IRExpr
* nmask
= binop(mkVecSHLN(size
),
10826 mkV128(0xFFFF), mkU8(lanebits
- shift
));
10827 IRTemp tmp
= newTempV128();
10828 assign(tmp
, binop(Iop_OrV128
,
10830 binop(Iop_AndV128
, getQReg128(dd
), nmask
)));
10833 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
10834 HChar laneCh
= "bhsd"[size
];
10835 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
10836 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10837 nameQReg128(dd
), nLanes
, laneCh
,
10838 nameQReg128(nn
), nLanes
, laneCh
, shift
);
10842 if (opcode
== BITS5(0,1,0,1,0)) {
10843 /* -------- 0,01010 SHL std7_std7_#imm -------- */
10844 /* -------- 1,01010 SLI std7_std7_#imm -------- */
10845 /* laneTy, shift = case immh:immb of
10847 001x:xxx -> H, xxxx
10848 01xx:xxx -> S, xxxxx
10849 1xxx:xxx -> D, xxxxxx
10854 Bool isSLI
= bitU
== 1;
10855 Bool isQ
= bitQ
== 1;
10856 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10857 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
10858 vassert(size
>= 0 && size
<= 3);
10859 /* The shift encoding has opposite sign for the leftwards case.
10860 Adjust shift to compensate. */
10861 UInt lanebits
= 8 << size
;
10862 shift
= lanebits
- shift
;
10863 vassert(shift
>= 0 && shift
< lanebits
);
10864 IROp op
= mkVecSHLN(size
);
10865 IRExpr
* src
= getQReg128(nn
);
10866 IRTemp res
= newTempV128();
10870 assign(res
, binop(op
, src
, mkU8(shift
)));
10872 IRExpr
* nmask
= binop(mkVecSHRN(size
),
10873 mkV128(0xFFFF), mkU8(lanebits
- shift
));
10874 IRTemp tmp
= newTempV128();
10875 assign(tmp
, binop(Iop_OrV128
,
10877 binop(Iop_AndV128
, getQReg128(dd
), nmask
)));
10881 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
10882 HChar laneCh
= "bhsd"[size
];
10883 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
10884 const HChar
* nm
= isSLI
? "sli" : "shl";
10885 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
10886 nameQReg128(dd
), nLanes
, laneCh
,
10887 nameQReg128(nn
), nLanes
, laneCh
, shift
);
10891 if (opcode
== BITS5(0,1,1,1,0)
10892 || (bitU
== 1 && opcode
== BITS5(0,1,1,0,0))) {
10893 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10894 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10895 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10898 Bool isQ
= bitQ
== 1;
10899 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10900 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
10901 vassert(size
>= 0 && size
<= 3);
10902 /* The shift encoding has opposite sign for the leftwards case.
10903 Adjust shift to compensate. */
10904 UInt lanebits
= 8 << size
;
10905 shift
= lanebits
- shift
;
10906 vassert(shift
>= 0 && shift
< lanebits
);
10907 const HChar
* nm
= NULL
;
10908 /**/ if (bitU
== 0 && opcode
== BITS5(0,1,1,1,0)) nm
= "sqshl";
10909 else if (bitU
== 1 && opcode
== BITS5(0,1,1,1,0)) nm
= "uqshl";
10910 else if (bitU
== 1 && opcode
== BITS5(0,1,1,0,0)) nm
= "sqshlu";
10912 IRTemp qDiff1
= IRTemp_INVALID
;
10913 IRTemp qDiff2
= IRTemp_INVALID
;
10914 IRTemp res
= IRTemp_INVALID
;
10915 IRTemp src
= newTempV128();
10916 assign(src
, getQReg128(nn
));
10917 math_QSHL_IMM(&res
, &qDiff1
, &qDiff2
, src
, size
, shift
, nm
);
10918 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
10919 updateQCFLAGwithDifferenceZHI(qDiff1
, qDiff2
,
10920 isQ
? Iop_INVALID
: Iop_ZeroHI64ofV128
);
10921 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
10922 DIP("%s %s.%s, %s.%s, #%u\n", nm
,
10923 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, shift
);
10928 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1))) {
10929 /* -------- 0,10000 SHRN{,2} #imm -------- */
10930 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10931 /* Narrows, and size is the narrow size. */
10934 Bool is2
= bitQ
== 1;
10935 Bool isR
= opcode
== BITS5(1,0,0,0,1);
10936 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10937 if (!ok
|| size
== X11
) return False
;
10938 vassert(shift
>= 1);
10939 IRTemp t1
= newTempV128();
10940 IRTemp t2
= newTempV128();
10941 IRTemp t3
= newTempV128();
10942 assign(t1
, getQReg128(nn
));
10943 assign(t2
, isR
? binop(mkVecADD(size
+1),
10945 mkexpr(math_VEC_DUP_IMM(size
+1, 1ULL<<(shift
-1))))
10947 assign(t3
, binop(mkVecSHRN(size
+1), mkexpr(t2
), mkU8(shift
)));
10948 IRTemp t4
= math_NARROW_LANES(t3
, t3
, size
);
10949 putLO64andZUorPutHI64(is2
, dd
, t4
);
10950 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
10951 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
10952 DIP("%s %s.%s, %s.%s, #%u\n", isR
? "rshrn" : "shrn",
10953 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
, shift
);
10957 if (opcode
== BITS5(1,0,0,1,0) || opcode
== BITS5(1,0,0,1,1)
10959 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1)))) {
10960 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10961 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10962 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10963 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10964 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10965 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10968 Bool is2
= bitQ
== 1;
10969 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10970 if (!ok
|| size
== X11
) return False
;
10971 vassert(shift
>= 1 && shift
<= (8 << size
));
10972 const HChar
* nm
= "??";
10973 IROp op
= Iop_INVALID
;
10974 /* Decide on the name and the operation. */
10975 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
10976 nm
= "sqshrn"; op
= mkVecQANDqsarNNARROWSS(size
);
10978 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
10979 nm
= "uqshrn"; op
= mkVecQANDqshrNNARROWUU(size
);
10981 else if (bitU
== 0 && opcode
== BITS5(1,0,0,1,1)) {
10982 nm
= "sqrshrn"; op
= mkVecQANDqrsarNNARROWSS(size
);
10984 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
10985 nm
= "uqrshrn"; op
= mkVecQANDqrshrNNARROWUU(size
);
10987 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,0)) {
10988 nm
= "sqshrun"; op
= mkVecQANDqsarNNARROWSU(size
);
10990 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,1)) {
10991 nm
= "sqrshrun"; op
= mkVecQANDqrsarNNARROWSU(size
);
10994 /* Compute the result (Q, shifted value) pair. */
10995 IRTemp src128
= newTempV128();
10996 assign(src128
, getQReg128(nn
));
10997 IRTemp pair
= newTempV128();
10998 assign(pair
, binop(op
, mkexpr(src128
), mkU8(shift
)));
10999 /* Update the result reg */
11000 IRTemp res64in128
= newTempV128();
11001 assign(res64in128
, unop(Iop_ZeroHI64ofV128
, mkexpr(pair
)));
11002 putLO64andZUorPutHI64(is2
, dd
, res64in128
);
11003 /* Update the Q flag. */
11004 IRTemp q64q64
= newTempV128();
11005 assign(q64q64
, binop(Iop_InterleaveHI64x2
, mkexpr(pair
), mkexpr(pair
)));
11006 IRTemp z128
= newTempV128();
11007 assign(z128
, mkV128(0x0000));
11008 updateQCFLAGwithDifference(q64q64
, z128
);
11010 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11011 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11012 DIP("%s %s.%s, %s.%s, #%u\n", nm
,
11013 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
, shift
);
11017 if (opcode
== BITS5(1,0,1,0,0)) {
11018 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11019 /* -------- 1,10100 USHLL{,2} #imm -------- */
11020 /* 31 28 22 18 15 9 4
11021 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11022 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11024 = case immh of 1xxx -> invalid
11025 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11026 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11027 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11028 0000 -> AdvSIMD modified immediate (???)
11030 Bool isQ
= bitQ
== 1;
11031 Bool isU
= bitU
== 1;
11032 UInt immhb
= (immh
<< 3) | immb
;
11033 IRTemp src
= newTempV128();
11034 IRTemp zero
= newTempV128();
11035 IRExpr
* res
= NULL
;
11037 const HChar
* ta
= "??";
11038 const HChar
* tb
= "??";
11039 assign(src
, getQReg128(nn
));
11040 assign(zero
, mkV128(0x0000));
11042 /* invalid; don't assign to res */
11044 else if (immh
& 4) {
11046 vassert(sh
< 32); /* so 32-sh is 1..32 */
11048 tb
= isQ
? "4s" : "2s";
11049 IRExpr
* tmp
= isQ
? mk_InterleaveHI32x4(src
, zero
)
11050 : mk_InterleaveLO32x4(src
, zero
);
11051 res
= binop(isU
? Iop_ShrN64x2
: Iop_SarN64x2
, tmp
, mkU8(32-sh
));
11053 else if (immh
& 2) {
11055 vassert(sh
< 16); /* so 16-sh is 1..16 */
11057 tb
= isQ
? "8h" : "4h";
11058 IRExpr
* tmp
= isQ
? mk_InterleaveHI16x8(src
, zero
)
11059 : mk_InterleaveLO16x8(src
, zero
);
11060 res
= binop(isU
? Iop_ShrN32x4
: Iop_SarN32x4
, tmp
, mkU8(16-sh
));
11062 else if (immh
& 1) {
11064 vassert(sh
< 8); /* so 8-sh is 1..8 */
11066 tb
= isQ
? "16b" : "8b";
11067 IRExpr
* tmp
= isQ
? mk_InterleaveHI8x16(src
, zero
)
11068 : mk_InterleaveLO8x16(src
, zero
);
11069 res
= binop(isU
? Iop_ShrN16x8
: Iop_SarN16x8
, tmp
, mkU8(8-sh
));
11071 vassert(immh
== 0);
11072 /* invalid; don't assign to res */
11076 putQReg128(dd
, res
);
11077 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11078 isU
? 'u' : 's', isQ
? "2" : "",
11079 nameQReg128(dd
), ta
, nameQReg128(nn
), tb
, sh
);
11085 if (opcode
== BITS5(1,1,1,0,0)) {
11086 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11087 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11088 /* If immh is of the form 00xx, the insn is invalid. */
11089 if (immh
< BITS4(0,1,0,0)) return False
;
11092 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
11093 /* The following holds because immh is never zero. */
11095 /* The following holds because immh >= 0100. */
11096 vassert(size
== X10
|| size
== X11
);
11097 Bool isD
= size
== X11
;
11098 Bool isU
= bitU
== 1;
11099 Bool isQ
= bitQ
== 1;
11100 if (isD
&& !isQ
) return False
; /* reject .1d case */
11101 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
11102 Double scale
= two_to_the_minus(fbits
);
11103 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
11104 : IRExpr_Const(IRConst_F32( (Float
)scale
));
11105 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
11106 IROp opCVT
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
11107 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
11108 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
11109 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
11110 UInt nLanes
= (isQ
? 2 : 1) * (isD
? 1 : 2);
11111 vassert(nLanes
== 2 || nLanes
== 4);
11112 for (UInt i
= 0; i
< nLanes
; i
++) {
11113 IRTemp src
= newTemp(tyI
);
11114 IRTemp res
= newTemp(tyF
);
11115 IRTemp rm
= mk_get_IR_rounding_mode();
11116 assign(src
, getQRegLane(nn
, i
, tyI
));
11117 assign(res
, triop(opMUL
, mkexpr(rm
),
11118 binop(opCVT
, mkexpr(rm
), mkexpr(src
)),
11120 putQRegLane(dd
, i
, mkexpr(res
));
11123 putQRegLane(dd
, 1, mkU64(0));
11125 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11126 DIP("%s %s.%s, %s.%s, #%u\n", isU
? "ucvtf" : "scvtf",
11127 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, fbits
);
11131 if (opcode
== BITS5(1,1,1,1,1)) {
11132 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11133 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11134 /* If immh is of the form 00xx, the insn is invalid. */
11135 if (immh
< BITS4(0,1,0,0)) return False
;
11138 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
11139 /* The following holds because immh is never zero. */
11141 /* The following holds because immh >= 0100. */
11142 vassert(size
== X10
|| size
== X11
);
11143 Bool isD
= size
== X11
;
11144 Bool isU
= bitU
== 1;
11145 Bool isQ
= bitQ
== 1;
11146 if (isD
&& !isQ
) return False
; /* reject .1d case */
11147 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
11148 Double scale
= two_to_the_plus(fbits
);
11149 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
11150 : IRExpr_Const(IRConst_F32( (Float
)scale
));
11151 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
11152 IROp opCVT
= isU
? (isD
? Iop_F64toI64U
: Iop_F32toI32U
)
11153 : (isD
? Iop_F64toI64S
: Iop_F32toI32S
);
11154 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
11155 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
11156 UInt nLanes
= (isQ
? 2 : 1) * (isD
? 1 : 2);
11157 vassert(nLanes
== 2 || nLanes
== 4);
11158 for (UInt i
= 0; i
< nLanes
; i
++) {
11159 IRTemp src
= newTemp(tyF
);
11160 IRTemp res
= newTemp(tyI
);
11161 IRTemp rm
= newTemp(Ity_I32
);
11162 assign(src
, getQRegLane(nn
, i
, tyF
));
11163 assign(rm
, mkU32(Irrm_ZERO
));
11164 assign(res
, binop(opCVT
, mkexpr(rm
),
11165 triop(opMUL
, mkexpr(rm
),
11166 mkexpr(src
), scaleE
)));
11167 putQRegLane(dd
, i
, mkexpr(res
));
11170 putQRegLane(dd
, 1, mkU64(0));
11172 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11173 DIP("%s %s.%s, %s.%s, #%u\n", isU
? "fcvtzu" : "fcvtzs",
11174 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, fbits
);
11178 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11185 Bool
dis_AdvSIMD_three_different(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11187 /* 31 30 29 28 23 21 20 15 11 9 4
11188 0 Q U 01110 size 1 m opcode 00 n d
11189 Decode fields: u,opcode
11191 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11192 if (INSN(31,31) != 0
11193 || INSN(28,24) != BITS5(0,1,1,1,0)
11194 || INSN(21,21) != 1
11195 || INSN(11,10) != BITS2(0,0)) {
11198 UInt bitQ
= INSN(30,30);
11199 UInt bitU
= INSN(29,29);
11200 UInt size
= INSN(23,22);
11201 UInt mm
= INSN(20,16);
11202 UInt opcode
= INSN(15,12);
11203 UInt nn
= INSN(9,5);
11204 UInt dd
= INSN(4,0);
11206 Bool is2
= bitQ
== 1;
11208 if (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,0,1,0)) {
11209 /* -------- 0,0000 SADDL{2} -------- */
11210 /* -------- 1,0000 UADDL{2} -------- */
11211 /* -------- 0,0010 SSUBL{2} -------- */
11212 /* -------- 1,0010 USUBL{2} -------- */
11213 /* Widens, and size refers to the narrow lanes. */
11214 if (size
== X11
) return False
;
11215 vassert(size
<= 2);
11216 Bool isU
= bitU
== 1;
11217 Bool isADD
= opcode
== BITS4(0,0,0,0);
11218 IRTemp argL
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(nn
));
11219 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
11220 IRTemp res
= newTempV128();
11221 assign(res
, binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
11222 mkexpr(argL
), mkexpr(argR
)));
11223 putQReg128(dd
, mkexpr(res
));
11224 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11225 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11226 const HChar
* nm
= isADD
? (isU
? "uaddl" : "saddl")
11227 : (isU
? "usubl" : "ssubl");
11228 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11229 nameQReg128(dd
), arrWide
,
11230 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11234 if (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,0,1,1)) {
11235 /* -------- 0,0001 SADDW{2} -------- */
11236 /* -------- 1,0001 UADDW{2} -------- */
11237 /* -------- 0,0011 SSUBW{2} -------- */
11238 /* -------- 1,0011 USUBW{2} -------- */
11239 /* Widens, and size refers to the narrow lanes. */
11240 if (size
== X11
) return False
;
11241 vassert(size
<= 2);
11242 Bool isU
= bitU
== 1;
11243 Bool isADD
= opcode
== BITS4(0,0,0,1);
11244 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
11245 IRTemp res
= newTempV128();
11246 assign(res
, binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
11247 getQReg128(nn
), mkexpr(argR
)));
11248 putQReg128(dd
, mkexpr(res
));
11249 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11250 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11251 const HChar
* nm
= isADD
? (isU
? "uaddw" : "saddw")
11252 : (isU
? "usubw" : "ssubw");
11253 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11254 nameQReg128(dd
), arrWide
,
11255 nameQReg128(nn
), arrWide
, nameQReg128(mm
), arrNarrow
);
11259 if (opcode
== BITS4(0,1,0,0) || opcode
== BITS4(0,1,1,0)) {
11260 /* -------- 0,0100 ADDHN{2} -------- */
11261 /* -------- 1,0100 RADDHN{2} -------- */
11262 /* -------- 0,0110 SUBHN{2} -------- */
11263 /* -------- 1,0110 RSUBHN{2} -------- */
11264 /* Narrows, and size refers to the narrowed lanes. */
11265 if (size
== X11
) return False
;
11266 vassert(size
<= 2);
11267 const UInt shift
[3] = { 8, 16, 32 };
11268 Bool isADD
= opcode
== BITS4(0,1,0,0);
11269 Bool isR
= bitU
== 1;
11270 /* Combined elements in wide lanes */
11271 IRTemp wide
= newTempV128();
11272 IRExpr
* wideE
= binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
11273 getQReg128(nn
), getQReg128(mm
));
11275 wideE
= binop(mkVecADD(size
+1),
11277 mkexpr(math_VEC_DUP_IMM(size
+1,
11278 1ULL << (shift
[size
]-1))));
11280 assign(wide
, wideE
);
11281 /* Top halves of elements, still in wide lanes */
11282 IRTemp shrd
= newTempV128();
11283 assign(shrd
, binop(mkVecSHRN(size
+1), mkexpr(wide
), mkU8(shift
[size
])));
11284 /* Elements now compacted into lower 64 bits */
11285 IRTemp new64
= newTempV128();
11286 assign(new64
, binop(mkVecCATEVENLANES(size
), mkexpr(shrd
), mkexpr(shrd
)));
11287 putLO64andZUorPutHI64(is2
, dd
, new64
);
11288 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11289 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11290 const HChar
* nm
= isADD
? (isR
? "raddhn" : "addhn")
11291 : (isR
? "rsubhn" : "subhn");
11292 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11293 nameQReg128(dd
), arrNarrow
,
11294 nameQReg128(nn
), arrWide
, nameQReg128(mm
), arrWide
);
11298 if (opcode
== BITS4(0,1,0,1) || opcode
== BITS4(0,1,1,1)) {
11299 /* -------- 0,0101 SABAL{2} -------- */
11300 /* -------- 1,0101 UABAL{2} -------- */
11301 /* -------- 0,0111 SABDL{2} -------- */
11302 /* -------- 1,0111 UABDL{2} -------- */
11303 /* Widens, and size refers to the narrow lanes. */
11304 if (size
== X11
) return False
;
11305 vassert(size
<= 2);
11306 Bool isU
= bitU
== 1;
11307 Bool isACC
= opcode
== BITS4(0,1,0,1);
11308 IRTemp argL
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(nn
));
11309 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
11310 IRTemp abd
= math_ABD(isU
, size
+1, mkexpr(argL
), mkexpr(argR
));
11311 IRTemp res
= newTempV128();
11312 assign(res
, isACC
? binop(mkVecADD(size
+1), mkexpr(abd
), getQReg128(dd
))
11314 putQReg128(dd
, mkexpr(res
));
11315 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11316 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11317 const HChar
* nm
= isACC
? (isU
? "uabal" : "sabal")
11318 : (isU
? "uabdl" : "sabdl");
11319 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11320 nameQReg128(dd
), arrWide
,
11321 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11325 if (opcode
== BITS4(1,1,0,0)
11326 || opcode
== BITS4(1,0,0,0) || opcode
== BITS4(1,0,1,0)) {
11327 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11328 /* -------- 1,1100 UMULL{2} -------- */ // 0
11329 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11330 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11331 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11332 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11333 /* Widens, and size refers to the narrow lanes. */
11336 case BITS4(1,1,0,0): ks
= 0; break;
11337 case BITS4(1,0,0,0): ks
= 1; break;
11338 case BITS4(1,0,1,0): ks
= 2; break;
11339 default: vassert(0);
11341 vassert(ks
>= 0 && ks
<= 2);
11342 if (size
== X11
) return False
;
11343 vassert(size
<= 2);
11344 Bool isU
= bitU
== 1;
11345 IRTemp vecN
= newTempV128();
11346 IRTemp vecM
= newTempV128();
11347 IRTemp vecD
= newTempV128();
11348 assign(vecN
, getQReg128(nn
));
11349 assign(vecM
, getQReg128(mm
));
11350 assign(vecD
, getQReg128(dd
));
11351 IRTemp res
= IRTemp_INVALID
;
11352 math_MULL_ACC(&res
, is2
, isU
, size
, "mas"[ks
],
11353 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
11354 putQReg128(dd
, mkexpr(res
));
11355 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11356 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11357 const HChar
* nm
= ks
== 0 ? "mull" : (ks
== 1 ? "mlal" : "mlsl");
11358 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU
? 'u' : 's', nm
, is2
? "2" : "",
11359 nameQReg128(dd
), arrWide
,
11360 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11365 && (opcode
== BITS4(1,1,0,1)
11366 || opcode
== BITS4(1,0,0,1) || opcode
== BITS4(1,0,1,1))) {
11367 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11368 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11369 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11370 /* Widens, and size refers to the narrow lanes. */
11373 case BITS4(1,1,0,1): ks
= 0; break;
11374 case BITS4(1,0,0,1): ks
= 1; break;
11375 case BITS4(1,0,1,1): ks
= 2; break;
11376 default: vassert(0);
11378 vassert(ks
>= 0 && ks
<= 2);
11379 if (size
== X00
|| size
== X11
) return False
;
11380 vassert(size
<= 2);
11381 IRTemp vecN
, vecM
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
11382 vecN
= vecM
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
11383 newTempsV128_3(&vecN
, &vecM
, &vecD
);
11384 assign(vecN
, getQReg128(nn
));
11385 assign(vecM
, getQReg128(mm
));
11386 assign(vecD
, getQReg128(dd
));
11387 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
11388 is2
, size
, "mas"[ks
],
11389 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
11390 putQReg128(dd
, mkexpr(res
));
11391 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
11392 updateQCFLAGwithDifference(sat1q
, sat1n
);
11393 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
11394 updateQCFLAGwithDifference(sat2q
, sat2n
);
11396 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11397 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11398 const HChar
* nm
= ks
== 0 ? "sqdmull"
11399 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
11400 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
11401 nameQReg128(dd
), arrWide
,
11402 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11406 if (bitU
== 0 && opcode
== BITS4(1,1,1,0)) {
11407 /* -------- 0,1110 PMULL{2} -------- */
11408 /* Widens, and size refers to the narrow lanes. */
11409 if (size
!= X00
&& size
!= X11
) return False
;
11410 IRTemp res
= IRTemp_INVALID
;
11411 IRExpr
* srcN
= getQReg128(nn
);
11412 IRExpr
* srcM
= getQReg128(mm
);
11413 const HChar
* arrNarrow
= NULL
;
11414 const HChar
* arrWide
= NULL
;
11416 res
= math_BINARY_WIDENING_V128(is2
, Iop_PolynomialMull8x8
,
11418 arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11419 arrWide
= nameArr_Q_SZ(1, size
+1);
11421 /* The same thing as the X00 case, except we have to call
11422 a helper to do it. */
11423 vassert(size
== X11
);
11424 res
= newTemp(Ity_V128
);
11426 = is2
? Iop_V128HIto64
: Iop_V128to64
;
11428 = mkIRExprVec_3( IRExpr_VECRET(),
11429 unop(slice
, srcN
), unop(slice
, srcM
));
11431 = unsafeIRDirty_1_N( res
, 0/*regparms*/,
11432 "arm64g_dirtyhelper_PMULLQ",
11433 &arm64g_dirtyhelper_PMULLQ
, args
);
11434 stmt(IRStmt_Dirty(di
));
11435 /* We can't use nameArr_Q_SZ for this because it can't deal with
11436 Q-sized (128 bit) results. Hence do it by hand. */
11437 arrNarrow
= bitQ
== 0 ? "1d" : "2d";
11440 putQReg128(dd
, mkexpr(res
));
11441 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2
? "2" : "",
11442 nameQReg128(dd
), arrWide
,
11443 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
11453 Bool
dis_AdvSIMD_three_same(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11455 /* 31 30 29 28 23 21 20 15 10 9 4
11456 0 Q U 01110 size 1 m opcode 1 n d
11457 Decode fields: u,size,opcode
11459 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11460 if (INSN(31,31) != 0
11461 || INSN(28,24) != BITS5(0,1,1,1,0)
11462 || INSN(21,21) != 1
11463 || INSN(10,10) != 1) {
11466 UInt bitQ
= INSN(30,30);
11467 UInt bitU
= INSN(29,29);
11468 UInt size
= INSN(23,22);
11469 UInt mm
= INSN(20,16);
11470 UInt opcode
= INSN(15,11);
11471 UInt nn
= INSN(9,5);
11472 UInt dd
= INSN(4,0);
11475 if (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,1,0,0)) {
11476 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11477 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11478 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11479 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11480 if (size
== X11
) return False
;
11481 Bool isADD
= opcode
== BITS5(0,0,0,0,0);
11482 Bool isU
= bitU
== 1;
11483 /* Widen both args out, do the math, narrow to final result. */
11484 IRTemp argL
= newTempV128();
11485 IRTemp argLhi
= IRTemp_INVALID
;
11486 IRTemp argLlo
= IRTemp_INVALID
;
11487 IRTemp argR
= newTempV128();
11488 IRTemp argRhi
= IRTemp_INVALID
;
11489 IRTemp argRlo
= IRTemp_INVALID
;
11490 IRTemp resHi
= newTempV128();
11491 IRTemp resLo
= newTempV128();
11492 IRTemp res
= IRTemp_INVALID
;
11493 assign(argL
, getQReg128(nn
));
11494 argLlo
= math_WIDEN_LO_OR_HI_LANES(isU
, False
, size
, mkexpr(argL
));
11495 argLhi
= math_WIDEN_LO_OR_HI_LANES(isU
, True
, size
, mkexpr(argL
));
11496 assign(argR
, getQReg128(mm
));
11497 argRlo
= math_WIDEN_LO_OR_HI_LANES(isU
, False
, size
, mkexpr(argR
));
11498 argRhi
= math_WIDEN_LO_OR_HI_LANES(isU
, True
, size
, mkexpr(argR
));
11499 IROp opADDSUB
= isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1);
11500 IROp opSxR
= isU
? mkVecSHRN(size
+1) : mkVecSARN(size
+1);
11501 assign(resHi
, binop(opSxR
,
11502 binop(opADDSUB
, mkexpr(argLhi
), mkexpr(argRhi
)),
11504 assign(resLo
, binop(opSxR
,
11505 binop(opADDSUB
, mkexpr(argLlo
), mkexpr(argRlo
)),
11507 res
= math_NARROW_LANES ( resHi
, resLo
, size
);
11508 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11509 const HChar
* nm
= isADD
? (isU
? "uhadd" : "shadd")
11510 : (isU
? "uhsub" : "shsub");
11511 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11512 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11513 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11517 if (opcode
== BITS5(0,0,0,1,0)) {
11518 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11519 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11520 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11521 Bool isU
= bitU
== 1;
11522 IRTemp argL
= newTempV128();
11523 IRTemp argR
= newTempV128();
11524 assign(argL
, getQReg128(nn
));
11525 assign(argR
, getQReg128(mm
));
11526 IRTemp res
= math_RHADD(size
, isU
, argL
, argR
);
11527 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11528 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11529 DIP("%s %s.%s, %s.%s, %s.%s\n", isU
? "urhadd" : "srhadd",
11530 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11534 if (opcode
== BITS5(0,0,0,0,1) || opcode
== BITS5(0,0,1,0,1)) {
11535 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11536 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11537 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11538 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11539 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11540 Bool isADD
= opcode
== BITS5(0,0,0,0,1);
11541 Bool isU
= bitU
== 1;
11542 IROp qop
= Iop_INVALID
;
11543 IROp nop
= Iop_INVALID
;
11545 qop
= isU
? mkVecQADDU(size
) : mkVecQADDS(size
);
11546 nop
= mkVecADD(size
);
11548 qop
= isU
? mkVecQSUBU(size
) : mkVecQSUBS(size
);
11549 nop
= mkVecSUB(size
);
11551 IRTemp argL
= newTempV128();
11552 IRTemp argR
= newTempV128();
11553 IRTemp qres
= newTempV128();
11554 IRTemp nres
= newTempV128();
11555 assign(argL
, getQReg128(nn
));
11556 assign(argR
, getQReg128(mm
));
11557 assign(qres
, math_MAYBE_ZERO_HI64_fromE(
11558 bitQ
, binop(qop
, mkexpr(argL
), mkexpr(argR
))));
11559 assign(nres
, math_MAYBE_ZERO_HI64_fromE(
11560 bitQ
, binop(nop
, mkexpr(argL
), mkexpr(argR
))));
11561 putQReg128(dd
, mkexpr(qres
));
11562 updateQCFLAGwithDifference(qres
, nres
);
11563 const HChar
* nm
= isADD
? (isU
? "uqadd" : "sqadd")
11564 : (isU
? "uqsub" : "sqsub");
11565 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11566 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11567 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11571 if (bitU
== 0 && opcode
== BITS5(0,0,0,1,1)) {
11572 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11573 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11574 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11575 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11576 Bool isORx
= (size
& 2) == 2;
11577 Bool invert
= (size
& 1) == 1;
11578 IRTemp res
= newTempV128();
11579 assign(res
, binop(isORx
? Iop_OrV128
: Iop_AndV128
,
11581 invert
? unop(Iop_NotV128
, getQReg128(mm
))
11582 : getQReg128(mm
)));
11583 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11584 const HChar
* names
[4] = { "and", "bic", "orr", "orn" };
11585 const HChar
* ar
= bitQ
== 1 ? "16b" : "8b";
11586 DIP("%s %s.%s, %s.%s, %s.%s\n", names
[INSN(23,22)],
11587 nameQReg128(dd
), ar
, nameQReg128(nn
), ar
, nameQReg128(mm
), ar
);
11591 if (bitU
== 1 && opcode
== BITS5(0,0,0,1,1)) {
11592 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11593 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11594 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11595 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11596 IRTemp argD
= newTempV128();
11597 IRTemp argN
= newTempV128();
11598 IRTemp argM
= newTempV128();
11599 assign(argD
, getQReg128(dd
));
11600 assign(argN
, getQReg128(nn
));
11601 assign(argM
, getQReg128(mm
));
11602 const IROp opXOR
= Iop_XorV128
;
11603 const IROp opAND
= Iop_AndV128
;
11604 const IROp opNOT
= Iop_NotV128
;
11605 IRTemp res
= newTempV128();
11607 case BITS2(0,0): /* EOR */
11608 assign(res
, binop(opXOR
, mkexpr(argM
), mkexpr(argN
)));
11610 case BITS2(0,1): /* BSL */
11611 assign(res
, binop(opXOR
, mkexpr(argM
),
11613 binop(opXOR
, mkexpr(argM
), mkexpr(argN
)),
11616 case BITS2(1,0): /* BIT */
11617 assign(res
, binop(opXOR
, mkexpr(argD
),
11619 binop(opXOR
, mkexpr(argD
), mkexpr(argN
)),
11622 case BITS2(1,1): /* BIF */
11623 assign(res
, binop(opXOR
, mkexpr(argD
),
11625 binop(opXOR
, mkexpr(argD
), mkexpr(argN
)),
11626 unop(opNOT
, mkexpr(argM
)))));
11631 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11632 const HChar
* nms
[4] = { "eor", "bsl", "bit", "bif" };
11633 const HChar
* arr
= bitQ
== 1 ? "16b" : "8b";
11634 DIP("%s %s.%s, %s.%s, %s.%s\n", nms
[size
],
11635 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11639 if (opcode
== BITS5(0,0,1,1,0)) {
11640 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11641 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11642 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11643 Bool isGT
= bitU
== 0;
11644 IRExpr
* argL
= getQReg128(nn
);
11645 IRExpr
* argR
= getQReg128(mm
);
11646 IRTemp res
= newTempV128();
11648 isGT
? binop(mkVecCMPGTS(size
), argL
, argR
)
11649 : binop(mkVecCMPGTU(size
), argL
, argR
));
11650 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11651 const HChar
* nm
= isGT
? "cmgt" : "cmhi";
11652 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11653 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11654 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11658 if (opcode
== BITS5(0,0,1,1,1)) {
11659 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11660 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11661 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11662 Bool isGE
= bitU
== 0;
11663 IRExpr
* argL
= getQReg128(nn
);
11664 IRExpr
* argR
= getQReg128(mm
);
11665 IRTemp res
= newTempV128();
11667 isGE
? unop(Iop_NotV128
, binop(mkVecCMPGTS(size
), argR
, argL
))
11668 : unop(Iop_NotV128
, binop(mkVecCMPGTU(size
), argR
, argL
)));
11669 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11670 const HChar
* nm
= isGE
? "cmge" : "cmhs";
11671 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11672 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11673 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11677 if (opcode
== BITS5(0,1,0,0,0) || opcode
== BITS5(0,1,0,1,0)) {
11678 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11679 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11680 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11681 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11682 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11683 Bool isU
= bitU
== 1;
11684 Bool isR
= opcode
== BITS5(0,1,0,1,0);
11685 IROp op
= isR
? (isU
? mkVecRSHU(size
) : mkVecRSHS(size
))
11686 : (isU
? mkVecSHU(size
) : mkVecSHS(size
));
11687 IRTemp res
= newTempV128();
11688 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
11689 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11690 const HChar
* nm
= isR
? (isU
? "urshl" : "srshl")
11691 : (isU
? "ushl" : "sshl");
11692 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11693 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11694 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11698 if (opcode
== BITS5(0,1,0,0,1) || opcode
== BITS5(0,1,0,1,1)) {
11699 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11700 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11701 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11702 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11703 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11704 Bool isU
= bitU
== 1;
11705 Bool isR
= opcode
== BITS5(0,1,0,1,1);
11706 IROp op
= isR
? (isU
? mkVecQANDUQRSH(size
) : mkVecQANDSQRSH(size
))
11707 : (isU
? mkVecQANDUQSH(size
) : mkVecQANDSQSH(size
));
11708 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11709 of the result (viz, bitQ == 0), then we must adjust the operands to
11710 ensure that the upper part of the result, that we don't care about,
11711 doesn't pollute the returned Q value. To do this, zero out the upper
11712 operand halves beforehand. This works because it means, for the
11713 lanes we don't care about, we are shifting zero by zero, which can
11715 IRTemp res256
= newTemp(Ity_V256
);
11716 IRTemp resSH
= newTempV128();
11717 IRTemp resQ
= newTempV128();
11718 IRTemp zero
= newTempV128();
11719 assign(res256
, binop(op
,
11720 math_MAYBE_ZERO_HI64_fromE(bitQ
, getQReg128(nn
)),
11721 math_MAYBE_ZERO_HI64_fromE(bitQ
, getQReg128(mm
))));
11722 assign(resSH
, unop(Iop_V256toV128_0
, mkexpr(res256
)));
11723 assign(resQ
, unop(Iop_V256toV128_1
, mkexpr(res256
)));
11724 assign(zero
, mkV128(0x0000));
11725 putQReg128(dd
, mkexpr(resSH
));
11726 updateQCFLAGwithDifference(resQ
, zero
);
11727 const HChar
* nm
= isR
? (isU
? "uqrshl" : "sqrshl")
11728 : (isU
? "uqshl" : "sqshl");
11729 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11730 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11731 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11735 if (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,0,1)) {
11736 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11737 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11738 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11739 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11740 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11741 Bool isU
= bitU
== 1;
11742 Bool isMAX
= (opcode
& 1) == 0;
11743 IROp op
= isMAX
? (isU
? mkVecMAXU(size
) : mkVecMAXS(size
))
11744 : (isU
? mkVecMINU(size
) : mkVecMINS(size
));
11745 IRTemp t
= newTempV128();
11746 assign(t
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
11747 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t
));
11748 const HChar
* nm
= isMAX
? (isU
? "umax" : "smax")
11749 : (isU
? "umin" : "smin");
11750 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11751 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11752 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11756 if (opcode
== BITS5(0,1,1,1,0) || opcode
== BITS5(0,1,1,1,1)) {
11757 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11758 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11759 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11760 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11761 if (size
== X11
) return False
; // 1d/2d cases not allowed
11762 Bool isU
= bitU
== 1;
11763 Bool isACC
= opcode
== BITS5(0,1,1,1,1);
11764 vassert(size
<= 2);
11765 IRTemp t1
= math_ABD(isU
, size
, getQReg128(nn
), getQReg128(mm
));
11766 IRTemp t2
= newTempV128();
11767 assign(t2
, isACC
? binop(mkVecADD(size
), mkexpr(t1
), getQReg128(dd
))
11769 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
11770 const HChar
* nm
= isACC
? (isU
? "uaba" : "saba")
11771 : (isU
? "uabd" : "sabd");
11772 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11773 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11774 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11778 if (opcode
== BITS5(1,0,0,0,0)) {
11779 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11780 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11781 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11782 Bool isSUB
= bitU
== 1;
11783 IROp op
= isSUB
? mkVecSUB(size
) : mkVecADD(size
);
11784 IRTemp t
= newTempV128();
11785 assign(t
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
11786 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t
));
11787 const HChar
* nm
= isSUB
? "sub" : "add";
11788 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11789 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11790 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11794 if (opcode
== BITS5(1,0,0,0,1)) {
11795 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11796 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11797 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11798 Bool isEQ
= bitU
== 1;
11799 IRExpr
* argL
= getQReg128(nn
);
11800 IRExpr
* argR
= getQReg128(mm
);
11801 IRTemp res
= newTempV128();
11803 isEQ
? binop(mkVecCMPEQ(size
), argL
, argR
)
11804 : unop(Iop_NotV128
, binop(mkVecCMPEQ(size
),
11805 binop(Iop_AndV128
, argL
, argR
),
11807 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11808 const HChar
* nm
= isEQ
? "cmeq" : "cmtst";
11809 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11810 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11811 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11815 if (opcode
== BITS5(1,0,0,1,0)) {
11816 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11817 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11818 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11819 Bool isMLS
= bitU
== 1;
11820 IROp opMUL
= mkVecMUL(size
);
11821 IROp opADDSUB
= isMLS
? mkVecSUB(size
) : mkVecADD(size
);
11822 IRTemp res
= newTempV128();
11823 if (opMUL
!= Iop_INVALID
&& opADDSUB
!= Iop_INVALID
) {
11824 assign(res
, binop(opADDSUB
,
11826 binop(opMUL
, getQReg128(nn
), getQReg128(mm
))));
11827 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11828 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11829 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS
? "mls" : "mla",
11830 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11836 if (opcode
== BITS5(1,0,0,1,1)) {
11837 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11838 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11839 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11840 Bool isPMUL
= bitU
== 1;
11841 const IROp opsPMUL
[4]
11842 = { Iop_PolynomialMul8x16
, Iop_INVALID
, Iop_INVALID
, Iop_INVALID
};
11843 IROp opMUL
= isPMUL
? opsPMUL
[size
] : mkVecMUL(size
);
11844 IRTemp res
= newTempV128();
11845 if (opMUL
!= Iop_INVALID
) {
11846 assign(res
, binop(opMUL
, getQReg128(nn
), getQReg128(mm
)));
11847 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11848 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11849 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL
? "pmul" : "mul",
11850 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11856 if (opcode
== BITS5(1,0,1,0,0) || opcode
== BITS5(1,0,1,0,1)) {
11857 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11858 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11859 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11860 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11861 if (size
== X11
) return False
;
11862 Bool isU
= bitU
== 1;
11863 Bool isMAX
= opcode
== BITS5(1,0,1,0,0);
11864 IRTemp vN
= newTempV128();
11865 IRTemp vM
= newTempV128();
11866 IROp op
= isMAX
? (isU
? mkVecMAXU(size
) : mkVecMAXS(size
))
11867 : (isU
? mkVecMINU(size
) : mkVecMINS(size
));
11868 assign(vN
, getQReg128(nn
));
11869 assign(vM
, getQReg128(mm
));
11870 IRTemp res128
= newTempV128();
11873 binop(mkVecCATEVENLANES(size
), mkexpr(vM
), mkexpr(vN
)),
11874 binop(mkVecCATODDLANES(size
), mkexpr(vM
), mkexpr(vN
))));
11875 /* In the half-width case, use CatEL32x4 to extract the half-width
11876 result from the full-width result. */
11878 = bitQ
== 0 ? unop(Iop_ZeroHI64ofV128
,
11879 binop(Iop_CatEvenLanes32x4
, mkexpr(res128
),
11882 putQReg128(dd
, res
);
11883 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11884 const HChar
* nm
= isMAX
? (isU
? "umaxp" : "smaxp")
11885 : (isU
? "uminp" : "sminp");
11886 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11887 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11891 if (opcode
== BITS5(1,0,1,1,0)) {
11892 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11893 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11894 if (size
== X00
|| size
== X11
) return False
;
11895 Bool isR
= bitU
== 1;
11896 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
11897 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
11898 newTempsV128_2(&vN
, &vM
);
11899 assign(vN
, getQReg128(nn
));
11900 assign(vM
, getQReg128(mm
));
11901 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
11902 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11903 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
11904 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
11905 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11906 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
11907 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
11908 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11912 if (bitU
== 0 && opcode
== BITS5(1,0,1,1,1)) {
11913 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11914 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
11915 IRTemp vN
= newTempV128();
11916 IRTemp vM
= newTempV128();
11917 assign(vN
, getQReg128(nn
));
11918 assign(vM
, getQReg128(mm
));
11919 IRTemp res128
= newTempV128();
11921 binop(mkVecADD(size
),
11922 binop(mkVecCATEVENLANES(size
), mkexpr(vM
), mkexpr(vN
)),
11923 binop(mkVecCATODDLANES(size
), mkexpr(vM
), mkexpr(vN
))));
11924 /* In the half-width case, use CatEL32x4 to extract the half-width
11925 result from the full-width result. */
11927 = bitQ
== 0 ? unop(Iop_ZeroHI64ofV128
,
11928 binop(Iop_CatEvenLanes32x4
, mkexpr(res128
),
11931 putQReg128(dd
, res
);
11932 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11933 DIP("addp %s.%s, %s.%s, %s.%s\n",
11934 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11939 && (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,1,1,0))) {
11940 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11941 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11942 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11943 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11944 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11945 Bool isD
= (size
& 1) == 1;
11946 if (bitQ
== 0 && isD
) return False
; // implied 1d case
11947 Bool isMIN
= (size
& 2) == 2;
11948 Bool isNM
= opcode
== BITS5(1,1,0,0,0);
11949 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? X11
: X10
);
11950 IRTemp res
= newTempV128();
11951 assign(res
, binop(opMXX
, getQReg128(nn
), getQReg128(mm
)));
11952 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11953 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
11954 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11955 isMIN
? "fmin" : "fmax", isNM
? "nm" : "",
11956 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11960 if (bitU
== 0 && opcode
== BITS5(1,1,0,0,1)) {
11961 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11962 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11963 Bool isD
= (size
& 1) == 1;
11964 Bool isSUB
= (size
& 2) == 2;
11965 if (bitQ
== 0 && isD
) return False
; // implied 1d case
11966 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
11967 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
11968 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
11969 IRTemp rm
= mk_get_IR_rounding_mode();
11970 IRTemp t1
= newTempV128();
11971 IRTemp t2
= newTempV128();
11972 // FIXME: double rounding; use FMA primops instead
11973 assign(t1
, triop(opMUL
,
11974 mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
11975 assign(t2
, triop(isSUB
? opSUB
: opADD
,
11976 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
11977 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
11978 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
11979 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB
? "fmls" : "fmla",
11980 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
11984 if (bitU
== 0 && opcode
== BITS5(1,1,0,1,0)) {
11985 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11986 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11987 Bool isD
= (size
& 1) == 1;
11988 Bool isSUB
= (size
& 2) == 2;
11989 if (bitQ
== 0 && isD
) return False
; // implied 1d case
11991 = { Iop_Add32Fx4
, Iop_Add64Fx2
, Iop_Sub32Fx4
, Iop_Sub64Fx2
};
11992 IROp op
= ops
[size
];
11993 IRTemp rm
= mk_get_IR_rounding_mode();
11994 IRTemp t1
= newTempV128();
11995 IRTemp t2
= newTempV128();
11996 assign(t1
, triop(op
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
11997 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
11998 putQReg128(dd
, mkexpr(t2
));
11999 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12000 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB
? "fsub" : "fadd",
12001 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12005 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,0,1,0)) {
12006 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12007 Bool isD
= (size
& 1) == 1;
12008 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12009 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
12010 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
12011 IRTemp rm
= mk_get_IR_rounding_mode();
12012 IRTemp t1
= newTempV128();
12013 IRTemp t2
= newTempV128();
12014 // FIXME: use Abd primop instead?
12015 assign(t1
, triop(opSUB
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12016 assign(t2
, unop(opABS
, mkexpr(t1
)));
12017 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12018 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12019 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12020 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12024 if (size
<= X01
&& opcode
== BITS5(1,1,0,1,1)) {
12025 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12026 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12027 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12028 Bool isD
= (size
& 1) == 1;
12029 Bool isMULX
= bitU
== 0;
12030 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12031 IRTemp rm
= mk_get_IR_rounding_mode();
12032 IRTemp t1
= newTempV128();
12033 assign(t1
, triop(isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
,
12034 mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12035 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12036 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12037 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX
? "fmulx" : "fmul",
12038 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12042 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,0)) {
12043 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12044 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12045 Bool isD
= (size
& 1) == 1;
12046 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12047 Bool isGE
= bitU
== 1;
12048 IROp opCMP
= isGE
? (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
)
12049 : (isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
);
12050 IRTemp t1
= newTempV128();
12051 assign(t1
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
)) // swapd
12052 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
12053 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12054 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12055 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE
? "fcmge" : "fcmeq",
12056 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12060 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,0,0)) {
12061 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12062 Bool isD
= (size
& 1) == 1;
12063 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12064 IROp opCMP
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
12065 IRTemp t1
= newTempV128();
12066 assign(t1
, binop(opCMP
, getQReg128(mm
), getQReg128(nn
))); // swapd
12067 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12068 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12069 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12070 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12074 if (bitU
== 1 && opcode
== BITS5(1,1,1,0,1)) {
12075 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12076 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12077 Bool isD
= (size
& 1) == 1;
12078 Bool isGT
= (size
& 2) == 2;
12079 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12080 IROp opCMP
= isGT
? (isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
)
12081 : (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
);
12082 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
12083 IRTemp t1
= newTempV128();
12084 assign(t1
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
12085 unop(opABS
, getQReg128(nn
)))); // swapd
12086 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12087 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12088 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT
? "facgt" : "facge",
12089 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12094 && (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,1,1,0))) {
12095 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12096 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12097 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12098 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12099 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12100 Bool isD
= (size
& 1) == 1;
12101 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12102 Bool isMIN
= (size
& 2) == 2;
12103 Bool isNM
= opcode
== BITS5(1,1,0,0,0);
12104 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? 3 : 2);
12105 IRTemp srcN
= newTempV128();
12106 IRTemp srcM
= newTempV128();
12107 IRTemp preL
= IRTemp_INVALID
;
12108 IRTemp preR
= IRTemp_INVALID
;
12109 assign(srcN
, getQReg128(nn
));
12110 assign(srcM
, getQReg128(mm
));
12111 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
,
12112 srcM
, srcN
, isD
, bitQ
);
12114 dd
, math_MAYBE_ZERO_HI64_fromE(
12116 binop(opMXX
, mkexpr(preL
), mkexpr(preR
))));
12117 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12118 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12119 isMIN
? "fmin" : "fmax", isNM
? "nm" : "",
12120 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12124 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(1,1,0,1,0)) {
12125 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12126 Bool isD
= size
== X01
;
12127 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12128 IRTemp srcN
= newTempV128();
12129 IRTemp srcM
= newTempV128();
12130 IRTemp preL
= IRTemp_INVALID
;
12131 IRTemp preR
= IRTemp_INVALID
;
12132 assign(srcN
, getQReg128(nn
));
12133 assign(srcM
, getQReg128(mm
));
12134 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
,
12135 srcM
, srcN
, isD
, bitQ
);
12137 dd
, math_MAYBE_ZERO_HI64_fromE(
12139 triop(mkVecADDF(isD
? 3 : 2),
12140 mkexpr(mk_get_IR_rounding_mode()),
12141 mkexpr(preL
), mkexpr(preR
))));
12142 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12143 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12144 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12148 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(1,1,1,1,1)) {
12149 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12150 Bool isD
= (size
& 1) == 1;
12151 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12152 vassert(size
<= 1);
12153 const IROp ops
[2] = { Iop_Div32Fx4
, Iop_Div64Fx2
};
12154 IROp op
= ops
[size
];
12155 IRTemp rm
= mk_get_IR_rounding_mode();
12156 IRTemp t1
= newTempV128();
12157 IRTemp t2
= newTempV128();
12158 assign(t1
, triop(op
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12159 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12160 putQReg128(dd
, mkexpr(t2
));
12161 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12162 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12163 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12167 if (bitU
== 0 && opcode
== BITS5(1,1,1,1,1)) {
12168 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12169 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12170 Bool isSQRT
= (size
& 2) == 2;
12171 Bool isD
= (size
& 1) == 1;
12172 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12173 IROp op
= isSQRT
? (isD
? Iop_RSqrtStep64Fx2
: Iop_RSqrtStep32Fx4
)
12174 : (isD
? Iop_RecipStep64Fx2
: Iop_RecipStep32Fx4
);
12175 IRTemp res
= newTempV128();
12176 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12177 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12178 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12179 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT
? "frsqrts" : "frecps",
12180 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12190 Bool
dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult
* dres
, UInt insn
)
12192 /* 31 30 29 28 23 21 16 11 9 4
12193 0 Q U 01110 size 10000 opcode 10 n d
12194 Decode fields: U,size,opcode
12196 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12197 if (INSN(31,31) != 0
12198 || INSN(28,24) != BITS5(0,1,1,1,0)
12199 || INSN(21,17) != BITS5(1,0,0,0,0)
12200 || INSN(11,10) != BITS2(1,0)) {
12203 UInt bitQ
= INSN(30,30);
12204 UInt bitU
= INSN(29,29);
12205 UInt size
= INSN(23,22);
12206 UInt opcode
= INSN(16,12);
12207 UInt nn
= INSN(9,5);
12208 UInt dd
= INSN(4,0);
12211 if (bitU
== 0 && size
<= X10
&& opcode
== BITS5(0,0,0,0,0)) {
12212 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12213 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12214 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12215 const IROp iops
[3] = { Iop_Reverse8sIn64_x2
,
12216 Iop_Reverse16sIn64_x2
, Iop_Reverse32sIn64_x2
};
12217 vassert(size
<= 2);
12218 IRTemp res
= newTempV128();
12219 assign(res
, unop(iops
[size
], getQReg128(nn
)));
12220 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12221 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12222 DIP("%s %s.%s, %s.%s\n", "rev64",
12223 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12227 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(0,0,0,0,0)) {
12228 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12229 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12230 Bool isH
= size
== X01
;
12231 IRTemp res
= newTempV128();
12232 IROp iop
= isH
? Iop_Reverse16sIn32_x4
: Iop_Reverse8sIn32_x4
;
12233 assign(res
, unop(iop
, getQReg128(nn
)));
12234 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12235 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12236 DIP("%s %s.%s, %s.%s\n", "rev32",
12237 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12241 if (bitU
== 0 && size
== X00
&& opcode
== BITS5(0,0,0,0,1)) {
12242 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12243 IRTemp res
= newTempV128();
12244 assign(res
, unop(Iop_Reverse8sIn16_x8
, getQReg128(nn
)));
12245 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12246 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12247 DIP("%s %s.%s, %s.%s\n", "rev16",
12248 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12252 if (opcode
== BITS5(0,0,0,1,0) || opcode
== BITS5(0,0,1,1,0)) {
12253 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12254 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12255 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12256 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12257 /* Widens, and size refers to the narrow size. */
12258 if (size
== X11
) return False
; // no 1d or 2d cases
12259 Bool isU
= bitU
== 1;
12260 Bool isACC
= opcode
== BITS5(0,0,1,1,0);
12261 IRTemp src
= newTempV128();
12262 IRTemp sum
= newTempV128();
12263 IRTemp res
= newTempV128();
12264 assign(src
, getQReg128(nn
));
12266 binop(mkVecADD(size
+1),
12267 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12268 isU
, True
/*fromOdd*/, size
, mkexpr(src
))),
12269 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12270 isU
, False
/*!fromOdd*/, size
, mkexpr(src
)))));
12271 assign(res
, isACC
? binop(mkVecADD(size
+1), mkexpr(sum
), getQReg128(dd
))
12273 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12274 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12275 const HChar
* arrWide
= nameArr_Q_SZ(bitQ
, size
+1);
12276 DIP("%s %s.%s, %s.%s\n", isACC
? (isU
? "uadalp" : "sadalp")
12277 : (isU
? "uaddlp" : "saddlp"),
12278 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
);
12282 if (opcode
== BITS5(0,0,0,1,1)) {
12283 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12284 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12285 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12286 Bool isUSQADD
= bitU
== 1;
12287 /* This is switched (in the US vs SU sense) deliberately.
12288 SUQADD corresponds to the ExtUSsatSS variants and
12289 USQADD corresponds to the ExtSUsatUU variants.
12290 See libvex_ir for more details. */
12291 IROp qop
= isUSQADD
? mkVecQADDEXTSUSATUU(size
)
12292 : mkVecQADDEXTUSSATSS(size
);
12293 IROp nop
= mkVecADD(size
);
12294 IRTemp argL
= newTempV128();
12295 IRTemp argR
= newTempV128();
12296 IRTemp qres
= newTempV128();
12297 IRTemp nres
= newTempV128();
12298 /* Because the two arguments to the addition are implicitly
12299 extended differently (one signedly, the other unsignedly) it is
12300 important to present them to the primop in the correct order. */
12301 assign(argL
, getQReg128(nn
));
12302 assign(argR
, getQReg128(dd
));
12303 assign(qres
, math_MAYBE_ZERO_HI64_fromE(
12304 bitQ
, binop(qop
, mkexpr(argL
), mkexpr(argR
))));
12305 assign(nres
, math_MAYBE_ZERO_HI64_fromE(
12306 bitQ
, binop(nop
, mkexpr(argL
), mkexpr(argR
))));
12307 putQReg128(dd
, mkexpr(qres
));
12308 updateQCFLAGwithDifference(qres
, nres
);
12309 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12310 DIP("%s %s.%s, %s.%s\n", isUSQADD
? "usqadd" : "suqadd",
12311 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12315 if (opcode
== BITS5(0,0,1,0,0)) {
12316 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12317 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12318 if (size
== X11
) return False
; // no 1d or 2d cases
12319 const IROp opsCLS
[3] = { Iop_Cls8x16
, Iop_Cls16x8
, Iop_Cls32x4
};
12320 const IROp opsCLZ
[3] = { Iop_Clz8x16
, Iop_Clz16x8
, Iop_Clz32x4
};
12321 Bool isCLZ
= bitU
== 1;
12322 IRTemp res
= newTempV128();
12323 vassert(size
<= 2);
12324 assign(res
, unop(isCLZ
? opsCLZ
[size
] : opsCLS
[size
], getQReg128(nn
)));
12325 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12326 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12327 DIP("%s %s.%s, %s.%s\n", isCLZ
? "clz" : "cls",
12328 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12332 if (size
== X00
&& opcode
== BITS5(0,0,1,0,1)) {
12333 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12334 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12335 IRTemp res
= newTempV128();
12336 assign(res
, unop(bitU
== 0 ? Iop_Cnt8x16
: Iop_NotV128
, getQReg128(nn
)));
12337 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12338 const HChar
* arr
= nameArr_Q_SZ(bitQ
, 0);
12339 DIP("%s %s.%s, %s.%s\n", bitU
== 0 ? "cnt" : "not",
12340 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12344 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(0,0,1,0,1)) {
12345 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12346 IRTemp res
= newTempV128();
12347 assign(res
, unop(Iop_Reverse1sIn8_x16
, getQReg128(nn
)));
12348 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12349 const HChar
* arr
= nameArr_Q_SZ(bitQ
, 0);
12350 DIP("%s %s.%s, %s.%s\n", "rbit",
12351 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12355 if (opcode
== BITS5(0,0,1,1,1)) {
12356 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12357 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12358 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12359 Bool isNEG
= bitU
== 1;
12360 IRTemp qresFW
= IRTemp_INVALID
, nresFW
= IRTemp_INVALID
;
12361 (isNEG
? math_SQNEG
: math_SQABS
)( &qresFW
, &nresFW
,
12362 getQReg128(nn
), size
);
12363 IRTemp qres
= newTempV128(), nres
= newTempV128();
12364 assign(qres
, math_MAYBE_ZERO_HI64(bitQ
, qresFW
));
12365 assign(nres
, math_MAYBE_ZERO_HI64(bitQ
, nresFW
));
12366 putQReg128(dd
, mkexpr(qres
));
12367 updateQCFLAGwithDifference(qres
, nres
);
12368 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12369 DIP("%s %s.%s, %s.%s\n", isNEG
? "sqneg" : "sqabs",
12370 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12374 if (opcode
== BITS5(0,1,0,0,0)) {
12375 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12376 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12377 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12378 Bool isGT
= bitU
== 0;
12379 IRExpr
* argL
= getQReg128(nn
);
12380 IRExpr
* argR
= mkV128(0x0000);
12381 IRTemp res
= newTempV128();
12382 IROp opGTS
= mkVecCMPGTS(size
);
12383 assign(res
, isGT
? binop(opGTS
, argL
, argR
)
12384 : unop(Iop_NotV128
, binop(opGTS
, argR
, argL
)));
12385 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12386 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12387 DIP("cm%s %s.%s, %s.%s, #0\n", isGT
? "gt" : "ge",
12388 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12392 if (opcode
== BITS5(0,1,0,0,1)) {
12393 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12394 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12395 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12396 Bool isEQ
= bitU
== 0;
12397 IRExpr
* argL
= getQReg128(nn
);
12398 IRExpr
* argR
= mkV128(0x0000);
12399 IRTemp res
= newTempV128();
12400 assign(res
, isEQ
? binop(mkVecCMPEQ(size
), argL
, argR
)
12401 : unop(Iop_NotV128
,
12402 binop(mkVecCMPGTS(size
), argL
, argR
)));
12403 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12404 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12405 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ
? "eq" : "le",
12406 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12410 if (bitU
== 0 && opcode
== BITS5(0,1,0,1,0)) {
12411 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12412 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12413 IRExpr
* argL
= getQReg128(nn
);
12414 IRExpr
* argR
= mkV128(0x0000);
12415 IRTemp res
= newTempV128();
12416 assign(res
, binop(mkVecCMPGTS(size
), argR
, argL
));
12417 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12418 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12419 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12420 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12424 if (bitU
== 0 && opcode
== BITS5(0,1,0,1,1)) {
12425 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12426 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12427 IRTemp res
= newTempV128();
12428 assign(res
, unop(mkVecABS(size
), getQReg128(nn
)));
12429 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12430 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12431 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12435 if (bitU
== 1 && opcode
== BITS5(0,1,0,1,1)) {
12436 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12437 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12438 IRTemp res
= newTempV128();
12439 assign(res
, binop(mkVecSUB(size
), mkV128(0x0000), getQReg128(nn
)));
12440 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12441 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12442 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12446 UInt ix
= 0; /*INVALID*/
12449 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 4 : 1; break;
12450 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 5 : 2; break;
12451 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break;
12456 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12457 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12458 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12459 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12460 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12461 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12462 Bool isD
= size
== X11
;
12463 IROp opCmpEQ
= isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
;
12464 IROp opCmpLE
= isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
;
12465 IROp opCmpLT
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
12466 IROp opCmp
= Iop_INVALID
;
12468 const HChar
* nm
= "??";
12470 case 1: nm
= "fcmgt"; opCmp
= opCmpLT
; swap
= True
; break;
12471 case 2: nm
= "fcmeq"; opCmp
= opCmpEQ
; break;
12472 case 3: nm
= "fcmlt"; opCmp
= opCmpLT
; break;
12473 case 4: nm
= "fcmge"; opCmp
= opCmpLE
; swap
= True
; break;
12474 case 5: nm
= "fcmle"; opCmp
= opCmpLE
; break;
12475 default: vassert(0);
12477 IRExpr
* zero
= mkV128(0x0000);
12478 IRTemp res
= newTempV128();
12479 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
12480 : binop(opCmp
, getQReg128(nn
), zero
));
12481 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12482 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
12483 DIP("%s %s.%s, %s.%s, #0.0\n", nm
,
12484 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12488 if (size
>= X10
&& opcode
== BITS5(0,1,1,1,1)) {
12489 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12490 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12491 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12492 Bool isFNEG
= bitU
== 1;
12493 IROp op
= isFNEG
? (size
== X10
? Iop_Neg32Fx4
: Iop_Neg64Fx2
)
12494 : (size
== X10
? Iop_Abs32Fx4
: Iop_Abs64Fx2
);
12495 IRTemp res
= newTempV128();
12496 assign(res
, unop(op
, getQReg128(nn
)));
12497 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12498 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
12499 DIP("%s %s.%s, %s.%s\n", isFNEG
? "fneg" : "fabs",
12500 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12504 if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
12505 /* -------- 0,xx,10010: XTN{,2} -------- */
12506 if (size
== X11
) return False
;
12508 Bool is2
= bitQ
== 1;
12509 IROp opN
= mkVecNARROWUN(size
);
12510 IRTemp resN
= newTempV128();
12511 assign(resN
, unop(Iop_64UtoV128
, unop(opN
, getQReg128(nn
))));
12512 putLO64andZUorPutHI64(is2
, dd
, resN
);
12513 const HChar
* nm
= "xtn";
12514 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12515 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12516 DIP("%s%s %s.%s, %s.%s\n", is2
? "2" : "", nm
,
12517 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
12521 if (opcode
== BITS5(1,0,1,0,0)
12522 || (bitU
== 1 && opcode
== BITS5(1,0,0,1,0))) {
12523 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12524 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12525 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12526 if (size
== X11
) return False
;
12528 Bool is2
= bitQ
== 1;
12529 IROp opN
= Iop_INVALID
;
12530 Bool zWiden
= True
;
12531 const HChar
* nm
= "??";
12532 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,1,0,0)) {
12533 opN
= mkVecQNARROWUNSS(size
); nm
= "sqxtn"; zWiden
= False
;
12535 else if (bitU
== 1 && opcode
== BITS5(1,0,1,0,0)) {
12536 opN
= mkVecQNARROWUNUU(size
); nm
= "uqxtn";
12538 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
12539 opN
= mkVecQNARROWUNSU(size
); nm
= "sqxtun";
12542 IRTemp src
= newTempV128();
12543 assign(src
, getQReg128(nn
));
12544 IRTemp resN
= newTempV128();
12545 assign(resN
, unop(Iop_64UtoV128
, unop(opN
, mkexpr(src
))));
12546 putLO64andZUorPutHI64(is2
, dd
, resN
);
12547 IRTemp resW
= math_WIDEN_LO_OR_HI_LANES(zWiden
, False
/*!fromUpperHalf*/,
12548 size
, mkexpr(resN
));
12549 updateQCFLAGwithDifference(src
, resW
);
12550 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12551 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12552 DIP("%s%s %s.%s, %s.%s\n", is2
? "2" : "", nm
,
12553 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
12557 if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
12558 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12559 /* Widens, and size is the narrow size. */
12560 if (size
== X11
) return False
;
12561 Bool is2
= bitQ
== 1;
12562 IROp opINT
= is2
? mkVecINTERLEAVEHI(size
) : mkVecINTERLEAVELO(size
);
12563 IROp opSHL
= mkVecSHLN(size
+1);
12564 IRTemp src
= newTempV128();
12565 IRTemp res
= newTempV128();
12566 assign(src
, getQReg128(nn
));
12567 assign(res
, binop(opSHL
, binop(opINT
, mkexpr(src
), mkexpr(src
)),
12569 putQReg128(dd
, mkexpr(res
));
12570 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12571 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12572 DIP("shll%s %s.%s, %s.%s, #%d\n", is2
? "2" : "",
12573 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
, 8 << size
);
12577 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,0,1,1,0)) {
12578 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12579 UInt nLanes
= size
== X00
? 4 : 2;
12580 IRType srcTy
= size
== X00
? Ity_F32
: Ity_F64
;
12581 IROp opCvt
= size
== X00
? Iop_F32toF16
: Iop_F64toF32
;
12582 IRTemp rm
= mk_get_IR_rounding_mode();
12583 IRTemp src
[nLanes
];
12584 for (UInt i
= 0; i
< nLanes
; i
++) {
12585 src
[i
] = newTemp(srcTy
);
12586 assign(src
[i
], getQRegLane(nn
, i
, srcTy
));
12588 for (UInt i
= 0; i
< nLanes
; i
++) {
12589 putQRegLane(dd
, nLanes
* bitQ
+ i
,
12590 binop(opCvt
, mkexpr(rm
), mkexpr(src
[i
])));
12593 putQRegLane(dd
, 1, mkU64(0));
12595 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
12596 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
12597 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ
? "2" : "",
12598 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
12602 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(1,0,1,1,0)) {
12603 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12604 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12605 odd" but I don't know what that really means. */
12606 IRType srcTy
= Ity_F64
;
12607 IROp opCvt
= Iop_F64toF32
;
12609 for (UInt i
= 0; i
< 2; i
++) {
12610 src
[i
] = newTemp(srcTy
);
12611 assign(src
[i
], getQRegLane(nn
, i
, srcTy
));
12613 for (UInt i
= 0; i
< 2; i
++) {
12614 putQRegLane(dd
, 2 * bitQ
+ i
,
12615 binop(opCvt
, mkU32(Irrm_NEAREST
), mkexpr(src
[i
])));
12618 putQRegLane(dd
, 1, mkU64(0));
12620 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
12621 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
12622 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ
? "2" : "",
12623 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
12627 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,0,1,1,1)) {
12628 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12629 UInt nLanes
= size
== X00
? 4 : 2;
12630 IRType srcTy
= size
== X00
? Ity_F16
: Ity_F32
;
12631 IROp opCvt
= size
== X00
? Iop_F16toF32
: Iop_F32toF64
;
12632 IRTemp src
[nLanes
];
12633 for (UInt i
= 0; i
< nLanes
; i
++) {
12634 src
[i
] = newTemp(srcTy
);
12635 assign(src
[i
], getQRegLane(nn
, nLanes
* bitQ
+ i
, srcTy
));
12637 for (UInt i
= 0; i
< nLanes
; i
++) {
12638 putQRegLane(dd
, i
, unop(opCvt
, mkexpr(src
[i
])));
12640 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
12641 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
12642 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ
? "2" : "",
12643 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
);
12648 if (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,0,0,1)) {
12649 ix
= 1 + ((((bitU
& 1) << 2) | ((size
& 2) << 0)) | ((opcode
& 1) << 0));
12650 // = 1 + bitU[0]:size[1]:opcode[0]
12651 vassert(ix
>= 1 && ix
<= 8);
12652 if (ix
== 7) ix
= 0;
12655 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12656 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12657 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12658 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12659 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12660 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12661 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12662 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12664 FRINTN: tieeven -- !! FIXME KLUDGED !!
12668 FRINTA: tieaway -- !! FIXME KLUDGED !!
12669 FRINTX: per FPCR + "exact = TRUE"
12672 Bool isD
= (size
& 1) == 1;
12673 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12675 IRTemp irrmRM
= mk_get_IR_rounding_mode();
12678 IRTemp irrm
= newTemp(Ity_I32
);
12680 case 1: ch
= 'n'; assign(irrm
, mkU32(Irrm_NEAREST
)); break;
12681 case 2: ch
= 'm'; assign(irrm
, mkU32(Irrm_NegINF
)); break;
12682 case 3: ch
= 'p'; assign(irrm
, mkU32(Irrm_PosINF
)); break;
12683 case 4: ch
= 'z'; assign(irrm
, mkU32(Irrm_ZERO
)); break;
12684 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12685 case 5: ch
= 'a'; assign(irrm
, mkU32(Irrm_NEAREST
)); break;
12686 // I am unsure about the following, due to the "integral exact"
12687 // description in the manual. What does it mean? (frintx, that is)
12688 case 6: ch
= 'x'; assign(irrm
, mkexpr(irrmRM
)); break;
12689 case 8: ch
= 'i'; assign(irrm
, mkexpr(irrmRM
)); break;
12690 default: vassert(0);
12693 IROp opRND
= isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
;
12695 for (UInt i
= 0; i
< 2; i
++) {
12696 putQRegLane(dd
, i
, binop(opRND
, mkexpr(irrm
),
12697 getQRegLane(nn
, i
, Ity_F64
)));
12700 UInt n
= bitQ
==1 ? 4 : 2;
12701 for (UInt i
= 0; i
< n
; i
++) {
12702 putQRegLane(dd
, i
, binop(opRND
, mkexpr(irrm
),
12703 getQRegLane(nn
, i
, Ity_F32
)));
12706 putQRegLane(dd
, 1, mkU64(0)); // zero out lanes 2 and 3
12708 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12709 DIP("frint%c %s.%s, %s.%s\n", ch
,
12710 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12714 ix
= 0; /*INVALID*/
12716 case BITS5(1,1,0,1,0): ix
= ((size
& 2) == 2) ? 4 : 1; break;
12717 case BITS5(1,1,0,1,1): ix
= ((size
& 2) == 2) ? 5 : 2; break;
12718 case BITS5(1,1,1,0,0): if ((size
& 2) == 0) ix
= 3; break;
12722 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12723 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12724 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12725 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12726 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12727 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12728 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12729 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12730 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12731 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12732 Bool isD
= (size
& 1) == 1;
12733 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12735 IRRoundingMode irrm
= 8; /*impossible*/
12738 case 1: ch
= 'n'; irrm
= Irrm_NEAREST
; break;
12739 case 2: ch
= 'm'; irrm
= Irrm_NegINF
; break;
12740 case 3: ch
= 'a'; irrm
= Irrm_NEAREST
; break; /* kludge? */
12741 case 4: ch
= 'p'; irrm
= Irrm_PosINF
; break;
12742 case 5: ch
= 'z'; irrm
= Irrm_ZERO
; break;
12743 default: vassert(0);
12745 IROp cvt
= Iop_INVALID
;
12747 cvt
= isD
? Iop_F64toI64U
: Iop_F32toI32U
;
12749 cvt
= isD
? Iop_F64toI64S
: Iop_F32toI32S
;
12752 for (UInt i
= 0; i
< 2; i
++) {
12753 putQRegLane(dd
, i
, binop(cvt
, mkU32(irrm
),
12754 getQRegLane(nn
, i
, Ity_F64
)));
12757 UInt n
= bitQ
==1 ? 4 : 2;
12758 for (UInt i
= 0; i
< n
; i
++) {
12759 putQRegLane(dd
, i
, binop(cvt
, mkU32(irrm
),
12760 getQRegLane(nn
, i
, Ity_F32
)));
12763 putQRegLane(dd
, 1, mkU64(0)); // zero out lanes 2 and 3
12765 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12766 DIP("fcvt%c%c %s.%s, %s.%s\n", ch
, bitU
== 1 ? 'u' : 's',
12767 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12771 if (size
== X10
&& opcode
== BITS5(1,1,1,0,0)) {
12772 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12773 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12774 Bool isREC
= bitU
== 0;
12775 IROp op
= isREC
? Iop_RecipEst32Ux4
: Iop_RSqrtEst32Ux4
;
12776 IRTemp res
= newTempV128();
12777 assign(res
, unop(op
, getQReg128(nn
)));
12778 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12779 const HChar
* nm
= isREC
? "urecpe" : "ursqrte";
12780 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12781 DIP("%s %s.%s, %s.%s\n", nm
,
12782 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12786 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,1)) {
12787 /* -------- 0,0x,11101: SCVTF -------- */
12788 /* -------- 1,0x,11101: UCVTF -------- */
12789 /* 31 28 22 21 15 9 4
12790 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12791 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12793 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12795 Bool isQ
= bitQ
== 1;
12796 Bool isU
= bitU
== 1;
12797 Bool isF64
= (size
& 1) == 1;
12798 if (isQ
|| !isF64
) {
12799 IRType tyF
= Ity_INVALID
, tyI
= Ity_INVALID
;
12801 Bool zeroHI
= False
;
12802 const HChar
* arrSpec
= NULL
;
12803 Bool ok
= getLaneInfo_Q_SZ(&tyI
, &tyF
, &nLanes
, &zeroHI
, &arrSpec
,
12805 IROp iop
= isU
? (isF64
? Iop_I64UtoF64
: Iop_I32UtoF32
)
12806 : (isF64
? Iop_I64StoF64
: Iop_I32StoF32
);
12807 IRTemp rm
= mk_get_IR_rounding_mode();
12809 vassert(ok
); /* the 'if' above should ensure this */
12810 for (i
= 0; i
< nLanes
; i
++) {
12812 binop(iop
, mkexpr(rm
), getQRegLane(nn
, i
, tyI
)));
12815 putQRegLane(dd
, 1, mkU64(0));
12817 DIP("%ccvtf %s.%s, %s.%s\n", isU
? 'u' : 's',
12818 nameQReg128(dd
), arrSpec
, nameQReg128(nn
), arrSpec
);
12821 /* else fall through */
12824 if (size
>= X10
&& opcode
== BITS5(1,1,1,0,1)) {
12825 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12826 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12827 Bool isSQRT
= bitU
== 1;
12828 Bool isD
= (size
& 1) == 1;
12829 IROp op
= isSQRT
? (isD
? Iop_RSqrtEst64Fx2
: Iop_RSqrtEst32Fx4
)
12830 : (isD
? Iop_RecipEst64Fx2
: Iop_RecipEst32Fx4
);
12831 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12832 IRTemp resV
= newTempV128();
12833 assign(resV
, unop(op
, getQReg128(nn
)));
12834 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
12835 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
12836 DIP("%s %s.%s, %s.%s\n", isSQRT
? "frsqrte" : "frecpe",
12837 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12841 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,1,1)) {
12842 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12843 Bool isD
= (size
& 1) == 1;
12844 IROp op
= isD
? Iop_Sqrt64Fx2
: Iop_Sqrt32Fx4
;
12845 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12846 IRTemp resV
= newTempV128();
12847 assign(resV
, binop(op
, mkexpr(mk_get_IR_rounding_mode()),
12849 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
12850 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
12851 DIP("%s %s.%s, %s.%s\n", "fsqrt",
12852 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
12862 Bool
dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult
* dres
, UInt insn
)
12864 /* 31 28 23 21 20 19 15 11 9 4
12865 0 Q U 01111 size L M m opcode H 0 n d
12866 Decode fields are: u,size,opcode
12867 M is really part of the mm register number. Individual
12868 cases need to inspect L and H though.
12870 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12871 if (INSN(31,31) != 0
12872 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12875 UInt bitQ
= INSN(30,30);
12876 UInt bitU
= INSN(29,29);
12877 UInt size
= INSN(23,22);
12878 UInt bitL
= INSN(21,21);
12879 UInt bitM
= INSN(20,20);
12880 UInt mmLO4
= INSN(19,16);
12881 UInt opcode
= INSN(15,12);
12882 UInt bitH
= INSN(11,11);
12883 UInt nn
= INSN(9,5);
12884 UInt dd
= INSN(4,0);
12886 vassert(bitH
< 2 && bitM
< 2 && bitL
< 2);
12888 if (bitU
== 0 && size
>= X10
12889 && (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,1,0,1))) {
12890 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12891 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12892 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12893 Bool isD
= (size
& 1) == 1;
12894 Bool isSUB
= opcode
== BITS4(0,1,0,1);
12896 if (!isD
) index
= (bitH
<< 1) | bitL
;
12897 else if (isD
&& bitL
== 0) index
= bitH
;
12898 else return False
; // sz:L == x11 => unallocated encoding
12899 vassert(index
< (isD
? 2 : 4));
12900 IRType ity
= isD
? Ity_F64
: Ity_F32
;
12901 IRTemp elem
= newTemp(ity
);
12902 UInt mm
= (bitM
<< 4) | mmLO4
;
12903 assign(elem
, getQRegLane(mm
, index
, ity
));
12904 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
12905 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
12906 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
12907 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
12908 IRTemp rm
= mk_get_IR_rounding_mode();
12909 IRTemp t1
= newTempV128();
12910 IRTemp t2
= newTempV128();
12911 // FIXME: double rounding; use FMA primops instead
12912 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
12913 assign(t2
, triop(isSUB
? opSUB
: opADD
,
12914 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
12915 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12916 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12917 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB
? "fmls" : "fmla",
12918 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
),
12919 isD
? 'd' : 's', index
);
12923 if (size
>= X10
&& opcode
== BITS4(1,0,0,1)) {
12924 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12925 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12926 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12927 Bool isD
= (size
& 1) == 1;
12928 Bool isMULX
= bitU
== 1;
12930 if (!isD
) index
= (bitH
<< 1) | bitL
;
12931 else if (isD
&& bitL
== 0) index
= bitH
;
12932 else return False
; // sz:L == x11 => unallocated encoding
12933 vassert(index
< (isD
? 2 : 4));
12934 IRType ity
= isD
? Ity_F64
: Ity_F32
;
12935 IRTemp elem
= newTemp(ity
);
12936 UInt mm
= (bitM
<< 4) | mmLO4
;
12937 assign(elem
, getQRegLane(mm
, index
, ity
));
12938 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
12939 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12940 IRTemp res
= newTempV128();
12941 assign(res
, triop(isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
,
12942 mkexpr(mk_get_IR_rounding_mode()),
12943 getQReg128(nn
), mkexpr(dupd
)));
12944 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12945 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12946 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12947 isMULX
? "fmulx" : "fmul", nameQReg128(dd
), arr
,
12948 nameQReg128(nn
), arr
, nameQReg128(mm
), isD
? 'd' : 's', index
);
12952 if ((bitU
== 1 && (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,1,0,0)))
12953 || (bitU
== 0 && opcode
== BITS4(1,0,0,0))) {
12954 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12955 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12956 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12957 Bool isMLA
= opcode
== BITS4(0,0,0,0);
12958 Bool isMLS
= opcode
== BITS4(0,1,0,0);
12959 UInt mm
= 32; // invalid
12960 UInt ix
= 16; // invalid
12963 return False
; // b case is not allowed
12965 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
12967 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
12969 return False
; // d case is not allowed
12973 vassert(mm
< 32 && ix
< 16);
12974 IROp opMUL
= mkVecMUL(size
);
12975 IROp opADD
= mkVecADD(size
);
12976 IROp opSUB
= mkVecSUB(size
);
12977 HChar ch
= size
== X01
? 'h' : 's';
12978 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
12979 IRTemp vecD
= newTempV128();
12980 IRTemp vecN
= newTempV128();
12981 IRTemp res
= newTempV128();
12982 assign(vecD
, getQReg128(dd
));
12983 assign(vecN
, getQReg128(nn
));
12984 IRExpr
* prod
= binop(opMUL
, mkexpr(vecN
), mkexpr(vecM
));
12985 if (isMLA
|| isMLS
) {
12986 assign(res
, binop(isMLA
? opADD
: opSUB
, mkexpr(vecD
), prod
));
12990 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12991 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12992 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA
? "mla"
12993 : (isMLS
? "mls" : "mul"),
12994 nameQReg128(dd
), arr
,
12995 nameQReg128(nn
), arr
, nameQReg128(dd
), ch
, ix
);
12999 if (opcode
== BITS4(1,0,1,0)
13000 || opcode
== BITS4(0,0,1,0) || opcode
== BITS4(0,1,1,0)) {
13001 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
13002 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
13003 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
13004 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
13005 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
13006 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
13007 /* Widens, and size refers to the narrowed lanes. */
13010 case BITS4(1,0,1,0): ks
= 0; break;
13011 case BITS4(0,0,1,0): ks
= 1; break;
13012 case BITS4(0,1,1,0): ks
= 2; break;
13013 default: vassert(0);
13015 vassert(ks
>= 0 && ks
<= 2);
13016 Bool isU
= bitU
== 1;
13017 Bool is2
= bitQ
== 1;
13018 UInt mm
= 32; // invalid
13019 UInt ix
= 16; // invalid
13022 return False
; // h_b_b[] case is not allowed
13024 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13026 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13028 return False
; // q_d_d[] case is not allowed
13032 vassert(mm
< 32 && ix
< 16);
13033 IRTemp vecN
= newTempV128();
13034 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13035 IRTemp vecD
= newTempV128();
13036 assign(vecN
, getQReg128(nn
));
13037 assign(vecD
, getQReg128(dd
));
13038 IRTemp res
= IRTemp_INVALID
;
13039 math_MULL_ACC(&res
, is2
, isU
, size
, "mas"[ks
],
13040 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
13041 putQReg128(dd
, mkexpr(res
));
13042 const HChar
* nm
= ks
== 0 ? "mull" : (ks
== 1 ? "mlal" : "mlsl");
13043 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13044 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13045 HChar ch
= size
== X01
? 'h' : 's';
13046 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13047 isU
? 'u' : 's', nm
, is2
? "2" : "",
13048 nameQReg128(dd
), arrWide
,
13049 nameQReg128(nn
), arrNarrow
, nameQReg128(dd
), ch
, ix
);
13054 && (opcode
== BITS4(1,0,1,1)
13055 || opcode
== BITS4(0,0,1,1) || opcode
== BITS4(0,1,1,1))) {
13056 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13057 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13058 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13059 /* Widens, and size refers to the narrowed lanes. */
13062 case BITS4(1,0,1,1): ks
= 0; break;
13063 case BITS4(0,0,1,1): ks
= 1; break;
13064 case BITS4(0,1,1,1): ks
= 2; break;
13065 default: vassert(0);
13067 vassert(ks
>= 0 && ks
<= 2);
13068 Bool is2
= bitQ
== 1;
13069 UInt mm
= 32; // invalid
13070 UInt ix
= 16; // invalid
13073 return False
; // h_b_b[] case is not allowed
13075 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13077 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13079 return False
; // q_d_d[] case is not allowed
13083 vassert(mm
< 32 && ix
< 16);
13084 IRTemp vecN
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
13085 vecN
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
13086 newTempsV128_2(&vecN
, &vecD
);
13087 assign(vecN
, getQReg128(nn
));
13088 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13089 assign(vecD
, getQReg128(dd
));
13090 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
13091 is2
, size
, "mas"[ks
],
13092 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
13093 putQReg128(dd
, mkexpr(res
));
13094 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
13095 updateQCFLAGwithDifference(sat1q
, sat1n
);
13096 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
13097 updateQCFLAGwithDifference(sat2q
, sat2n
);
13099 const HChar
* nm
= ks
== 0 ? "sqdmull"
13100 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
13101 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13102 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13103 HChar ch
= size
== X01
? 'h' : 's';
13104 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13105 nm
, is2
? "2" : "",
13106 nameQReg128(dd
), arrWide
,
13107 nameQReg128(nn
), arrNarrow
, nameQReg128(dd
), ch
, ix
);
13111 if (opcode
== BITS4(1,1,0,0) || opcode
== BITS4(1,1,0,1)) {
13112 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13113 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13114 UInt mm
= 32; // invalid
13115 UInt ix
= 16; // invalid
13118 return False
; // b case is not allowed
13120 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
13122 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
13124 return False
; // q case is not allowed
13128 vassert(mm
< 32 && ix
< 16);
13129 Bool isR
= opcode
== BITS4(1,1,0,1);
13130 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
13131 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
13132 vN
= newTempV128();
13133 assign(vN
, getQReg128(nn
));
13134 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
13135 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
13136 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13137 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
13138 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
13139 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
13140 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13141 HChar ch
= size
== X01
? 'h' : 's';
13142 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm
,
13143 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(dd
), ch
, ix
);
13153 Bool
dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13155 /* 31 23 21 16 11 9 4
13156 0100 1110 size 10100 opcode 10 n d
13157 Decode fields are: size,opcode
13158 Size is always 00 in ARMv8, it appears.
13160 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13161 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13162 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13165 UInt size
= INSN(23,22);
13166 UInt opcode
= INSN(16,12);
13167 UInt nn
= INSN(9,5);
13168 UInt dd
= INSN(4,0);
13170 if (size
== BITS2(0,0)
13171 && (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,0,1))) {
13172 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13173 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13174 Bool isD
= opcode
== BITS5(0,0,1,0,1);
13175 IRTemp op1
= newTemp(Ity_V128
);
13176 IRTemp op2
= newTemp(Ity_V128
);
13177 IRTemp xord
= newTemp(Ity_V128
);
13178 IRTemp res
= newTemp(Ity_V128
);
13179 void* helper
= isD
? &arm64g_dirtyhelper_AESD
13180 : &arm64g_dirtyhelper_AESE
;
13181 const HChar
* hname
= isD
? "arm64g_dirtyhelper_AESD"
13182 : "arm64g_dirtyhelper_AESE";
13183 assign(op1
, getQReg128(dd
));
13184 assign(op2
, getQReg128(nn
));
13185 assign(xord
, binop(Iop_XorV128
, mkexpr(op1
), mkexpr(op2
)));
13187 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hname
, helper
,
13190 unop(Iop_V128HIto64
, mkexpr(xord
)),
13191 unop(Iop_V128to64
, mkexpr(xord
)) ) );
13192 stmt(IRStmt_Dirty(di
));
13193 putQReg128(dd
, mkexpr(res
));
13194 DIP("aes%c %s.16b, %s.16b\n", isD
? 'd' : 'e',
13195 nameQReg128(dd
), nameQReg128(nn
));
13199 if (size
== BITS2(0,0)
13200 && (opcode
== BITS5(0,0,1,1,0) || opcode
== BITS5(0,0,1,1,1))) {
13201 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13202 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13203 Bool isI
= opcode
== BITS5(0,0,1,1,1);
13204 IRTemp src
= newTemp(Ity_V128
);
13205 IRTemp res
= newTemp(Ity_V128
);
13206 void* helper
= isI
? &arm64g_dirtyhelper_AESIMC
13207 : &arm64g_dirtyhelper_AESMC
;
13208 const HChar
* hname
= isI
? "arm64g_dirtyhelper_AESIMC"
13209 : "arm64g_dirtyhelper_AESMC";
13210 assign(src
, getQReg128(nn
));
13212 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hname
, helper
,
13215 unop(Iop_V128HIto64
, mkexpr(src
)),
13216 unop(Iop_V128to64
, mkexpr(src
)) ) );
13217 stmt(IRStmt_Dirty(di
));
13218 putQReg128(dd
, mkexpr(res
));
13219 DIP("aes%s %s.16b, %s.16b\n", isI
? "imc" : "mc",
13220 nameQReg128(dd
), nameQReg128(nn
));
13230 Bool
dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13232 /* 31 28 23 21 20 15 14 11 9 4
13233 0101 1110 sz 0 m 0 opc 00 n d
13234 Decode fields are: sz,opc
13236 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13237 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13238 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13241 UInt sz
= INSN(23,22);
13242 UInt mm
= INSN(20,16);
13243 UInt opc
= INSN(14,12);
13244 UInt nn
= INSN(9,5);
13245 UInt dd
= INSN(4,0);
13246 if (sz
== BITS2(0,0) && opc
<= BITS3(1,1,0)) {
13247 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13248 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13249 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13250 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13251 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13252 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13253 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13255 const HChar
* inames
[7]
13256 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13257 "sha256h", "sha256h2", "sha256su1" };
13258 void(*helpers
[7])(V128
*,ULong
,ULong
,ULong
,ULong
,ULong
,ULong
)
13259 = { &arm64g_dirtyhelper_SHA1C
, &arm64g_dirtyhelper_SHA1P
,
13260 &arm64g_dirtyhelper_SHA1M
, &arm64g_dirtyhelper_SHA1SU0
,
13261 &arm64g_dirtyhelper_SHA256H
, &arm64g_dirtyhelper_SHA256H2
,
13262 &arm64g_dirtyhelper_SHA256SU1
};
13263 const HChar
* hnames
[7]
13264 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13265 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13266 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13267 "arm64g_dirtyhelper_SHA256SU1" };
13268 IRTemp vD
= newTemp(Ity_V128
);
13269 IRTemp vN
= newTemp(Ity_V128
);
13270 IRTemp vM
= newTemp(Ity_V128
);
13271 IRTemp vDhi
= newTemp(Ity_I64
);
13272 IRTemp vDlo
= newTemp(Ity_I64
);
13273 IRTemp vNhiPre
= newTemp(Ity_I64
);
13274 IRTemp vNloPre
= newTemp(Ity_I64
);
13275 IRTemp vNhi
= newTemp(Ity_I64
);
13276 IRTemp vNlo
= newTemp(Ity_I64
);
13277 IRTemp vMhi
= newTemp(Ity_I64
);
13278 IRTemp vMlo
= newTemp(Ity_I64
);
13279 assign(vD
, getQReg128(dd
));
13280 assign(vN
, getQReg128(nn
));
13281 assign(vM
, getQReg128(mm
));
13282 assign(vDhi
, unop(Iop_V128HIto64
, mkexpr(vD
)));
13283 assign(vDlo
, unop(Iop_V128to64
, mkexpr(vD
)));
13284 assign(vNhiPre
, unop(Iop_V128HIto64
, mkexpr(vN
)));
13285 assign(vNloPre
, unop(Iop_V128to64
, mkexpr(vN
)));
13286 assign(vMhi
, unop(Iop_V128HIto64
, mkexpr(vM
)));
13287 assign(vMlo
, unop(Iop_V128to64
, mkexpr(vM
)));
13288 /* Mask off any bits of the N register operand that aren't actually
13289 needed, so that Memcheck doesn't complain unnecessarily. */
13291 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13292 assign(vNhi
, mkU64(0));
13293 assign(vNlo
, unop(Iop_32Uto64
, unop(Iop_64to32
, mkexpr(vNloPre
))));
13295 case BITS3(0,1,1): case BITS3(1,0,0):
13296 case BITS3(1,0,1): case BITS3(1,1,0):
13297 assign(vNhi
, mkexpr(vNhiPre
));
13298 assign(vNlo
, mkexpr(vNloPre
));
13303 IRTemp res
= newTemp(Ity_V128
);
13305 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hnames
[opc
], helpers
[opc
],
13308 mkexpr(vDhi
), mkexpr(vDlo
), mkexpr(vNhi
),
13309 mkexpr(vNlo
), mkexpr(vMhi
), mkexpr(vMlo
)));
13310 stmt(IRStmt_Dirty(di
));
13311 putQReg128(dd
, mkexpr(res
));
13313 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13314 DIP("%s q%u, s%u, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
13316 case BITS3(0,1,1): case BITS3(1,1,0):
13317 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
13319 case BITS3(1,0,0): case BITS3(1,0,1):
13320 DIP("%s q%u, q%u, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
13334 Bool
dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13336 /* 31 28 23 21 16 11 9 4
13337 0101 1110 sz 10100 opc 10 n d
13338 Decode fields are: sz,opc
13340 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13341 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13342 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13345 UInt sz
= INSN(23,22);
13346 UInt opc
= INSN(16,12);
13347 UInt nn
= INSN(9,5);
13348 UInt dd
= INSN(4,0);
13349 if (sz
== BITS2(0,0) && opc
<= BITS5(0,0,0,1,0)) {
13350 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13351 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13352 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13354 const HChar
* inames
[3] = { "sha1h", "sha1su1", "sha256su0" };
13355 IRTemp vD
= newTemp(Ity_V128
);
13356 IRTemp vN
= newTemp(Ity_V128
);
13357 IRTemp vDhi
= newTemp(Ity_I64
);
13358 IRTemp vDlo
= newTemp(Ity_I64
);
13359 IRTemp vNhi
= newTemp(Ity_I64
);
13360 IRTemp vNlo
= newTemp(Ity_I64
);
13361 assign(vD
, getQReg128(dd
));
13362 assign(vN
, getQReg128(nn
));
13363 assign(vDhi
, unop(Iop_V128HIto64
, mkexpr(vD
)));
13364 assign(vDlo
, unop(Iop_V128to64
, mkexpr(vD
)));
13365 assign(vNhi
, unop(Iop_V128HIto64
, mkexpr(vN
)));
13366 assign(vNlo
, unop(Iop_V128to64
, mkexpr(vN
)));
13367 /* Mask off any bits of the N register operand that aren't actually
13368 needed, so that Memcheck doesn't complain unnecessarily. Also
13369 construct the calls, given that the helper functions don't take
13370 the same number of arguments. */
13371 IRDirty
* di
= NULL
;
13372 IRTemp res
= newTemp(Ity_V128
);
13374 case BITS5(0,0,0,0,0): {
13375 IRExpr
* vNloMasked
= unop(Iop_32Uto64
,
13376 unop(Iop_64to32
, mkexpr(vNlo
)));
13377 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
13378 "arm64g_dirtyhelper_SHA1H",
13379 &arm64g_dirtyhelper_SHA1H
,
13382 mkU64(0), vNloMasked
) );
13385 case BITS5(0,0,0,0,1):
13386 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
13387 "arm64g_dirtyhelper_SHA1SU1",
13388 &arm64g_dirtyhelper_SHA1SU1
,
13391 mkexpr(vDhi
), mkexpr(vDlo
),
13392 mkexpr(vNhi
), mkexpr(vNlo
)) );
13394 case BITS5(0,0,0,1,0):
13395 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
13396 "arm64g_dirtyhelper_SHA256SU0",
13397 &arm64g_dirtyhelper_SHA256SU0
,
13400 mkexpr(vDhi
), mkexpr(vDlo
),
13401 mkexpr(vNhi
), mkexpr(vNlo
)) );
13406 stmt(IRStmt_Dirty(di
));
13407 putQReg128(dd
, mkexpr(res
));
13409 case BITS5(0,0,0,0,0):
13410 DIP("%s s%u, s%u\n", inames
[opc
], dd
, nn
);
13412 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13413 DIP("%s v%u.4s, v%u.4s\n", inames
[opc
], dd
, nn
);
13427 Bool
dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13429 /* 31 28 23 21 20 15 13 9 4
13430 000 11110 ty 1 m op 1000 n opcode2
13431 The first 3 bits are really "M 0 S", but M and S are always zero.
13432 Decode fields are: ty,op,opcode2
13434 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13435 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13436 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13439 UInt ty
= INSN(23,22);
13440 UInt mm
= INSN(20,16);
13441 UInt op
= INSN(15,14);
13442 UInt nn
= INSN(9,5);
13443 UInt opcode2
= INSN(4,0);
13446 if (ty
<= X01
&& op
== X00
13447 && (opcode2
& BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13448 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13449 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13450 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13451 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13453 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13454 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13455 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13456 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13458 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13459 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13460 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13461 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13463 FCMPE generates Invalid Operation exn if either arg is any kind
13464 of NaN. FCMP generates Invalid Operation exn if either arg is a
13465 signalling NaN. We ignore this detail here and produce the same
13468 Bool isD
= (ty
& 1) == 1;
13469 Bool isCMPE
= (opcode2
& 16) == 16;
13470 Bool cmpZero
= (opcode2
& 8) == 8;
13471 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13473 if (cmpZero
&& mm
!= 0) valid
= False
;
13475 IRTemp argL
= newTemp(ity
);
13476 IRTemp argR
= newTemp(ity
);
13477 IRTemp irRes
= newTemp(Ity_I32
);
13478 assign(argL
, getQRegLO(nn
, ity
));
13481 ? (IRExpr_Const(isD
? IRConst_F64i(0) : IRConst_F32i(0)))
13482 : getQRegLO(mm
, ity
));
13483 assign(irRes
, binop(isD
? Iop_CmpF64
: Iop_CmpF32
,
13484 mkexpr(argL
), mkexpr(argR
)));
13485 IRTemp nzcv
= mk_convert_IRCmpF64Result_to_NZCV(irRes
);
13486 IRTemp nzcv_28x0
= newTemp(Ity_I64
);
13487 assign(nzcv_28x0
, binop(Iop_Shl64
, mkexpr(nzcv
), mkU8(28)));
13488 setFlags_COPY(nzcv_28x0
);
13489 DIP("fcmp%s %s, %s\n", isCMPE
? "e" : "", nameQRegLO(nn
, ity
),
13490 cmpZero
? "#0.0" : nameQRegLO(mm
, ity
));
13502 Bool
dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13504 /* 31 28 23 21 20 15 11 9 4 3
13505 000 11110 ty 1 m cond 01 n op nzcv
13506 The first 3 bits are really "M 0 S", but M and S are always zero.
13507 Decode fields are: ty,op
13509 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13510 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13511 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13514 UInt ty
= INSN(23,22);
13515 UInt mm
= INSN(20,16);
13516 UInt cond
= INSN(15,12);
13517 UInt nn
= INSN(9,5);
13518 UInt op
= INSN(4,4);
13519 UInt nzcv
= INSN(3,0);
13520 vassert(ty
< 4 && op
<= 1);
13522 if (ty
<= BITS2(0,1)) {
13523 /* -------- 00,0 FCCMP s_s -------- */
13524 /* -------- 00,1 FCCMPE s_s -------- */
13525 /* -------- 01,0 FCCMP d_d -------- */
13526 /* -------- 01,1 FCCMPE d_d -------- */
13528 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13529 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13530 signalling NaN. We ignore this detail here and produce the same
13533 Bool isD
= (ty
& 1) == 1;
13534 Bool isCMPE
= op
== 1;
13535 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13536 IRTemp argL
= newTemp(ity
);
13537 IRTemp argR
= newTemp(ity
);
13538 IRTemp irRes
= newTemp(Ity_I32
);
13539 assign(argL
, getQRegLO(nn
, ity
));
13540 assign(argR
, getQRegLO(mm
, ity
));
13541 assign(irRes
, binop(isD
? Iop_CmpF64
: Iop_CmpF32
,
13542 mkexpr(argL
), mkexpr(argR
)));
13543 IRTemp condT
= newTemp(Ity_I1
);
13544 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
13545 IRTemp nzcvT
= mk_convert_IRCmpF64Result_to_NZCV(irRes
);
13547 IRTemp nzcvT_28x0
= newTemp(Ity_I64
);
13548 assign(nzcvT_28x0
, binop(Iop_Shl64
, mkexpr(nzcvT
), mkU8(28)));
13550 IRExpr
* nzcvF_28x0
= mkU64(((ULong
)nzcv
) << 28);
13552 IRTemp nzcv_28x0
= newTemp(Ity_I64
);
13553 assign(nzcv_28x0
, IRExpr_ITE(mkexpr(condT
),
13554 mkexpr(nzcvT_28x0
), nzcvF_28x0
));
13555 setFlags_COPY(nzcv_28x0
);
13556 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE
? "e" : "",
13557 nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
), nzcv
, nameCC(cond
));
13567 Bool
dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13569 /* 31 23 21 20 15 11 9 5
13570 000 11110 ty 1 m cond 11 n d
13571 The first 3 bits are really "M 0 S", but M and S are always zero.
13574 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13575 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
13576 || INSN(11,10) != BITS2(1,1)) {
13579 UInt ty
= INSN(23,22);
13580 UInt mm
= INSN(20,16);
13581 UInt cond
= INSN(15,12);
13582 UInt nn
= INSN(9,5);
13583 UInt dd
= INSN(4,0);
13585 /* -------- 00: FCSEL s_s -------- */
13586 /* -------- 00: FCSEL d_d -------- */
13587 IRType ity
= ty
== X01
? Ity_F64
: Ity_F32
;
13588 IRTemp srcT
= newTemp(ity
);
13589 IRTemp srcF
= newTemp(ity
);
13590 IRTemp res
= newTemp(ity
);
13591 assign(srcT
, getQRegLO(nn
, ity
));
13592 assign(srcF
, getQRegLO(mm
, ity
));
13593 assign(res
, IRExpr_ITE(
13594 unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
13595 mkexpr(srcT
), mkexpr(srcF
)));
13596 putQReg128(dd
, mkV128(0x0000));
13597 putQRegLO(dd
, mkexpr(res
));
13598 DIP("fcsel %s, %s, %s, %s\n",
13599 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
),
13609 Bool
dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13611 /* 31 28 23 21 20 14 9 4
13612 000 11110 ty 1 opcode 10000 n d
13613 The first 3 bits are really "M 0 S", but M and S are always zero.
13614 Decode fields: ty,opcode
13616 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13617 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13618 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13621 UInt ty
= INSN(23,22);
13622 UInt opcode
= INSN(20,15);
13623 UInt nn
= INSN(9,5);
13624 UInt dd
= INSN(4,0);
13626 if (ty
<= X01
&& opcode
<= BITS6(0,0,0,0,1,1)) {
13627 /* -------- 0x,000000: FMOV d_d, s_s -------- */
13628 /* -------- 0x,000001: FABS d_d, s_s -------- */
13629 /* -------- 0x,000010: FNEG d_d, s_s -------- */
13630 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13631 IRType ity
= ty
== X01
? Ity_F64
: Ity_F32
;
13632 IRTemp src
= newTemp(ity
);
13633 IRTemp res
= newTemp(ity
);
13634 const HChar
* nm
= "??";
13635 assign(src
, getQRegLO(nn
, ity
));
13637 case BITS6(0,0,0,0,0,0):
13638 nm
= "fmov"; assign(res
, mkexpr(src
)); break;
13639 case BITS6(0,0,0,0,0,1):
13640 nm
= "fabs"; assign(res
, unop(mkABSF(ity
), mkexpr(src
))); break;
13641 case BITS6(0,0,0,0,1,0):
13642 nm
= "fabs"; assign(res
, unop(mkNEGF(ity
), mkexpr(src
))); break;
13643 case BITS6(0,0,0,0,1,1):
13645 assign(res
, binop(mkSQRTF(ity
),
13646 mkexpr(mk_get_IR_rounding_mode()),
13647 mkexpr(src
))); break;
13651 putQReg128(dd
, mkV128(0x0000));
13652 putQRegLO(dd
, mkexpr(res
));
13653 DIP("%s %s, %s\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
13657 if ( (ty
== X11
&& (opcode
== BITS6(0,0,0,1,0,0)
13658 || opcode
== BITS6(0,0,0,1,0,1)))
13659 || (ty
== X00
&& (opcode
== BITS6(0,0,0,1,1,1)
13660 || opcode
== BITS6(0,0,0,1,0,1)))
13661 || (ty
== X01
&& (opcode
== BITS6(0,0,0,1,1,1)
13662 || opcode
== BITS6(0,0,0,1,0,0)))) {
13663 /* -------- 11,000100: FCVT s_h -------- */
13664 /* -------- 11,000101: FCVT d_h -------- */
13665 /* -------- 00,000111: FCVT h_s -------- */
13666 /* -------- 00,000101: FCVT d_s -------- */
13667 /* -------- 01,000111: FCVT h_d -------- */
13668 /* -------- 01,000100: FCVT s_d -------- */
13669 /* 31 23 21 16 14 9 4
13670 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
13671 --------- 11 ----- 01 --------- FCVT Dd, Hn
13672 --------- 00 ----- 11 --------- FCVT Hd, Sn
13673 --------- 00 ----- 01 --------- FCVT Dd, Sn
13674 --------- 01 ----- 11 --------- FCVT Hd, Dn
13675 --------- 01 ----- 00 --------- FCVT Sd, Dn
13676 Rounding, when dst is smaller than src, is per the FPCR.
13679 UInt b1615
= opcode
& BITS2(1,1);
13680 switch ((b2322
<< 2) | b1615
) {
13681 case BITS4(0,0,0,1): // S -> D
13682 case BITS4(1,1,0,1): { // H -> D
13683 Bool srcIsH
= b2322
== BITS2(1,1);
13684 IRType srcTy
= srcIsH
? Ity_F16
: Ity_F32
;
13685 IRTemp res
= newTemp(Ity_F64
);
13686 assign(res
, unop(srcIsH
? Iop_F16toF64
: Iop_F32toF64
,
13687 getQRegLO(nn
, srcTy
)));
13688 putQReg128(dd
, mkV128(0x0000));
13689 putQRegLO(dd
, mkexpr(res
));
13690 DIP("fcvt %s, %s\n",
13691 nameQRegLO(dd
, Ity_F64
), nameQRegLO(nn
, srcTy
));
13694 case BITS4(0,1,0,0): // D -> S
13695 case BITS4(0,1,1,1): { // D -> H
13696 Bool dstIsH
= b1615
== BITS2(1,1);
13697 IRType dstTy
= dstIsH
? Ity_F16
: Ity_F32
;
13698 IRTemp res
= newTemp(dstTy
);
13699 assign(res
, binop(dstIsH
? Iop_F64toF16
: Iop_F64toF32
,
13700 mkexpr(mk_get_IR_rounding_mode()),
13701 getQRegLO(nn
, Ity_F64
)));
13702 putQReg128(dd
, mkV128(0x0000));
13703 putQRegLO(dd
, mkexpr(res
));
13704 DIP("fcvt %s, %s\n",
13705 nameQRegLO(dd
, dstTy
), nameQRegLO(nn
, Ity_F64
));
13708 case BITS4(0,0,1,1): // S -> H
13709 case BITS4(1,1,0,0): { // H -> S
13710 Bool toH
= b1615
== BITS2(1,1);
13711 IRType srcTy
= toH
? Ity_F32
: Ity_F16
;
13712 IRType dstTy
= toH
? Ity_F16
: Ity_F32
;
13713 IRTemp res
= newTemp(dstTy
);
13715 assign(res
, binop(Iop_F32toF16
,
13716 mkexpr(mk_get_IR_rounding_mode()),
13717 getQRegLO(nn
, srcTy
)));
13720 assign(res
, unop(Iop_F16toF32
,
13721 getQRegLO(nn
, srcTy
)));
13723 putQReg128(dd
, mkV128(0x0000));
13724 putQRegLO(dd
, mkexpr(res
));
13725 DIP("fcvt %s, %s\n",
13726 nameQRegLO(dd
, dstTy
), nameQRegLO(nn
, srcTy
));
13732 /* else unhandled */
13737 && opcode
>= BITS6(0,0,1,0,0,0) && opcode
<= BITS6(0,0,1,1,1,1)
13738 && opcode
!= BITS6(0,0,1,1,0,1)) {
13739 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13740 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13741 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13742 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13743 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13744 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13745 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13746 /* 31 23 21 17 14 9 4
13747 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
13749 x==0 => S-registers, x==1 => D-registers
13750 rm (17:15) encodings:
13751 111 per FPCR (FRINTI)
13755 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
13756 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
13757 110 per FPCR + "exact = TRUE" (FRINTX)
13760 Bool isD
= (ty
& 1) == 1;
13761 UInt rm
= opcode
& BITS6(0,0,0,1,1,1);
13762 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13763 IRExpr
* irrmE
= NULL
;
13766 case BITS3(0,1,1): ch
= 'z'; irrmE
= mkU32(Irrm_ZERO
); break;
13767 case BITS3(0,1,0): ch
= 'm'; irrmE
= mkU32(Irrm_NegINF
); break;
13768 case BITS3(0,0,1): ch
= 'p'; irrmE
= mkU32(Irrm_PosINF
); break;
13769 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13770 case BITS3(1,0,0): ch
= 'a'; irrmE
= mkU32(Irrm_NEAREST
); break;
13771 // I am unsure about the following, due to the "integral exact"
13772 // description in the manual. What does it mean? (frintx, that is)
13774 ch
= 'x'; irrmE
= mkexpr(mk_get_IR_rounding_mode()); break;
13776 ch
= 'i'; irrmE
= mkexpr(mk_get_IR_rounding_mode()); break;
13777 // The following is a kludge. There's no Irrm_ value to represent
13778 // this ("to nearest, with ties to even")
13779 case BITS3(0,0,0): ch
= 'n'; irrmE
= mkU32(Irrm_NEAREST
); break;
13783 IRTemp src
= newTemp(ity
);
13784 IRTemp dst
= newTemp(ity
);
13785 assign(src
, getQRegLO(nn
, ity
));
13786 assign(dst
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
13787 irrmE
, mkexpr(src
)));
13788 putQReg128(dd
, mkV128(0x0000));
13789 putQRegLO(dd
, mkexpr(dst
));
13790 DIP("frint%c %s, %s\n",
13791 ch
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
13803 Bool
dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13805 /* 31 28 23 21 20 15 11 9 4
13806 000 11110 ty 1 m opcode 10 n d
13807 The first 3 bits are really "M 0 S", but M and S are always zero.
13808 Decode fields: ty, opcode
13810 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13811 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13812 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13815 UInt ty
= INSN(23,22);
13816 UInt mm
= INSN(20,16);
13817 UInt opcode
= INSN(15,12);
13818 UInt nn
= INSN(9,5);
13819 UInt dd
= INSN(4,0);
13821 if (ty
<= X01
&& opcode
<= BITS4(0,1,1,1)) {
13822 /* ------- 0x,0000: FMUL d_d, s_s ------- */
13823 /* ------- 0x,0001: FDIV d_d, s_s ------- */
13824 /* ------- 0x,0010: FADD d_d, s_s ------- */
13825 /* ------- 0x,0011: FSUB d_d, s_s ------- */
13826 /* ------- 0x,0100: FMAX d_d, s_s ------- */
13827 /* ------- 0x,0101: FMIN d_d, s_s ------- */
13828 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13829 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13830 IRType ity
= ty
== X00
? Ity_F32
: Ity_F64
;
13831 IROp iop
= Iop_INVALID
;
13832 const HChar
* nm
= "???";
13834 case BITS4(0,0,0,0): nm
= "fmul"; iop
= mkMULF(ity
); break;
13835 case BITS4(0,0,0,1): nm
= "fdiv"; iop
= mkDIVF(ity
); break;
13836 case BITS4(0,0,1,0): nm
= "fadd"; iop
= mkADDF(ity
); break;
13837 case BITS4(0,0,1,1): nm
= "fsub"; iop
= mkSUBF(ity
); break;
13838 case BITS4(0,1,0,0): nm
= "fmax"; iop
= mkVecMAXF(ty
+2); break;
13839 case BITS4(0,1,0,1): nm
= "fmin"; iop
= mkVecMINF(ty
+2); break;
13840 case BITS4(0,1,1,0): nm
= "fmaxnm"; iop
= mkVecMAXF(ty
+2); break; //!!
13841 case BITS4(0,1,1,1): nm
= "fminnm"; iop
= mkVecMINF(ty
+2); break; //!!
13842 default: vassert(0);
13844 if (opcode
<= BITS4(0,0,1,1)) {
13845 // This is really not good code. TODO: avoid width-changing
13846 IRTemp res
= newTemp(ity
);
13847 assign(res
, triop(iop
, mkexpr(mk_get_IR_rounding_mode()),
13848 getQRegLO(nn
, ity
), getQRegLO(mm
, ity
)));
13849 putQReg128(dd
, mkV128(0));
13850 putQRegLO(dd
, mkexpr(res
));
13852 putQReg128(dd
, unop(mkVecZEROHIxxOFV128(ty
+2),
13853 binop(iop
, getQReg128(nn
), getQReg128(mm
))));
13855 DIP("%s %s, %s, %s\n",
13856 nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
13860 if (ty
<= X01
&& opcode
== BITS4(1,0,0,0)) {
13861 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13862 IRType ity
= ty
== X00
? Ity_F32
: Ity_F64
;
13863 IROp iop
= mkMULF(ity
);
13864 IROp iopn
= mkNEGF(ity
);
13865 const HChar
* nm
= "fnmul";
13866 IRExpr
* resE
= unop(iopn
,
13867 triop(iop
, mkexpr(mk_get_IR_rounding_mode()),
13868 getQRegLO(nn
, ity
), getQRegLO(mm
, ity
)));
13869 IRTemp res
= newTemp(ity
);
13871 putQReg128(dd
, mkV128(0));
13872 putQRegLO(dd
, mkexpr(res
));
13873 DIP("%s %s, %s, %s\n",
13874 nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
13884 Bool
dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13886 /* 31 28 23 21 20 15 14 9 4
13887 000 11111 ty o1 m o0 a n d
13888 The first 3 bits are really "M 0 S", but M and S are always zero.
13889 Decode fields: ty,o1,o0
13891 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13892 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13895 UInt ty
= INSN(23,22);
13896 UInt bitO1
= INSN(21,21);
13897 UInt mm
= INSN(20,16);
13898 UInt bitO0
= INSN(15,15);
13899 UInt aa
= INSN(14,10);
13900 UInt nn
= INSN(9,5);
13901 UInt dd
= INSN(4,0);
13905 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13906 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13907 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13908 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13909 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13910 /* 31 22 20 15 14 9 4 ix
13911 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13912 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13913 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13914 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13915 where Fx=Dx when sz=1, Fx=Sx when sz=0
13917 -----SPEC------ ----IMPL----
13918 fmadd a + n * m a + n * m
13919 fmsub a + (-n) * m a - n * m
13920 fnmadd (-a) + (-n) * m -(a + n * m)
13921 fnmsub (-a) + n * m -(a - n * m)
13923 Bool isD
= (ty
& 1) == 1;
13924 UInt ix
= (bitO1
<< 1) | bitO0
;
13925 IRType ity
= isD
? Ity_F64
: Ity_F32
;
13926 IROp opADD
= mkADDF(ity
);
13927 IROp opSUB
= mkSUBF(ity
);
13928 IROp opMUL
= mkMULF(ity
);
13929 IROp opNEG
= mkNEGF(ity
);
13930 IRTemp res
= newTemp(ity
);
13931 IRExpr
* eA
= getQRegLO(aa
, ity
);
13932 IRExpr
* eN
= getQRegLO(nn
, ity
);
13933 IRExpr
* eM
= getQRegLO(mm
, ity
);
13934 IRExpr
* rm
= mkexpr(mk_get_IR_rounding_mode());
13935 IRExpr
* eNxM
= triop(opMUL
, rm
, eN
, eM
);
13937 case 0: assign(res
, triop(opADD
, rm
, eA
, eNxM
)); break;
13938 case 1: assign(res
, triop(opSUB
, rm
, eA
, eNxM
)); break;
13939 case 2: assign(res
, unop(opNEG
, triop(opADD
, rm
, eA
, eNxM
))); break;
13940 case 3: assign(res
, unop(opNEG
, triop(opSUB
, rm
, eA
, eNxM
))); break;
13941 default: vassert(0);
13943 putQReg128(dd
, mkV128(0x0000));
13944 putQRegLO(dd
, mkexpr(res
));
13945 const HChar
* names
[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13946 DIP("%s %s, %s, %s, %s\n",
13947 names
[ix
], nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
),
13948 nameQRegLO(mm
, ity
), nameQRegLO(aa
, ity
));
13958 Bool
dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13960 /* 31 28 23 21 20 12 9 4
13961 000 11110 ty 1 imm8 100 imm5 d
13962 The first 3 bits are really "M 0 S", but M and S are always zero.
13964 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13965 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13966 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13969 UInt ty
= INSN(23,22);
13970 UInt imm8
= INSN(20,13);
13971 UInt imm5
= INSN(9,5);
13972 UInt dd
= INSN(4,0);
13974 /* ------- 00,00000: FMOV s_imm ------- */
13975 /* ------- 01,00000: FMOV d_imm ------- */
13976 if (ty
<= X01
&& imm5
== BITS5(0,0,0,0,0)) {
13977 Bool isD
= (ty
& 1) == 1;
13978 ULong imm
= VFPExpandImm(imm8
, isD
? 64 : 32);
13980 vassert(0 == (imm
& 0xFFFFFFFF00000000ULL
));
13982 putQReg128(dd
, mkV128(0));
13983 putQRegLO(dd
, isD
? mkU64(imm
) : mkU32(imm
& 0xFFFFFFFFULL
));
13984 DIP("fmov %s, #0x%llx\n",
13985 nameQRegLO(dd
, isD
? Ity_F64
: Ity_F32
), imm
);
13995 Bool
dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13997 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13998 /* 31 30 29 28 23 21 20 18 15 9 4
13999 sf 0 0 11110 type 0 rmode opcode scale n d
14000 The first 3 bits are really "sf 0 S", but S is always zero.
14001 Decode fields: sf,type,rmode,opcode
14003 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14004 if (INSN(30,29) != BITS2(0,0)
14005 || INSN(28,24) != BITS5(1,1,1,1,0)
14006 || INSN(21,21) != 0) {
14009 UInt bitSF
= INSN(31,31);
14010 UInt ty
= INSN(23,22); // type
14011 UInt rm
= INSN(20,19); // rmode
14012 UInt op
= INSN(18,16); // opcode
14013 UInt sc
= INSN(15,10); // scale
14014 UInt nn
= INSN(9,5);
14015 UInt dd
= INSN(4,0);
14017 if (ty
<= X01
&& rm
== X11
14018 && (op
== BITS3(0,0,0) || op
== BITS3(0,0,1))) {
14019 /* -------- (ix) sf ty rm opc -------- */
14020 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14021 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14022 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14023 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14025 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14026 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14027 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14028 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14029 Bool isI64
= bitSF
== 1;
14030 Bool isF64
= (ty
& 1) == 1;
14031 Bool isU
= (op
& 1) == 1;
14032 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
14034 Int fbits
= 64 - sc
;
14035 vassert(fbits
>= 1 && fbits
<= (isI64
? 64 : 32));
14037 Double scale
= two_to_the_plus(fbits
);
14038 IRExpr
* scaleE
= isF64
? IRExpr_Const(IRConst_F64(scale
))
14039 : IRExpr_Const(IRConst_F32( (Float
)scale
));
14040 IROp opMUL
= isF64
? Iop_MulF64
: Iop_MulF32
;
14043 = { Iop_F32toI32S
, Iop_F64toI32S
, Iop_F32toI64S
, Iop_F64toI64S
,
14044 Iop_F32toI32U
, Iop_F64toI32U
, Iop_F32toI64U
, Iop_F64toI64U
};
14045 IRTemp irrm
= newTemp(Ity_I32
);
14046 assign(irrm
, mkU32(Irrm_ZERO
));
14048 IRExpr
* src
= getQRegLO(nn
, isF64
? Ity_F64
: Ity_F32
);
14049 IRExpr
* res
= binop(ops
[ix
], mkexpr(irrm
),
14050 triop(opMUL
, mkexpr(irrm
), src
, scaleE
));
14051 putIRegOrZR(isI64
, dd
, res
);
14053 DIP("fcvtz%c %s, %s, #%d\n",
14054 isU
? 'u' : 's', nameIRegOrZR(isI64
, dd
),
14055 nameQRegLO(nn
, isF64
? Ity_F64
: Ity_F32
), fbits
);
14059 /* ------ sf,ty,rm,opc ------ */
14060 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14061 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14062 /* (ix) sf S 28 ty rm opc 15 9 4
14063 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14064 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14065 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14066 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14068 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14069 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14070 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14071 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14073 These are signed/unsigned conversion from integer registers to
14074 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14075 scaled per |scale|.
14077 if (ty
<= X01
&& rm
== X00
14078 && (op
== BITS3(0,1,0) || op
== BITS3(0,1,1))
14079 && (bitSF
== 1 || ((sc
>> 5) & 1) == 1)) {
14080 Bool isI64
= bitSF
== 1;
14081 Bool isF64
= (ty
& 1) == 1;
14082 Bool isU
= (op
& 1) == 1;
14083 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
14085 Int fbits
= 64 - sc
;
14086 vassert(fbits
>= 1 && fbits
<= (isI64
? 64 : 32));
14088 Double scale
= two_to_the_minus(fbits
);
14089 IRExpr
* scaleE
= isF64
? IRExpr_Const(IRConst_F64(scale
))
14090 : IRExpr_Const(IRConst_F32( (Float
)scale
));
14091 IROp opMUL
= isF64
? Iop_MulF64
: Iop_MulF32
;
14094 = { Iop_I32StoF32
, Iop_I32StoF64
, Iop_I64StoF32
, Iop_I64StoF64
,
14095 Iop_I32UtoF32
, Iop_I32UtoF64
, Iop_I64UtoF32
, Iop_I64UtoF64
};
14096 IRExpr
* src
= getIRegOrZR(isI64
, nn
);
14097 IRExpr
* res
= (isF64
&& !isI64
)
14098 ? unop(ops
[ix
], src
)
14100 mkexpr(mk_get_IR_rounding_mode()), src
);
14101 putQReg128(dd
, mkV128(0));
14102 putQRegLO(dd
, triop(opMUL
, mkU32(Irrm_NEAREST
), res
, scaleE
));
14104 DIP("%ccvtf %s, %s, #%d\n",
14105 isU
? 'u' : 's', nameQRegLO(dd
, isF64
? Ity_F64
: Ity_F32
),
14106 nameIRegOrZR(isI64
, nn
), fbits
);
14116 Bool
dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14118 /* 31 30 29 28 23 21 20 18 15 9 4
14119 sf 0 0 11110 type 1 rmode opcode 000000 n d
14120 The first 3 bits are really "sf 0 S", but S is always zero.
14121 Decode fields: sf,type,rmode,opcode
14123 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14124 if (INSN(30,29) != BITS2(0,0)
14125 || INSN(28,24) != BITS5(1,1,1,1,0)
14126 || INSN(21,21) != 1
14127 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14130 UInt bitSF
= INSN(31,31);
14131 UInt ty
= INSN(23,22); // type
14132 UInt rm
= INSN(20,19); // rmode
14133 UInt op
= INSN(18,16); // opcode
14134 UInt nn
= INSN(9,5);
14135 UInt dd
= INSN(4,0);
14138 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14139 /* 30 23 20 18 15 9 4
14140 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14141 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14142 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14143 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14144 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14145 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14146 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14148 Rd is Xd when sf==1, Wd when sf==0
14149 Fn is Dn when x==1, Sn when x==0
14150 20:19 carry the rounding mode, using the same encoding as FPCR
14153 && ( ((op
== BITS3(0,0,0) || op
== BITS3(0,0,1)) && True
)
14154 || ((op
== BITS3(1,0,0) || op
== BITS3(1,0,1)) && rm
== BITS2(0,0))
14157 Bool isI64
= bitSF
== 1;
14158 Bool isF64
= (ty
& 1) == 1;
14159 Bool isU
= (op
& 1) == 1;
14160 /* Decide on the IR rounding mode to use. */
14161 IRRoundingMode irrm
= 8; /*impossible*/
14163 if (op
== BITS3(0,0,0) || op
== BITS3(0,0,1)) {
14165 case BITS2(0,0): ch
= 'n'; irrm
= Irrm_NEAREST
; break;
14166 case BITS2(0,1): ch
= 'p'; irrm
= Irrm_PosINF
; break;
14167 case BITS2(1,0): ch
= 'm'; irrm
= Irrm_NegINF
; break;
14168 case BITS2(1,1): ch
= 'z'; irrm
= Irrm_ZERO
; break;
14169 default: vassert(0);
14172 vassert(op
== BITS3(1,0,0) || op
== BITS3(1,0,1));
14174 case BITS2(0,0): ch
= 'a'; irrm
= Irrm_NEAREST
; break;
14175 default: vassert(0);
14178 vassert(irrm
!= 8);
14179 /* Decide on the conversion primop, based on the source size,
14180 dest size and signedness (8 possibilities). Case coding:
14190 UInt ix
= (isF64
? 4 : 0) | (isI64
? 2 : 0) | (isU
? 1 : 0);
14193 = { Iop_F32toI32S
, Iop_F32toI32U
, Iop_F32toI64S
, Iop_F32toI64U
,
14194 Iop_F64toI32S
, Iop_F64toI32U
, Iop_F64toI64S
, Iop_F64toI64U
};
14195 IROp iop
= iops
[ix
];
14196 // A bit of ATCery: bounce all cases we haven't seen an example of.
14197 if (/* F32toI32S */
14198 (iop
== Iop_F32toI32S
&& irrm
== Irrm_ZERO
) /* FCVTZS Wd,Sn */
14199 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_NegINF
) /* FCVTMS Wd,Sn */
14200 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_PosINF
) /* FCVTPS Wd,Sn */
14201 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S W,S */
14203 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_ZERO
) /* FCVTZU Wd,Sn */
14204 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_NegINF
) /* FCVTMU Wd,Sn */
14205 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_PosINF
) /* FCVTPU Wd,Sn */
14206 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U W,S */
14208 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_ZERO
) /* FCVTZS Xd,Sn */
14209 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_NegINF
) /* FCVTMS Xd,Sn */
14210 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_PosINF
) /* FCVTPS Xd,Sn */
14211 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S X,S */
14213 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_ZERO
) /* FCVTZU Xd,Sn */
14214 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_NegINF
) /* FCVTMU Xd,Sn */
14215 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_PosINF
) /* FCVTPU Xd,Sn */
14216 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U X,S */
14218 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_ZERO
) /* FCVTZS Wd,Dn */
14219 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_NegINF
) /* FCVTMS Wd,Dn */
14220 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_PosINF
) /* FCVTPS Wd,Dn */
14221 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S W,D */
14223 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_ZERO
) /* FCVTZU Wd,Dn */
14224 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_NegINF
) /* FCVTMU Wd,Dn */
14225 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_PosINF
) /* FCVTPU Wd,Dn */
14226 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U W,D */
14228 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_ZERO
) /* FCVTZS Xd,Dn */
14229 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_NegINF
) /* FCVTMS Xd,Dn */
14230 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_PosINF
) /* FCVTPS Xd,Dn */
14231 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S X,D */
14233 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_ZERO
) /* FCVTZU Xd,Dn */
14234 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_NegINF
) /* FCVTMU Xd,Dn */
14235 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_PosINF
) /* FCVTPU Xd,Dn */
14236 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U X,D */
14242 IRType srcTy
= isF64
? Ity_F64
: Ity_F32
;
14243 IRType dstTy
= isI64
? Ity_I64
: Ity_I32
;
14244 IRTemp src
= newTemp(srcTy
);
14245 IRTemp dst
= newTemp(dstTy
);
14246 assign(src
, getQRegLO(nn
, srcTy
));
14247 assign(dst
, binop(iop
, mkU32(irrm
), mkexpr(src
)));
14248 putIRegOrZR(isI64
, dd
, mkexpr(dst
));
14249 DIP("fcvt%c%c %s, %s\n", ch
, isU
? 'u' : 's',
14250 nameIRegOrZR(isI64
, dd
), nameQRegLO(nn
, srcTy
));
14255 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14256 /* (ix) sf S 28 ty rm op 15 9 4
14257 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14258 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14259 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14260 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14262 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14263 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14264 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14265 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14267 These are signed/unsigned conversion from integer registers to
14268 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14270 if (ty
<= X01
&& rm
== X00
&& (op
== BITS3(0,1,0) || op
== BITS3(0,1,1))) {
14271 Bool isI64
= bitSF
== 1;
14272 Bool isF64
= (ty
& 1) == 1;
14273 Bool isU
= (op
& 1) == 1;
14274 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
14276 = { Iop_I32StoF32
, Iop_I32StoF64
, Iop_I64StoF32
, Iop_I64StoF64
,
14277 Iop_I32UtoF32
, Iop_I32UtoF64
, Iop_I64UtoF32
, Iop_I64UtoF64
};
14278 IRExpr
* src
= getIRegOrZR(isI64
, nn
);
14279 IRExpr
* res
= (isF64
&& !isI64
)
14280 ? unop(ops
[ix
], src
)
14282 mkexpr(mk_get_IR_rounding_mode()), src
);
14283 putQReg128(dd
, mkV128(0));
14284 putQRegLO(dd
, res
);
14285 DIP("%ccvtf %s, %s\n",
14286 isU
? 'u' : 's', nameQRegLO(dd
, isF64
? Ity_F64
: Ity_F32
),
14287 nameIRegOrZR(isI64
, nn
));
14292 /* -------- FMOV (general) -------- */
14293 /* case sf S ty rm op 15 9 4
14294 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14295 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14296 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14298 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14299 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14300 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14303 UInt ix
= 0; // case
14305 if (ty
== BITS2(0,0) && rm
== BITS2(0,0) && op
== BITS3(1,1,1))
14308 if (ty
== BITS2(0,0) && rm
== BITS2(0,0) && op
== BITS3(1,1,0))
14311 vassert(bitSF
== 1);
14312 if (ty
== BITS2(0,1) && rm
== BITS2(0,0) && op
== BITS3(1,1,1))
14315 if (ty
== BITS2(0,1) && rm
== BITS2(0,0) && op
== BITS3(1,1,0))
14318 if (ty
== BITS2(1,0) && rm
== BITS2(0,1) && op
== BITS3(1,1,1))
14321 if (ty
== BITS2(1,0) && rm
== BITS2(0,1) && op
== BITS3(1,1,0))
14327 putQReg128(dd
, mkV128(0));
14328 putQRegLO(dd
, getIReg32orZR(nn
));
14329 DIP("fmov s%u, w%u\n", dd
, nn
);
14332 putQReg128(dd
, mkV128(0));
14333 putQRegLO(dd
, getIReg64orZR(nn
));
14334 DIP("fmov d%u, x%u\n", dd
, nn
);
14337 putQRegHI64(dd
, getIReg64orZR(nn
));
14338 DIP("fmov v%u.d[1], x%u\n", dd
, nn
);
14341 putIReg32orZR(dd
, getQRegLO(nn
, Ity_I32
));
14342 DIP("fmov w%u, s%u\n", dd
, nn
);
14345 putIReg64orZR(dd
, getQRegLO(nn
, Ity_I64
));
14346 DIP("fmov x%u, d%u\n", dd
, nn
);
14349 putIReg64orZR(dd
, getQRegHI64(nn
));
14350 DIP("fmov x%u, v%u.d[1]\n", dd
, nn
);
14357 /* undecodable; fall through */
14366 Bool
dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14369 ok
= dis_AdvSIMD_EXT(dres
, insn
);
14370 if (UNLIKELY(ok
)) return True
;
14371 ok
= dis_AdvSIMD_TBL_TBX(dres
, insn
);
14372 if (UNLIKELY(ok
)) return True
;
14373 ok
= dis_AdvSIMD_ZIP_UZP_TRN(dres
, insn
);
14374 if (UNLIKELY(ok
)) return True
;
14375 ok
= dis_AdvSIMD_across_lanes(dres
, insn
);
14376 if (UNLIKELY(ok
)) return True
;
14377 ok
= dis_AdvSIMD_copy(dres
, insn
);
14378 if (UNLIKELY(ok
)) return True
;
14379 ok
= dis_AdvSIMD_modified_immediate(dres
, insn
);
14380 if (UNLIKELY(ok
)) return True
;
14381 ok
= dis_AdvSIMD_scalar_copy(dres
, insn
);
14382 if (UNLIKELY(ok
)) return True
;
14383 ok
= dis_AdvSIMD_scalar_pairwise(dres
, insn
);
14384 if (UNLIKELY(ok
)) return True
;
14385 ok
= dis_AdvSIMD_scalar_shift_by_imm(dres
, insn
);
14386 if (UNLIKELY(ok
)) return True
;
14387 ok
= dis_AdvSIMD_scalar_three_different(dres
, insn
);
14388 if (UNLIKELY(ok
)) return True
;
14389 ok
= dis_AdvSIMD_scalar_three_same(dres
, insn
);
14390 if (UNLIKELY(ok
)) return True
;
14391 ok
= dis_AdvSIMD_scalar_two_reg_misc(dres
, insn
);
14392 if (UNLIKELY(ok
)) return True
;
14393 ok
= dis_AdvSIMD_scalar_x_indexed_element(dres
, insn
);
14394 if (UNLIKELY(ok
)) return True
;
14395 ok
= dis_AdvSIMD_shift_by_immediate(dres
, insn
);
14396 if (UNLIKELY(ok
)) return True
;
14397 ok
= dis_AdvSIMD_three_different(dres
, insn
);
14398 if (UNLIKELY(ok
)) return True
;
14399 ok
= dis_AdvSIMD_three_same(dres
, insn
);
14400 if (UNLIKELY(ok
)) return True
;
14401 ok
= dis_AdvSIMD_two_reg_misc(dres
, insn
);
14402 if (UNLIKELY(ok
)) return True
;
14403 ok
= dis_AdvSIMD_vector_x_indexed_elem(dres
, insn
);
14404 if (UNLIKELY(ok
)) return True
;
14405 ok
= dis_AdvSIMD_crypto_aes(dres
, insn
);
14406 if (UNLIKELY(ok
)) return True
;
14407 ok
= dis_AdvSIMD_crypto_three_reg_sha(dres
, insn
);
14408 if (UNLIKELY(ok
)) return True
;
14409 ok
= dis_AdvSIMD_crypto_two_reg_sha(dres
, insn
);
14410 if (UNLIKELY(ok
)) return True
;
14411 ok
= dis_AdvSIMD_fp_compare(dres
, insn
);
14412 if (UNLIKELY(ok
)) return True
;
14413 ok
= dis_AdvSIMD_fp_conditional_compare(dres
, insn
);
14414 if (UNLIKELY(ok
)) return True
;
14415 ok
= dis_AdvSIMD_fp_conditional_select(dres
, insn
);
14416 if (UNLIKELY(ok
)) return True
;
14417 ok
= dis_AdvSIMD_fp_data_proc_1_source(dres
, insn
);
14418 if (UNLIKELY(ok
)) return True
;
14419 ok
= dis_AdvSIMD_fp_data_proc_2_source(dres
, insn
);
14420 if (UNLIKELY(ok
)) return True
;
14421 ok
= dis_AdvSIMD_fp_data_proc_3_source(dres
, insn
);
14422 if (UNLIKELY(ok
)) return True
;
14423 ok
= dis_AdvSIMD_fp_immediate(dres
, insn
);
14424 if (UNLIKELY(ok
)) return True
;
14425 ok
= dis_AdvSIMD_fp_to_from_fixedp_conv(dres
, insn
);
14426 if (UNLIKELY(ok
)) return True
;
14427 ok
= dis_AdvSIMD_fp_to_from_int_conv(dres
, insn
);
14428 if (UNLIKELY(ok
)) return True
;
14433 /*------------------------------------------------------------*/
14434 /*--- Disassemble a single ARM64 instruction ---*/
14435 /*------------------------------------------------------------*/
14437 /* Disassemble a single ARM64 instruction into IR. The instruction
14438 has is located at |guest_instr| and has guest IP of
14439 |guest_PC_curr_instr|, which will have been set before the call
14440 here. Returns True iff the instruction was decoded, in which case
14441 *dres will be set accordingly, or False, in which case *dres should
14442 be ignored by the caller. */
14445 Bool
disInstr_ARM64_WRK (
14446 /*MB_OUT*/DisResult
* dres
,
14447 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
14449 void* callback_opaque
,
14450 const UChar
* guest_instr
,
14451 const VexArchInfo
* archinfo
,
14452 const VexAbiInfo
* abiinfo
14455 // A macro to fish bits out of 'insn'.
14456 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14458 //ZZ DisResult dres;
14460 //ZZ //Bool allow_VFP = False;
14461 //ZZ //UInt hwcaps = archinfo->hwcaps;
14462 //ZZ IRTemp condT; /* :: Ity_I32 */
14464 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14466 //ZZ /* What insn variants are we supporting today? */
14467 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14470 /* Set result defaults. */
14471 dres
->whatNext
= Dis_Continue
;
14473 dres
->continueAt
= 0;
14474 dres
->jk_StopHere
= Ijk_INVALID
;
14475 dres
->hint
= Dis_HintNone
;
14477 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14478 4-aligned. So just fish the whole thing out of memory right now
14480 UInt insn
= getUIntLittleEndianly( guest_instr
);
14482 if (0) vex_printf("insn: 0x%x\n", insn
);
14484 DIP("\t(arm64) 0x%llx: ", (ULong
)guest_PC_curr_instr
);
14486 vassert(0 == (guest_PC_curr_instr
& 3ULL));
14488 /* ----------------------------------------------------------- */
14490 /* Spot "Special" instructions (see comment at top of file). */
14492 const UChar
* code
= guest_instr
;
14493 /* Spot the 16-byte preamble:
14494 93CC0D8C ror x12, x12, #3
14495 93CC358C ror x12, x12, #13
14496 93CCCD8C ror x12, x12, #51
14497 93CCF58C ror x12, x12, #61
14499 UInt word1
= 0x93CC0D8C;
14500 UInt word2
= 0x93CC358C;
14501 UInt word3
= 0x93CCCD8C;
14502 UInt word4
= 0x93CCF58C;
14503 if (getUIntLittleEndianly(code
+ 0) == word1
&&
14504 getUIntLittleEndianly(code
+ 4) == word2
&&
14505 getUIntLittleEndianly(code
+ 8) == word3
&&
14506 getUIntLittleEndianly(code
+12) == word4
) {
14507 /* Got a "Special" instruction preamble. Which one is it? */
14508 if (getUIntLittleEndianly(code
+16) == 0xAA0A014A
14509 /* orr x10,x10,x10 */) {
14510 /* X3 = client_request ( X4 ) */
14511 DIP("x3 = client_request ( x4 )\n");
14512 putPC(mkU64( guest_PC_curr_instr
+ 20 ));
14513 dres
->jk_StopHere
= Ijk_ClientReq
;
14514 dres
->whatNext
= Dis_StopHere
;
14518 if (getUIntLittleEndianly(code
+16) == 0xAA0B016B
14519 /* orr x11,x11,x11 */) {
14520 /* X3 = guest_NRADDR */
14521 DIP("x3 = guest_NRADDR\n");
14523 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
14527 if (getUIntLittleEndianly(code
+16) == 0xAA0C018C
14528 /* orr x12,x12,x12 */) {
14529 /* branch-and-link-to-noredir X8 */
14530 DIP("branch-and-link-to-noredir x8\n");
14531 putIReg64orZR(30, mkU64(guest_PC_curr_instr
+ 20));
14532 putPC(getIReg64orZR(8));
14533 dres
->jk_StopHere
= Ijk_NoRedir
;
14534 dres
->whatNext
= Dis_StopHere
;
14538 if (getUIntLittleEndianly(code
+16) == 0xAA090129
14539 /* orr x9,x9,x9 */) {
14541 DIP("IR injection\n");
14542 vex_inject_ir(irsb
, Iend_LE
);
14543 // Invalidate the current insn. The reason is that the IRop we're
14544 // injecting here can change. In which case the translation has to
14545 // be redone. For ease of handling, we simply invalidate all the
14547 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_PC_curr_instr
)));
14548 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(20)));
14549 putPC(mkU64( guest_PC_curr_instr
+ 20 ));
14550 dres
->whatNext
= Dis_StopHere
;
14551 dres
->jk_StopHere
= Ijk_InvalICache
;
14554 /* We don't know what it is. */
14560 /* ----------------------------------------------------------- */
14562 /* Main ARM64 instruction decoder starts here. */
14566 /* insn[28:25] determines the top-level grouping, so let's start
14569 For all of these dis_ARM64_ functions, we pass *dres with the
14570 normal default results "insn OK, 4 bytes long, keep decoding" so
14571 they don't need to change it. However, decodes of control-flow
14572 insns may cause *dres to change.
14574 switch (INSN(28,25)) {
14575 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
14576 // Data processing - immediate
14577 ok
= dis_ARM64_data_processing_immediate(dres
, insn
);
14579 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
14580 // Branch, exception generation and system instructions
14581 ok
= dis_ARM64_branch_etc(dres
, insn
, archinfo
, abiinfo
);
14583 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
14584 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
14585 // Loads and stores
14586 ok
= dis_ARM64_load_store(dres
, insn
, abiinfo
);
14588 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
14589 // Data processing - register
14590 ok
= dis_ARM64_data_processing_register(dres
, insn
);
14592 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
14593 // Data processing - SIMD and floating point
14594 ok
= dis_ARM64_simd_and_fp(dres
, insn
);
14596 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
14597 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
14601 vassert(0); /* Can't happen */
14604 /* If the next-level down decoders failed, make sure |dres| didn't
14607 vassert(dres
->whatNext
== Dis_Continue
);
14608 vassert(dres
->len
== 4);
14609 vassert(dres
->continueAt
== 0);
14610 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
14619 /*------------------------------------------------------------*/
14620 /*--- Top-level fn ---*/
14621 /*------------------------------------------------------------*/
14623 /* Disassemble a single instruction into IR. The instruction
14624 is located in host memory at &guest_code[delta]. */
14626 DisResult
disInstr_ARM64 ( IRSB
* irsb_IN
,
14627 Bool (*resteerOkFn
) ( void*, Addr
),
14629 void* callback_opaque
,
14630 const UChar
* guest_code_IN
,
14633 VexArch guest_arch
,
14634 const VexArchInfo
* archinfo
,
14635 const VexAbiInfo
* abiinfo
,
14636 VexEndness host_endness_IN
,
14637 Bool sigill_diag_IN
)
14640 vex_bzero(&dres
, sizeof(dres
));
14642 /* Set globals (see top of this file) */
14643 vassert(guest_arch
== VexArchARM64
);
14646 host_endness
= host_endness_IN
;
14647 guest_PC_curr_instr
= (Addr64
)guest_IP
;
14649 /* Sanity checks */
14650 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14651 vassert((archinfo
->arm64_dMinLine_lg2_szB
- 2) <= 15);
14652 vassert((archinfo
->arm64_iMinLine_lg2_szB
- 2) <= 15);
14654 /* Try to decode */
14655 Bool ok
= disInstr_ARM64_WRK( &dres
,
14656 resteerOkFn
, resteerCisOk
, callback_opaque
,
14657 &guest_code_IN
[delta_IN
],
14658 archinfo
, abiinfo
);
14660 /* All decode successes end up here. */
14661 vassert(dres
.len
== 4 || dres
.len
== 20);
14662 switch (dres
.whatNext
) {
14664 putPC( mkU64(dres
.len
+ guest_PC_curr_instr
) );
14668 putPC(mkU64(dres
.continueAt
));
14677 /* All decode failures end up here. */
14678 if (sigill_diag_IN
) {
14682 = getUIntLittleEndianly( &guest_code_IN
[delta_IN
] );
14683 vex_bzero(buf
, sizeof(buf
));
14684 for (i
= j
= 0; i
< 32; i
++) {
14686 if ((i
& 7) == 0) buf
[j
++] = ' ';
14687 else if ((i
& 3) == 0) buf
[j
++] = '\'';
14689 buf
[j
++] = (insn
& (1<<(31-i
))) ? '1' : '0';
14691 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn
);
14692 vex_printf("disInstr(arm64): %s\n", buf
);
14695 /* Tell the dispatcher that this insn cannot be decoded, and so
14696 has not been executed, and (is currently) the next to be
14697 executed. PC should be up-to-date since it is made so at the
14698 start of each insn, but nevertheless be paranoid and update
14699 it again right now. */
14700 putPC( mkU64(guest_PC_curr_instr
) );
14702 dres
.whatNext
= Dis_StopHere
;
14703 dres
.jk_StopHere
= Ijk_NoDecode
;
14704 dres
.continueAt
= 0;
14710 /*--------------------------------------------------------------------*/
14711 /*--- end guest_arm64_toIR.c ---*/
14712 /*--------------------------------------------------------------------*/