1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2013-2017 OpenWorks
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 /* KNOWN LIMITATIONS 2014-Nov-16
32 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
34 Also FP comparison "unordered" .. is implemented as normal FP
37 Both should be fixed. They behave incorrectly in the presence of
40 FMULX is treated the same as FMUL. That's also not correct.
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
47 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
48 handling for the "ties" case. FRINTX might be dubious too.
50 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
51 just rounds to nearest.
54 /* "Special" instructions.
56 This instruction decoder can decode four special instructions
57 which mean nothing natively (are no-ops as far as regs/mem are
58 concerned) but have meaning for supporting Valgrind. A special
59 instruction is flagged by a 16-byte preamble:
61 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
62 (ror x12, x12, #3; ror x12, x12, #13
63 ror x12, x12, #51; ror x12, x12, #61)
65 Following that, one of the following 3 are allowed
66 (standard interpretation in parentheses):
68 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
69 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
70 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
71 AA090129 (orr x9,x9,x9) IR injection
73 Any other bytes following the 16-byte preamble are illegal and
74 constitute a failure in instruction decoding. This all assumes
75 that the preamble will never occur except in specific code
76 fragments designed for Valgrind to catch.
79 /* Translates ARM64 code to IR. */
81 #include "libvex_basictypes.h"
82 #include "libvex_ir.h"
84 #include "libvex_guest_arm64.h"
86 #include "main_util.h"
87 #include "main_globals.h"
88 #include "guest_generic_bb_to_IR.h"
89 #include "guest_arm64_defs.h"
92 /*------------------------------------------------------------*/
94 /*------------------------------------------------------------*/
96 /* These are set at the start of the translation of a instruction, so
97 that we don't have to pass them around endlessly. CONST means does
98 not change during translation of the instruction.
101 /* CONST: what is the host's endianness? We need to know this in
102 order to do sub-register accesses to the SIMD/FP registers
104 static VexEndness host_endness
;
106 /* CONST: The guest address for the instruction currently being
108 static Addr64 guest_PC_curr_instr
;
110 /* MOD: The IRSB* into which we're generating code. */
114 /*------------------------------------------------------------*/
115 /*--- Debugging output ---*/
116 /*------------------------------------------------------------*/
118 #define DIP(format, args...) \
119 if (vex_traceflags & VEX_TRACE_FE) \
120 vex_printf(format, ## args)
122 #define DIS(buf, format, args...) \
123 if (vex_traceflags & VEX_TRACE_FE) \
124 vex_sprintf(buf, format, ## args)
127 /*------------------------------------------------------------*/
128 /*--- Helper bits and pieces for deconstructing the ---*/
129 /*--- arm insn stream. ---*/
130 /*------------------------------------------------------------*/
132 /* Do a little-endian load of a 32-bit word, regardless of the
133 endianness of the underlying host. */
134 static inline UInt
getUIntLittleEndianly ( const UChar
* p
)
144 /* Sign extend a N-bit value up to 64 bits, by copying
145 bit N-1 into all higher positions. */
146 static ULong
sx_to_64 ( ULong x
, UInt n
)
148 vassert(n
> 1 && n
< 64);
155 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
156 //ZZ endianness of the underlying host. */
157 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
160 //ZZ w = (w << 8) | p[1];
161 //ZZ w = (w << 8) | p[0];
165 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
166 //ZZ vassert(sh >= 0 && sh < 32);
170 //ZZ return (x << (32-sh)) | (x >> sh);
173 //ZZ static Int popcount32 ( UInt x )
176 //ZZ for (i = 0; i < 32; i++) {
183 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
185 //ZZ UInt mask = 1 << ix;
187 //ZZ x |= ((b << ix) & mask);
191 #define BITS2(_b1,_b0) \
192 (((_b1) << 1) | (_b0))
194 #define BITS3(_b2,_b1,_b0) \
195 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197 #define BITS4(_b3,_b2,_b1,_b0) \
198 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
201 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
202 | BITS4((_b3),(_b2),(_b1),(_b0)))
204 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
205 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
206 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
213 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
216 (((_b9) << 9) | ((_b8) << 8) \
217 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
225 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227 #define X00 BITS2(0,0)
228 #define X01 BITS2(0,1)
229 #define X10 BITS2(1,0)
230 #define X11 BITS2(1,1)
232 // produces _uint[_bMax:_bMin]
233 #define SLICE_UInt(_uint,_bMax,_bMin) \
234 (( ((UInt)(_uint)) >> (_bMin)) \
235 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238 /*------------------------------------------------------------*/
239 /*--- Helper bits and pieces for creating IR fragments. ---*/
240 /*------------------------------------------------------------*/
242 static IRExpr
* mkV128 ( UShort w
)
244 return IRExpr_Const(IRConst_V128(w
));
247 static IRExpr
* mkU64 ( ULong i
)
249 return IRExpr_Const(IRConst_U64(i
));
252 static IRExpr
* mkU32 ( UInt i
)
254 return IRExpr_Const(IRConst_U32(i
));
257 static IRExpr
* mkU16 ( UInt i
)
260 return IRExpr_Const(IRConst_U16(i
));
263 static IRExpr
* mkU8 ( UInt i
)
266 return IRExpr_Const(IRConst_U8( (UChar
)i
));
269 static IRExpr
* mkexpr ( IRTemp tmp
)
271 return IRExpr_RdTmp(tmp
);
274 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
276 return IRExpr_Unop(op
, a
);
279 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
281 return IRExpr_Binop(op
, a1
, a2
);
284 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
286 return IRExpr_Triop(op
, a1
, a2
, a3
);
289 static IRExpr
* qop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
,
290 IRExpr
* a3
, IRExpr
* a4
)
292 return IRExpr_Qop(op
, a1
, a2
, a3
, a4
);
295 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
297 return IRExpr_Load(Iend_LE
, ty
, addr
);
300 /* Add a statement to the list held by "irbb". */
301 static void stmt ( IRStmt
* st
)
303 addStmtToIRSB( irsb
, st
);
306 static void assign ( IRTemp dst
, IRExpr
* e
)
308 stmt( IRStmt_WrTmp(dst
, e
) );
311 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
313 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
316 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
318 //ZZ if (guardT == IRTemp_INVALID) {
319 //ZZ /* unconditional */
320 //ZZ storeLE(addr, data);
322 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
323 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
327 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
328 //ZZ IRExpr* addr, IRExpr* alt,
329 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
331 //ZZ if (guardT == IRTemp_INVALID) {
332 //ZZ /* unconditional */
333 //ZZ IRExpr* loaded = NULL;
335 //ZZ case ILGop_Ident32:
336 //ZZ loaded = loadLE(Ity_I32, addr); break;
337 //ZZ case ILGop_8Uto32:
338 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
339 //ZZ case ILGop_8Sto32:
340 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
341 //ZZ case ILGop_16Uto32:
342 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
343 //ZZ case ILGop_16Sto32:
344 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
348 //ZZ vassert(loaded != NULL);
349 //ZZ assign(dst, loaded);
351 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
352 //ZZ loaded data before putting the data in 'dst'. If the load
353 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
354 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
355 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
359 /* Generate a new temporary of the given type. */
360 static IRTemp
newTemp ( IRType ty
)
362 vassert(isPlausibleIRType(ty
));
363 return newIRTemp( irsb
->tyenv
, ty
);
366 /* This is used in many places, so the brevity is an advantage. */
367 static IRTemp
newTempV128(void)
369 return newTemp(Ity_V128
);
372 /* Initialise V128 temporaries en masse. */
374 void newTempsV128_2(IRTemp
* t1
, IRTemp
* t2
)
376 vassert(t1
&& *t1
== IRTemp_INVALID
);
377 vassert(t2
&& *t2
== IRTemp_INVALID
);
383 void newTempsV128_3(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
)
385 vassert(t1
&& *t1
== IRTemp_INVALID
);
386 vassert(t2
&& *t2
== IRTemp_INVALID
);
387 vassert(t3
&& *t3
== IRTemp_INVALID
);
394 void newTempsV128_4(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
, IRTemp
* t4
)
396 vassert(t1
&& *t1
== IRTemp_INVALID
);
397 vassert(t2
&& *t2
== IRTemp_INVALID
);
398 vassert(t3
&& *t3
== IRTemp_INVALID
);
399 vassert(t4
&& *t4
== IRTemp_INVALID
);
407 void newTempsV128_7(IRTemp
* t1
, IRTemp
* t2
, IRTemp
* t3
,
408 IRTemp
* t4
, IRTemp
* t5
, IRTemp
* t6
, IRTemp
* t7
)
410 vassert(t1
&& *t1
== IRTemp_INVALID
);
411 vassert(t2
&& *t2
== IRTemp_INVALID
);
412 vassert(t3
&& *t3
== IRTemp_INVALID
);
413 vassert(t4
&& *t4
== IRTemp_INVALID
);
414 vassert(t5
&& *t5
== IRTemp_INVALID
);
415 vassert(t6
&& *t6
== IRTemp_INVALID
);
416 vassert(t7
&& *t7
== IRTemp_INVALID
);
426 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
427 //ZZ IRRoundingMode. */
428 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
430 //ZZ return mkU32(Irrm_NEAREST);
433 //ZZ /* Generate an expression for SRC rotated right by ROT. */
434 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
436 //ZZ vassert(rot >= 0 && rot < 32);
438 //ZZ return mkexpr(src);
441 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
442 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
445 //ZZ static IRExpr* mkU128 ( ULong i )
447 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
450 //ZZ /* Generate a 4-aligned version of the given expression if
451 //ZZ the given condition is true. Else return it unchanged. */
452 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
455 //ZZ return binop(Iop_And32, e, mkU32(~3));
460 /* Other IR construction helpers. */
461 static IROp
mkAND ( IRType ty
) {
463 case Ity_I32
: return Iop_And32
;
464 case Ity_I64
: return Iop_And64
;
465 default: vpanic("mkAND");
469 static IROp
mkOR ( IRType ty
) {
471 case Ity_I32
: return Iop_Or32
;
472 case Ity_I64
: return Iop_Or64
;
473 default: vpanic("mkOR");
477 static IROp
mkXOR ( IRType ty
) {
479 case Ity_I32
: return Iop_Xor32
;
480 case Ity_I64
: return Iop_Xor64
;
481 default: vpanic("mkXOR");
485 static IROp
mkSHL ( IRType ty
) {
487 case Ity_I32
: return Iop_Shl32
;
488 case Ity_I64
: return Iop_Shl64
;
489 default: vpanic("mkSHL");
493 static IROp
mkSHR ( IRType ty
) {
495 case Ity_I32
: return Iop_Shr32
;
496 case Ity_I64
: return Iop_Shr64
;
497 default: vpanic("mkSHR");
501 static IROp
mkSAR ( IRType ty
) {
503 case Ity_I32
: return Iop_Sar32
;
504 case Ity_I64
: return Iop_Sar64
;
505 default: vpanic("mkSAR");
509 static IROp
mkNOT ( IRType ty
) {
511 case Ity_I32
: return Iop_Not32
;
512 case Ity_I64
: return Iop_Not64
;
513 default: vpanic("mkNOT");
517 static IROp
mkADD ( IRType ty
) {
519 case Ity_I32
: return Iop_Add32
;
520 case Ity_I64
: return Iop_Add64
;
521 default: vpanic("mkADD");
525 static IROp
mkSUB ( IRType ty
) {
527 case Ity_I32
: return Iop_Sub32
;
528 case Ity_I64
: return Iop_Sub64
;
529 default: vpanic("mkSUB");
533 static IROp
mkADDF ( IRType ty
) {
535 case Ity_F16
: return Iop_AddF16
;
536 case Ity_F32
: return Iop_AddF32
;
537 case Ity_F64
: return Iop_AddF64
;
538 default: vpanic("mkADDF");
542 static IROp
mkFMADDF ( IRType ty
) {
544 case Ity_F32
: return Iop_MAddF32
;
545 case Ity_F64
: return Iop_MAddF64
;
546 default: vpanic("mkFMADDF");
550 static IROp
mkFMSUBF ( IRType ty
) {
552 case Ity_F32
: return Iop_MSubF32
;
553 case Ity_F64
: return Iop_MSubF64
;
554 default: vpanic("mkFMSUBF");
558 static IROp
mkSUBF ( IRType ty
) {
560 case Ity_F16
: return Iop_SubF16
;
561 case Ity_F32
: return Iop_SubF32
;
562 case Ity_F64
: return Iop_SubF64
;
563 default: vpanic("mkSUBF");
567 static IROp
mkMULF ( IRType ty
) {
569 case Ity_F32
: return Iop_MulF32
;
570 case Ity_F64
: return Iop_MulF64
;
571 default: vpanic("mkMULF");
575 static IROp
mkDIVF ( IRType ty
) {
577 case Ity_F32
: return Iop_DivF32
;
578 case Ity_F64
: return Iop_DivF64
;
579 default: vpanic("mkDIVF");
583 static IROp
mkNEGF ( IRType ty
) {
585 case Ity_F16
: return Iop_NegF16
;
586 case Ity_F32
: return Iop_NegF32
;
587 case Ity_F64
: return Iop_NegF64
;
588 default: vpanic("mkNEGF");
592 static IROp
mkABSF ( IRType ty
) {
594 case Ity_F16
: return Iop_AbsF16
;
595 case Ity_F32
: return Iop_AbsF32
;
596 case Ity_F64
: return Iop_AbsF64
;
597 default: vpanic("mkABSF");
601 static IROp
mkSQRTF ( IRType ty
) {
603 case Ity_F16
: return Iop_SqrtF16
;
604 case Ity_F32
: return Iop_SqrtF32
;
605 case Ity_F64
: return Iop_SqrtF64
;
606 default: vpanic("mkSQRTF");
610 static IROp
mkVecADD ( UInt size
) {
612 = { Iop_Add8x16
, Iop_Add16x8
, Iop_Add32x4
, Iop_Add64x2
};
617 static IROp
mkVecQADDU ( UInt size
) {
619 = { Iop_QAdd8Ux16
, Iop_QAdd16Ux8
, Iop_QAdd32Ux4
, Iop_QAdd64Ux2
};
624 static IROp
mkVecQADDS ( UInt size
) {
626 = { Iop_QAdd8Sx16
, Iop_QAdd16Sx8
, Iop_QAdd32Sx4
, Iop_QAdd64Sx2
};
631 static IROp
mkVecQADDEXTSUSATUU ( UInt size
) {
633 = { Iop_QAddExtSUsatUU8x16
, Iop_QAddExtSUsatUU16x8
,
634 Iop_QAddExtSUsatUU32x4
, Iop_QAddExtSUsatUU64x2
};
639 static IROp
mkVecQADDEXTUSSATSS ( UInt size
) {
641 = { Iop_QAddExtUSsatSS8x16
, Iop_QAddExtUSsatSS16x8
,
642 Iop_QAddExtUSsatSS32x4
, Iop_QAddExtUSsatSS64x2
};
647 static IROp
mkVecSUB ( UInt size
) {
649 = { Iop_Sub8x16
, Iop_Sub16x8
, Iop_Sub32x4
, Iop_Sub64x2
};
654 static IROp
mkVecQSUBU ( UInt size
) {
656 = { Iop_QSub8Ux16
, Iop_QSub16Ux8
, Iop_QSub32Ux4
, Iop_QSub64Ux2
};
661 static IROp
mkVecQSUBS ( UInt size
) {
663 = { Iop_QSub8Sx16
, Iop_QSub16Sx8
, Iop_QSub32Sx4
, Iop_QSub64Sx2
};
668 static IROp
mkVecSARN ( UInt size
) {
670 = { Iop_SarN8x16
, Iop_SarN16x8
, Iop_SarN32x4
, Iop_SarN64x2
};
675 static IROp
mkVecSHRN ( UInt size
) {
677 = { Iop_ShrN8x16
, Iop_ShrN16x8
, Iop_ShrN32x4
, Iop_ShrN64x2
};
682 static IROp
mkVecSHLN ( UInt size
) {
684 = { Iop_ShlN8x16
, Iop_ShlN16x8
, Iop_ShlN32x4
, Iop_ShlN64x2
};
689 static IROp
mkVecCATEVENLANES ( UInt size
) {
691 = { Iop_CatEvenLanes8x16
, Iop_CatEvenLanes16x8
,
692 Iop_CatEvenLanes32x4
, Iop_InterleaveLO64x2
};
697 static IROp
mkVecCATODDLANES ( UInt size
) {
699 = { Iop_CatOddLanes8x16
, Iop_CatOddLanes16x8
,
700 Iop_CatOddLanes32x4
, Iop_InterleaveHI64x2
};
705 static IROp
mkVecINTERLEAVELO ( UInt size
) {
707 = { Iop_InterleaveLO8x16
, Iop_InterleaveLO16x8
,
708 Iop_InterleaveLO32x4
, Iop_InterleaveLO64x2
};
713 static IROp
mkVecINTERLEAVEHI ( UInt size
) {
715 = { Iop_InterleaveHI8x16
, Iop_InterleaveHI16x8
,
716 Iop_InterleaveHI32x4
, Iop_InterleaveHI64x2
};
721 static IROp
mkVecMAXU ( UInt size
) {
723 = { Iop_Max8Ux16
, Iop_Max16Ux8
, Iop_Max32Ux4
, Iop_Max64Ux2
};
728 static IROp
mkVecMAXS ( UInt size
) {
730 = { Iop_Max8Sx16
, Iop_Max16Sx8
, Iop_Max32Sx4
, Iop_Max64Sx2
};
735 static IROp
mkVecMINU ( UInt size
) {
737 = { Iop_Min8Ux16
, Iop_Min16Ux8
, Iop_Min32Ux4
, Iop_Min64Ux2
};
742 static IROp
mkVecMINS ( UInt size
) {
744 = { Iop_Min8Sx16
, Iop_Min16Sx8
, Iop_Min32Sx4
, Iop_Min64Sx2
};
749 static IROp
mkVecMUL ( UInt size
) {
751 = { Iop_Mul8x16
, Iop_Mul16x8
, Iop_Mul32x4
, Iop_INVALID
};
756 static IROp
mkVecMULLU ( UInt sizeNarrow
) {
758 = { Iop_Mull8Ux8
, Iop_Mull16Ux4
, Iop_Mull32Ux2
, Iop_INVALID
};
759 vassert(sizeNarrow
< 3);
760 return ops
[sizeNarrow
];
763 static IROp
mkVecMULLS ( UInt sizeNarrow
) {
765 = { Iop_Mull8Sx8
, Iop_Mull16Sx4
, Iop_Mull32Sx2
, Iop_INVALID
};
766 vassert(sizeNarrow
< 3);
767 return ops
[sizeNarrow
];
770 static IROp
mkVecQDMULLS ( UInt sizeNarrow
) {
772 = { Iop_INVALID
, Iop_QDMull16Sx4
, Iop_QDMull32Sx2
, Iop_INVALID
};
773 vassert(sizeNarrow
< 3);
774 return ops
[sizeNarrow
];
777 static IROp
mkVecCMPEQ ( UInt size
) {
779 = { Iop_CmpEQ8x16
, Iop_CmpEQ16x8
, Iop_CmpEQ32x4
, Iop_CmpEQ64x2
};
784 static IROp
mkVecCMPGTU ( UInt size
) {
786 = { Iop_CmpGT8Ux16
, Iop_CmpGT16Ux8
, Iop_CmpGT32Ux4
, Iop_CmpGT64Ux2
};
791 static IROp
mkVecCMPGTS ( UInt size
) {
793 = { Iop_CmpGT8Sx16
, Iop_CmpGT16Sx8
, Iop_CmpGT32Sx4
, Iop_CmpGT64Sx2
};
798 static IROp
mkVecABS ( UInt size
) {
800 = { Iop_Abs8x16
, Iop_Abs16x8
, Iop_Abs32x4
, Iop_Abs64x2
};
805 static IROp
mkVecZEROHIxxOFV128 ( UInt size
) {
807 = { Iop_ZeroHI120ofV128
, Iop_ZeroHI112ofV128
,
808 Iop_ZeroHI96ofV128
, Iop_ZeroHI64ofV128
};
813 static IRExpr
* mkU ( IRType ty
, ULong imm
) {
815 case Ity_I32
: return mkU32((UInt
)(imm
& 0xFFFFFFFFULL
));
816 case Ity_I64
: return mkU64(imm
);
817 default: vpanic("mkU");
821 static IROp
mkVecQDMULHIS ( UInt size
) {
823 = { Iop_INVALID
, Iop_QDMulHi16Sx8
, Iop_QDMulHi32Sx4
, Iop_INVALID
};
828 static IROp
mkVecQRDMULHIS ( UInt size
) {
830 = { Iop_INVALID
, Iop_QRDMulHi16Sx8
, Iop_QRDMulHi32Sx4
, Iop_INVALID
};
835 static IROp
mkVecQANDUQSH ( UInt size
) {
837 = { Iop_QandUQsh8x16
, Iop_QandUQsh16x8
,
838 Iop_QandUQsh32x4
, Iop_QandUQsh64x2
};
843 static IROp
mkVecQANDSQSH ( UInt size
) {
845 = { Iop_QandSQsh8x16
, Iop_QandSQsh16x8
,
846 Iop_QandSQsh32x4
, Iop_QandSQsh64x2
};
851 static IROp
mkVecQANDUQRSH ( UInt size
) {
853 = { Iop_QandUQRsh8x16
, Iop_QandUQRsh16x8
,
854 Iop_QandUQRsh32x4
, Iop_QandUQRsh64x2
};
859 static IROp
mkVecQANDSQRSH ( UInt size
) {
861 = { Iop_QandSQRsh8x16
, Iop_QandSQRsh16x8
,
862 Iop_QandSQRsh32x4
, Iop_QandSQRsh64x2
};
867 static IROp
mkVecSHU ( UInt size
) {
869 = { Iop_Sh8Ux16
, Iop_Sh16Ux8
, Iop_Sh32Ux4
, Iop_Sh64Ux2
};
874 static IROp
mkVecSHS ( UInt size
) {
876 = { Iop_Sh8Sx16
, Iop_Sh16Sx8
, Iop_Sh32Sx4
, Iop_Sh64Sx2
};
881 static IROp
mkVecRSHU ( UInt size
) {
883 = { Iop_Rsh8Ux16
, Iop_Rsh16Ux8
, Iop_Rsh32Ux4
, Iop_Rsh64Ux2
};
888 static IROp
mkVecRSHS ( UInt size
) {
890 = { Iop_Rsh8Sx16
, Iop_Rsh16Sx8
, Iop_Rsh32Sx4
, Iop_Rsh64Sx2
};
895 static IROp
mkVecNARROWUN ( UInt sizeNarrow
) {
897 = { Iop_NarrowUn16to8x8
, Iop_NarrowUn32to16x4
,
898 Iop_NarrowUn64to32x2
, Iop_INVALID
};
899 vassert(sizeNarrow
< 4);
900 return ops
[sizeNarrow
];
903 static IROp
mkVecQNARROWUNSU ( UInt sizeNarrow
) {
905 = { Iop_QNarrowUn16Sto8Ux8
, Iop_QNarrowUn32Sto16Ux4
,
906 Iop_QNarrowUn64Sto32Ux2
, Iop_INVALID
};
907 vassert(sizeNarrow
< 4);
908 return ops
[sizeNarrow
];
911 static IROp
mkVecQNARROWUNSS ( UInt sizeNarrow
) {
913 = { Iop_QNarrowUn16Sto8Sx8
, Iop_QNarrowUn32Sto16Sx4
,
914 Iop_QNarrowUn64Sto32Sx2
, Iop_INVALID
};
915 vassert(sizeNarrow
< 4);
916 return ops
[sizeNarrow
];
919 static IROp
mkVecQNARROWUNUU ( UInt sizeNarrow
) {
921 = { Iop_QNarrowUn16Uto8Ux8
, Iop_QNarrowUn32Uto16Ux4
,
922 Iop_QNarrowUn64Uto32Ux2
, Iop_INVALID
};
923 vassert(sizeNarrow
< 4);
924 return ops
[sizeNarrow
];
927 static IROp
mkVecQANDqshrNNARROWUU ( UInt sizeNarrow
) {
929 = { Iop_QandQShrNnarrow16Uto8Ux8
, Iop_QandQShrNnarrow32Uto16Ux4
,
930 Iop_QandQShrNnarrow64Uto32Ux2
, Iop_INVALID
};
931 vassert(sizeNarrow
< 4);
932 return ops
[sizeNarrow
];
935 static IROp
mkVecQANDqsarNNARROWSS ( UInt sizeNarrow
) {
937 = { Iop_QandQSarNnarrow16Sto8Sx8
, Iop_QandQSarNnarrow32Sto16Sx4
,
938 Iop_QandQSarNnarrow64Sto32Sx2
, Iop_INVALID
};
939 vassert(sizeNarrow
< 4);
940 return ops
[sizeNarrow
];
943 static IROp
mkVecQANDqsarNNARROWSU ( UInt sizeNarrow
) {
945 = { Iop_QandQSarNnarrow16Sto8Ux8
, Iop_QandQSarNnarrow32Sto16Ux4
,
946 Iop_QandQSarNnarrow64Sto32Ux2
, Iop_INVALID
};
947 vassert(sizeNarrow
< 4);
948 return ops
[sizeNarrow
];
951 static IROp
mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow
) {
953 = { Iop_QandQRShrNnarrow16Uto8Ux8
, Iop_QandQRShrNnarrow32Uto16Ux4
,
954 Iop_QandQRShrNnarrow64Uto32Ux2
, Iop_INVALID
};
955 vassert(sizeNarrow
< 4);
956 return ops
[sizeNarrow
];
959 static IROp
mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow
) {
961 = { Iop_QandQRSarNnarrow16Sto8Sx8
, Iop_QandQRSarNnarrow32Sto16Sx4
,
962 Iop_QandQRSarNnarrow64Sto32Sx2
, Iop_INVALID
};
963 vassert(sizeNarrow
< 4);
964 return ops
[sizeNarrow
];
967 static IROp
mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow
) {
969 = { Iop_QandQRSarNnarrow16Sto8Ux8
, Iop_QandQRSarNnarrow32Sto16Ux4
,
970 Iop_QandQRSarNnarrow64Sto32Ux2
, Iop_INVALID
};
971 vassert(sizeNarrow
< 4);
972 return ops
[sizeNarrow
];
975 static IROp
mkVecQSHLNSATUU ( UInt size
) {
977 = { Iop_QShlNsatUU8x16
, Iop_QShlNsatUU16x8
,
978 Iop_QShlNsatUU32x4
, Iop_QShlNsatUU64x2
};
983 static IROp
mkVecQSHLNSATSS ( UInt size
) {
985 = { Iop_QShlNsatSS8x16
, Iop_QShlNsatSS16x8
,
986 Iop_QShlNsatSS32x4
, Iop_QShlNsatSS64x2
};
991 static IROp
mkVecQSHLNSATSU ( UInt size
) {
993 = { Iop_QShlNsatSU8x16
, Iop_QShlNsatSU16x8
,
994 Iop_QShlNsatSU32x4
, Iop_QShlNsatSU64x2
};
999 static IROp
mkVecADDF ( UInt size
) {
1001 = { Iop_INVALID
, Iop_Add16Fx8
, Iop_Add32Fx4
, Iop_Add64Fx2
};
1006 static IROp
mkVecMAXF ( UInt size
) {
1008 = { Iop_INVALID
, Iop_INVALID
, Iop_Max32Fx4
, Iop_Max64Fx2
};
1013 static IROp
mkVecMINF ( UInt size
) {
1015 = { Iop_INVALID
, Iop_INVALID
, Iop_Min32Fx4
, Iop_Min64Fx2
};
1020 /* Generate IR to create 'arg rotated right by imm', for sane values
1021 of 'ty' and 'imm'. */
1022 static IRTemp
mathROR ( IRType ty
, IRTemp arg
, UInt imm
)
1025 if (ty
== Ity_I64
) {
1028 vassert(ty
== Ity_I32
);
1036 IRTemp res
= newTemp(ty
);
1037 assign(res
, binop(mkOR(ty
),
1038 binop(mkSHL(ty
), mkexpr(arg
), mkU8(w
- imm
)),
1039 binop(mkSHR(ty
), mkexpr(arg
), mkU8(imm
)) ));
1043 /* Generate IR to set the returned temp to either all-zeroes or
1044 all ones, as a copy of arg<imm>. */
1045 static IRTemp
mathREPLICATE ( IRType ty
, IRTemp arg
, UInt imm
)
1048 if (ty
== Ity_I64
) {
1051 vassert(ty
== Ity_I32
);
1056 IRTemp res
= newTemp(ty
);
1057 assign(res
, binop(mkSAR(ty
),
1058 binop(mkSHL(ty
), mkexpr(arg
), mkU8(w
- 1 - imm
)),
1063 /* S-widen 8/16/32/64 bit int expr to 64. */
1064 static IRExpr
* widenSto64 ( IRType srcTy
, IRExpr
* e
)
1067 case Ity_I64
: return e
;
1068 case Ity_I32
: return unop(Iop_32Sto64
, e
);
1069 case Ity_I16
: return unop(Iop_16Sto64
, e
);
1070 case Ity_I8
: return unop(Iop_8Sto64
, e
);
1071 default: vpanic("widenSto64(arm64)");
1075 /* U-widen 8/16/32/64 bit int expr to 64. */
1076 static IRExpr
* widenUto64 ( IRType srcTy
, IRExpr
* e
)
1079 case Ity_I64
: return e
;
1080 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1081 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1082 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1083 default: vpanic("widenUto64(arm64)");
1087 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1088 of these combinations make sense. */
1089 static IRExpr
* narrowFrom64 ( IRType dstTy
, IRExpr
* e
)
1092 case Ity_I64
: return e
;
1093 case Ity_I32
: return unop(Iop_64to32
, e
);
1094 case Ity_I16
: return unop(Iop_64to16
, e
);
1095 case Ity_I8
: return unop(Iop_64to8
, e
);
1096 default: vpanic("narrowFrom64(arm64)");
1101 /*------------------------------------------------------------*/
1102 /*--- Helpers for accessing guest registers. ---*/
1103 /*------------------------------------------------------------*/
1105 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1106 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1107 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1108 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1109 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1110 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1111 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1112 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1113 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1114 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1115 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1116 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1117 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1118 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1119 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1120 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1121 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1122 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1123 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1124 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1125 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1126 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1127 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1128 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1129 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1130 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1131 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1132 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1133 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1134 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1135 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1137 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1138 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1140 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1141 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1142 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1143 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1145 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1146 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1148 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1149 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1150 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1151 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1152 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1153 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1154 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1155 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1156 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1157 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1158 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1159 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1160 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1161 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1162 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1163 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1164 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1165 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1166 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1167 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1168 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1169 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1170 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1171 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1172 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1173 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1174 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1175 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1176 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1177 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1178 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1179 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1181 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1182 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1184 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1185 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1187 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1188 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1189 #define OFFB_LLSC_DATA_LO64 offsetof(VexGuestARM64State,guest_LLSC_DATA_LO64)
1190 #define OFFB_LLSC_DATA_HI64 offsetof(VexGuestARM64State,guest_LLSC_DATA_HI64)
1193 /* ---------------- Integer registers ---------------- */
1195 static Int
offsetIReg64 ( UInt iregNo
)
1197 /* Do we care about endianness here? We do if sub-parts of integer
1198 registers are accessed. */
1200 case 0: return OFFB_X0
;
1201 case 1: return OFFB_X1
;
1202 case 2: return OFFB_X2
;
1203 case 3: return OFFB_X3
;
1204 case 4: return OFFB_X4
;
1205 case 5: return OFFB_X5
;
1206 case 6: return OFFB_X6
;
1207 case 7: return OFFB_X7
;
1208 case 8: return OFFB_X8
;
1209 case 9: return OFFB_X9
;
1210 case 10: return OFFB_X10
;
1211 case 11: return OFFB_X11
;
1212 case 12: return OFFB_X12
;
1213 case 13: return OFFB_X13
;
1214 case 14: return OFFB_X14
;
1215 case 15: return OFFB_X15
;
1216 case 16: return OFFB_X16
;
1217 case 17: return OFFB_X17
;
1218 case 18: return OFFB_X18
;
1219 case 19: return OFFB_X19
;
1220 case 20: return OFFB_X20
;
1221 case 21: return OFFB_X21
;
1222 case 22: return OFFB_X22
;
1223 case 23: return OFFB_X23
;
1224 case 24: return OFFB_X24
;
1225 case 25: return OFFB_X25
;
1226 case 26: return OFFB_X26
;
1227 case 27: return OFFB_X27
;
1228 case 28: return OFFB_X28
;
1229 case 29: return OFFB_X29
;
1230 case 30: return OFFB_X30
;
1232 default: vassert(0);
1236 static Int
offsetIReg64orSP ( UInt iregNo
)
1238 return iregNo
== 31 ? OFFB_XSP
: offsetIReg64(iregNo
);
1241 static const HChar
* nameIReg64orZR ( UInt iregNo
)
1243 vassert(iregNo
< 32);
1244 static const HChar
* names
[32]
1245 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1246 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1247 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1248 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1249 return names
[iregNo
];
1252 static const HChar
* nameIReg64orSP ( UInt iregNo
)
1257 vassert(iregNo
< 31);
1258 return nameIReg64orZR(iregNo
);
1261 static IRExpr
* getIReg64orSP ( UInt iregNo
)
1263 vassert(iregNo
< 32);
1264 return IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
);
1267 static IRExpr
* getIReg64orZR ( UInt iregNo
)
1272 vassert(iregNo
< 31);
1273 return IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
);
1276 static void putIReg64orSP ( UInt iregNo
, IRExpr
* e
)
1278 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1279 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), e
) );
1282 static void putIReg64orZR ( UInt iregNo
, IRExpr
* e
)
1284 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1288 vassert(iregNo
< 31);
1289 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), e
) );
1292 static const HChar
* nameIReg32orZR ( UInt iregNo
)
1294 vassert(iregNo
< 32);
1295 static const HChar
* names
[32]
1296 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1297 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1298 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1299 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1300 return names
[iregNo
];
1303 static const HChar
* nameIReg32orSP ( UInt iregNo
)
1308 vassert(iregNo
< 31);
1309 return nameIReg32orZR(iregNo
);
1312 static IRExpr
* getIReg32orSP ( UInt iregNo
)
1314 vassert(iregNo
< 32);
1315 return unop(Iop_64to32
,
1316 IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
));
1319 static IRExpr
* getIReg32orZR ( UInt iregNo
)
1324 vassert(iregNo
< 31);
1325 return unop(Iop_64to32
,
1326 IRExpr_Get( offsetIReg64orSP(iregNo
), Ity_I64
));
1329 static void putIReg32orSP ( UInt iregNo
, IRExpr
* e
)
1331 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
1332 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), unop(Iop_32Uto64
, e
)) );
1335 static void putIReg32orZR ( UInt iregNo
, IRExpr
* e
)
1337 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
1341 vassert(iregNo
< 31);
1342 stmt( IRStmt_Put(offsetIReg64orSP(iregNo
), unop(Iop_32Uto64
, e
)) );
1345 static const HChar
* nameIRegOrSP ( Bool is64
, UInt iregNo
)
1347 vassert(is64
== True
|| is64
== False
);
1348 return is64
? nameIReg64orSP(iregNo
) : nameIReg32orSP(iregNo
);
1351 static const HChar
* nameIRegOrZR ( Bool is64
, UInt iregNo
)
1353 vassert(is64
== True
|| is64
== False
);
1354 return is64
? nameIReg64orZR(iregNo
) : nameIReg32orZR(iregNo
);
1357 static IRExpr
* getIRegOrZR ( Bool is64
, UInt iregNo
)
1359 vassert(is64
== True
|| is64
== False
);
1360 return is64
? getIReg64orZR(iregNo
) : getIReg32orZR(iregNo
);
1363 static void putIRegOrZR ( Bool is64
, UInt iregNo
, IRExpr
* e
)
1365 vassert(is64
== True
|| is64
== False
);
1366 if (is64
) putIReg64orZR(iregNo
, e
); else putIReg32orZR(iregNo
, e
);
1369 static void putPC ( IRExpr
* e
)
1371 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
1372 stmt( IRStmt_Put(OFFB_PC
, e
) );
1376 /* ---------------- Vector (Q) registers ---------------- */
1378 static Int
offsetQReg128 ( UInt qregNo
)
1380 /* We don't care about endianness at this point. It only becomes
1381 relevant when dealing with sections of these registers.*/
1383 case 0: return OFFB_Q0
;
1384 case 1: return OFFB_Q1
;
1385 case 2: return OFFB_Q2
;
1386 case 3: return OFFB_Q3
;
1387 case 4: return OFFB_Q4
;
1388 case 5: return OFFB_Q5
;
1389 case 6: return OFFB_Q6
;
1390 case 7: return OFFB_Q7
;
1391 case 8: return OFFB_Q8
;
1392 case 9: return OFFB_Q9
;
1393 case 10: return OFFB_Q10
;
1394 case 11: return OFFB_Q11
;
1395 case 12: return OFFB_Q12
;
1396 case 13: return OFFB_Q13
;
1397 case 14: return OFFB_Q14
;
1398 case 15: return OFFB_Q15
;
1399 case 16: return OFFB_Q16
;
1400 case 17: return OFFB_Q17
;
1401 case 18: return OFFB_Q18
;
1402 case 19: return OFFB_Q19
;
1403 case 20: return OFFB_Q20
;
1404 case 21: return OFFB_Q21
;
1405 case 22: return OFFB_Q22
;
1406 case 23: return OFFB_Q23
;
1407 case 24: return OFFB_Q24
;
1408 case 25: return OFFB_Q25
;
1409 case 26: return OFFB_Q26
;
1410 case 27: return OFFB_Q27
;
1411 case 28: return OFFB_Q28
;
1412 case 29: return OFFB_Q29
;
1413 case 30: return OFFB_Q30
;
1414 case 31: return OFFB_Q31
;
1415 default: vassert(0);
1419 /* Write to a complete Qreg. */
1420 static void putQReg128 ( UInt qregNo
, IRExpr
* e
)
1422 vassert(qregNo
< 32);
1423 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_V128
);
1424 stmt( IRStmt_Put(offsetQReg128(qregNo
), e
) );
1427 /* Read a complete Qreg. */
1428 static IRExpr
* getQReg128 ( UInt qregNo
)
1430 vassert(qregNo
< 32);
1431 return IRExpr_Get(offsetQReg128(qregNo
), Ity_V128
);
1434 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1435 bit sub-parts we can choose either integer or float types, and
1436 choose float on the basis that that is the common use case and so
1437 will give least interference with Put-to-Get forwarding later
1439 static IRType
preferredVectorSubTypeFromSize ( UInt szB
)
1442 case 1: return Ity_I8
;
1443 case 2: return Ity_I16
;
1444 case 4: return Ity_I32
; //Ity_F32;
1445 case 8: return Ity_F64
;
1446 case 16: return Ity_V128
;
1447 default: vassert(0);
1451 /* Find the offset of the laneNo'th lane of type laneTy in the given
1452 Qreg. Since the host is little-endian, the least significant lane
1453 has the lowest offset. */
1454 static Int
offsetQRegLane ( UInt qregNo
, IRType laneTy
, UInt laneNo
)
1456 vassert(host_endness
== VexEndnessLE
);
1457 Int base
= offsetQReg128(qregNo
);
1458 /* Since the host is little-endian, the least significant lane
1459 will be at the lowest address. */
1460 /* Restrict this to known types, so as to avoid silently accepting
1464 case Ity_I8
: laneSzB
= 1; break;
1465 case Ity_F16
: case Ity_I16
: laneSzB
= 2; break;
1466 case Ity_F32
: case Ity_I32
: laneSzB
= 4; break;
1467 case Ity_F64
: case Ity_I64
: laneSzB
= 8; break;
1468 case Ity_V128
: laneSzB
= 16; break;
1471 vassert(laneSzB
> 0);
1472 UInt minOff
= laneNo
* laneSzB
;
1473 UInt maxOff
= minOff
+ laneSzB
- 1;
1474 vassert(maxOff
< 16);
1475 return base
+ minOff
;
1478 /* Put to the least significant lane of a Qreg. */
1479 static void putQRegLO ( UInt qregNo
, IRExpr
* e
)
1481 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1482 Int off
= offsetQRegLane(qregNo
, ty
, 0);
1484 case Ity_I8
: case Ity_I16
: case Ity_I32
: case Ity_I64
:
1485 case Ity_F16
: case Ity_F32
: case Ity_F64
: case Ity_V128
:
1488 vassert(0); // Other cases are probably invalid
1490 stmt(IRStmt_Put(off
, e
));
1493 /* Get from the least significant lane of a Qreg. */
1494 static IRExpr
* getQRegLO ( UInt qregNo
, IRType ty
)
1496 Int off
= offsetQRegLane(qregNo
, ty
, 0);
1499 case Ity_F16
: case Ity_I16
:
1500 case Ity_I32
: case Ity_I64
:
1501 case Ity_F32
: case Ity_F64
: case Ity_V128
:
1504 vassert(0); // Other cases are ATC
1506 return IRExpr_Get(off
, ty
);
1509 static const HChar
* nameQRegLO ( UInt qregNo
, IRType laneTy
)
1511 static const HChar
* namesQ
[32]
1512 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1513 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1514 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1515 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1516 static const HChar
* namesD
[32]
1517 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1518 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1519 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1520 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1521 static const HChar
* namesS
[32]
1522 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1523 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1524 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1525 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1526 static const HChar
* namesH
[32]
1527 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1528 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1529 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1530 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1531 static const HChar
* namesB
[32]
1532 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1533 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1534 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1535 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1536 vassert(qregNo
< 32);
1537 switch (sizeofIRType(laneTy
)) {
1538 case 1: return namesB
[qregNo
];
1539 case 2: return namesH
[qregNo
];
1540 case 4: return namesS
[qregNo
];
1541 case 8: return namesD
[qregNo
];
1542 case 16: return namesQ
[qregNo
];
1543 default: vassert(0);
1548 static const HChar
* nameQReg128 ( UInt qregNo
)
1550 return nameQRegLO(qregNo
, Ity_V128
);
1553 /* Find the offset of the most significant half (8 bytes) of the given
1554 Qreg. This requires knowing the endianness of the host. */
1555 static Int
offsetQRegHI64 ( UInt qregNo
)
1557 return offsetQRegLane(qregNo
, Ity_I64
, 1);
1560 static IRExpr
* getQRegHI64 ( UInt qregNo
)
1562 return IRExpr_Get(offsetQRegHI64(qregNo
), Ity_I64
);
1565 static void putQRegHI64 ( UInt qregNo
, IRExpr
* e
)
1567 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1568 Int off
= offsetQRegHI64(qregNo
);
1570 case Ity_I64
: case Ity_F64
:
1573 vassert(0); // Other cases are plain wrong
1575 stmt(IRStmt_Put(off
, e
));
1578 /* Put to a specified lane of a Qreg. */
1579 static void putQRegLane ( UInt qregNo
, UInt laneNo
, IRExpr
* e
)
1581 IRType laneTy
= typeOfIRExpr(irsb
->tyenv
, e
);
1582 Int off
= offsetQRegLane(qregNo
, laneTy
, laneNo
);
1584 case Ity_F64
: case Ity_I64
:
1585 case Ity_I32
: case Ity_F32
:
1586 case Ity_I16
: case Ity_F16
:
1590 vassert(0); // Other cases are ATC
1592 stmt(IRStmt_Put(off
, e
));
1595 /* Get from a specified lane of a Qreg. */
1596 static IRExpr
* getQRegLane ( UInt qregNo
, UInt laneNo
, IRType laneTy
)
1598 Int off
= offsetQRegLane(qregNo
, laneTy
, laneNo
);
1600 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
1601 case Ity_F64
: case Ity_F32
: case Ity_F16
:
1604 vassert(0); // Other cases are ATC
1606 return IRExpr_Get(off
, laneTy
);
1610 //ZZ /* ---------------- Misc registers ---------------- */
1612 //ZZ static void putMiscReg32 ( UInt gsoffset,
1613 //ZZ IRExpr* e, /* :: Ity_I32 */
1614 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1616 //ZZ switch (gsoffset) {
1617 //ZZ case OFFB_FPSCR: break;
1618 //ZZ case OFFB_QFLAG32: break;
1619 //ZZ case OFFB_GEFLAG0: break;
1620 //ZZ case OFFB_GEFLAG1: break;
1621 //ZZ case OFFB_GEFLAG2: break;
1622 //ZZ case OFFB_GEFLAG3: break;
1623 //ZZ default: vassert(0); /* awaiting more cases */
1625 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1627 //ZZ if (guardT == IRTemp_INVALID) {
1628 //ZZ /* unconditional write */
1629 //ZZ stmt(IRStmt_Put(gsoffset, e));
1631 //ZZ stmt(IRStmt_Put(
1633 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1634 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1639 //ZZ static IRTemp get_ITSTATE ( void )
1641 //ZZ ASSERT_IS_THUMB;
1642 //ZZ IRTemp t = newTemp(Ity_I32);
1643 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1647 //ZZ static void put_ITSTATE ( IRTemp t )
1649 //ZZ ASSERT_IS_THUMB;
1650 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1653 //ZZ static IRTemp get_QFLAG32 ( void )
1655 //ZZ IRTemp t = newTemp(Ity_I32);
1656 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1660 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1662 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1665 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1666 //ZZ Status Register) to indicate that overflow or saturation occurred.
1667 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1668 //ZZ value to indicate saturation. */
1669 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1671 //ZZ IRTemp old = get_QFLAG32();
1672 //ZZ IRTemp nyu = newTemp(Ity_I32);
1673 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1674 //ZZ put_QFLAG32(nyu, condT);
1678 /* ---------------- FPCR stuff ---------------- */
1680 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1681 convert them to IR format. Bind the final result to the
1683 static IRTemp
/* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1685 /* The ARMvfp encoding for rounding mode bits is:
1690 We need to convert that to the IR encoding:
1691 00 to nearest (the default)
1695 Which can be done by swapping bits 0 and 1.
1696 The rmode bits are at 23:22 in FPSCR.
1698 IRTemp armEncd
= newTemp(Ity_I32
);
1699 IRTemp swapped
= newTemp(Ity_I32
);
1700 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1701 we don't zero out bits 24 and above, since the assignment to
1702 'swapped' will mask them out anyway. */
1704 binop(Iop_Shr32
, IRExpr_Get(OFFB_FPCR
, Ity_I32
), mkU8(22)));
1705 /* Now swap them. */
1709 binop(Iop_Shl32
, mkexpr(armEncd
), mkU8(1)),
1712 binop(Iop_Shr32
, mkexpr(armEncd
), mkU8(1)),
1719 /*------------------------------------------------------------*/
1720 /*--- Helpers for flag handling and conditional insns ---*/
1721 /*------------------------------------------------------------*/
1723 static const HChar
* nameARM64Condcode ( ARM64Condcode cond
)
1726 case ARM64CondEQ
: return "eq";
1727 case ARM64CondNE
: return "ne";
1728 case ARM64CondCS
: return "cs"; // or 'hs'
1729 case ARM64CondCC
: return "cc"; // or 'lo'
1730 case ARM64CondMI
: return "mi";
1731 case ARM64CondPL
: return "pl";
1732 case ARM64CondVS
: return "vs";
1733 case ARM64CondVC
: return "vc";
1734 case ARM64CondHI
: return "hi";
1735 case ARM64CondLS
: return "ls";
1736 case ARM64CondGE
: return "ge";
1737 case ARM64CondLT
: return "lt";
1738 case ARM64CondGT
: return "gt";
1739 case ARM64CondLE
: return "le";
1740 case ARM64CondAL
: return "al";
1741 case ARM64CondNV
: return "nv";
1742 default: vpanic("name_ARM64Condcode");
1746 /* and a handy shorthand for it */
1747 static const HChar
* nameCC ( ARM64Condcode cond
) {
1748 return nameARM64Condcode(cond
);
1752 /* Build IR to calculate some particular condition from stored
1753 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1754 Ity_I64, suitable for narrowing. Although the return type is
1755 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1756 :: Ity_I64 and must denote the condition to compute in
1757 bits 7:4, and be zero everywhere else.
1759 static IRExpr
* mk_arm64g_calculate_condition_dyn ( IRExpr
* cond
)
1761 vassert(typeOfIRExpr(irsb
->tyenv
, cond
) == Ity_I64
);
1762 /* And 'cond' had better produce a value in which only bits 7:4 are
1763 nonzero. However, obviously we can't assert for that. */
1765 /* So what we're constructing for the first argument is
1766 "(cond << 4) | stored-operation".
1767 However, as per comments above, 'cond' must be supplied
1768 pre-shifted to this function.
1770 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1771 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1772 8 bits of the first argument. */
1775 binop(Iop_Or64
, IRExpr_Get(OFFB_CC_OP
, Ity_I64
), cond
),
1776 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1777 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1778 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
)
1784 "arm64g_calculate_condition", &arm64g_calculate_condition
,
1788 /* Exclude the requested condition, OP and NDEP from definedness
1789 checking. We're only interested in DEP1 and DEP2. */
1790 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1795 /* Build IR to calculate some particular condition from stored
1796 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1797 Ity_I64, suitable for narrowing. Although the return type is
1798 Ity_I64, the returned value is either 0 or 1.
1800 static IRExpr
* mk_arm64g_calculate_condition ( ARM64Condcode cond
)
1802 /* First arg is "(cond << 4) | condition". This requires that the
1803 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1804 (COND, OP) pair in the lowest 8 bits of the first argument. */
1805 vassert(cond
>= 0 && cond
<= 15);
1806 return mk_arm64g_calculate_condition_dyn( mkU64(cond
<< 4) );
1810 /* Build IR to calculate just the carry flag from stored
1811 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1813 static IRExpr
* mk_arm64g_calculate_flag_c ( void )
1816 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1817 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1818 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1819 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1824 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c
,
1827 /* Exclude OP and NDEP from definedness checking. We're only
1828 interested in DEP1 and DEP2. */
1829 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1834 //ZZ /* Build IR to calculate just the overflow flag from stored
1835 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1837 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1840 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1841 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1842 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1843 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1845 //ZZ = mkIRExprCCall(
1848 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1851 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1852 //ZZ interested in DEP1 and DEP2. */
1853 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1858 /* Build IR to calculate N Z C V in bits 31:28 of the
1860 static IRExpr
* mk_arm64g_calculate_flags_nzcv ( void )
1863 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1864 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1865 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1866 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1871 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv
,
1874 /* Exclude OP and NDEP from definedness checking. We're only
1875 interested in DEP1 and DEP2. */
1876 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1881 /* Build IR to set the flags thunk, in the most general case. */
1883 void setFlags_D1_D2_ND ( UInt cc_op
,
1884 IRTemp t_dep1
, IRTemp t_dep2
, IRTemp t_ndep
)
1886 vassert(typeOfIRTemp(irsb
->tyenv
, t_dep1
== Ity_I64
));
1887 vassert(typeOfIRTemp(irsb
->tyenv
, t_dep2
== Ity_I64
));
1888 vassert(typeOfIRTemp(irsb
->tyenv
, t_ndep
== Ity_I64
));
1889 vassert(cc_op
>= ARM64G_CC_OP_COPY
&& cc_op
< ARM64G_CC_OP_NUMBER
);
1890 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(cc_op
) ));
1891 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t_dep1
) ));
1892 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkexpr(t_dep2
) ));
1893 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(t_ndep
) ));
1896 /* Build IR to set the flags thunk after ADD or SUB. */
1898 void setFlags_ADD_SUB ( Bool is64
, Bool isSUB
, IRTemp argL
, IRTemp argR
)
1900 IRTemp argL64
= IRTemp_INVALID
;
1901 IRTemp argR64
= IRTemp_INVALID
;
1902 IRTemp z64
= newTemp(Ity_I64
);
1907 argL64
= newTemp(Ity_I64
);
1908 argR64
= newTemp(Ity_I64
);
1909 assign(argL64
, unop(Iop_32Uto64
, mkexpr(argL
)));
1910 assign(argR64
, unop(Iop_32Uto64
, mkexpr(argR
)));
1912 assign(z64
, mkU64(0));
1913 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1914 /**/ if ( isSUB
&& is64
) { cc_op
= ARM64G_CC_OP_SUB64
; }
1915 else if ( isSUB
&& !is64
) { cc_op
= ARM64G_CC_OP_SUB32
; }
1916 else if (!isSUB
&& is64
) { cc_op
= ARM64G_CC_OP_ADD64
; }
1917 else if (!isSUB
&& !is64
) { cc_op
= ARM64G_CC_OP_ADD32
; }
1918 else { vassert(0); }
1919 setFlags_D1_D2_ND(cc_op
, argL64
, argR64
, z64
);
1922 /* Build IR to set the flags thunk after ADC or SBC. */
1924 void setFlags_ADC_SBC ( Bool is64
, Bool isSBC
,
1925 IRTemp argL
, IRTemp argR
, IRTemp oldC
)
1927 IRTemp argL64
= IRTemp_INVALID
;
1928 IRTemp argR64
= IRTemp_INVALID
;
1929 IRTemp oldC64
= IRTemp_INVALID
;
1935 argL64
= newTemp(Ity_I64
);
1936 argR64
= newTemp(Ity_I64
);
1937 oldC64
= newTemp(Ity_I64
);
1938 assign(argL64
, unop(Iop_32Uto64
, mkexpr(argL
)));
1939 assign(argR64
, unop(Iop_32Uto64
, mkexpr(argR
)));
1940 assign(oldC64
, unop(Iop_32Uto64
, mkexpr(oldC
)));
1942 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
1943 /**/ if ( isSBC
&& is64
) { cc_op
= ARM64G_CC_OP_SBC64
; }
1944 else if ( isSBC
&& !is64
) { cc_op
= ARM64G_CC_OP_SBC32
; }
1945 else if (!isSBC
&& is64
) { cc_op
= ARM64G_CC_OP_ADC64
; }
1946 else if (!isSBC
&& !is64
) { cc_op
= ARM64G_CC_OP_ADC32
; }
1947 else { vassert(0); }
1948 setFlags_D1_D2_ND(cc_op
, argL64
, argR64
, oldC64
);
1951 /* Build IR to set the flags thunk after ADD or SUB, if the given
1952 condition evaluates to True at run time. If not, the flags are set
1953 to the specified NZCV value. */
1955 void setFlags_ADD_SUB_conditionally (
1956 Bool is64
, Bool isSUB
,
1957 IRTemp cond
, IRTemp argL
, IRTemp argR
, UInt nzcv
1960 /* Generate IR as follows:
1961 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1962 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1963 CC_DEP2 = ITE(cond, argR64, 0)
1967 IRTemp z64
= newTemp(Ity_I64
);
1968 assign(z64
, mkU64(0));
1970 /* Establish the operation and operands for the True case. */
1971 IRTemp t_dep1
= IRTemp_INVALID
;
1972 IRTemp t_dep2
= IRTemp_INVALID
;
1973 UInt t_op
= ARM64G_CC_OP_NUMBER
;
1974 /**/ if ( isSUB
&& is64
) { t_op
= ARM64G_CC_OP_SUB64
; }
1975 else if ( isSUB
&& !is64
) { t_op
= ARM64G_CC_OP_SUB32
; }
1976 else if (!isSUB
&& is64
) { t_op
= ARM64G_CC_OP_ADD64
; }
1977 else if (!isSUB
&& !is64
) { t_op
= ARM64G_CC_OP_ADD32
; }
1978 else { vassert(0); }
1984 t_dep1
= newTemp(Ity_I64
);
1985 t_dep2
= newTemp(Ity_I64
);
1986 assign(t_dep1
, unop(Iop_32Uto64
, mkexpr(argL
)));
1987 assign(t_dep2
, unop(Iop_32Uto64
, mkexpr(argR
)));
1990 /* Establish the operation and operands for the False case. */
1991 IRTemp f_dep1
= newTemp(Ity_I64
);
1992 IRTemp f_dep2
= z64
;
1993 UInt f_op
= ARM64G_CC_OP_COPY
;
1994 assign(f_dep1
, mkU64(nzcv
<< 28));
1996 /* Final thunk values */
1997 IRTemp dep1
= newTemp(Ity_I64
);
1998 IRTemp dep2
= newTemp(Ity_I64
);
1999 IRTemp op
= newTemp(Ity_I64
);
2001 assign(op
, IRExpr_ITE(mkexpr(cond
), mkU64(t_op
), mkU64(f_op
)));
2002 assign(dep1
, IRExpr_ITE(mkexpr(cond
), mkexpr(t_dep1
), mkexpr(f_dep1
)));
2003 assign(dep2
, IRExpr_ITE(mkexpr(cond
), mkexpr(t_dep2
), mkexpr(f_dep2
)));
2006 stmt( IRStmt_Put( OFFB_CC_OP
, mkexpr(op
) ));
2007 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(dep1
) ));
2008 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkexpr(dep2
) ));
2009 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(z64
) ));
2012 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
2014 void setFlags_LOGIC ( Bool is64
, IRTemp res
)
2016 IRTemp res64
= IRTemp_INVALID
;
2017 IRTemp z64
= newTemp(Ity_I64
);
2018 UInt cc_op
= ARM64G_CC_OP_NUMBER
;
2021 cc_op
= ARM64G_CC_OP_LOGIC64
;
2023 res64
= newTemp(Ity_I64
);
2024 assign(res64
, unop(Iop_32Uto64
, mkexpr(res
)));
2025 cc_op
= ARM64G_CC_OP_LOGIC32
;
2027 assign(z64
, mkU64(0));
2028 setFlags_D1_D2_ND(cc_op
, res64
, z64
, z64
);
2031 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
2032 located in bits 31:28 of the supplied value. */
2034 void setFlags_COPY ( IRTemp nzcv_28x0
)
2036 IRTemp z64
= newTemp(Ity_I64
);
2037 assign(z64
, mkU64(0));
2038 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY
, nzcv_28x0
, z64
, z64
);
2042 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2043 //ZZ sets it at all) */
2044 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2046 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2048 //ZZ IRTemp z32 = newTemp(Ity_I32);
2049 //ZZ assign( z32, mkU32(0) );
2050 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2054 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2055 //ZZ sets it at all) */
2056 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2058 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2060 //ZZ IRTemp z32 = newTemp(Ity_I32);
2061 //ZZ assign( z32, mkU32(0) );
2062 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2066 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2067 //ZZ sets them at all) */
2068 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2069 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2071 //ZZ IRTemp z32 = newTemp(Ity_I32);
2072 //ZZ assign( z32, mkU32(0) );
2073 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2077 /*------------------------------------------------------------*/
2078 /*--- Misc math helpers ---*/
2079 /*------------------------------------------------------------*/
2081 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2082 static IRTemp
math_SWAPHELPER ( IRTemp x
, ULong mask
, Int sh
)
2084 IRTemp maskT
= newTemp(Ity_I64
);
2085 IRTemp res
= newTemp(Ity_I64
);
2086 vassert(sh
>= 1 && sh
<= 63);
2087 assign(maskT
, mkU64(mask
));
2091 binop(Iop_And64
,mkexpr(x
),mkexpr(maskT
)),
2094 binop(Iop_Shl64
,mkexpr(x
),mkU8(sh
)),
2101 /* Generates byte swaps within 32-bit lanes. */
2102 static IRTemp
math_UINTSWAP64 ( IRTemp src
)
2105 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2106 res
= math_SWAPHELPER(res
, 0xFFFF0000FFFF0000ULL
, 16);
2110 /* Generates byte swaps within 16-bit lanes. */
2111 static IRTemp
math_USHORTSWAP64 ( IRTemp src
)
2114 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2118 /* Generates a 64-bit byte swap. */
2119 static IRTemp
math_BYTESWAP64 ( IRTemp src
)
2122 res
= math_SWAPHELPER(src
, 0xFF00FF00FF00FF00ULL
, 8);
2123 res
= math_SWAPHELPER(res
, 0xFFFF0000FFFF0000ULL
, 16);
2124 res
= math_SWAPHELPER(res
, 0xFFFFFFFF00000000ULL
, 32);
2128 /* Generates a 64-bit bit swap. */
2129 static IRTemp
math_BITSWAP64 ( IRTemp src
)
2132 res
= math_SWAPHELPER(src
, 0xAAAAAAAAAAAAAAAAULL
, 1);
2133 res
= math_SWAPHELPER(res
, 0xCCCCCCCCCCCCCCCCULL
, 2);
2134 res
= math_SWAPHELPER(res
, 0xF0F0F0F0F0F0F0F0ULL
, 4);
2135 return math_BYTESWAP64(res
);
2138 /* Duplicates the bits at the bottom of the given word to fill the
2139 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2140 except for the bottom bits. */
2141 static IRTemp
math_DUP_TO_64 ( IRTemp src
, IRType srcTy
)
2143 if (srcTy
== Ity_I8
) {
2144 IRTemp t16
= newTemp(Ity_I64
);
2145 assign(t16
, binop(Iop_Or64
, mkexpr(src
),
2146 binop(Iop_Shl64
, mkexpr(src
), mkU8(8))));
2147 IRTemp t32
= newTemp(Ity_I64
);
2148 assign(t32
, binop(Iop_Or64
, mkexpr(t16
),
2149 binop(Iop_Shl64
, mkexpr(t16
), mkU8(16))));
2150 IRTemp t64
= newTemp(Ity_I64
);
2151 assign(t64
, binop(Iop_Or64
, mkexpr(t32
),
2152 binop(Iop_Shl64
, mkexpr(t32
), mkU8(32))));
2155 if (srcTy
== Ity_I16
) {
2156 IRTemp t32
= newTemp(Ity_I64
);
2157 assign(t32
, binop(Iop_Or64
, mkexpr(src
),
2158 binop(Iop_Shl64
, mkexpr(src
), mkU8(16))));
2159 IRTemp t64
= newTemp(Ity_I64
);
2160 assign(t64
, binop(Iop_Or64
, mkexpr(t32
),
2161 binop(Iop_Shl64
, mkexpr(t32
), mkU8(32))));
2164 if (srcTy
== Ity_I32
) {
2165 IRTemp t64
= newTemp(Ity_I64
);
2166 assign(t64
, binop(Iop_Or64
, mkexpr(src
),
2167 binop(Iop_Shl64
, mkexpr(src
), mkU8(32))));
2170 if (srcTy
== Ity_I64
) {
2177 /* Duplicates the src element exactly so as to fill a V128 value. */
2178 static IRTemp
math_DUP_TO_V128 ( IRTemp src
, IRType srcTy
)
2180 IRTemp res
= newTempV128();
2181 if (srcTy
== Ity_F64
) {
2182 IRTemp i64
= newTemp(Ity_I64
);
2183 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(src
)));
2184 assign(res
, binop(Iop_64HLtoV128
, mkexpr(i64
), mkexpr(i64
)));
2187 if (srcTy
== Ity_F32
) {
2188 IRTemp i64a
= newTemp(Ity_I64
);
2189 assign(i64a
, unop(Iop_32Uto64
, unop(Iop_ReinterpF32asI32
, mkexpr(src
))));
2190 IRTemp i64b
= newTemp(Ity_I64
);
2191 assign(i64b
, binop(Iop_Or64
, binop(Iop_Shl64
, mkexpr(i64a
), mkU8(32)),
2193 assign(res
, binop(Iop_64HLtoV128
, mkexpr(i64b
), mkexpr(i64b
)));
2196 if (srcTy
== Ity_I64
) {
2197 assign(res
, binop(Iop_64HLtoV128
, mkexpr(src
), mkexpr(src
)));
2200 if (srcTy
== Ity_I32
|| srcTy
== Ity_I16
|| srcTy
== Ity_I8
) {
2201 IRTemp t1
= newTemp(Ity_I64
);
2202 assign(t1
, widenUto64(srcTy
, mkexpr(src
)));
2203 IRTemp t2
= math_DUP_TO_64(t1
, srcTy
);
2204 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t2
), mkexpr(t2
)));
2211 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2212 zero out the upper half. */
2213 static IRExpr
* math_MAYBE_ZERO_HI64 ( UInt bitQ
, IRTemp fullWidth
)
2215 if (bitQ
== 1) return mkexpr(fullWidth
);
2216 if (bitQ
== 0) return unop(Iop_ZeroHI64ofV128
, mkexpr(fullWidth
));
2220 /* The same, but from an expression instead. */
2221 static IRExpr
* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ
, IRExpr
* fullWidth
)
2223 IRTemp fullWidthT
= newTempV128();
2224 assign(fullWidthT
, fullWidth
);
2225 return math_MAYBE_ZERO_HI64(bitQ
, fullWidthT
);
2229 /*------------------------------------------------------------*/
2230 /*--- FP comparison helpers ---*/
2231 /*------------------------------------------------------------*/
2233 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2234 as an IRCmpF64Result. Generate code to convert it to an
2235 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2236 Assign a new temp to hold that value, and return the temp. */
2238 IRTemp
mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32
)
2240 IRTemp ix
= newTemp(Ity_I64
);
2241 IRTemp termL
= newTemp(Ity_I64
);
2242 IRTemp termR
= newTemp(Ity_I64
);
2243 IRTemp nzcv
= newTemp(Ity_I64
);
2244 IRTemp irRes
= newTemp(Ity_I64
);
2246 /* This is where the fun starts. We have to convert 'irRes' from
2247 an IR-convention return result (IRCmpF64Result) to an
2248 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2249 4 bits of 'nzcv'. */
2250 /* Map compare result from IR to ARM(nzcv) */
2252 FP cmp result | IR | ARM(nzcv)
2253 --------------------------------
2259 /* Now since you're probably wondering WTF ..
2261 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2262 places them side by side, giving a number which is 0, 1, 2 or 3.
2264 termL is a sequence cooked up by GNU superopt. It converts ix
2265 into an almost correct value NZCV value (incredibly), except
2266 for the case of UN, where it produces 0100 instead of the
2269 termR is therefore a correction term, also computed from ix. It
2270 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2271 the final correct value, we subtract termR from termL.
2273 Don't take my word for it. There's a test program at the bottom
2274 of guest_arm_toIR.c, to try this out with.
2276 assign(irRes
, unop(Iop_32Uto64
, mkexpr(irRes32
)));
2282 binop(Iop_Shr64
, mkexpr(irRes
), mkU8(5)),
2284 binop(Iop_And64
, mkexpr(irRes
), mkU64(1))));
2292 binop(Iop_Xor64
, mkexpr(ix
), mkU64(1)),
2303 binop(Iop_Shr64
, mkexpr(ix
), mkU8(1))),
2306 assign(nzcv
, binop(Iop_Sub64
, mkexpr(termL
), mkexpr(termR
)));
2311 /*------------------------------------------------------------*/
2312 /*--- Data processing (immediate) ---*/
2313 /*------------------------------------------------------------*/
2315 /* Helper functions for supporting "DecodeBitMasks" */
2317 static ULong
dbm_ROR ( Int width
, ULong x
, Int rot
)
2319 vassert(width
> 0 && width
<= 64);
2320 vassert(rot
>= 0 && rot
< width
);
2321 if (rot
== 0) return x
;
2322 ULong res
= x
>> rot
;
2323 res
|= (x
<< (width
- rot
));
2325 res
&= ((1ULL << width
) - 1);
2329 static ULong
dbm_RepTo64( Int esize
, ULong x
)
2335 x
&= 0xFFFFFFFF; x
|= (x
<< 32);
2338 x
&= 0xFFFF; x
|= (x
<< 16); x
|= (x
<< 32);
2341 x
&= 0xFF; x
|= (x
<< 8); x
|= (x
<< 16); x
|= (x
<< 32);
2344 x
&= 0xF; x
|= (x
<< 4); x
|= (x
<< 8);
2345 x
|= (x
<< 16); x
|= (x
<< 32);
2348 x
&= 0x3; x
|= (x
<< 2); x
|= (x
<< 4); x
|= (x
<< 8);
2349 x
|= (x
<< 16); x
|= (x
<< 32);
2354 vpanic("dbm_RepTo64");
2359 static Int
dbm_highestSetBit ( ULong x
)
2362 for (i
= 63; i
>= 0; i
--) {
2363 if (x
& (1ULL << i
))
2371 Bool
dbm_DecodeBitMasks ( /*OUT*/ULong
* wmask
, /*OUT*/ULong
* tmask
,
2372 ULong immN
, ULong imms
, ULong immr
, Bool immediate
,
2373 UInt M
/*32 or 64*/)
2375 vassert(immN
< (1ULL << 1));
2376 vassert(imms
< (1ULL << 6));
2377 vassert(immr
< (1ULL << 6));
2378 vassert(immediate
== False
|| immediate
== True
);
2379 vassert(M
== 32 || M
== 64);
2381 Int len
= dbm_highestSetBit( ((immN
<< 6) & 64) | ((~imms
) & 63) );
2382 if (len
< 1) { /* printf("fail1\n"); */ return False
; }
2384 vassert(M
>= (1 << len
));
2386 vassert(len
>= 1 && len
<= 6);
2387 ULong levels
= // (zeroes(6 - len) << (6-len)) | ones(len);
2389 vassert(levels
>= 1 && levels
<= 63);
2391 if (immediate
&& ((imms
& levels
) == levels
)) {
2392 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2396 ULong S
= imms
& levels
;
2397 ULong R
= immr
& levels
;
2400 Int esize
= 1 << len
;
2401 vassert(2 <= esize
&& esize
<= 64);
2403 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2404 same below with d. S can be 63 in which case we have an out of
2405 range and hence undefined shift. */
2406 vassert(S
>= 0 && S
<= 63);
2407 vassert(esize
>= (S
+1));
2408 ULong elem_s
= // Zeroes(esize-(S+1)):Ones(S+1)
2409 //(1ULL << (S+1)) - 1;
2410 ((1ULL << S
) - 1) + (1ULL << S
);
2412 Int d
= // diff<len-1:0>
2413 diff
& ((1 << len
)-1);
2414 vassert(esize
>= (d
+1));
2415 vassert(d
>= 0 && d
<= 63);
2417 ULong elem_d
= // Zeroes(esize-(d+1)):Ones(d+1)
2418 //(1ULL << (d+1)) - 1;
2419 ((1ULL << d
) - 1) + (1ULL << d
);
2421 if (esize
!= 64) vassert(elem_s
< (1ULL << esize
));
2422 if (esize
!= 64) vassert(elem_d
< (1ULL << esize
));
2424 if (wmask
) *wmask
= dbm_RepTo64(esize
, dbm_ROR(esize
, elem_s
, R
));
2425 if (tmask
) *tmask
= dbm_RepTo64(esize
, elem_d
);
2432 Bool
dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult
* dres
,
2433 UInt insn
, Bool sigill_diag
)
2435 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2438 10000x PC-rel addressing
2439 10001x Add/subtract (immediate)
2440 100100 Logical (immediate)
2441 100101 Move Wide (immediate)
2446 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2447 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2448 Bool is64
= INSN(31,31) == 1;
2449 Bool isSub
= INSN(30,30) == 1;
2450 Bool setCC
= INSN(29,29) == 1;
2451 UInt sh
= INSN(23,22);
2452 UInt uimm12
= INSN(21,10);
2453 UInt nn
= INSN(9,5);
2454 UInt dd
= INSN(4,0);
2455 const HChar
* nm
= isSub
? "sub" : "add";
2457 /* Invalid; fall through */
2460 uimm12
<<= (12 * sh
);
2462 IRTemp argL
= newTemp(Ity_I64
);
2463 IRTemp argR
= newTemp(Ity_I64
);
2464 IRTemp res
= newTemp(Ity_I64
);
2465 assign(argL
, getIReg64orSP(nn
));
2466 assign(argR
, mkU64(uimm12
));
2467 assign(res
, binop(isSub
? Iop_Sub64
: Iop_Add64
,
2468 mkexpr(argL
), mkexpr(argR
)));
2470 putIReg64orZR(dd
, mkexpr(res
));
2471 setFlags_ADD_SUB(True
/*is64*/, isSub
, argL
, argR
);
2472 DIP("%ss %s, %s, 0x%x\n",
2473 nm
, nameIReg64orZR(dd
), nameIReg64orSP(nn
), uimm12
);
2475 putIReg64orSP(dd
, mkexpr(res
));
2476 DIP("%s %s, %s, 0x%x\n",
2477 nm
, nameIReg64orSP(dd
), nameIReg64orSP(nn
), uimm12
);
2480 IRTemp argL
= newTemp(Ity_I32
);
2481 IRTemp argR
= newTemp(Ity_I32
);
2482 IRTemp res
= newTemp(Ity_I32
);
2483 assign(argL
, getIReg32orSP(nn
));
2484 assign(argR
, mkU32(uimm12
));
2485 assign(res
, binop(isSub
? Iop_Sub32
: Iop_Add32
,
2486 mkexpr(argL
), mkexpr(argR
)));
2488 putIReg32orZR(dd
, mkexpr(res
));
2489 setFlags_ADD_SUB(False
/*!is64*/, isSub
, argL
, argR
);
2490 DIP("%ss %s, %s, 0x%x\n",
2491 nm
, nameIReg32orZR(dd
), nameIReg32orSP(nn
), uimm12
);
2493 putIReg32orSP(dd
, mkexpr(res
));
2494 DIP("%s %s, %s, 0x%x\n",
2495 nm
, nameIReg32orSP(dd
), nameIReg32orSP(nn
), uimm12
);
2502 /* -------------------- ADR/ADRP -------------------- */
2503 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2504 UInt bP
= INSN(31,31);
2505 UInt immLo
= INSN(30,29);
2506 UInt immHi
= INSN(23,5);
2507 UInt rD
= INSN(4,0);
2508 ULong uimm
= (immHi
<< 2) | immLo
;
2509 ULong simm
= sx_to_64(uimm
, 21);
2512 val
= (guest_PC_curr_instr
& 0xFFFFFFFFFFFFF000ULL
) + (simm
<< 12);
2514 val
= guest_PC_curr_instr
+ simm
;
2516 putIReg64orZR(rD
, mkU64(val
));
2517 DIP("adr%s %s, 0x%llx\n", bP
? "p" : "", nameIReg64orZR(rD
), val
);
2521 /* -------------------- LOGIC(imm) -------------------- */
2522 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2523 /* 31 30 28 22 21 15 9 4
2524 sf op 100100 N immr imms Rn Rd
2525 op=00: AND Rd|SP, Rn, #imm
2526 op=01: ORR Rd|SP, Rn, #imm
2527 op=10: EOR Rd|SP, Rn, #imm
2528 op=11: ANDS Rd|ZR, Rn, #imm
2530 Bool is64
= INSN(31,31) == 1;
2531 UInt op
= INSN(30,29);
2532 UInt N
= INSN(22,22);
2533 UInt immR
= INSN(21,16);
2534 UInt immS
= INSN(15,10);
2535 UInt nn
= INSN(9,5);
2536 UInt dd
= INSN(4,0);
2539 if (N
== 1 && !is64
)
2540 goto after_logic_imm
; /* not allowed; fall through */
2541 ok
= dbm_DecodeBitMasks(&imm
, NULL
,
2542 N
, immS
, immR
, True
, is64
? 64 : 32);
2544 goto after_logic_imm
;
2546 const HChar
* names
[4] = { "and", "orr", "eor", "ands" };
2547 const IROp ops64
[4] = { Iop_And64
, Iop_Or64
, Iop_Xor64
, Iop_And64
};
2548 const IROp ops32
[4] = { Iop_And32
, Iop_Or32
, Iop_Xor32
, Iop_And32
};
2552 IRExpr
* argL
= getIReg64orZR(nn
);
2553 IRExpr
* argR
= mkU64(imm
);
2554 IRTemp res
= newTemp(Ity_I64
);
2555 assign(res
, binop(ops64
[op
], argL
, argR
));
2557 putIReg64orSP(dd
, mkexpr(res
));
2558 DIP("%s %s, %s, 0x%llx\n", names
[op
],
2559 nameIReg64orSP(dd
), nameIReg64orZR(nn
), imm
);
2561 putIReg64orZR(dd
, mkexpr(res
));
2562 setFlags_LOGIC(True
/*is64*/, res
);
2563 DIP("%s %s, %s, 0x%llx\n", names
[op
],
2564 nameIReg64orZR(dd
), nameIReg64orZR(nn
), imm
);
2567 IRExpr
* argL
= getIReg32orZR(nn
);
2568 IRExpr
* argR
= mkU32((UInt
)imm
);
2569 IRTemp res
= newTemp(Ity_I32
);
2570 assign(res
, binop(ops32
[op
], argL
, argR
));
2572 putIReg32orSP(dd
, mkexpr(res
));
2573 DIP("%s %s, %s, 0x%x\n", names
[op
],
2574 nameIReg32orSP(dd
), nameIReg32orZR(nn
), (UInt
)imm
);
2576 putIReg32orZR(dd
, mkexpr(res
));
2577 setFlags_LOGIC(False
/*!is64*/, res
);
2578 DIP("%s %s, %s, 0x%x\n", names
[op
],
2579 nameIReg32orZR(dd
), nameIReg32orZR(nn
), (UInt
)imm
);
2586 /* -------------------- MOV{Z,N,K} -------------------- */
2587 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2590 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2591 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2592 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2594 Bool is64
= INSN(31,31) == 1;
2595 UInt subopc
= INSN(30,29);
2596 UInt hw
= INSN(22,21);
2597 UInt imm16
= INSN(20,5);
2598 UInt dd
= INSN(4,0);
2599 if (subopc
== BITS2(0,1) || (!is64
&& hw
>= 2)) {
2600 /* invalid; fall through */
2602 ULong imm64
= ((ULong
)imm16
) << (16 * hw
);
2604 vassert(imm64
< 0x100000000ULL
);
2606 case BITS2(1,0): // MOVZ
2607 putIRegOrZR(is64
, dd
, is64
? mkU64(imm64
) : mkU32((UInt
)imm64
));
2608 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64
, dd
), imm64
);
2610 case BITS2(0,0): // MOVN
2613 imm64
&= 0xFFFFFFFFULL
;
2614 putIRegOrZR(is64
, dd
, is64
? mkU64(imm64
) : mkU32((UInt
)imm64
));
2615 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64
, dd
), imm64
);
2617 case BITS2(1,1): // MOVK
2618 /* This is more complex. We are inserting a slice into
2619 the destination register, so we need to have the old
2622 IRTemp old
= newTemp(Ity_I64
);
2623 assign(old
, getIReg64orZR(dd
));
2624 ULong mask
= 0xFFFFULL
<< (16 * hw
);
2627 binop(Iop_And64
, mkexpr(old
), mkU64(~mask
)),
2629 putIReg64orZR(dd
, res
);
2630 DIP("movk %s, 0x%x, lsl %u\n",
2631 nameIReg64orZR(dd
), imm16
, 16*hw
);
2633 IRTemp old
= newTemp(Ity_I32
);
2634 assign(old
, getIReg32orZR(dd
));
2636 UInt mask
= ((UInt
)0xFFFF) << (16 * hw
);
2639 binop(Iop_And32
, mkexpr(old
), mkU32(~mask
)),
2640 mkU32((UInt
)imm64
));
2641 putIReg32orZR(dd
, res
);
2642 DIP("movk %s, 0x%x, lsl %u\n",
2643 nameIReg32orZR(dd
), imm16
, 16*hw
);
2653 /* -------------------- {U,S,}BFM -------------------- */
2654 /* 30 28 22 21 15 9 4
2656 sf 10 100110 N immr imms nn dd
2657 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2658 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2660 sf 00 100110 N immr imms nn dd
2661 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2662 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2664 sf 01 100110 N immr imms nn dd
2665 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2666 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2668 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2669 UInt sf
= INSN(31,31);
2670 UInt opc
= INSN(30,29);
2671 UInt N
= INSN(22,22);
2672 UInt immR
= INSN(21,16);
2673 UInt immS
= INSN(15,10);
2674 UInt nn
= INSN(9,5);
2675 UInt dd
= INSN(4,0);
2676 Bool inZero
= False
;
2677 Bool extend
= False
;
2678 const HChar
* nm
= "???";
2679 /* skip invalid combinations */
2682 inZero
= True
; extend
= True
; nm
= "sbfm"; break;
2684 inZero
= False
; extend
= False
; nm
= "bfm"; break;
2686 inZero
= True
; extend
= False
; nm
= "ubfm"; break;
2688 goto after_bfm
; /* invalid */
2692 if (sf
== 1 && N
!= 1) goto after_bfm
;
2693 if (sf
== 0 && (N
!= 0 || ((immR
>> 5) & 1) != 0
2694 || ((immS
>> 5) & 1) != 0)) goto after_bfm
;
2695 ULong wmask
= 0, tmask
= 0;
2696 Bool ok
= dbm_DecodeBitMasks(&wmask
, &tmask
,
2697 N
, immS
, immR
, False
, sf
== 1 ? 64 : 32);
2698 if (!ok
) goto after_bfm
; /* hmmm */
2700 Bool is64
= sf
== 1;
2701 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2703 // Handle plain shifts explicitly. These are functionally identical to
2704 // the general case below, but iropt isn't clever enough to reduce those
2705 // sequences to plain shifts. So give it a hand.
2706 if (is64
&& immS
== 63 && immR
>= 1 && immR
<= 63) {
2707 if (opc
== BITS2(0,0)) {
2708 // 64-bit signed shift right
2709 putIReg64orZR(dd
, binop(Iop_Sar64
, getIReg64orZR(nn
), mkU8(immR
)));
2710 DIP("asr %s, %s, #%u\n",
2711 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
);
2714 if (opc
== BITS2(1,0)) {
2715 // 64-bit unsigned shift right
2716 putIReg64orZR(dd
, binop(Iop_Shr64
, getIReg64orZR(nn
), mkU8(immR
)));
2717 DIP("lsr %s, %s, #%u\n",
2718 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
);
2723 if (!is64
&& immS
== 31 && immR
>= 1 && immR
<= 31) {
2724 if (opc
== BITS2(0,0)) {
2725 // 32-bit signed shift right
2726 putIReg32orZR(dd
, binop(Iop_Sar32
, getIReg32orZR(nn
), mkU8(immR
)));
2727 DIP("asr %s, %s, #%u\n",
2728 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
);
2731 if (opc
== BITS2(1,0)) {
2732 // 32-bit unsigned shift right
2733 putIReg32orZR(dd
, binop(Iop_Shr32
, getIReg32orZR(nn
), mkU8(immR
)));
2734 DIP("lsr %s, %s, #%u\n",
2735 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
);
2740 if (is64
&& immS
>= 0 && immS
<= 62
2741 && immR
== immS
+ 1 && opc
== BITS2(1,0)) {
2742 // 64-bit shift left
2743 UInt shift
= 64 - immR
;
2744 vassert(shift
>= 1 && shift
<= 63);
2745 putIReg64orZR(dd
, binop(Iop_Shl64
, getIReg64orZR(nn
), mkU8(shift
)));
2746 DIP("lsl %s, %s, #%u\n",
2747 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), shift
);
2750 if (!is64
&& immS
>= 0 && immS
<= 30
2751 && immR
== immS
+ 1 && opc
== BITS2(1,0)) {
2752 // 32-bit shift left
2753 UInt shift
= 32 - immR
;
2754 vassert(shift
>= 1 && shift
<= 31);
2755 putIReg32orZR(dd
, binop(Iop_Shl32
, getIReg32orZR(nn
), mkU8(shift
)));
2756 DIP("lsl %s, %s, #%u\n",
2757 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), shift
);
2761 // Also special-case sxtw.
2762 if (opc
== BITS2(0,0) && immR
== 0) {
2764 // The destination size is 64 bits.
2766 putIReg64orZR(dd
, unop(Iop_32Sto64
, getIReg32orZR(nn
)));
2767 DIP("sxtw %s, %s\n", nameIReg64orZR(dd
), nameIReg32orZR(nn
));
2771 putIReg64orZR(dd
, unop(Iop_16Sto64
,
2772 unop(Iop_64to16
, getIReg64orZR(nn
))));
2773 DIP("sxth %s, %s\n", nameIReg64orZR(dd
), nameIReg32orZR(nn
));
2777 putIReg64orZR(dd
, unop(Iop_8Sto64
,
2778 unop(Iop_64to8
, getIReg64orZR(nn
))));
2779 DIP("sxtb %s, %s\n", nameIReg64orZR(dd
), nameIReg32orZR(nn
));
2783 // The destination size is 32 bits.
2785 putIReg32orZR(dd
, unop(Iop_16Sto32
,
2786 unop(Iop_64to16
, getIReg64orZR(nn
))));
2787 DIP("sxth %s, %s\n", nameIReg32orZR(dd
), nameIReg32orZR(nn
));
2791 putIReg32orZR(dd
, unop(Iop_8Sto32
,
2792 unop(Iop_64to8
, getIReg64orZR(nn
))));
2793 DIP("sxtb %s, %s\n", nameIReg32orZR(dd
), nameIReg32orZR(nn
));
2799 // None of the special cases apply. We have to use the (slow) general
2801 IRTemp dst
= newTemp(ty
);
2802 IRTemp src
= newTemp(ty
);
2803 IRTemp bot
= newTemp(ty
);
2804 IRTemp top
= newTemp(ty
);
2805 IRTemp res
= newTemp(ty
);
2806 assign(dst
, inZero
? mkU(ty
,0) : getIRegOrZR(is64
, dd
));
2807 assign(src
, getIRegOrZR(is64
, nn
));
2808 /* perform bitfield move on low bits */
2809 assign(bot
, binop(mkOR(ty
),
2810 binop(mkAND(ty
), mkexpr(dst
), mkU(ty
, ~wmask
)),
2811 binop(mkAND(ty
), mkexpr(mathROR(ty
, src
, immR
)),
2813 /* determine extension bits (sign, zero or dest register) */
2814 assign(top
, mkexpr(extend
? mathREPLICATE(ty
, src
, immS
) : dst
));
2815 /* combine extension bits and result bits */
2816 assign(res
, binop(mkOR(ty
),
2817 binop(mkAND(ty
), mkexpr(top
), mkU(ty
, ~tmask
)),
2818 binop(mkAND(ty
), mkexpr(bot
), mkU(ty
, tmask
))));
2819 putIRegOrZR(is64
, dd
, mkexpr(res
));
2820 DIP("%s %s, %s, immR=%u, immS=%u\n",
2821 nm
, nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
), immR
, immS
);
2826 /* ---------------------- EXTR ---------------------- */
2827 /* 30 28 22 20 15 9 4
2828 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2829 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2831 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2832 Bool is64
= INSN(31,31) == 1;
2833 UInt mm
= INSN(20,16);
2834 UInt imm6
= INSN(15,10);
2835 UInt nn
= INSN(9,5);
2836 UInt dd
= INSN(4,0);
2838 if (INSN(31,31) != INSN(22,22))
2840 if (!is64
&& imm6
>= 32)
2842 if (!valid
) goto after_extr
;
2843 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2844 IRTemp srcHi
= newTemp(ty
);
2845 IRTemp srcLo
= newTemp(ty
);
2846 IRTemp res
= newTemp(ty
);
2847 assign(srcHi
, getIRegOrZR(is64
, nn
));
2848 assign(srcLo
, getIRegOrZR(is64
, mm
));
2850 assign(res
, mkexpr(srcLo
));
2852 UInt szBits
= 8 * sizeofIRType(ty
);
2853 vassert(imm6
> 0 && imm6
< szBits
);
2854 assign(res
, binop(mkOR(ty
),
2855 binop(mkSHL(ty
), mkexpr(srcHi
), mkU8(szBits
-imm6
)),
2856 binop(mkSHR(ty
), mkexpr(srcLo
), mkU8(imm6
))));
2858 putIRegOrZR(is64
, dd
, mkexpr(res
));
2859 DIP("extr %s, %s, %s, #%u\n",
2860 nameIRegOrZR(is64
,dd
),
2861 nameIRegOrZR(is64
,nn
), nameIRegOrZR(is64
,mm
), imm6
);
2867 vex_printf("ARM64 front end: data_processing_immediate\n");
2874 /*------------------------------------------------------------*/
2875 /*--- Data processing (register) instructions ---*/
2876 /*------------------------------------------------------------*/
2878 static const HChar
* nameSH ( UInt sh
) {
2880 case 0: return "lsl";
2881 case 1: return "lsr";
2882 case 2: return "asr";
2883 case 3: return "ror";
2884 default: vassert(0);
2888 /* Generate IR to get a register value, possibly shifted by an
2889 immediate. Returns either a 32- or 64-bit temporary holding the
2890 result. After the shift, the value can optionally be NOT-ed
2893 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2894 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2895 isn't allowed, but it's the job of the caller to check that.
2897 static IRTemp
getShiftedIRegOrZR ( Bool is64
,
2898 UInt sh_how
, UInt sh_amt
, UInt regNo
,
2901 vassert(sh_how
< 4);
2902 vassert(sh_amt
< (is64
? 64 : 32));
2903 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2904 IRTemp t0
= newTemp(ty
);
2905 assign(t0
, getIRegOrZR(is64
, regNo
));
2906 IRTemp t1
= newTemp(ty
);
2909 assign(t1
, binop(mkSHL(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2912 assign(t1
, binop(mkSHR(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2915 assign(t1
, binop(mkSAR(ty
), mkexpr(t0
), mkU8(sh_amt
)));
2918 assign(t1
, mkexpr(mathROR(ty
, t0
, sh_amt
)));
2924 IRTemp t2
= newTemp(ty
);
2925 assign(t2
, unop(mkNOT(ty
), mkexpr(t1
)));
2934 Bool
dis_ARM64_data_processing_register(/*MB_OUT*/DisResult
* dres
,
2935 UInt insn
, Bool sigill_diag
)
2937 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2939 /* ------------------- ADD/SUB(reg) ------------------- */
2940 /* x==0 => 32 bit op x==1 => 64 bit op
2941 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2943 31 30 29 28 23 21 20 15 9 4
2945 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2946 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2947 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2948 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2950 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2951 UInt bX
= INSN(31,31);
2952 UInt bOP
= INSN(30,30); /* 0: ADD, 1: SUB */
2953 UInt bS
= INSN(29, 29); /* set flags? */
2954 UInt sh
= INSN(23,22);
2955 UInt rM
= INSN(20,16);
2956 UInt imm6
= INSN(15,10);
2957 UInt rN
= INSN(9,5);
2958 UInt rD
= INSN(4,0);
2959 Bool isSUB
= bOP
== 1;
2960 Bool is64
= bX
== 1;
2961 IRType ty
= is64
? Ity_I64
: Ity_I32
;
2962 if ((!is64
&& imm6
> 31) || sh
== BITS2(1,1)) {
2963 /* invalid; fall through */
2965 IRTemp argL
= newTemp(ty
);
2966 assign(argL
, getIRegOrZR(is64
, rN
));
2967 IRTemp argR
= getShiftedIRegOrZR(is64
, sh
, imm6
, rM
, False
);
2968 IROp op
= isSUB
? mkSUB(ty
) : mkADD(ty
);
2969 IRTemp res
= newTemp(ty
);
2970 assign(res
, binop(op
, mkexpr(argL
), mkexpr(argR
)));
2971 if (rD
!= 31) putIRegOrZR(is64
, rD
, mkexpr(res
));
2973 setFlags_ADD_SUB(is64
, isSUB
, argL
, argR
);
2975 DIP("%s%s %s, %s, %s, %s #%u\n",
2976 bOP
? "sub" : "add", bS
? "s" : "",
2977 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
2978 nameIRegOrZR(is64
, rM
), nameSH(sh
), imm6
);
2983 /* ------------------- ADC/SBC(reg) ------------------- */
2984 /* x==0 => 32 bit op x==1 => 64 bit op
2986 31 30 29 28 23 21 20 15 9 4
2988 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2989 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2990 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2991 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2994 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2995 UInt bX
= INSN(31,31);
2996 UInt bOP
= INSN(30,30); /* 0: ADC, 1: SBC */
2997 UInt bS
= INSN(29,29); /* set flags */
2998 UInt rM
= INSN(20,16);
2999 UInt rN
= INSN(9,5);
3000 UInt rD
= INSN(4,0);
3002 Bool isSUB
= bOP
== 1;
3003 Bool is64
= bX
== 1;
3004 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3006 IRTemp oldC
= newTemp(ty
);
3008 is64
? mk_arm64g_calculate_flag_c()
3009 : unop(Iop_64to32
, mk_arm64g_calculate_flag_c()) );
3011 IRTemp argL
= newTemp(ty
);
3012 assign(argL
, getIRegOrZR(is64
, rN
));
3013 IRTemp argR
= newTemp(ty
);
3014 assign(argR
, getIRegOrZR(is64
, rM
));
3016 IROp op
= isSUB
? mkSUB(ty
) : mkADD(ty
);
3017 IRTemp res
= newTemp(ty
);
3019 IRExpr
* one
= is64
? mkU64(1) : mkU32(1);
3020 IROp xorOp
= is64
? Iop_Xor64
: Iop_Xor32
;
3023 binop(op
, mkexpr(argL
), mkexpr(argR
)),
3024 binop(xorOp
, mkexpr(oldC
), one
)));
3028 binop(op
, mkexpr(argL
), mkexpr(argR
)),
3032 if (rD
!= 31) putIRegOrZR(is64
, rD
, mkexpr(res
));
3035 setFlags_ADC_SBC(is64
, isSUB
, argL
, argR
, oldC
);
3038 DIP("%s%s %s, %s, %s\n",
3039 bOP
? "sbc" : "adc", bS
? "s" : "",
3040 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
3041 nameIRegOrZR(is64
, rM
));
3045 /* -------------------- LOGIC(reg) -------------------- */
3046 /* x==0 => 32 bit op x==1 => 64 bit op
3047 N==0 => inv? is no-op (no inversion)
3049 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
3051 31 30 28 23 21 20 15 9 4
3053 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
3054 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
3055 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
3056 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
3057 With N=1, the names are: BIC ORN EON BICS
3059 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
3060 UInt bX
= INSN(31,31);
3061 UInt sh
= INSN(23,22);
3062 UInt bN
= INSN(21,21);
3063 UInt rM
= INSN(20,16);
3064 UInt imm6
= INSN(15,10);
3065 UInt rN
= INSN(9,5);
3066 UInt rD
= INSN(4,0);
3067 Bool is64
= bX
== 1;
3068 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3069 if (!is64
&& imm6
> 31) {
3070 /* invalid; fall though */
3072 IRTemp argL
= newTemp(ty
);
3073 assign(argL
, getIRegOrZR(is64
, rN
));
3074 IRTemp argR
= getShiftedIRegOrZR(is64
, sh
, imm6
, rM
, bN
== 1);
3075 IROp op
= Iop_INVALID
;
3076 switch (INSN(30,29)) {
3077 case BITS2(0,0): case BITS2(1,1): op
= mkAND(ty
); break;
3078 case BITS2(0,1): op
= mkOR(ty
); break;
3079 case BITS2(1,0): op
= mkXOR(ty
); break;
3080 default: vassert(0);
3082 IRTemp res
= newTemp(ty
);
3083 assign(res
, binop(op
, mkexpr(argL
), mkexpr(argR
)));
3084 if (INSN(30,29) == BITS2(1,1)) {
3085 setFlags_LOGIC(is64
, res
);
3087 putIRegOrZR(is64
, rD
, mkexpr(res
));
3089 static const HChar
* names_op
[8]
3090 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
3091 vassert(((bN
<< 2) | INSN(30,29)) < 8);
3092 const HChar
* nm_op
= names_op
[(bN
<< 2) | INSN(30,29)];
3093 /* Special-case the printing of "MOV" */
3094 if (rN
== 31/*zr*/ && sh
== 0/*LSL*/ && imm6
== 0 && bN
== 0) {
3095 DIP("mov %s, %s\n", nameIRegOrZR(is64
, rD
),
3096 nameIRegOrZR(is64
, rM
));
3098 DIP("%s %s, %s, %s, %s #%u\n", nm_op
,
3099 nameIRegOrZR(is64
, rD
), nameIRegOrZR(is64
, rN
),
3100 nameIRegOrZR(is64
, rM
), nameSH(sh
), imm6
);
3106 /* -------------------- {U,S}MULH -------------------- */
3107 /* 31 23 22 20 15 9 4
3108 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
3109 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
3111 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
3112 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
3113 Bool isU
= INSN(23,23) == 1;
3114 UInt mm
= INSN(20,16);
3115 UInt nn
= INSN(9,5);
3116 UInt dd
= INSN(4,0);
3117 putIReg64orZR(dd
, unop(Iop_128HIto64
,
3118 binop(isU
? Iop_MullU64
: Iop_MullS64
,
3119 getIReg64orZR(nn
), getIReg64orZR(mm
))));
3120 DIP("%cmulh %s, %s, %s\n",
3122 nameIReg64orZR(dd
), nameIReg64orZR(nn
), nameIReg64orZR(mm
));
3126 /* -------------------- M{ADD,SUB} -------------------- */
3127 /* 31 30 20 15 14 9 4
3128 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
3129 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
3131 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
3132 Bool is64
= INSN(31,31) == 1;
3133 UInt mm
= INSN(20,16);
3134 Bool isAdd
= INSN(15,15) == 0;
3135 UInt aa
= INSN(14,10);
3136 UInt nn
= INSN(9,5);
3137 UInt dd
= INSN(4,0);
3141 binop(isAdd
? Iop_Add64
: Iop_Sub64
,
3143 binop(Iop_Mul64
, getIReg64orZR(mm
), getIReg64orZR(nn
))));
3147 binop(isAdd
? Iop_Add32
: Iop_Sub32
,
3149 binop(Iop_Mul32
, getIReg32orZR(mm
), getIReg32orZR(nn
))));
3151 DIP("%s %s, %s, %s, %s\n",
3152 isAdd
? "madd" : "msub",
3153 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
),
3154 nameIRegOrZR(is64
, mm
), nameIRegOrZR(is64
, aa
));
3158 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3159 /* 31 30 28 20 15 11 9 4
3160 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3161 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3162 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3163 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3164 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3166 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3167 Bool is64
= INSN(31,31) == 1;
3168 UInt b30
= INSN(30,30);
3169 UInt mm
= INSN(20,16);
3170 UInt cond
= INSN(15,12);
3171 UInt b10
= INSN(10,10);
3172 UInt nn
= INSN(9,5);
3173 UInt dd
= INSN(4,0);
3174 UInt op
= (b30
<< 1) | b10
; /* 00=id 01=inc 10=inv 11=neg */
3175 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3176 IRExpr
* argL
= getIRegOrZR(is64
, nn
);
3177 IRExpr
* argR
= getIRegOrZR(is64
, mm
);
3182 argR
= binop(mkADD(ty
), argR
, mkU(ty
,1));
3185 argR
= unop(mkNOT(ty
), argR
);
3188 argR
= binop(mkSUB(ty
), mkU(ty
,0), argR
);
3195 IRExpr_ITE(unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
3198 const HChar
* op_nm
[4] = { "csel", "csinc", "csinv", "csneg" };
3199 DIP("%s %s, %s, %s, %s\n", op_nm
[op
],
3200 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
),
3201 nameIRegOrZR(is64
, mm
), nameCC(cond
));
3205 /* -------------- ADD/SUB(extended reg) -------------- */
3207 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3208 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3210 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3211 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3213 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3214 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3216 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3217 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3219 The 'm' operand is extended per opt, thusly:
3222 001 Xm & 0xFFFF UXTH
3223 010 Xm & (2^32)-1 UXTW
3226 100 Xm sx from bit 7 SXTB
3227 101 Xm sx from bit 15 SXTH
3228 110 Xm sx from bit 31 SXTW
3231 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3232 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3233 are the identity operation on Wm.
3235 After extension, the value is shifted left by imm3 bits, which
3236 may only be in the range 0 .. 4 inclusive.
3238 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3239 Bool is64
= INSN(31,31) == 1;
3240 Bool isSub
= INSN(30,30) == 1;
3241 Bool setCC
= INSN(29,29) == 1;
3242 UInt mm
= INSN(20,16);
3243 UInt opt
= INSN(15,13);
3244 UInt imm3
= INSN(12,10);
3245 UInt nn
= INSN(9,5);
3246 UInt dd
= INSN(4,0);
3247 const HChar
* nameExt
[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3248 "sxtb", "sxth", "sxtw", "sxtx" };
3249 /* Do almost the same thing in the 32- and 64-bit cases. */
3250 IRTemp xN
= newTemp(Ity_I64
);
3251 IRTemp xM
= newTemp(Ity_I64
);
3252 assign(xN
, getIReg64orSP(nn
));
3253 assign(xM
, getIReg64orZR(mm
));
3254 IRExpr
* xMw
= mkexpr(xM
); /* "xM widened" */
3258 case BITS3(0,0,0): // UXTB
3259 xMw
= binop(Iop_And64
, xMw
, mkU64(0xFF)); break;
3260 case BITS3(0,0,1): // UXTH
3261 xMw
= binop(Iop_And64
, xMw
, mkU64(0xFFFF)); break;
3262 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3264 xMw
= unop(Iop_32Uto64
, unop(Iop_64to32
, xMw
));
3267 case BITS3(0,1,1): // UXTX -- always a noop
3269 case BITS3(1,0,0): // SXTB
3270 shSX
= 56; goto sxTo64
;
3271 case BITS3(1,0,1): // SXTH
3272 shSX
= 48; goto sxTo64
;
3273 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3275 shSX
= 32; goto sxTo64
;
3278 case BITS3(1,1,1): // SXTX -- always a noop
3281 vassert(shSX
>= 32);
3282 xMw
= binop(Iop_Sar64
, binop(Iop_Shl64
, xMw
, mkU8(shSX
)),
3290 IRTemp argR
= newTemp(Ity_I64
);
3291 assign(argR
, binop(Iop_Shl64
, xMw
, mkU8(imm3
)));
3292 IRTemp res
= newTemp(Ity_I64
);
3293 assign(res
, binop(isSub
? Iop_Sub64
: Iop_Add64
,
3294 mkexpr(argL
), mkexpr(argR
)));
3297 putIReg64orZR(dd
, mkexpr(res
));
3298 setFlags_ADD_SUB(True
/*is64*/, isSub
, argL
, argR
);
3300 putIReg64orSP(dd
, mkexpr(res
));
3304 IRTemp argL32
= newTemp(Ity_I32
);
3305 IRTemp argR32
= newTemp(Ity_I32
);
3306 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(res
)));
3307 assign(argL32
, unop(Iop_64to32
, mkexpr(argL
)));
3308 assign(argR32
, unop(Iop_64to32
, mkexpr(argR
)));
3309 setFlags_ADD_SUB(False
/*!is64*/, isSub
, argL32
, argR32
);
3311 putIReg32orSP(dd
, unop(Iop_64to32
, mkexpr(res
)));
3314 DIP("%s%s %s, %s, %s %s lsl %u\n",
3315 isSub
? "sub" : "add", setCC
? "s" : "",
3316 setCC
? nameIRegOrZR(is64
, dd
) : nameIRegOrSP(is64
, dd
),
3317 nameIRegOrSP(is64
, nn
), nameIRegOrSP(is64
, mm
),
3318 nameExt
[opt
], imm3
);
3322 /* ---------------- CCMP/CCMN(imm) ---------------- */
3323 /* Bizarrely, these appear in the "data processing register"
3324 category, even though they are operations against an
3326 /* 31 29 20 15 11 9 3
3327 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3328 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3331 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3332 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3334 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3335 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3336 Bool is64
= INSN(31,31) == 1;
3337 Bool isSUB
= INSN(30,30) == 1;
3338 UInt imm5
= INSN(20,16);
3339 UInt cond
= INSN(15,12);
3340 UInt nn
= INSN(9,5);
3341 UInt nzcv
= INSN(3,0);
3343 IRTemp condT
= newTemp(Ity_I1
);
3344 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
3346 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3347 IRTemp argL
= newTemp(ty
);
3348 IRTemp argR
= newTemp(ty
);
3351 assign(argL
, getIReg64orZR(nn
));
3352 assign(argR
, mkU64(imm5
));
3354 assign(argL
, getIReg32orZR(nn
));
3355 assign(argR
, mkU32(imm5
));
3357 setFlags_ADD_SUB_conditionally(is64
, isSUB
, condT
, argL
, argR
, nzcv
);
3359 DIP("ccm%c %s, #%u, #%u, %s\n",
3360 isSUB
? 'p' : 'n', nameIRegOrZR(is64
, nn
),
3361 imm5
, nzcv
, nameCC(cond
));
3365 /* ---------------- CCMP/CCMN(reg) ---------------- */
3366 /* 31 29 20 15 11 9 3
3367 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3368 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3370 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3371 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3373 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3374 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3375 Bool is64
= INSN(31,31) == 1;
3376 Bool isSUB
= INSN(30,30) == 1;
3377 UInt mm
= INSN(20,16);
3378 UInt cond
= INSN(15,12);
3379 UInt nn
= INSN(9,5);
3380 UInt nzcv
= INSN(3,0);
3382 IRTemp condT
= newTemp(Ity_I1
);
3383 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
3385 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3386 IRTemp argL
= newTemp(ty
);
3387 IRTemp argR
= newTemp(ty
);
3390 assign(argL
, getIReg64orZR(nn
));
3391 assign(argR
, getIReg64orZR(mm
));
3393 assign(argL
, getIReg32orZR(nn
));
3394 assign(argR
, getIReg32orZR(mm
));
3396 setFlags_ADD_SUB_conditionally(is64
, isSUB
, condT
, argL
, argR
, nzcv
);
3398 DIP("ccm%c %s, %s, #%u, %s\n",
3399 isSUB
? 'p' : 'n', nameIRegOrZR(is64
, nn
),
3400 nameIRegOrZR(is64
, mm
), nzcv
, nameCC(cond
));
3405 /* -------------- REV/REV16/REV32/RBIT -------------- */
3406 /* 31 30 28 20 15 11 9 4
3408 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3409 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3411 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3412 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3414 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3415 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3417 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3419 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3420 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3421 UInt b31
= INSN(31,31);
3422 UInt opc
= INSN(11,10);
3425 /**/ if (b31
== 1 && opc
== BITS2(1,1)) ix
= 1;
3426 else if (b31
== 0 && opc
== BITS2(1,0)) ix
= 2;
3427 else if (b31
== 1 && opc
== BITS2(0,0)) ix
= 3;
3428 else if (b31
== 0 && opc
== BITS2(0,0)) ix
= 4;
3429 else if (b31
== 1 && opc
== BITS2(0,1)) ix
= 5;
3430 else if (b31
== 0 && opc
== BITS2(0,1)) ix
= 6;
3431 else if (b31
== 1 && opc
== BITS2(1,0)) ix
= 7;
3432 if (ix
>= 1 && ix
<= 7) {
3433 Bool is64
= ix
== 1 || ix
== 3 || ix
== 5 || ix
== 7;
3434 UInt nn
= INSN(9,5);
3435 UInt dd
= INSN(4,0);
3436 IRTemp src
= newTemp(Ity_I64
);
3437 IRTemp dst
= IRTemp_INVALID
;
3438 IRTemp (*math
)(IRTemp
) = NULL
;
3440 case 1: case 2: math
= math_BYTESWAP64
; break;
3441 case 3: case 4: math
= math_BITSWAP64
; break;
3442 case 5: case 6: math
= math_USHORTSWAP64
; break;
3443 case 7: math
= math_UINTSWAP64
; break;
3444 default: vassert(0);
3446 const HChar
* names
[7]
3447 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3448 const HChar
* nm
= names
[ix
-1];
3451 /* This has to be special cased, since the logic below doesn't
3452 handle it correctly. */
3453 assign(src
, getIReg64orZR(nn
));
3456 unop(Iop_32Uto64
, unop(Iop_64to32
, mkexpr(dst
))));
3458 assign(src
, getIReg64orZR(nn
));
3460 putIReg64orZR(dd
, mkexpr(dst
));
3462 assign(src
, binop(Iop_Shl64
, getIReg64orZR(nn
), mkU8(32)));
3464 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(dst
)));
3466 DIP("%s %s, %s\n", nm
,
3467 nameIRegOrZR(is64
,dd
), nameIRegOrZR(is64
,nn
));
3470 /* else fall through */
3473 /* -------------------- CLZ/CLS -------------------- */
3474 /* 30 28 24 20 15 9 4
3475 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3476 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3478 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3479 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3480 Bool is64
= INSN(31,31) == 1;
3481 Bool isCLS
= INSN(10,10) == 1;
3482 UInt nn
= INSN(9,5);
3483 UInt dd
= INSN(4,0);
3484 IRTemp src
= newTemp(Ity_I64
);
3485 IRTemp srcZ
= newTemp(Ity_I64
);
3486 IRTemp dst
= newTemp(Ity_I64
);
3487 /* Get the argument, widened out to 64 bit */
3489 assign(src
, getIReg64orZR(nn
));
3491 assign(src
, binop(Iop_Shl64
,
3492 unop(Iop_32Uto64
, getIReg32orZR(nn
)), mkU8(32)));
3494 /* If this is CLS, mash the arg around accordingly */
3496 IRExpr
* one
= mkU8(1);
3499 binop(Iop_Shl64
, mkexpr(src
), one
),
3500 binop(Iop_Shl64
, binop(Iop_Shr64
, mkexpr(src
), one
), one
)));
3502 assign(srcZ
, mkexpr(src
));
3504 /* And compute CLZ. */
3506 assign(dst
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(srcZ
), mkU64(0)),
3507 mkU64(isCLS
? 63 : 64),
3508 unop(Iop_Clz64
, mkexpr(srcZ
))));
3509 putIReg64orZR(dd
, mkexpr(dst
));
3511 assign(dst
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(srcZ
), mkU64(0)),
3512 mkU64(isCLS
? 31 : 32),
3513 unop(Iop_Clz64
, mkexpr(srcZ
))));
3514 putIReg32orZR(dd
, unop(Iop_64to32
, mkexpr(dst
)));
3516 DIP("cl%c %s, %s\n", isCLS
? 's' : 'z',
3517 nameIRegOrZR(is64
, dd
), nameIRegOrZR(is64
, nn
));
3521 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3522 /* 30 28 20 15 11 9 4
3523 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3524 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3525 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3526 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3528 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3529 && INSN(15,12) == BITS4(0,0,1,0)) {
3530 Bool is64
= INSN(31,31) == 1;
3531 UInt mm
= INSN(20,16);
3532 UInt op
= INSN(11,10);
3533 UInt nn
= INSN(9,5);
3534 UInt dd
= INSN(4,0);
3535 IRType ty
= is64
? Ity_I64
: Ity_I32
;
3536 IRTemp srcL
= newTemp(ty
);
3537 IRTemp srcR
= newTemp(Ity_I64
);
3538 IRTemp res
= newTemp(ty
);
3539 IROp iop
= Iop_INVALID
;
3540 assign(srcL
, getIRegOrZR(is64
, nn
));
3541 assign(srcR
, binop(Iop_And64
, getIReg64orZR(mm
),
3542 mkU64(is64
? 63 : 31)));
3546 case BITS2(0,0): iop
= mkSHL(ty
); break;
3547 case BITS2(0,1): iop
= mkSHR(ty
); break;
3548 case BITS2(1,0): iop
= mkSAR(ty
); break;
3549 default: vassert(0);
3551 assign(res
, binop(iop
, mkexpr(srcL
),
3552 unop(Iop_64to8
, mkexpr(srcR
))));
3555 IROp opSHL
= mkSHL(ty
);
3556 IROp opSHR
= mkSHR(ty
);
3557 IROp opOR
= mkOR(ty
);
3558 IRExpr
* width
= mkU64(is64
? 64: 32);
3562 binop(Iop_CmpEQ64
, mkexpr(srcR
), mkU64(0)),
3567 unop(Iop_64to8
, binop(Iop_Sub64
, width
,
3570 mkexpr(srcL
), unop(Iop_64to8
, mkexpr(srcR
))))
3573 putIRegOrZR(is64
, dd
, mkexpr(res
));
3575 const HChar
* names
[4] = { "lslv", "lsrv", "asrv", "rorv" };
3576 DIP("%s %s, %s, %s\n",
3577 names
[op
], nameIRegOrZR(is64
,dd
),
3578 nameIRegOrZR(is64
,nn
), nameIRegOrZR(is64
,mm
));
3582 /* -------------------- SDIV/UDIV -------------------- */
3583 /* 30 28 20 15 10 9 4
3584 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3585 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3587 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3588 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3589 Bool is64
= INSN(31,31) == 1;
3590 UInt mm
= INSN(20,16);
3591 Bool isS
= INSN(10,10) == 1;
3592 UInt nn
= INSN(9,5);
3593 UInt dd
= INSN(4,0);
3595 putIRegOrZR(is64
, dd
, binop(is64
? Iop_DivS64
: Iop_DivS32
,
3596 getIRegOrZR(is64
, nn
),
3597 getIRegOrZR(is64
, mm
)));
3599 putIRegOrZR(is64
, dd
, binop(is64
? Iop_DivU64
: Iop_DivU32
,
3600 getIRegOrZR(is64
, nn
),
3601 getIRegOrZR(is64
, mm
)));
3603 DIP("%cdiv %s, %s, %s\n", isS
? 's' : 'u',
3604 nameIRegOrZR(is64
, dd
),
3605 nameIRegOrZR(is64
, nn
), nameIRegOrZR(is64
, mm
));
3609 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3610 /* 31 23 20 15 14 9 4
3611 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3612 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3613 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3614 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3616 Xd = Xa +/- (Wn *u/s Wm)
3618 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3619 Bool isU
= INSN(23,23) == 1;
3620 UInt mm
= INSN(20,16);
3621 Bool isAdd
= INSN(15,15) == 0;
3622 UInt aa
= INSN(14,10);
3623 UInt nn
= INSN(9,5);
3624 UInt dd
= INSN(4,0);
3625 IRTemp wN
= newTemp(Ity_I32
);
3626 IRTemp wM
= newTemp(Ity_I32
);
3627 IRTemp xA
= newTemp(Ity_I64
);
3628 IRTemp muld
= newTemp(Ity_I64
);
3629 IRTemp res
= newTemp(Ity_I64
);
3630 assign(wN
, getIReg32orZR(nn
));
3631 assign(wM
, getIReg32orZR(mm
));
3632 assign(xA
, getIReg64orZR(aa
));
3633 assign(muld
, binop(isU
? Iop_MullU32
: Iop_MullS32
,
3634 mkexpr(wN
), mkexpr(wM
)));
3635 assign(res
, binop(isAdd
? Iop_Add64
: Iop_Sub64
,
3636 mkexpr(xA
), mkexpr(muld
)));
3637 putIReg64orZR(dd
, mkexpr(res
));
3638 DIP("%cm%sl %s, %s, %s, %s\n", isU
? 'u' : 's', isAdd
? "add" : "sub",
3639 nameIReg64orZR(dd
), nameIReg32orZR(nn
),
3640 nameIReg32orZR(mm
), nameIReg64orZR(aa
));
3644 /* -------------------- CRC32/CRC32C -------------------- */
3645 /* 31 30 20 15 11 9 4
3646 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3647 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3649 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3650 && INSN(15,13) == BITS3(0,1,0)) {
3651 UInt bitSF
= INSN(31,31);
3652 UInt mm
= INSN(20,16);
3653 UInt bitC
= INSN(12,12);
3654 UInt sz
= INSN(11,10);
3655 UInt nn
= INSN(9,5);
3656 UInt dd
= INSN(4,0);
3657 vassert(sz
>= 0 && sz
<= 3);
3658 if ((bitSF
== 0 && sz
<= BITS2(1,0))
3659 || (bitSF
== 1 && sz
== BITS2(1,1))) {
3660 UInt ix
= (bitC
== 1 ? 4 : 0) | sz
;
3662 = { &arm64g_calc_crc32b
, &arm64g_calc_crc32h
,
3663 &arm64g_calc_crc32w
, &arm64g_calc_crc32x
,
3664 &arm64g_calc_crc32cb
, &arm64g_calc_crc32ch
,
3665 &arm64g_calc_crc32cw
, &arm64g_calc_crc32cx
};
3666 const HChar
* hNames
[8]
3667 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3668 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3669 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3670 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3671 const HChar
* iNames
[8]
3672 = { "crc32b", "crc32h", "crc32w", "crc32x",
3673 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3675 IRTemp srcN
= newTemp(Ity_I64
);
3676 assign(srcN
, unop(Iop_32Uto64
, unop(Iop_64to32
, getIReg64orZR(nn
))));
3678 IRTemp srcM
= newTemp(Ity_I64
);
3679 IRExpr
* at64
= getIReg64orZR(mm
);
3682 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFF))); break;
3684 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFFFF))); break;
3686 assign(srcM
, binop(Iop_And64
, at64
, mkU64(0xFFFFFFFF))); break;
3688 assign(srcM
, at64
); break;
3693 vassert(ix
>= 0 && ix
<= 7);
3699 mkIRExprCCall(Ity_I64
, 0/*regparm*/,
3700 hNames
[ix
], helpers
[ix
],
3701 mkIRExprVec_2(mkexpr(srcN
),
3704 DIP("%s %s, %s, %s\n", iNames
[ix
],
3706 nameIReg32orZR(nn
), nameIRegOrZR(bitSF
== 1, mm
));
3713 vex_printf("ARM64 front end: data_processing_register\n");
3720 /*------------------------------------------------------------*/
3721 /*--- Math helpers for vector interleave/deinterleave ---*/
3722 /*------------------------------------------------------------*/
3726 #define SL(_hi128,_lo128,_nbytes) \
3729 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3730 #define ROR(_v128,_nbytes) \
3731 SL((_v128),(_v128),(_nbytes))
3732 #define ROL(_v128,_nbytes) \
3733 SL((_v128),(_v128),16-(_nbytes))
3734 #define SHR(_v128,_nbytes) \
3735 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3736 #define SHL(_v128,_nbytes) \
3737 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3738 #define ILO64x2(_argL,_argR) \
3739 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3740 #define IHI64x2(_argL,_argR) \
3741 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3742 #define ILO32x4(_argL,_argR) \
3743 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3744 #define IHI32x4(_argL,_argR) \
3745 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3746 #define ILO16x8(_argL,_argR) \
3747 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3748 #define IHI16x8(_argL,_argR) \
3749 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3750 #define ILO8x16(_argL,_argR) \
3751 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3752 #define IHI8x16(_argL,_argR) \
3753 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3754 #define CEV32x4(_argL,_argR) \
3755 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3756 #define COD32x4(_argL,_argR) \
3757 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3758 #define COD16x8(_argL,_argR) \
3759 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3760 #define COD8x16(_argL,_argR) \
3761 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3762 #define CEV8x16(_argL,_argR) \
3763 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3764 #define AND(_arg1,_arg2) \
3765 binop(Iop_AndV128,(_arg1),(_arg2))
3766 #define OR2(_arg1,_arg2) \
3767 binop(Iop_OrV128,(_arg1),(_arg2))
3768 #define OR3(_arg1,_arg2,_arg3) \
3769 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3770 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3772 binop(Iop_OrV128,(_arg1),(_arg2)), \
3773 binop(Iop_OrV128,(_arg3),(_arg4)))
3776 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3778 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp
* i0
,
3779 UInt laneSzBlg2
, IRTemp u0
)
3781 assign(*i0
, mkexpr(u0
));
3785 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3787 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp
* i0
, IRTemp
* i1
,
3788 UInt laneSzBlg2
, IRTemp u0
, IRTemp u1
)
3790 /* This is pretty easy, since we have primitives directly to
3792 if (laneSzBlg2
== 3) {
3794 // u1 == B1 B0, u0 == A1 A0
3795 // i1 == B1 A1, i0 == B0 A0
3796 assign(*i0
, binop(Iop_InterleaveLO64x2
, mkexpr(u1
), mkexpr(u0
)));
3797 assign(*i1
, binop(Iop_InterleaveHI64x2
, mkexpr(u1
), mkexpr(u0
)));
3800 if (laneSzBlg2
== 2) {
3802 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3803 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3804 assign(*i0
, binop(Iop_InterleaveLO32x4
, mkexpr(u1
), mkexpr(u0
)));
3805 assign(*i1
, binop(Iop_InterleaveHI32x4
, mkexpr(u1
), mkexpr(u0
)));
3808 if (laneSzBlg2
== 1) {
3810 // u1 == B{7..0}, u0 == A{7..0}
3811 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3812 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3813 assign(*i0
, binop(Iop_InterleaveLO16x8
, mkexpr(u1
), mkexpr(u0
)));
3814 assign(*i1
, binop(Iop_InterleaveHI16x8
, mkexpr(u1
), mkexpr(u0
)));
3817 if (laneSzBlg2
== 0) {
3819 // u1 == B{f..0}, u0 == A{f..0}
3820 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3821 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3822 assign(*i0
, binop(Iop_InterleaveLO8x16
, mkexpr(u1
), mkexpr(u0
)));
3823 assign(*i1
, binop(Iop_InterleaveHI8x16
, mkexpr(u1
), mkexpr(u0
)));
3831 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3833 void math_INTERLEAVE3_128(
3834 /*OUTx3*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
,
3836 IRTemp u0
, IRTemp u1
, IRTemp u2
)
3838 if (laneSzBlg2
== 3) {
3840 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3841 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3842 assign(*i2
, IHI64x2( EX(u2
), EX(u1
) ));
3843 assign(*i1
, ILO64x2( ROR(EX(u0
),8), EX(u2
) ));
3844 assign(*i0
, ILO64x2( EX(u1
), EX(u0
) ));
3848 if (laneSzBlg2
== 2) {
3850 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3851 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3852 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3853 IRTemp p0
= newTempV128();
3854 IRTemp p1
= newTempV128();
3855 IRTemp p2
= newTempV128();
3856 IRTemp c1100
= newTempV128();
3857 IRTemp c0011
= newTempV128();
3858 IRTemp c0110
= newTempV128();
3859 assign(c1100
, mkV128(0xFF00));
3860 assign(c0011
, mkV128(0x00FF));
3861 assign(c0110
, mkV128(0x0FF0));
3862 // First interleave them at 64x2 granularity,
3863 // generating partial ("p") values.
3864 math_INTERLEAVE3_128(&p0
, &p1
, &p2
, 3, u0
, u1
, u2
);
3865 // And more shuffling around for the final answer
3866 assign(*i2
, OR2( AND( IHI32x4(EX(p2
), ROL(EX(p2
),8)), EX(c1100
) ),
3867 AND( IHI32x4(ROR(EX(p1
),4), EX(p2
)), EX(c0011
) ) ));
3868 assign(*i1
, OR3( SHL(EX(p2
),12),
3869 AND(EX(p1
),EX(c0110
)),
3871 assign(*i0
, OR2( AND( ILO32x4(EX(p0
),ROL(EX(p1
),4)), EX(c1100
) ),
3872 AND( ILO32x4(ROR(EX(p0
),8),EX(p0
)), EX(c0011
) ) ));
3876 if (laneSzBlg2
== 1) {
3878 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3879 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3880 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3882 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3883 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3884 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3886 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3887 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3888 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3889 IRTemp p0
= newTempV128();
3890 IRTemp p1
= newTempV128();
3891 IRTemp p2
= newTempV128();
3892 IRTemp c1000
= newTempV128();
3893 IRTemp c0100
= newTempV128();
3894 IRTemp c0010
= newTempV128();
3895 IRTemp c0001
= newTempV128();
3896 assign(c1000
, mkV128(0xF000));
3897 assign(c0100
, mkV128(0x0F00));
3898 assign(c0010
, mkV128(0x00F0));
3899 assign(c0001
, mkV128(0x000F));
3900 // First interleave them at 32x4 granularity,
3901 // generating partial ("p") values.
3902 math_INTERLEAVE3_128(&p0
, &p1
, &p2
, 2, u0
, u1
, u2
);
3903 // And more shuffling around for the final answer
3905 OR4( AND( IHI16x8( EX(p2
), ROL(EX(p2
),4) ), EX(c1000
) ),
3906 AND( IHI16x8( ROL(EX(p2
),6), EX(p2
) ), EX(c0100
) ),
3907 AND( IHI16x8( ROL(EX(p2
),2), ROL(EX(p2
),6) ), EX(c0010
) ),
3908 AND( ILO16x8( ROR(EX(p2
),2), ROL(EX(p1
),2) ), EX(c0001
) )
3911 OR4( AND( IHI16x8( ROL(EX(p1
),4), ROR(EX(p2
),2) ), EX(c1000
) ),
3912 AND( IHI16x8( EX(p1
), ROL(EX(p1
),4) ), EX(c0100
) ),
3913 AND( IHI16x8( ROL(EX(p1
),4), ROL(EX(p1
),8) ), EX(c0010
) ),
3914 AND( IHI16x8( ROR(EX(p0
),6), ROL(EX(p1
),4) ), EX(c0001
) )
3917 OR4( AND( IHI16x8( ROR(EX(p1
),2), ROL(EX(p0
),2) ), EX(c1000
) ),
3918 AND( IHI16x8( ROL(EX(p0
),2), ROL(EX(p0
),6) ), EX(c0100
) ),
3919 AND( IHI16x8( ROL(EX(p0
),8), ROL(EX(p0
),2) ), EX(c0010
) ),
3920 AND( IHI16x8( ROL(EX(p0
),4), ROL(EX(p0
),8) ), EX(c0001
) )
3925 if (laneSzBlg2
== 0) {
3926 // 8x16. It doesn't seem worth the hassle of first doing a
3927 // 16x8 interleave, so just generate all 24 partial results
3929 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3930 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3931 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3932 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3934 IRTemp i2_FEDC
= newTempV128(); IRTemp i2_BA98
= newTempV128();
3935 IRTemp i2_7654
= newTempV128(); IRTemp i2_3210
= newTempV128();
3936 IRTemp i1_FEDC
= newTempV128(); IRTemp i1_BA98
= newTempV128();
3937 IRTemp i1_7654
= newTempV128(); IRTemp i1_3210
= newTempV128();
3938 IRTemp i0_FEDC
= newTempV128(); IRTemp i0_BA98
= newTempV128();
3939 IRTemp i0_7654
= newTempV128(); IRTemp i0_3210
= newTempV128();
3940 IRTemp i2_hi64
= newTempV128(); IRTemp i2_lo64
= newTempV128();
3941 IRTemp i1_hi64
= newTempV128(); IRTemp i1_lo64
= newTempV128();
3942 IRTemp i0_hi64
= newTempV128(); IRTemp i0_lo64
= newTempV128();
3944 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3945 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3947 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3948 IRTemp t_##_tempName = newTempV128(); \
3949 assign(t_##_tempName, \
3950 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3951 ROR(EX(_srcVec2),(_srcShift2)) ) )
3953 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3954 IRTemp CC
= u2
; IRTemp BB
= u1
; IRTemp AA
= u0
;
3956 // The slicing and reassembly are done as interleavedly as possible,
3957 // so as to minimise the demand for registers in the back end, which
3958 // was observed to be a problem in testing.
3960 XXXX(CfBf
, CC
, 0xf, BB
, 0xf); // i2[15:14]
3961 XXXX(AfCe
, AA
, 0xf, CC
, 0xe);
3962 assign(i2_FEDC
, ILO16x8(EX(t_CfBf
), EX(t_AfCe
)));
3964 XXXX(BeAe
, BB
, 0xe, AA
, 0xe);
3965 XXXX(CdBd
, CC
, 0xd, BB
, 0xd);
3966 assign(i2_BA98
, ILO16x8(EX(t_BeAe
), EX(t_CdBd
)));
3967 assign(i2_hi64
, ILO32x4(EX(i2_FEDC
), EX(i2_BA98
)));
3969 XXXX(AdCc
, AA
, 0xd, CC
, 0xc);
3970 XXXX(BcAc
, BB
, 0xc, AA
, 0xc);
3971 assign(i2_7654
, ILO16x8(EX(t_AdCc
), EX(t_BcAc
)));
3973 XXXX(CbBb
, CC
, 0xb, BB
, 0xb);
3974 XXXX(AbCa
, AA
, 0xb, CC
, 0xa); // i2[1:0]
3975 assign(i2_3210
, ILO16x8(EX(t_CbBb
), EX(t_AbCa
)));
3976 assign(i2_lo64
, ILO32x4(EX(i2_7654
), EX(i2_3210
)));
3977 assign(*i2
, ILO64x2(EX(i2_hi64
), EX(i2_lo64
)));
3979 XXXX(BaAa
, BB
, 0xa, AA
, 0xa); // i1[15:14]
3980 XXXX(C9B9
, CC
, 0x9, BB
, 0x9);
3981 assign(i1_FEDC
, ILO16x8(EX(t_BaAa
), EX(t_C9B9
)));
3983 XXXX(A9C8
, AA
, 0x9, CC
, 0x8);
3984 XXXX(B8A8
, BB
, 0x8, AA
, 0x8);
3985 assign(i1_BA98
, ILO16x8(EX(t_A9C8
), EX(t_B8A8
)));
3986 assign(i1_hi64
, ILO32x4(EX(i1_FEDC
), EX(i1_BA98
)));
3988 XXXX(C7B7
, CC
, 0x7, BB
, 0x7);
3989 XXXX(A7C6
, AA
, 0x7, CC
, 0x6);
3990 assign(i1_7654
, ILO16x8(EX(t_C7B7
), EX(t_A7C6
)));
3992 XXXX(B6A6
, BB
, 0x6, AA
, 0x6);
3993 XXXX(C5B5
, CC
, 0x5, BB
, 0x5); // i1[1:0]
3994 assign(i1_3210
, ILO16x8(EX(t_B6A6
), EX(t_C5B5
)));
3995 assign(i1_lo64
, ILO32x4(EX(i1_7654
), EX(i1_3210
)));
3996 assign(*i1
, ILO64x2(EX(i1_hi64
), EX(i1_lo64
)));
3998 XXXX(A5C4
, AA
, 0x5, CC
, 0x4); // i0[15:14]
3999 XXXX(B4A4
, BB
, 0x4, AA
, 0x4);
4000 assign(i0_FEDC
, ILO16x8(EX(t_A5C4
), EX(t_B4A4
)));
4002 XXXX(C3B3
, CC
, 0x3, BB
, 0x3);
4003 XXXX(A3C2
, AA
, 0x3, CC
, 0x2);
4004 assign(i0_BA98
, ILO16x8(EX(t_C3B3
), EX(t_A3C2
)));
4005 assign(i0_hi64
, ILO32x4(EX(i0_FEDC
), EX(i0_BA98
)));
4007 XXXX(B2A2
, BB
, 0x2, AA
, 0x2);
4008 XXXX(C1B1
, CC
, 0x1, BB
, 0x1);
4009 assign(i0_7654
, ILO16x8(EX(t_B2A2
), EX(t_C1B1
)));
4011 XXXX(A1C0
, AA
, 0x1, CC
, 0x0);
4012 XXXX(B0A0
, BB
, 0x0, AA
, 0x0); // i0[1:0]
4013 assign(i0_3210
, ILO16x8(EX(t_A1C0
), EX(t_B0A0
)));
4014 assign(i0_lo64
, ILO32x4(EX(i0_7654
), EX(i0_3210
)));
4015 assign(*i0
, ILO64x2(EX(i0_hi64
), EX(i0_lo64
)));
4026 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
4028 void math_INTERLEAVE4_128(
4029 /*OUTx4*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
, IRTemp
* i3
,
4031 IRTemp u0
, IRTemp u1
, IRTemp u2
, IRTemp u3
)
4033 if (laneSzBlg2
== 3) {
4035 assign(*i0
, ILO64x2(EX(u1
), EX(u0
)));
4036 assign(*i1
, ILO64x2(EX(u3
), EX(u2
)));
4037 assign(*i2
, IHI64x2(EX(u1
), EX(u0
)));
4038 assign(*i3
, IHI64x2(EX(u3
), EX(u2
)));
4041 if (laneSzBlg2
== 2) {
4043 // First, interleave at the 64-bit lane size.
4044 IRTemp p0
= newTempV128();
4045 IRTemp p1
= newTempV128();
4046 IRTemp p2
= newTempV128();
4047 IRTemp p3
= newTempV128();
4048 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 3, u0
, u1
, u2
, u3
);
4049 // And interleave (cat) at the 32 bit size.
4050 assign(*i0
, CEV32x4(EX(p1
), EX(p0
)));
4051 assign(*i1
, COD32x4(EX(p1
), EX(p0
)));
4052 assign(*i2
, CEV32x4(EX(p3
), EX(p2
)));
4053 assign(*i3
, COD32x4(EX(p3
), EX(p2
)));
4056 if (laneSzBlg2
== 1) {
4058 // First, interleave at the 32-bit lane size.
4059 IRTemp p0
= newTempV128();
4060 IRTemp p1
= newTempV128();
4061 IRTemp p2
= newTempV128();
4062 IRTemp p3
= newTempV128();
4063 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 2, u0
, u1
, u2
, u3
);
4064 // And rearrange within each vector, to get the right 16 bit lanes.
4065 assign(*i0
, COD16x8(EX(p0
), SHL(EX(p0
), 2)));
4066 assign(*i1
, COD16x8(EX(p1
), SHL(EX(p1
), 2)));
4067 assign(*i2
, COD16x8(EX(p2
), SHL(EX(p2
), 2)));
4068 assign(*i3
, COD16x8(EX(p3
), SHL(EX(p3
), 2)));
4071 if (laneSzBlg2
== 0) {
4073 // First, interleave at the 16-bit lane size.
4074 IRTemp p0
= newTempV128();
4075 IRTemp p1
= newTempV128();
4076 IRTemp p2
= newTempV128();
4077 IRTemp p3
= newTempV128();
4078 math_INTERLEAVE4_128(&p0
, &p1
, &p2
, &p3
, 1, u0
, u1
, u2
, u3
);
4079 // And rearrange within each vector, to get the right 8 bit lanes.
4080 assign(*i0
, IHI32x4(COD8x16(EX(p0
),EX(p0
)), CEV8x16(EX(p0
),EX(p0
))));
4081 assign(*i1
, IHI32x4(COD8x16(EX(p1
),EX(p1
)), CEV8x16(EX(p1
),EX(p1
))));
4082 assign(*i2
, IHI32x4(COD8x16(EX(p2
),EX(p2
)), CEV8x16(EX(p2
),EX(p2
))));
4083 assign(*i3
, IHI32x4(COD8x16(EX(p3
),EX(p3
)), CEV8x16(EX(p3
),EX(p3
))));
4091 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
4093 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp
* u0
,
4094 UInt laneSzBlg2
, IRTemp i0
)
4096 assign(*u0
, mkexpr(i0
));
4100 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
4102 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp
* u0
, IRTemp
* u1
,
4103 UInt laneSzBlg2
, IRTemp i0
, IRTemp i1
)
4105 /* This is pretty easy, since we have primitives directly to
4107 if (laneSzBlg2
== 3) {
4109 // i1 == B1 A1, i0 == B0 A0
4110 // u1 == B1 B0, u0 == A1 A0
4111 assign(*u0
, binop(Iop_InterleaveLO64x2
, mkexpr(i1
), mkexpr(i0
)));
4112 assign(*u1
, binop(Iop_InterleaveHI64x2
, mkexpr(i1
), mkexpr(i0
)));
4115 if (laneSzBlg2
== 2) {
4117 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
4118 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
4119 assign(*u0
, binop(Iop_CatEvenLanes32x4
, mkexpr(i1
), mkexpr(i0
)));
4120 assign(*u1
, binop(Iop_CatOddLanes32x4
, mkexpr(i1
), mkexpr(i0
)));
4123 if (laneSzBlg2
== 1) {
4125 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
4126 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
4127 // u1 == B{7..0}, u0 == A{7..0}
4128 assign(*u0
, binop(Iop_CatEvenLanes16x8
, mkexpr(i1
), mkexpr(i0
)));
4129 assign(*u1
, binop(Iop_CatOddLanes16x8
, mkexpr(i1
), mkexpr(i0
)));
4132 if (laneSzBlg2
== 0) {
4134 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
4135 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
4136 // u1 == B{f..0}, u0 == A{f..0}
4137 assign(*u0
, binop(Iop_CatEvenLanes8x16
, mkexpr(i1
), mkexpr(i0
)));
4138 assign(*u1
, binop(Iop_CatOddLanes8x16
, mkexpr(i1
), mkexpr(i0
)));
4146 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4148 void math_DEINTERLEAVE3_128(
4149 /*OUTx3*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
,
4151 IRTemp i0
, IRTemp i1
, IRTemp i2
)
4153 if (laneSzBlg2
== 3) {
4155 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4156 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4157 assign(*u2
, ILO64x2( ROL(EX(i2
),8), EX(i1
) ));
4158 assign(*u1
, ILO64x2( EX(i2
), ROL(EX(i0
),8) ));
4159 assign(*u0
, ILO64x2( ROL(EX(i1
),8), EX(i0
) ));
4163 if (laneSzBlg2
== 2) {
4165 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4166 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4167 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4168 IRTemp t_a1c0b0a0
= newTempV128();
4169 IRTemp t_a2c1b1a1
= newTempV128();
4170 IRTemp t_a3c2b2a2
= newTempV128();
4171 IRTemp t_a0c3b3a3
= newTempV128();
4172 IRTemp p0
= newTempV128();
4173 IRTemp p1
= newTempV128();
4174 IRTemp p2
= newTempV128();
4175 // Compute some intermediate values.
4176 assign(t_a1c0b0a0
, EX(i0
));
4177 assign(t_a2c1b1a1
, SL(EX(i1
),EX(i0
),3*4));
4178 assign(t_a3c2b2a2
, SL(EX(i2
),EX(i1
),2*4));
4179 assign(t_a0c3b3a3
, SL(EX(i0
),EX(i2
),1*4));
4180 // First deinterleave into lane-pairs
4181 assign(p0
, ILO32x4(EX(t_a2c1b1a1
),EX(t_a1c0b0a0
)));
4182 assign(p1
, ILO64x2(ILO32x4(EX(t_a0c3b3a3
), EX(t_a3c2b2a2
)),
4183 IHI32x4(EX(t_a2c1b1a1
), EX(t_a1c0b0a0
))));
4184 assign(p2
, ILO32x4(ROR(EX(t_a0c3b3a3
),1*4), ROR(EX(t_a3c2b2a2
),1*4)));
4185 // Then deinterleave at 64x2 granularity.
4186 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 3, p0
, p1
, p2
);
4190 if (laneSzBlg2
== 1) {
4192 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4193 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4194 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4196 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4197 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4198 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4200 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4201 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4202 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4204 IRTemp s0
, s1
, s2
, s3
, t0
, t1
, t2
, t3
, p0
, p1
, p2
, c00111111
;
4206 = t0
= t1
= t2
= t3
= p0
= p1
= p2
= c00111111
= IRTemp_INVALID
;
4207 newTempsV128_4(&s0
, &s1
, &s2
, &s3
);
4208 newTempsV128_4(&t0
, &t1
, &t2
, &t3
);
4209 newTempsV128_4(&p0
, &p1
, &p2
, &c00111111
);
4211 // s0 == b2a2 c1b1a1 c0b0a0
4212 // s1 == b4a4 c3b3c3 c2b2a2
4213 // s2 == b6a6 c5b5a5 c4b4a4
4214 // s3 == b0a0 c7b7a7 c6b6a6
4216 assign(s1
, SL(EX(i1
),EX(i0
),6*2));
4217 assign(s2
, SL(EX(i2
),EX(i1
),4*2));
4218 assign(s3
, SL(EX(i0
),EX(i2
),2*2));
4220 // t0 == 0 0 c1c0 b1b0 a1a0
4221 // t1 == 0 0 c3c2 b3b2 a3a2
4222 // t2 == 0 0 c5c4 b5b4 a5a4
4223 // t3 == 0 0 c7c6 b7b6 a7a6
4224 assign(c00111111
, mkV128(0x0FFF));
4225 assign(t0
, AND( ILO16x8( ROR(EX(s0
),3*2), EX(s0
)), EX(c00111111
)));
4226 assign(t1
, AND( ILO16x8( ROR(EX(s1
),3*2), EX(s1
)), EX(c00111111
)));
4227 assign(t2
, AND( ILO16x8( ROR(EX(s2
),3*2), EX(s2
)), EX(c00111111
)));
4228 assign(t3
, AND( ILO16x8( ROR(EX(s3
),3*2), EX(s3
)), EX(c00111111
)));
4230 assign(p0
, OR2(EX(t0
), SHL(EX(t1
),6*2)));
4231 assign(p1
, OR2(SHL(EX(t2
),4*2), SHR(EX(t1
),2*2)));
4232 assign(p2
, OR2(SHL(EX(t3
),2*2), SHR(EX(t2
),4*2)));
4234 // Then deinterleave at 32x4 granularity.
4235 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 2, p0
, p1
, p2
);
4239 if (laneSzBlg2
== 0) {
4240 // 8x16. This is the same scheme as for 16x8, with twice the
4241 // number of intermediate values.
4247 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4248 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4249 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4251 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4252 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4253 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4255 IRTemp s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
,
4256 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, p0
, p1
, p2
, cMASK
;
4257 s0
= s1
= s2
= s3
= s4
= s5
= s6
= s7
4258 = t0
= t1
= t2
= t3
= t4
= t5
= t6
= t7
= p0
= p1
= p2
= cMASK
4260 newTempsV128_4(&s0
, &s1
, &s2
, &s3
);
4261 newTempsV128_4(&s4
, &s5
, &s6
, &s7
);
4262 newTempsV128_4(&t0
, &t1
, &t2
, &t3
);
4263 newTempsV128_4(&t4
, &t5
, &t6
, &t7
);
4264 newTempsV128_4(&p0
, &p1
, &p2
, &cMASK
);
4266 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4267 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4268 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4269 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4270 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4271 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4272 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4273 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4274 assign(s0
, SL(EX(i1
),EX(i0
), 0));
4275 assign(s1
, SL(EX(i1
),EX(i0
), 6));
4276 assign(s2
, SL(EX(i1
),EX(i0
),12));
4277 assign(s3
, SL(EX(i2
),EX(i1
), 2));
4278 assign(s4
, SL(EX(i2
),EX(i1
), 8));
4279 assign(s5
, SL(EX(i2
),EX(i1
),14));
4280 assign(s6
, SL(EX(i0
),EX(i2
), 4));
4281 assign(s7
, SL(EX(i0
),EX(i2
),10));
4283 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4284 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4285 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4286 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4287 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4288 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4289 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4290 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4291 assign(cMASK
, mkV128(0x003F));
4292 assign(t0
, AND( ILO8x16( ROR(EX(s0
),3), EX(s0
)), EX(cMASK
)));
4293 assign(t1
, AND( ILO8x16( ROR(EX(s1
),3), EX(s1
)), EX(cMASK
)));
4294 assign(t2
, AND( ILO8x16( ROR(EX(s2
),3), EX(s2
)), EX(cMASK
)));
4295 assign(t3
, AND( ILO8x16( ROR(EX(s3
),3), EX(s3
)), EX(cMASK
)));
4296 assign(t4
, AND( ILO8x16( ROR(EX(s4
),3), EX(s4
)), EX(cMASK
)));
4297 assign(t5
, AND( ILO8x16( ROR(EX(s5
),3), EX(s5
)), EX(cMASK
)));
4298 assign(t6
, AND( ILO8x16( ROR(EX(s6
),3), EX(s6
)), EX(cMASK
)));
4299 assign(t7
, AND( ILO8x16( ROR(EX(s7
),3), EX(s7
)), EX(cMASK
)));
4301 assign(p0
, OR3( SHL(EX(t2
),12), SHL(EX(t1
),6), EX(t0
) ));
4302 assign(p1
, OR4( SHL(EX(t5
),14), SHL(EX(t4
),8),
4303 SHL(EX(t3
),2), SHR(EX(t2
),4) ));
4304 assign(p2
, OR3( SHL(EX(t7
),10), SHL(EX(t6
),4), SHR(EX(t5
),2) ));
4306 // Then deinterleave at 16x8 granularity.
4307 math_DEINTERLEAVE3_128(u0
, u1
, u2
, 1, p0
, p1
, p2
);
4316 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4318 void math_DEINTERLEAVE4_128(
4319 /*OUTx4*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
, IRTemp
* u3
,
4321 IRTemp i0
, IRTemp i1
, IRTemp i2
, IRTemp i3
)
4323 if (laneSzBlg2
== 3) {
4325 assign(*u0
, ILO64x2(EX(i2
), EX(i0
)));
4326 assign(*u1
, IHI64x2(EX(i2
), EX(i0
)));
4327 assign(*u2
, ILO64x2(EX(i3
), EX(i1
)));
4328 assign(*u3
, IHI64x2(EX(i3
), EX(i1
)));
4331 if (laneSzBlg2
== 2) {
4333 IRTemp p0
= newTempV128();
4334 IRTemp p2
= newTempV128();
4335 IRTemp p1
= newTempV128();
4336 IRTemp p3
= newTempV128();
4337 assign(p0
, ILO32x4(EX(i1
), EX(i0
)));
4338 assign(p1
, IHI32x4(EX(i1
), EX(i0
)));
4339 assign(p2
, ILO32x4(EX(i3
), EX(i2
)));
4340 assign(p3
, IHI32x4(EX(i3
), EX(i2
)));
4341 // And now do what we did for the 64-bit case.
4342 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 3, p0
, p1
, p2
, p3
);
4345 if (laneSzBlg2
== 1) {
4347 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4348 IRTemp p0
= newTempV128();
4349 IRTemp p1
= newTempV128();
4350 IRTemp p2
= newTempV128();
4351 IRTemp p3
= newTempV128();
4352 assign(p0
, IHI16x8(EX(i0
), SHL(EX(i0
), 8)));
4353 assign(p1
, IHI16x8(EX(i1
), SHL(EX(i1
), 8)));
4354 assign(p2
, IHI16x8(EX(i2
), SHL(EX(i2
), 8)));
4355 assign(p3
, IHI16x8(EX(i3
), SHL(EX(i3
), 8)));
4356 // From here on is like the 32 bit case.
4357 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 2, p0
, p1
, p2
, p3
);
4360 if (laneSzBlg2
== 0) {
4362 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4363 IRTemp p0
= newTempV128();
4364 IRTemp p1
= newTempV128();
4365 IRTemp p2
= newTempV128();
4366 IRTemp p3
= newTempV128();
4367 assign(p0
, IHI64x2( IHI8x16(EX(i0
),ROL(EX(i0
),4)),
4368 ILO8x16(EX(i0
),ROL(EX(i0
),4)) ));
4369 assign(p1
, IHI64x2( IHI8x16(EX(i1
),ROL(EX(i1
),4)),
4370 ILO8x16(EX(i1
),ROL(EX(i1
),4)) ));
4371 assign(p2
, IHI64x2( IHI8x16(EX(i2
),ROL(EX(i2
),4)),
4372 ILO8x16(EX(i2
),ROL(EX(i2
),4)) ));
4373 assign(p3
, IHI64x2( IHI8x16(EX(i3
),ROL(EX(i3
),4)),
4374 ILO8x16(EX(i3
),ROL(EX(i3
),4)) ));
4375 // From here on is like the 16 bit case.
4376 math_DEINTERLEAVE4_128(u0
, u1
, u2
, u3
, 1, p0
, p1
, p2
, p3
);
4384 /* Wrappers that use the full-width (de)interleavers to do half-width
4385 (de)interleaving. The scheme is to clone each input lane in the
4386 lower half of each incoming value, do a full width (de)interleave
4387 at the next lane size up, and remove every other lane of the the
4388 result. The returned values may have any old junk in the upper
4389 64 bits -- the caller must ignore that. */
4391 /* Helper function -- get doubling and narrowing operations. */
4393 void math_get_doubler_and_halver ( /*OUT*/IROp
* doubler
,
4394 /*OUT*/IROp
* halver
,
4397 switch (laneSzBlg2
) {
4399 *doubler
= Iop_InterleaveLO32x4
; *halver
= Iop_CatEvenLanes32x4
;
4402 *doubler
= Iop_InterleaveLO16x8
; *halver
= Iop_CatEvenLanes16x8
;
4405 *doubler
= Iop_InterleaveLO8x16
; *halver
= Iop_CatEvenLanes8x16
;
4412 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4414 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp
* i0
,
4415 UInt laneSzBlg2
, IRTemp u0
)
4417 assign(*i0
, mkexpr(u0
));
4421 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4423 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp
* i0
, IRTemp
* i1
,
4424 UInt laneSzBlg2
, IRTemp u0
, IRTemp u1
)
4426 if (laneSzBlg2
== 3) {
4427 // 1x64, degenerate case
4428 assign(*i0
, EX(u0
));
4429 assign(*i1
, EX(u1
));
4433 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4434 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4435 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4437 IRTemp du0
= newTempV128();
4438 IRTemp du1
= newTempV128();
4439 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4440 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4441 IRTemp di0
= newTempV128();
4442 IRTemp di1
= newTempV128();
4443 math_INTERLEAVE2_128(&di0
, &di1
, laneSzBlg2
+ 1, du0
, du1
);
4444 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4445 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4449 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4451 void math_INTERLEAVE3_64(
4452 /*OUTx3*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
,
4454 IRTemp u0
, IRTemp u1
, IRTemp u2
)
4456 if (laneSzBlg2
== 3) {
4457 // 1x64, degenerate case
4458 assign(*i0
, EX(u0
));
4459 assign(*i1
, EX(u1
));
4460 assign(*i2
, EX(u2
));
4464 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4465 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4466 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4468 IRTemp du0
= newTempV128();
4469 IRTemp du1
= newTempV128();
4470 IRTemp du2
= newTempV128();
4471 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4472 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4473 assign(du2
, binop(doubler
, EX(u2
), EX(u2
)));
4474 IRTemp di0
= newTempV128();
4475 IRTemp di1
= newTempV128();
4476 IRTemp di2
= newTempV128();
4477 math_INTERLEAVE3_128(&di0
, &di1
, &di2
, laneSzBlg2
+ 1, du0
, du1
, du2
);
4478 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4479 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4480 assign(*i2
, binop(halver
, EX(di2
), EX(di2
)));
4484 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4486 void math_INTERLEAVE4_64(
4487 /*OUTx4*/ IRTemp
* i0
, IRTemp
* i1
, IRTemp
* i2
, IRTemp
* i3
,
4489 IRTemp u0
, IRTemp u1
, IRTemp u2
, IRTemp u3
)
4491 if (laneSzBlg2
== 3) {
4492 // 1x64, degenerate case
4493 assign(*i0
, EX(u0
));
4494 assign(*i1
, EX(u1
));
4495 assign(*i2
, EX(u2
));
4496 assign(*i3
, EX(u3
));
4500 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4501 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4502 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4504 IRTemp du0
= newTempV128();
4505 IRTemp du1
= newTempV128();
4506 IRTemp du2
= newTempV128();
4507 IRTemp du3
= newTempV128();
4508 assign(du0
, binop(doubler
, EX(u0
), EX(u0
)));
4509 assign(du1
, binop(doubler
, EX(u1
), EX(u1
)));
4510 assign(du2
, binop(doubler
, EX(u2
), EX(u2
)));
4511 assign(du3
, binop(doubler
, EX(u3
), EX(u3
)));
4512 IRTemp di0
= newTempV128();
4513 IRTemp di1
= newTempV128();
4514 IRTemp di2
= newTempV128();
4515 IRTemp di3
= newTempV128();
4516 math_INTERLEAVE4_128(&di0
, &di1
, &di2
, &di3
,
4517 laneSzBlg2
+ 1, du0
, du1
, du2
, du3
);
4518 assign(*i0
, binop(halver
, EX(di0
), EX(di0
)));
4519 assign(*i1
, binop(halver
, EX(di1
), EX(di1
)));
4520 assign(*i2
, binop(halver
, EX(di2
), EX(di2
)));
4521 assign(*i3
, binop(halver
, EX(di3
), EX(di3
)));
4525 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4527 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp
* u0
,
4528 UInt laneSzBlg2
, IRTemp i0
)
4530 assign(*u0
, mkexpr(i0
));
4534 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4536 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp
* u0
, IRTemp
* u1
,
4537 UInt laneSzBlg2
, IRTemp i0
, IRTemp i1
)
4539 if (laneSzBlg2
== 3) {
4540 // 1x64, degenerate case
4541 assign(*u0
, EX(i0
));
4542 assign(*u1
, EX(i1
));
4546 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4547 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4548 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4550 IRTemp di0
= newTempV128();
4551 IRTemp di1
= newTempV128();
4552 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4553 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4555 IRTemp du0
= newTempV128();
4556 IRTemp du1
= newTempV128();
4557 math_DEINTERLEAVE2_128(&du0
, &du1
, laneSzBlg2
+ 1, di0
, di1
);
4558 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4559 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4563 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4565 void math_DEINTERLEAVE3_64(
4566 /*OUTx3*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
,
4568 IRTemp i0
, IRTemp i1
, IRTemp i2
)
4570 if (laneSzBlg2
== 3) {
4571 // 1x64, degenerate case
4572 assign(*u0
, EX(i0
));
4573 assign(*u1
, EX(i1
));
4574 assign(*u2
, EX(i2
));
4578 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4579 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4580 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4582 IRTemp di0
= newTempV128();
4583 IRTemp di1
= newTempV128();
4584 IRTemp di2
= newTempV128();
4585 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4586 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4587 assign(di2
, binop(doubler
, EX(i2
), EX(i2
)));
4588 IRTemp du0
= newTempV128();
4589 IRTemp du1
= newTempV128();
4590 IRTemp du2
= newTempV128();
4591 math_DEINTERLEAVE3_128(&du0
, &du1
, &du2
, laneSzBlg2
+ 1, di0
, di1
, di2
);
4592 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4593 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4594 assign(*u2
, binop(halver
, EX(du2
), EX(du2
)));
4598 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4600 void math_DEINTERLEAVE4_64(
4601 /*OUTx4*/ IRTemp
* u0
, IRTemp
* u1
, IRTemp
* u2
, IRTemp
* u3
,
4603 IRTemp i0
, IRTemp i1
, IRTemp i2
, IRTemp i3
)
4605 if (laneSzBlg2
== 3) {
4606 // 1x64, degenerate case
4607 assign(*u0
, EX(i0
));
4608 assign(*u1
, EX(i1
));
4609 assign(*u2
, EX(i2
));
4610 assign(*u3
, EX(i3
));
4614 vassert(laneSzBlg2
>= 0 && laneSzBlg2
<= 2);
4615 IROp doubler
= Iop_INVALID
, halver
= Iop_INVALID
;
4616 math_get_doubler_and_halver(&doubler
, &halver
, laneSzBlg2
);
4618 IRTemp di0
= newTempV128();
4619 IRTemp di1
= newTempV128();
4620 IRTemp di2
= newTempV128();
4621 IRTemp di3
= newTempV128();
4622 assign(di0
, binop(doubler
, EX(i0
), EX(i0
)));
4623 assign(di1
, binop(doubler
, EX(i1
), EX(i1
)));
4624 assign(di2
, binop(doubler
, EX(i2
), EX(i2
)));
4625 assign(di3
, binop(doubler
, EX(i3
), EX(i3
)));
4626 IRTemp du0
= newTempV128();
4627 IRTemp du1
= newTempV128();
4628 IRTemp du2
= newTempV128();
4629 IRTemp du3
= newTempV128();
4630 math_DEINTERLEAVE4_128(&du0
, &du1
, &du2
, &du3
,
4631 laneSzBlg2
+ 1, di0
, di1
, di2
, di3
);
4632 assign(*u0
, binop(halver
, EX(du0
), EX(du0
)));
4633 assign(*u1
, binop(halver
, EX(du1
), EX(du1
)));
4634 assign(*u2
, binop(halver
, EX(du2
), EX(du2
)));
4635 assign(*u3
, binop(halver
, EX(du3
), EX(du3
)));
4664 /*------------------------------------------------------------*/
4665 /*--- Load and Store instructions ---*/
4666 /*------------------------------------------------------------*/
4668 /* Generate the EA for a "reg + reg" style amode. This is done from
4669 parts of the insn, but for sanity checking sake it takes the whole
4670 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4673 The possible forms, along with their opt:S values, are:
4676 011:1 Xn|SP + Xm * transfer_szB
4677 111:1 Xn|SP + Xm * transfer_szB
4678 010:0 Xn|SP + 32Uto64(Wm)
4679 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4680 110:0 Xn|SP + 32Sto64(Wm)
4681 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4683 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4684 the transfer size is insn[23,31,30]. For integer loads/stores,
4685 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4687 If the decoding fails, it returns IRTemp_INVALID.
4689 isInt is True iff this is decoding is for transfers to/from integer
4690 registers. If False it is for transfers to/from vector registers.
4692 static IRTemp
gen_indexed_EA ( /*OUT*/HChar
* buf
, UInt insn
, Bool isInt
)
4694 UInt optS
= SLICE_UInt(insn
, 15, 12);
4695 UInt mm
= SLICE_UInt(insn
, 20, 16);
4696 UInt nn
= SLICE_UInt(insn
, 9, 5);
4697 UInt szLg2
= (isInt
? 0 : (SLICE_UInt(insn
, 23, 23) << 2))
4698 | SLICE_UInt(insn
, 31, 30); // Log2 of the size
4702 /* Sanity checks, that this really is a load/store insn. */
4703 if (SLICE_UInt(insn
, 11, 10) != BITS2(1,0))
4707 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4708 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4709 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4710 && SLICE_UInt(insn
, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4714 && SLICE_UInt(insn
, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4717 /* Throw out non-verified but possibly valid cases. */
4719 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4720 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4721 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4722 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4723 case BITS3(1,0,0): // can only ever be valid for the vector case
4724 if (isInt
) goto fail
; else break;
4725 case BITS3(1,0,1): // these sizes are never valid
4727 case BITS3(1,1,1): goto fail
;
4729 default: vassert(0);
4734 case BITS4(1,1,1,0): goto fail
; //ATC
4735 case BITS4(0,1,1,0):
4736 rhs
= getIReg64orZR(mm
);
4737 vex_sprintf(buf
, "[%s, %s]",
4738 nameIReg64orZR(nn
), nameIReg64orZR(mm
));
4740 case BITS4(1,1,1,1): goto fail
; //ATC
4741 case BITS4(0,1,1,1):
4742 rhs
= binop(Iop_Shl64
, getIReg64orZR(mm
), mkU8(szLg2
));
4743 vex_sprintf(buf
, "[%s, %s lsl %u]",
4744 nameIReg64orZR(nn
), nameIReg64orZR(mm
), szLg2
);
4746 case BITS4(0,1,0,0):
4747 rhs
= unop(Iop_32Uto64
, getIReg32orZR(mm
));
4748 vex_sprintf(buf
, "[%s, %s uxtx]",
4749 nameIReg64orZR(nn
), nameIReg32orZR(mm
));
4751 case BITS4(0,1,0,1):
4752 rhs
= binop(Iop_Shl64
,
4753 unop(Iop_32Uto64
, getIReg32orZR(mm
)), mkU8(szLg2
));
4754 vex_sprintf(buf
, "[%s, %s uxtx, lsl %u]",
4755 nameIReg64orZR(nn
), nameIReg32orZR(mm
), szLg2
);
4757 case BITS4(1,1,0,0):
4758 rhs
= unop(Iop_32Sto64
, getIReg32orZR(mm
));
4759 vex_sprintf(buf
, "[%s, %s sxtx]",
4760 nameIReg64orZR(nn
), nameIReg32orZR(mm
));
4762 case BITS4(1,1,0,1):
4763 rhs
= binop(Iop_Shl64
,
4764 unop(Iop_32Sto64
, getIReg32orZR(mm
)), mkU8(szLg2
));
4765 vex_sprintf(buf
, "[%s, %s sxtx, lsl %u]",
4766 nameIReg64orZR(nn
), nameIReg32orZR(mm
), szLg2
);
4769 /* The rest appear to be genuinely invalid */
4774 IRTemp res
= newTemp(Ity_I64
);
4775 assign(res
, binop(Iop_Add64
, getIReg64orSP(nn
), rhs
));
4779 if (0 /*really, sigill_diag, but that causes too much plumbing*/) {
4780 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS
);
4782 return IRTemp_INVALID
;
4786 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4787 bits of DATAE :: Ity_I64. */
4788 static void gen_narrowing_store ( UInt szB
, IRTemp addr
, IRExpr
* dataE
)
4790 IRExpr
* addrE
= mkexpr(addr
);
4793 storeLE(addrE
, dataE
);
4796 storeLE(addrE
, unop(Iop_64to32
, dataE
));
4799 storeLE(addrE
, unop(Iop_64to16
, dataE
));
4802 storeLE(addrE
, unop(Iop_64to8
, dataE
));
4810 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4811 placing the result in an Ity_I64 temporary. */
4812 static IRTemp
gen_zwidening_load ( UInt szB
, IRTemp addr
)
4814 IRTemp res
= newTemp(Ity_I64
);
4815 IRExpr
* addrE
= mkexpr(addr
);
4818 assign(res
, loadLE(Ity_I64
,addrE
));
4821 assign(res
, unop(Iop_32Uto64
, loadLE(Ity_I32
,addrE
)));
4824 assign(res
, unop(Iop_16Uto64
, loadLE(Ity_I16
,addrE
)));
4827 assign(res
, unop(Iop_8Uto64
, loadLE(Ity_I8
,addrE
)));
4836 /* Generate a SIGBUS followed by a restart of the current instruction if
4837 `effective_addr` is `align`-aligned. This is required behaviour for atomic
4838 instructions. This assumes that guest_RIP_curr_instr is set correctly!
4840 This is hardwired to generate SIGBUS because so far the only supported arm64
4841 (arm64-linux) does that. Should we need to later extend it to generate some
4842 other signal, use the same scheme as with gen_SIGNAL_if_not_XX_aligned in
4843 guest_amd64_toIR.c. */
4845 void gen_SIGBUS_if_not_XX_aligned ( IRTemp effective_addr
, ULong align
)
4850 vassert(align
== 16 || align
== 8 || align
== 4 || align
== 2);
4854 binop(Iop_And64
,mkexpr(effective_addr
),mkU64(align
-1)),
4857 IRConst_U64(guest_PC_curr_instr
),
4864 /* Generate a "standard 7" name, from bitQ and size. But also
4865 allow ".1d" since that's occasionally useful. */
4867 const HChar
* nameArr_Q_SZ ( UInt bitQ
, UInt size
)
4869 vassert(bitQ
<= 1 && size
<= 3);
4871 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4872 UInt ix
= (bitQ
<< 2) | size
;
4879 Bool
dis_ARM64_load_store(/*MB_OUT*/DisResult
* dres
, UInt insn
,
4880 const VexAbiInfo
* abiinfo
, Bool sigill_diag
)
4882 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4884 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4885 /* uimm12 is scaled by the transfer size
4889 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4890 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4892 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4893 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4895 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4896 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4898 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4899 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4901 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4902 UInt szLg2
= INSN(31,30);
4903 UInt szB
= 1 << szLg2
;
4904 Bool isLD
= INSN(22,22) == 1;
4905 UInt offs
= INSN(21,10) * szB
;
4906 UInt nn
= INSN(9,5);
4907 UInt tt
= INSN(4,0);
4908 IRTemp ta
= newTemp(Ity_I64
);
4909 assign(ta
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(offs
)));
4910 if (nn
== 31) { /* FIXME generate stack alignment check */ }
4913 putIReg64orZR(tt
, mkexpr(gen_zwidening_load(szB
, ta
)));
4915 gen_narrowing_store(szB
, ta
, getIReg64orZR(tt
));
4917 const HChar
* ld_name
[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4918 const HChar
* st_name
[4] = { "strb", "strh", "str", "str" };
4919 DIP("%s %s, [%s, #%u]\n",
4920 (isLD
? ld_name
: st_name
)[szLg2
], nameIRegOrZR(szB
== 8, tt
),
4921 nameIReg64orSP(nn
), offs
);
4925 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4929 (at-Rn-then-Rn=EA) | | |
4930 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4931 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4934 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4935 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4938 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4939 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4943 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4944 load case this is because would create two competing values for
4945 Rt. In the store case the reason is unclear, but the spec
4946 disallows it anyway.
4948 Stores are narrowing, loads are unsigned widening. sz encodes
4949 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4951 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4952 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4953 UInt szLg2
= INSN(31,30);
4954 UInt szB
= 1 << szLg2
;
4955 Bool isLoad
= INSN(22,22) == 1;
4956 UInt imm9
= INSN(20,12);
4957 UInt nn
= INSN(9,5);
4958 UInt tt
= INSN(4,0);
4959 Bool wBack
= INSN(10,10) == 1;
4960 UInt how
= INSN(11,10);
4961 if (how
== BITS2(1,0) || (wBack
&& nn
== tt
&& tt
!= 31)) {
4962 /* undecodable; fall through */
4964 if (nn
== 31) { /* FIXME generate stack alignment check */ }
4966 // Compute the transfer address TA and the writeback address WA.
4967 IRTemp tRN
= newTemp(Ity_I64
);
4968 assign(tRN
, getIReg64orSP(nn
));
4969 IRTemp tEA
= newTemp(Ity_I64
);
4970 Long simm9
= (Long
)sx_to_64(imm9
, 9);
4971 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
4973 IRTemp tTA
= newTemp(Ity_I64
);
4974 IRTemp tWA
= newTemp(Ity_I64
);
4977 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
4979 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
4981 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
4983 vassert(0); /* NOTREACHED */
4986 /* Normally rN would be updated after the transfer. However, in
4987 the special cases typifed by
4990 it is necessary to update SP before the transfer, (1)
4991 because Memcheck will otherwise complain about a write
4992 below the stack pointer, and (2) because the segfault
4993 stack extension mechanism will otherwise extend the stack
4994 only down to SP before the instruction, which might not be
4995 far enough, if the -16/-32 bit takes the actual access
4996 address to the next page.
4999 = wBack
&& simm9
< 0
5000 && (szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1)
5001 && how
== BITS2(1,1) && nn
== 31 && !isLoad
;
5003 if (wBack
&& earlyWBack
)
5004 putIReg64orSP(nn
, mkexpr(tEA
));
5007 putIReg64orZR(tt
, mkexpr(gen_zwidening_load(szB
, tTA
)));
5009 gen_narrowing_store(szB
, tTA
, getIReg64orZR(tt
));
5012 if (wBack
&& !earlyWBack
)
5013 putIReg64orSP(nn
, mkexpr(tEA
));
5015 const HChar
* ld_name
[4] = { "ldurb", "ldurh", "ldur", "ldur" };
5016 const HChar
* st_name
[4] = { "sturb", "sturh", "stur", "stur" };
5017 const HChar
* fmt_str
= NULL
;
5020 fmt_str
= "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5023 fmt_str
= "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5026 fmt_str
= "%s %s, [%s, #%lld] (at-Rn)\n";
5031 DIP(fmt_str
, (isLoad
? ld_name
: st_name
)[szLg2
],
5032 nameIRegOrZR(szB
== 8, tt
),
5033 nameIReg64orSP(nn
), simm9
);
5038 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
5041 x==0 => 32 bit transfers, and zero extended loads
5042 x==1 => 64 bit transfers
5043 simm7 is scaled by the (single-register) transfer size
5046 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
5049 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
5052 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
5054 UInt insn_30_23
= INSN(30,23);
5055 if (insn_30_23
== BITS8(0,1,0,1,0,0,0,1)
5056 || insn_30_23
== BITS8(0,1,0,1,0,0,1,1)
5057 || insn_30_23
== BITS8(0,1,0,1,0,0,1,0)) {
5058 UInt bL
= INSN(22,22);
5059 UInt bX
= INSN(31,31);
5060 UInt bWBack
= INSN(23,23);
5061 UInt rT1
= INSN(4,0);
5062 UInt rN
= INSN(9,5);
5063 UInt rT2
= INSN(14,10);
5064 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5065 if ((bWBack
&& (rT1
== rN
|| rT2
== rN
) && rN
!= 31)
5066 || (bL
&& rT1
== rT2
)) {
5067 /* undecodable; fall through */
5069 if (rN
== 31) { /* FIXME generate stack alignment check */ }
5071 // Compute the transfer address TA and the writeback address WA.
5072 IRTemp tRN
= newTemp(Ity_I64
);
5073 assign(tRN
, getIReg64orSP(rN
));
5074 IRTemp tEA
= newTemp(Ity_I64
);
5075 simm7
= (bX
? 8 : 4) * simm7
;
5076 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5078 IRTemp tTA
= newTemp(Ity_I64
);
5079 IRTemp tWA
= newTemp(Ity_I64
);
5080 switch (INSN(24,23)) {
5082 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5084 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5086 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5088 vassert(0); /* NOTREACHED */
5091 /* Normally rN would be updated after the transfer. However, in
5092 the special case typifed by
5093 stp x29, x30, [sp,#-112]!
5094 it is necessary to update SP before the transfer, (1)
5095 because Memcheck will otherwise complain about a write
5096 below the stack pointer, and (2) because the segfault
5097 stack extension mechanism will otherwise extend the stack
5098 only down to SP before the instruction, which might not be
5099 far enough, if the -112 bit takes the actual access
5100 address to the next page.
5103 = bWBack
&& simm7
< 0
5104 && INSN(24,23) == BITS2(1,1) && rN
== 31 && bL
== 0;
5106 if (bWBack
&& earlyWBack
)
5107 putIReg64orSP(rN
, mkexpr(tEA
));
5109 /**/ if (bL
== 1 && bX
== 1) {
5111 putIReg64orZR(rT1
, loadLE(Ity_I64
,
5112 binop(Iop_Add64
,mkexpr(tTA
),mkU64(0))));
5113 putIReg64orZR(rT2
, loadLE(Ity_I64
,
5114 binop(Iop_Add64
,mkexpr(tTA
),mkU64(8))));
5115 } else if (bL
== 1 && bX
== 0) {
5117 putIReg32orZR(rT1
, loadLE(Ity_I32
,
5118 binop(Iop_Add64
,mkexpr(tTA
),mkU64(0))));
5119 putIReg32orZR(rT2
, loadLE(Ity_I32
,
5120 binop(Iop_Add64
,mkexpr(tTA
),mkU64(4))));
5121 } else if (bL
== 0 && bX
== 1) {
5123 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(0)),
5124 getIReg64orZR(rT1
));
5125 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(8)),
5126 getIReg64orZR(rT2
));
5128 vassert(bL
== 0 && bX
== 0);
5130 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(0)),
5131 getIReg32orZR(rT1
));
5132 storeLE(binop(Iop_Add64
,mkexpr(tTA
),mkU64(4)),
5133 getIReg32orZR(rT2
));
5136 if (bWBack
&& !earlyWBack
)
5137 putIReg64orSP(rN
, mkexpr(tEA
));
5139 const HChar
* fmt_str
= NULL
;
5140 switch (INSN(24,23)) {
5142 fmt_str
= "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5145 fmt_str
= "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5148 fmt_str
= "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5153 DIP(fmt_str
, bL
== 0 ? "st" : "ld",
5154 nameIRegOrZR(bX
== 1, rT1
),
5155 nameIRegOrZR(bX
== 1, rT2
),
5156 nameIReg64orSP(rN
), simm7
);
5161 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
5162 /* Does 32 bit transfers which are sign extended to 64 bits.
5163 simm7 is scaled by the (single-register) transfer size
5166 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
5169 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5172 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5174 UInt insn_31_22
= INSN(31,22);
5175 if (insn_31_22
== BITS10(0,1,1,0,1,0,0,0,1,1)
5176 || insn_31_22
== BITS10(0,1,1,0,1,0,0,1,1,1)
5177 || insn_31_22
== BITS10(0,1,1,0,1,0,0,1,0,1)) {
5178 UInt bWBack
= INSN(23,23);
5179 UInt rT1
= INSN(4,0);
5180 UInt rN
= INSN(9,5);
5181 UInt rT2
= INSN(14,10);
5182 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5183 if ((bWBack
&& (rT1
== rN
|| rT2
== rN
) && rN
!= 31)
5185 /* undecodable; fall through */
5187 if (rN
== 31) { /* FIXME generate stack alignment check */ }
5189 // Compute the transfer address TA and the writeback address WA.
5190 IRTemp tRN
= newTemp(Ity_I64
);
5191 assign(tRN
, getIReg64orSP(rN
));
5192 IRTemp tEA
= newTemp(Ity_I64
);
5194 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5196 IRTemp tTA
= newTemp(Ity_I64
);
5197 IRTemp tWA
= newTemp(Ity_I64
);
5198 switch (INSN(24,23)) {
5200 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5202 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5204 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5206 vassert(0); /* NOTREACHED */
5209 // 32 bit load, sign extended to 64 bits
5210 putIReg64orZR(rT1
, unop(Iop_32Sto64
,
5211 loadLE(Ity_I32
, binop(Iop_Add64
,
5214 putIReg64orZR(rT2
, unop(Iop_32Sto64
,
5215 loadLE(Ity_I32
, binop(Iop_Add64
,
5219 putIReg64orSP(rN
, mkexpr(tEA
));
5221 const HChar
* fmt_str
= NULL
;
5222 switch (INSN(24,23)) {
5224 fmt_str
= "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5227 fmt_str
= "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5230 fmt_str
= "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5235 DIP(fmt_str
, nameIReg64orZR(rT1
),
5236 nameIReg64orZR(rT2
),
5237 nameIReg64orSP(rN
), simm7
);
5242 /* ---------------- LDR (literal, int reg) ---------------- */
5244 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5245 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5246 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5247 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5248 Just handles the first two cases for now.
5250 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5251 UInt imm19
= INSN(23,5);
5252 UInt rT
= INSN(4,0);
5253 UInt bX
= INSN(30,30);
5254 ULong ea
= guest_PC_curr_instr
+ sx_to_64(imm19
<< 2, 21);
5256 putIReg64orZR(rT
, loadLE(Ity_I64
, mkU64(ea
)));
5258 putIReg32orZR(rT
, loadLE(Ity_I32
, mkU64(ea
)));
5260 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX
== 1, rT
), ea
);
5264 /* -------------- {LD,ST}R (integer register) --------------- */
5265 /* 31 29 20 15 12 11 9 4
5267 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5268 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5269 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5270 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5272 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5273 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5274 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5275 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5277 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5278 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5280 UInt szLg2
= INSN(31,30);
5281 Bool isLD
= INSN(22,22) == 1;
5282 UInt tt
= INSN(4,0);
5283 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
5284 if (ea
!= IRTemp_INVALID
) {
5286 case 3: /* 64 bit */
5288 putIReg64orZR(tt
, loadLE(Ity_I64
, mkexpr(ea
)));
5289 DIP("ldr %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5291 storeLE(mkexpr(ea
), getIReg64orZR(tt
));
5292 DIP("str %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5295 case 2: /* 32 bit */
5297 putIReg32orZR(tt
, loadLE(Ity_I32
, mkexpr(ea
)));
5298 DIP("ldr %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5300 storeLE(mkexpr(ea
), getIReg32orZR(tt
));
5301 DIP("str %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5304 case 1: /* 16 bit */
5306 putIReg64orZR(tt
, unop(Iop_16Uto64
,
5307 loadLE(Ity_I16
, mkexpr(ea
))));
5308 DIP("ldruh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5310 storeLE(mkexpr(ea
), unop(Iop_64to16
, getIReg64orZR(tt
)));
5311 DIP("strh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5316 putIReg64orZR(tt
, unop(Iop_8Uto64
,
5317 loadLE(Ity_I8
, mkexpr(ea
))));
5318 DIP("ldrub %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5320 storeLE(mkexpr(ea
), unop(Iop_64to8
, getIReg64orZR(tt
)));
5321 DIP("strb %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5331 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5332 /* 31 29 26 23 21 9 4
5333 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5334 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5335 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5337 Rt is Wt when x==1, Xt when x==0
5339 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5340 /* Further checks on bits 31:30 and 22 */
5342 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5344 case BITS3(0,1,0): case BITS3(0,1,1):
5345 case BITS3(0,0,0): case BITS3(0,0,1):
5350 UInt szLg2
= INSN(31,30);
5351 UInt bitX
= INSN(22,22);
5352 UInt imm12
= INSN(21,10);
5353 UInt nn
= INSN(9,5);
5354 UInt tt
= INSN(4,0);
5355 UInt szB
= 1 << szLg2
;
5356 IRExpr
* ea
= binop(Iop_Add64
,
5357 getIReg64orSP(nn
), mkU64(imm12
* szB
));
5361 putIReg64orZR(tt
, unop(Iop_32Sto64
, loadLE(Ity_I32
, ea
)));
5362 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt
),
5363 nameIReg64orSP(nn
), imm12
* szB
);
5367 putIReg32orZR(tt
, unop(Iop_16Sto32
, loadLE(Ity_I16
, ea
)));
5369 putIReg64orZR(tt
, unop(Iop_16Sto64
, loadLE(Ity_I16
, ea
)));
5371 DIP("ldrsh %s, [%s, #%u]\n",
5372 nameIRegOrZR(bitX
== 0, tt
),
5373 nameIReg64orSP(nn
), imm12
* szB
);
5377 putIReg32orZR(tt
, unop(Iop_8Sto32
, loadLE(Ity_I8
, ea
)));
5379 putIReg64orZR(tt
, unop(Iop_8Sto64
, loadLE(Ity_I8
, ea
)));
5381 DIP("ldrsb %s, [%s, #%u]\n",
5382 nameIRegOrZR(bitX
== 0, tt
),
5383 nameIReg64orSP(nn
), imm12
* szB
);
5390 /* else fall through */
5393 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5394 /* (at-Rn-then-Rn=EA)
5395 31 29 23 21 20 11 9 4
5396 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5397 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5398 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5401 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5402 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5403 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5405 Rt is Wt when x==1, Xt when x==0
5406 transfer-at-Rn when [11]==0, at EA when [11]==1
5408 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5409 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5410 /* Further checks on bits 31:30 and 22 */
5412 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5413 case BITS3(1,0,0): // LDRSW Xt
5414 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5415 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5420 UInt szLg2
= INSN(31,30);
5421 UInt imm9
= INSN(20,12);
5422 Bool atRN
= INSN(11,11) == 0;
5423 UInt nn
= INSN(9,5);
5424 UInt tt
= INSN(4,0);
5425 IRTemp tRN
= newTemp(Ity_I64
);
5426 IRTemp tEA
= newTemp(Ity_I64
);
5427 IRTemp tTA
= IRTemp_INVALID
;
5428 ULong simm9
= sx_to_64(imm9
, 9);
5429 Bool is64
= INSN(22,22) == 0;
5430 assign(tRN
, getIReg64orSP(nn
));
5431 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5432 tTA
= atRN
? tRN
: tEA
;
5434 /* There are 5 cases:
5436 byte load, SX to 32, ZX to 64
5437 halfword load, SX to 64
5438 halfword load, SX to 32, ZX to 64
5440 The ifs below handle them in the listed order.
5445 putIReg64orZR(tt
, unop(Iop_8Sto64
,
5446 loadLE(Ity_I8
, mkexpr(tTA
))));
5448 putIReg32orZR(tt
, unop(Iop_8Sto32
,
5449 loadLE(Ity_I8
, mkexpr(tTA
))));
5452 else if (szLg2
== 1) {
5455 putIReg64orZR(tt
, unop(Iop_16Sto64
,
5456 loadLE(Ity_I16
, mkexpr(tTA
))));
5458 putIReg32orZR(tt
, unop(Iop_16Sto32
,
5459 loadLE(Ity_I16
, mkexpr(tTA
))));
5462 else if (szLg2
== 2 && is64
) {
5464 putIReg64orZR(tt
, unop(Iop_32Sto64
,
5465 loadLE(Ity_I32
, mkexpr(tTA
))));
5470 putIReg64orSP(nn
, mkexpr(tEA
));
5471 DIP(atRN
? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5472 ch
, nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
), simm9
);
5475 /* else fall through */
5478 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5479 /* 31 29 23 21 20 11 9 4
5480 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5481 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5482 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5484 Rt is Wt when x==1, Xt when x==0
5486 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5487 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5488 /* Further checks on bits 31:30 and 22 */
5490 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5491 case BITS3(1,0,0): // LDURSW Xt
5492 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5493 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5498 UInt szLg2
= INSN(31,30);
5499 UInt imm9
= INSN(20,12);
5500 UInt nn
= INSN(9,5);
5501 UInt tt
= INSN(4,0);
5502 IRTemp tRN
= newTemp(Ity_I64
);
5503 IRTemp tEA
= newTemp(Ity_I64
);
5504 ULong simm9
= sx_to_64(imm9
, 9);
5505 Bool is64
= INSN(22,22) == 0;
5506 assign(tRN
, getIReg64orSP(nn
));
5507 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5509 /* There are 5 cases:
5511 byte load, SX to 32, ZX to 64
5512 halfword load, SX to 64
5513 halfword load, SX to 32, ZX to 64
5515 The ifs below handle them in the listed order.
5520 putIReg64orZR(tt
, unop(Iop_8Sto64
,
5521 loadLE(Ity_I8
, mkexpr(tEA
))));
5523 putIReg32orZR(tt
, unop(Iop_8Sto32
,
5524 loadLE(Ity_I8
, mkexpr(tEA
))));
5527 else if (szLg2
== 1) {
5530 putIReg64orZR(tt
, unop(Iop_16Sto64
,
5531 loadLE(Ity_I16
, mkexpr(tEA
))));
5533 putIReg32orZR(tt
, unop(Iop_16Sto32
,
5534 loadLE(Ity_I16
, mkexpr(tEA
))));
5537 else if (szLg2
== 2 && is64
) {
5539 putIReg64orZR(tt
, unop(Iop_32Sto64
,
5540 loadLE(Ity_I32
, mkexpr(tEA
))));
5545 DIP("ldurs%c %s, [%s, #%lld]\n",
5546 ch
, nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
), (Long
)simm9
);
5549 /* else fall through */
5552 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5555 sz==00 => 32 bit (S) transfers
5556 sz==01 => 64 bit (D) transfers
5557 sz==10 => 128 bit (Q) transfers
5558 sz==11 isn't allowed
5559 simm7 is scaled by the (single-register) transfer size
5561 31 29 26 22 21 14 9 4
5563 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5564 (at-EA, with nontemporal hint)
5566 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5569 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5572 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5575 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5576 UInt szSlg2
= INSN(31,30); // log2 of the xfer size in 32-bit units
5577 Bool isLD
= INSN(22,22) == 1;
5578 Bool wBack
= INSN(23,23) == 1;
5579 Long simm7
= (Long
)sx_to_64(INSN(21,15), 7);
5580 UInt tt2
= INSN(14,10);
5581 UInt nn
= INSN(9,5);
5582 UInt tt1
= INSN(4,0);
5583 if (szSlg2
== BITS2(1,1) || (isLD
&& tt1
== tt2
)) {
5584 /* undecodable; fall through */
5586 if (nn
== 31) { /* FIXME generate stack alignment check */ }
5588 // Compute the transfer address TA and the writeback address WA.
5589 UInt szB
= 4 << szSlg2
; /* szB is the per-register size */
5590 IRTemp tRN
= newTemp(Ity_I64
);
5591 assign(tRN
, getIReg64orSP(nn
));
5592 IRTemp tEA
= newTemp(Ity_I64
);
5593 simm7
= szB
* simm7
;
5594 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm7
)));
5596 IRTemp tTA
= newTemp(Ity_I64
);
5597 IRTemp tWA
= newTemp(Ity_I64
);
5598 switch (INSN(24,23)) {
5600 assign(tTA
, mkexpr(tRN
)); assign(tWA
, mkexpr(tEA
)); break;
5602 assign(tTA
, mkexpr(tEA
)); assign(tWA
, mkexpr(tEA
)); break;
5605 assign(tTA
, mkexpr(tEA
)); /* tWA is unused */ break;
5607 vassert(0); /* NOTREACHED */
5610 IRType ty
= Ity_INVALID
;
5612 case 4: ty
= Ity_F32
; break;
5613 case 8: ty
= Ity_F64
; break;
5614 case 16: ty
= Ity_V128
; break;
5615 default: vassert(0);
5618 /* Normally rN would be updated after the transfer. However, in
5619 the special cases typifed by
5620 stp q0, q1, [sp,#-512]!
5621 stp d0, d1, [sp,#-512]!
5622 stp s0, s1, [sp,#-512]!
5623 it is necessary to update SP before the transfer, (1)
5624 because Memcheck will otherwise complain about a write
5625 below the stack pointer, and (2) because the segfault
5626 stack extension mechanism will otherwise extend the stack
5627 only down to SP before the instruction, which might not be
5628 far enough, if the -512 bit takes the actual access
5629 address to the next page.
5632 = wBack
&& simm7
< 0
5633 && INSN(24,23) == BITS2(1,1) && nn
== 31 && !isLD
;
5635 if (wBack
&& earlyWBack
)
5636 putIReg64orSP(nn
, mkexpr(tEA
));
5640 putQReg128(tt1
, mkV128(0x0000));
5643 loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
), mkU64(0))));
5645 putQReg128(tt2
, mkV128(0x0000));
5648 loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
), mkU64(szB
))));
5650 storeLE(binop(Iop_Add64
, mkexpr(tTA
), mkU64(0)),
5651 getQRegLO(tt1
, ty
));
5652 storeLE(binop(Iop_Add64
, mkexpr(tTA
), mkU64(szB
)),
5653 getQRegLO(tt2
, ty
));
5656 if (wBack
&& !earlyWBack
)
5657 putIReg64orSP(nn
, mkexpr(tEA
));
5659 const HChar
* fmt_str
= NULL
;
5660 switch (INSN(24,23)) {
5662 fmt_str
= "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5665 fmt_str
= "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5668 fmt_str
= "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5671 fmt_str
= "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5676 DIP(fmt_str
, isLD
? "ld" : "st",
5677 nameQRegLO(tt1
, ty
), nameQRegLO(tt2
, ty
),
5678 nameIReg64orSP(nn
), simm7
);
5683 /* -------------- {LD,ST}R (vector register) --------------- */
5684 /* 31 29 23 20 15 12 11 9 4
5686 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5687 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5688 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5689 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5690 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5692 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5693 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5694 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5695 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5696 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5698 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5699 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5701 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5702 Bool isLD
= INSN(22,22) == 1;
5703 UInt tt
= INSN(4,0);
5704 if (szLg2
> 4) goto after_LDR_STR_vector_register
;
5705 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, False
/*to/from vec regs*/);
5706 if (ea
== IRTemp_INVALID
) goto after_LDR_STR_vector_register
;
5710 putQReg128(tt
, mkV128(0x0000));
5711 putQRegLO(tt
, loadLE(Ity_I8
, mkexpr(ea
)));
5712 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I8
), dis_buf
);
5714 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I8
));
5715 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I8
), dis_buf
);
5720 putQReg128(tt
, mkV128(0x0000));
5721 putQRegLO(tt
, loadLE(Ity_I16
, mkexpr(ea
)));
5722 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I16
), dis_buf
);
5724 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I16
));
5725 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I16
), dis_buf
);
5728 case 2: /* 32 bit */
5730 putQReg128(tt
, mkV128(0x0000));
5731 putQRegLO(tt
, loadLE(Ity_I32
, mkexpr(ea
)));
5732 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I32
), dis_buf
);
5734 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I32
));
5735 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I32
), dis_buf
);
5738 case 3: /* 64 bit */
5740 putQReg128(tt
, mkV128(0x0000));
5741 putQRegLO(tt
, loadLE(Ity_I64
, mkexpr(ea
)));
5742 DIP("ldr %s, %s\n", nameQRegLO(tt
, Ity_I64
), dis_buf
);
5744 storeLE(mkexpr(ea
), getQRegLO(tt
, Ity_I64
));
5745 DIP("str %s, %s\n", nameQRegLO(tt
, Ity_I64
), dis_buf
);
5750 putQReg128(tt
, loadLE(Ity_V128
, mkexpr(ea
)));
5751 DIP("ldr %s, %s\n", nameQReg128(tt
), dis_buf
);
5753 storeLE(mkexpr(ea
), getQReg128(tt
));
5754 DIP("str %s, %s\n", nameQReg128(tt
), dis_buf
);
5762 after_LDR_STR_vector_register
:
5764 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5765 /* 31 29 22 20 15 12 11 9 4
5767 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5769 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5770 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5772 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5773 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5775 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5776 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5778 UInt szLg2
= INSN(31,30);
5779 Bool sxTo64
= INSN(22,22) == 0; // else sx to 32 and zx to 64
5780 UInt tt
= INSN(4,0);
5781 if (szLg2
== 3) goto after_LDRS_integer_register
;
5782 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
5783 if (ea
== IRTemp_INVALID
) goto after_LDRS_integer_register
;
5784 /* Enumerate the 5 variants explicitly. */
5785 if (szLg2
== 2/*32 bit*/ && sxTo64
) {
5786 putIReg64orZR(tt
, unop(Iop_32Sto64
, loadLE(Ity_I32
, mkexpr(ea
))));
5787 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5791 if (szLg2
== 1/*16 bit*/) {
5793 putIReg64orZR(tt
, unop(Iop_16Sto64
, loadLE(Ity_I16
, mkexpr(ea
))));
5794 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5796 putIReg32orZR(tt
, unop(Iop_16Sto32
, loadLE(Ity_I16
, mkexpr(ea
))));
5797 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5802 if (szLg2
== 0/*8 bit*/) {
5804 putIReg64orZR(tt
, unop(Iop_8Sto64
, loadLE(Ity_I8
, mkexpr(ea
))));
5805 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt
), dis_buf
);
5807 putIReg32orZR(tt
, unop(Iop_8Sto32
, loadLE(Ity_I8
, mkexpr(ea
))));
5808 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt
), dis_buf
);
5812 /* else it's an invalid combination */
5814 after_LDRS_integer_register
:
5816 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5817 /* This is the Unsigned offset variant only. The Post-Index and
5818 Pre-Index variants are below.
5821 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5822 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5823 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5824 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5825 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5827 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5828 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5829 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5830 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5831 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5833 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5834 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5835 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5836 Bool isLD
= INSN(22,22) == 1;
5837 UInt pimm12
= INSN(21,10) << szLg2
;
5838 UInt nn
= INSN(9,5);
5839 UInt tt
= INSN(4,0);
5840 IRTemp tEA
= newTemp(Ity_I64
);
5841 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5842 assign(tEA
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(pimm12
)));
5845 putQReg128(tt
, mkV128(0x0000));
5847 putQRegLO(tt
, loadLE(ty
, mkexpr(tEA
)));
5849 storeLE(mkexpr(tEA
), getQRegLO(tt
, ty
));
5851 DIP("%s %s, [%s, #%u]\n",
5852 isLD
? "ldr" : "str",
5853 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), pimm12
);
5857 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5858 /* These are the Post-Index and Pre-Index variants.
5862 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5863 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5864 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5865 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5866 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5869 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5870 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5871 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5872 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5873 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5875 Stores are the same except with bit 22 set to 0.
5877 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5878 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5879 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5880 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5881 Bool isLD
= INSN(22,22) == 1;
5882 UInt imm9
= INSN(20,12);
5883 Bool atRN
= INSN(11,11) == 0;
5884 UInt nn
= INSN(9,5);
5885 UInt tt
= INSN(4,0);
5886 IRTemp tRN
= newTemp(Ity_I64
);
5887 IRTemp tEA
= newTemp(Ity_I64
);
5888 IRTemp tTA
= IRTemp_INVALID
;
5889 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5890 ULong simm9
= sx_to_64(imm9
, 9);
5891 assign(tRN
, getIReg64orSP(nn
));
5892 assign(tEA
, binop(Iop_Add64
, mkexpr(tRN
), mkU64(simm9
)));
5893 tTA
= atRN
? tRN
: tEA
;
5895 /* Do early writeback for the cases typified by
5897 str d10, [sp, #-128]!
5899 for the same reasons as described in a similar comment in the
5900 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5903 = !atRN
&& !isLD
&& (ty
== Ity_F64
|| ty
== Ity_V128
)
5904 && nn
== 31 && ((Long
)simm9
) < 0;
5907 putIReg64orSP(nn
, mkexpr(tEA
));
5911 putQReg128(tt
, mkV128(0x0000));
5913 putQRegLO(tt
, loadLE(ty
, mkexpr(tTA
)));
5915 storeLE(mkexpr(tTA
), getQRegLO(tt
, ty
));
5919 putIReg64orSP(nn
, mkexpr(tEA
));
5921 DIP(atRN
? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5922 isLD
? "ldr" : "str",
5923 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), (Long
)simm9
);
5927 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5928 /* 31 29 23 20 11 9 4
5929 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5930 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5931 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5932 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5933 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5935 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5936 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5937 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5938 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5939 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5941 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5942 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5943 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5944 UInt szLg2
= (INSN(23,23) << 2) | INSN(31,30);
5945 Bool isLD
= INSN(22,22) == 1;
5946 UInt imm9
= INSN(20,12);
5947 UInt nn
= INSN(9,5);
5948 UInt tt
= INSN(4,0);
5949 ULong simm9
= sx_to_64(imm9
, 9);
5950 IRTemp tEA
= newTemp(Ity_I64
);
5951 IRType ty
= preferredVectorSubTypeFromSize(1 << szLg2
);
5952 assign(tEA
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(simm9
)));
5955 putQReg128(tt
, mkV128(0x0000));
5957 putQRegLO(tt
, loadLE(ty
, mkexpr(tEA
)));
5959 storeLE(mkexpr(tEA
), getQRegLO(tt
, ty
));
5961 DIP("%s %s, [%s, #%lld]\n",
5962 isLD
? "ldur" : "stur",
5963 nameQRegLO(tt
, ty
), nameIReg64orSP(nn
), (Long
)simm9
);
5967 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5969 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5970 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5971 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5973 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5974 UInt szB
= 4 << INSN(31,30);
5975 UInt imm19
= INSN(23,5);
5976 UInt tt
= INSN(4,0);
5977 ULong ea
= guest_PC_curr_instr
+ sx_to_64(imm19
<< 2, 21);
5978 IRType ty
= preferredVectorSubTypeFromSize(szB
);
5979 putQReg128(tt
, mkV128(0x0000));
5980 putQRegLO(tt
, loadLE(ty
, mkU64(ea
)));
5981 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt
, ty
), ea
);
5985 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5986 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5987 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5988 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5989 /* 31 29 26 22 21 20 15 11 9 4
5991 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5992 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5994 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5995 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5997 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5998 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
6000 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
6001 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
6003 T = defined by Q and sz in the normal way
6004 step = if m == 11111 then transfer-size else Xm
6005 xx = case L of 1 -> LD ; 0 -> ST
6007 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6008 && INSN(21,21) == 0) {
6009 Bool bitQ
= INSN(30,30);
6010 Bool isPX
= INSN(23,23) == 1;
6011 Bool isLD
= INSN(22,22) == 1;
6012 UInt mm
= INSN(20,16);
6013 UInt opc
= INSN(15,12);
6014 UInt sz
= INSN(11,10);
6015 UInt nn
= INSN(9,5);
6016 UInt tt
= INSN(4,0);
6017 Bool isQ
= bitQ
== 1;
6018 Bool is1d
= sz
== BITS2(1,1) && !isQ
;
6021 case BITS4(0,0,0,0): nRegs
= 4; break;
6022 case BITS4(0,1,0,0): nRegs
= 3; break;
6023 case BITS4(1,0,0,0): nRegs
= 2; break;
6024 case BITS4(0,1,1,1): nRegs
= 1; break;
6028 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6029 If we see it, set nRegs to 0 so as to cause the next conditional
6031 if (!isPX
&& mm
!= 0)
6034 if (nRegs
== 1 /* .1d is allowed */
6035 || (nRegs
>= 2 && nRegs
<= 4 && !is1d
) /* .1d is not allowed */) {
6037 UInt xferSzB
= (isQ
? 16 : 8) * nRegs
;
6039 /* Generate the transfer address (TA) and if necessary the
6040 writeback address (WB) */
6041 IRTemp tTA
= newTemp(Ity_I64
);
6042 assign(tTA
, getIReg64orSP(nn
));
6043 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6044 IRTemp tWB
= IRTemp_INVALID
;
6046 tWB
= newTemp(Ity_I64
);
6047 assign(tWB
, binop(Iop_Add64
,
6049 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6050 : getIReg64orZR(mm
)));
6053 /* -- BEGIN generate the transfers -- */
6055 IRTemp u0
, u1
, u2
, u3
, i0
, i1
, i2
, i3
;
6056 u0
= u1
= u2
= u3
= i0
= i1
= i2
= i3
= IRTemp_INVALID
;
6058 case 4: u3
= newTempV128(); i3
= newTempV128(); /* fallthru */
6059 case 3: u2
= newTempV128(); i2
= newTempV128(); /* fallthru */
6060 case 2: u1
= newTempV128(); i1
= newTempV128(); /* fallthru */
6061 case 1: u0
= newTempV128(); i0
= newTempV128(); break;
6062 default: vassert(0);
6065 /* -- Multiple 128 or 64 bit stores -- */
6068 case 4: assign(u3
, getQReg128((tt
+3) % 32)); /* fallthru */
6069 case 3: assign(u2
, getQReg128((tt
+2) % 32)); /* fallthru */
6070 case 2: assign(u1
, getQReg128((tt
+1) % 32)); /* fallthru */
6071 case 1: assign(u0
, getQReg128((tt
+0) % 32)); break;
6072 default: vassert(0);
6075 case 4: (isQ
? math_INTERLEAVE4_128
: math_INTERLEAVE4_64
)
6076 (&i0
, &i1
, &i2
, &i3
, sz
, u0
, u1
, u2
, u3
);
6078 case 3: (isQ
? math_INTERLEAVE3_128
: math_INTERLEAVE3_64
)
6079 (&i0
, &i1
, &i2
, sz
, u0
, u1
, u2
);
6081 case 2: (isQ
? math_INTERLEAVE2_128
: math_INTERLEAVE2_64
)
6082 (&i0
, &i1
, sz
, u0
, u1
);
6084 case 1: (isQ
? math_INTERLEAVE1_128
: math_INTERLEAVE1_64
)
6087 default: vassert(0);
6089 # define MAYBE_NARROW_TO_64(_expr) \
6090 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6091 UInt step
= isQ
? 16 : 8;
6093 case 4: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(3*step
)),
6094 MAYBE_NARROW_TO_64(mkexpr(i3
)) );
6096 case 3: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(2*step
)),
6097 MAYBE_NARROW_TO_64(mkexpr(i2
)) );
6099 case 2: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(1*step
)),
6100 MAYBE_NARROW_TO_64(mkexpr(i1
)) );
6102 case 1: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(0*step
)),
6103 MAYBE_NARROW_TO_64(mkexpr(i0
)) );
6105 default: vassert(0);
6107 # undef MAYBE_NARROW_TO_64
6110 /* -- Multiple 128 or 64 bit loads -- */
6112 UInt step
= isQ
? 16 : 8;
6113 IRType loadTy
= isQ
? Ity_V128
: Ity_I64
;
6114 # define MAYBE_WIDEN_FROM_64(_expr) \
6115 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6118 assign(i3
, MAYBE_WIDEN_FROM_64(
6120 binop(Iop_Add64
, mkexpr(tTA
),
6121 mkU64(3 * step
)))));
6124 assign(i2
, MAYBE_WIDEN_FROM_64(
6126 binop(Iop_Add64
, mkexpr(tTA
),
6127 mkU64(2 * step
)))));
6130 assign(i1
, MAYBE_WIDEN_FROM_64(
6132 binop(Iop_Add64
, mkexpr(tTA
),
6133 mkU64(1 * step
)))));
6136 assign(i0
, MAYBE_WIDEN_FROM_64(
6138 binop(Iop_Add64
, mkexpr(tTA
),
6139 mkU64(0 * step
)))));
6144 # undef MAYBE_WIDEN_FROM_64
6146 case 4: (isQ
? math_DEINTERLEAVE4_128
: math_DEINTERLEAVE4_64
)
6147 (&u0
, &u1
, &u2
, &u3
, sz
, i0
,i1
,i2
,i3
);
6149 case 3: (isQ
? math_DEINTERLEAVE3_128
: math_DEINTERLEAVE3_64
)
6150 (&u0
, &u1
, &u2
, sz
, i0
, i1
, i2
);
6152 case 2: (isQ
? math_DEINTERLEAVE2_128
: math_DEINTERLEAVE2_64
)
6153 (&u0
, &u1
, sz
, i0
, i1
);
6155 case 1: (isQ
? math_DEINTERLEAVE1_128
: math_DEINTERLEAVE1_64
)
6158 default: vassert(0);
6161 case 4: putQReg128( (tt
+3) % 32,
6162 math_MAYBE_ZERO_HI64(bitQ
, u3
));
6164 case 3: putQReg128( (tt
+2) % 32,
6165 math_MAYBE_ZERO_HI64(bitQ
, u2
));
6167 case 2: putQReg128( (tt
+1) % 32,
6168 math_MAYBE_ZERO_HI64(bitQ
, u1
));
6170 case 1: putQReg128( (tt
+0) % 32,
6171 math_MAYBE_ZERO_HI64(bitQ
, u0
));
6173 default: vassert(0);
6177 /* -- END generate the transfers -- */
6179 /* Do the writeback, if necessary */
6181 putIReg64orSP(nn
, mkexpr(tWB
));
6185 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6187 if (mm
== BITS5(1,1,1,1,1))
6188 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6190 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6192 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6193 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6194 isLD
? "ld" : "st", nRegs
,
6195 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6199 dres
->hint
= Dis_HintVerbose
;
6203 /* else fall through */
6206 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6207 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6208 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6209 /* 31 29 26 22 21 20 15 11 9 4
6211 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6212 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6214 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6215 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6217 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6218 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6220 T = defined by Q and sz in the normal way
6221 step = if m == 11111 then transfer-size else Xm
6222 xx = case L of 1 -> LD ; 0 -> ST
6224 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6225 && INSN(21,21) == 0) {
6226 Bool bitQ
= INSN(30,30);
6227 Bool isPX
= INSN(23,23) == 1;
6228 Bool isLD
= INSN(22,22) == 1;
6229 UInt mm
= INSN(20,16);
6230 UInt opc
= INSN(15,12);
6231 UInt sz
= INSN(11,10);
6232 UInt nn
= INSN(9,5);
6233 UInt tt
= INSN(4,0);
6234 Bool isQ
= bitQ
== 1;
6237 case BITS4(0,0,1,0): nRegs
= 4; break;
6238 case BITS4(0,1,1,0): nRegs
= 3; break;
6239 case BITS4(1,0,1,0): nRegs
= 2; break;
6243 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6244 If we see it, set nRegs to 0 so as to cause the next conditional
6246 if (!isPX
&& mm
!= 0)
6249 if (nRegs
>= 2 && nRegs
<= 4) {
6251 UInt xferSzB
= (isQ
? 16 : 8) * nRegs
;
6253 /* Generate the transfer address (TA) and if necessary the
6254 writeback address (WB) */
6255 IRTemp tTA
= newTemp(Ity_I64
);
6256 assign(tTA
, getIReg64orSP(nn
));
6257 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6258 IRTemp tWB
= IRTemp_INVALID
;
6260 tWB
= newTemp(Ity_I64
);
6261 assign(tWB
, binop(Iop_Add64
,
6263 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6264 : getIReg64orZR(mm
)));
6267 /* -- BEGIN generate the transfers -- */
6269 IRTemp u0
, u1
, u2
, u3
;
6270 u0
= u1
= u2
= u3
= IRTemp_INVALID
;
6272 case 4: u3
= newTempV128(); /* fallthru */
6273 case 3: u2
= newTempV128(); /* fallthru */
6274 case 2: u1
= newTempV128();
6275 u0
= newTempV128(); break;
6276 default: vassert(0);
6279 /* -- Multiple 128 or 64 bit stores -- */
6282 case 4: assign(u3
, getQReg128((tt
+3) % 32)); /* fallthru */
6283 case 3: assign(u2
, getQReg128((tt
+2) % 32)); /* fallthru */
6284 case 2: assign(u1
, getQReg128((tt
+1) % 32));
6285 assign(u0
, getQReg128((tt
+0) % 32)); break;
6286 default: vassert(0);
6288 # define MAYBE_NARROW_TO_64(_expr) \
6289 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6290 UInt step
= isQ
? 16 : 8;
6292 case 4: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(3*step
)),
6293 MAYBE_NARROW_TO_64(mkexpr(u3
)) );
6295 case 3: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(2*step
)),
6296 MAYBE_NARROW_TO_64(mkexpr(u2
)) );
6298 case 2: storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(1*step
)),
6299 MAYBE_NARROW_TO_64(mkexpr(u1
)) );
6300 storeLE( binop(Iop_Add64
, mkexpr(tTA
), mkU64(0*step
)),
6301 MAYBE_NARROW_TO_64(mkexpr(u0
)) );
6303 default: vassert(0);
6305 # undef MAYBE_NARROW_TO_64
6308 /* -- Multiple 128 or 64 bit loads -- */
6310 UInt step
= isQ
? 16 : 8;
6311 IRType loadTy
= isQ
? Ity_V128
: Ity_I64
;
6312 # define MAYBE_WIDEN_FROM_64(_expr) \
6313 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6316 assign(u3
, MAYBE_WIDEN_FROM_64(
6318 binop(Iop_Add64
, mkexpr(tTA
),
6319 mkU64(3 * step
)))));
6322 assign(u2
, MAYBE_WIDEN_FROM_64(
6324 binop(Iop_Add64
, mkexpr(tTA
),
6325 mkU64(2 * step
)))));
6328 assign(u1
, MAYBE_WIDEN_FROM_64(
6330 binop(Iop_Add64
, mkexpr(tTA
),
6331 mkU64(1 * step
)))));
6332 assign(u0
, MAYBE_WIDEN_FROM_64(
6334 binop(Iop_Add64
, mkexpr(tTA
),
6335 mkU64(0 * step
)))));
6340 # undef MAYBE_WIDEN_FROM_64
6342 case 4: putQReg128( (tt
+3) % 32,
6343 math_MAYBE_ZERO_HI64(bitQ
, u3
));
6345 case 3: putQReg128( (tt
+2) % 32,
6346 math_MAYBE_ZERO_HI64(bitQ
, u2
));
6348 case 2: putQReg128( (tt
+1) % 32,
6349 math_MAYBE_ZERO_HI64(bitQ
, u1
));
6350 putQReg128( (tt
+0) % 32,
6351 math_MAYBE_ZERO_HI64(bitQ
, u0
));
6353 default: vassert(0);
6357 /* -- END generate the transfers -- */
6359 /* Do the writeback, if necessary */
6361 putIReg64orSP(nn
, mkexpr(tWB
));
6365 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6367 if (mm
== BITS5(1,1,1,1,1))
6368 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6370 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6372 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6373 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6375 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6380 /* else fall through */
6383 /* ---------- LD1R (single structure, replicate) ---------- */
6384 /* ---------- LD2R (single structure, replicate) ---------- */
6385 /* ---------- LD3R (single structure, replicate) ---------- */
6386 /* ---------- LD4R (single structure, replicate) ---------- */
6387 /* 31 29 22 20 15 11 9 4
6388 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6389 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6391 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6392 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6394 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6395 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6397 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6398 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6400 step = if m == 11111 then transfer-size else Xm
6402 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6403 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6404 && INSN(12,12) == 0) {
6405 UInt bitQ
= INSN(30,30);
6406 Bool isPX
= INSN(23,23) == 1;
6407 UInt nRegs
= ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6408 UInt mm
= INSN(20,16);
6409 UInt sz
= INSN(11,10);
6410 UInt nn
= INSN(9,5);
6411 UInt tt
= INSN(4,0);
6413 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6414 if (isPX
|| mm
== 0) {
6416 IRType ty
= integerIRTypeOfSize(1 << sz
);
6418 UInt laneSzB
= 1 << sz
;
6419 UInt xferSzB
= laneSzB
* nRegs
;
6421 /* Generate the transfer address (TA) and if necessary the
6422 writeback address (WB) */
6423 IRTemp tTA
= newTemp(Ity_I64
);
6424 assign(tTA
, getIReg64orSP(nn
));
6425 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6426 IRTemp tWB
= IRTemp_INVALID
;
6428 tWB
= newTemp(Ity_I64
);
6429 assign(tWB
, binop(Iop_Add64
,
6431 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6432 : getIReg64orZR(mm
)));
6435 /* Do the writeback, if necessary */
6437 putIReg64orSP(nn
, mkexpr(tWB
));
6440 IRTemp e0
, e1
, e2
, e3
, v0
, v1
, v2
, v3
;
6441 e0
= e1
= e2
= e3
= v0
= v1
= v2
= v3
= IRTemp_INVALID
;
6445 assign(e3
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6446 mkU64(3 * laneSzB
))));
6447 v3
= math_DUP_TO_V128(e3
, ty
);
6448 putQReg128((tt
+3) % 32, math_MAYBE_ZERO_HI64(bitQ
, v3
));
6452 assign(e2
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6453 mkU64(2 * laneSzB
))));
6454 v2
= math_DUP_TO_V128(e2
, ty
);
6455 putQReg128((tt
+2) % 32, math_MAYBE_ZERO_HI64(bitQ
, v2
));
6459 assign(e1
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6460 mkU64(1 * laneSzB
))));
6461 v1
= math_DUP_TO_V128(e1
, ty
);
6462 putQReg128((tt
+1) % 32, math_MAYBE_ZERO_HI64(bitQ
, v1
));
6466 assign(e0
, loadLE(ty
, binop(Iop_Add64
, mkexpr(tTA
),
6467 mkU64(0 * laneSzB
))));
6468 v0
= math_DUP_TO_V128(e0
, ty
);
6469 putQReg128((tt
+0) % 32, math_MAYBE_ZERO_HI64(bitQ
, v0
));
6476 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6478 if (mm
== BITS5(1,1,1,1,1))
6479 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6481 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6483 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6484 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6486 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
, nameIReg64orSP(nn
),
6491 /* else fall through */
6494 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6495 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6496 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6497 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6498 /* 31 29 22 21 20 15 11 9 4
6499 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6500 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6502 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6503 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6505 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6506 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6508 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6509 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6511 step = if m == 11111 then transfer-size else Xm
6512 op = case L of 1 -> LD ; 0 -> ST
6514 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6519 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6520 UInt bitQ
= INSN(30,30);
6521 Bool isPX
= INSN(23,23) == 1;
6522 Bool isLD
= INSN(22,22) == 1;
6523 UInt nRegs
= ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6524 UInt mm
= INSN(20,16);
6525 UInt xx
= INSN(15,14);
6526 UInt bitS
= INSN(12,12);
6527 UInt sz
= INSN(11,10);
6528 UInt nn
= INSN(9,5);
6529 UInt tt
= INSN(4,0);
6533 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6534 if (!isPX
&& mm
!= 0)
6537 UInt laneSzB
= 0; /* invalid */
6538 UInt ix
= 16; /* invalid */
6540 UInt xx_q_S_sz
= (xx
<< 4) | (bitQ
<< 3) | (bitS
<< 2) | sz
;
6541 switch (xx_q_S_sz
) {
6542 case 0x00: case 0x01: case 0x02: case 0x03:
6543 case 0x04: case 0x05: case 0x06: case 0x07:
6544 case 0x08: case 0x09: case 0x0A: case 0x0B:
6545 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6546 laneSzB
= 1; ix
= xx_q_S_sz
& 0xF;
6548 case 0x10: case 0x12: case 0x14: case 0x16:
6549 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6550 laneSzB
= 2; ix
= (xx_q_S_sz
>> 1) & 7;
6552 case 0x20: case 0x24: case 0x28: case 0x2C:
6553 laneSzB
= 4; ix
= (xx_q_S_sz
>> 2) & 3;
6555 case 0x21: case 0x29:
6556 laneSzB
= 8; ix
= (xx_q_S_sz
>> 3) & 1;
6562 if (valid
&& laneSzB
!= 0) {
6564 IRType ty
= integerIRTypeOfSize(laneSzB
);
6565 UInt xferSzB
= laneSzB
* nRegs
;
6567 /* Generate the transfer address (TA) and if necessary the
6568 writeback address (WB) */
6569 IRTemp tTA
= newTemp(Ity_I64
);
6570 assign(tTA
, getIReg64orSP(nn
));
6571 if (nn
== 31) { /* FIXME generate stack alignment check */ }
6572 IRTemp tWB
= IRTemp_INVALID
;
6574 tWB
= newTemp(Ity_I64
);
6575 assign(tWB
, binop(Iop_Add64
,
6577 mm
== BITS5(1,1,1,1,1) ? mkU64(xferSzB
)
6578 : getIReg64orZR(mm
)));
6581 /* Do the writeback, if necessary */
6583 putIReg64orSP(nn
, mkexpr(tWB
));
6589 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(3 * laneSzB
));
6591 putQRegLane((tt
+3) % 32, ix
, loadLE(ty
, addr
));
6593 storeLE(addr
, getQRegLane((tt
+3) % 32, ix
, ty
));
6599 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(2 * laneSzB
));
6601 putQRegLane((tt
+2) % 32, ix
, loadLE(ty
, addr
));
6603 storeLE(addr
, getQRegLane((tt
+2) % 32, ix
, ty
));
6609 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(1 * laneSzB
));
6611 putQRegLane((tt
+1) % 32, ix
, loadLE(ty
, addr
));
6613 storeLE(addr
, getQRegLane((tt
+1) % 32, ix
, ty
));
6619 = binop(Iop_Add64
, mkexpr(tTA
), mkU64(0 * laneSzB
));
6621 putQRegLane((tt
+0) % 32, ix
, loadLE(ty
, addr
));
6623 storeLE(addr
, getQRegLane((tt
+0) % 32, ix
, ty
));
6632 pxStr
[0] = pxStr
[sizeof(pxStr
)-1] = 0;
6634 if (mm
== BITS5(1,1,1,1,1))
6635 vex_sprintf(pxStr
, ", #%u", xferSzB
);
6637 vex_sprintf(pxStr
, ", %s", nameIReg64orZR(mm
));
6639 const HChar
* arr
= nameArr_Q_SZ(bitQ
, sz
);
6640 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6641 isLD
? "ld" : "st", nRegs
,
6642 (tt
+0) % 32, arr
, (tt
+nRegs
-1) % 32, arr
,
6643 ix
, nameIReg64orSP(nn
), pxStr
);
6647 /* else fall through */
6650 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6651 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6652 /* 31 29 23 20 14 9 4
6653 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6654 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6655 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6656 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6658 /* For the "standard" implementation we pass through the LL and SC to
6659 the host. For the "fallback" implementation, for details see
6660 https://bugs.kde.org/show_bug.cgi?id=344524 and
6661 https://bugs.kde.org/show_bug.cgi?id=369459,
6665 gs.LLsize = load_size // 1, 2, 4 or 8
6667 gs.LLdata = zeroExtend(*addr)
6669 StoreCond(addr, data)
6670 tmp_LLsize = gs.LLsize
6671 gs.LLsize = 0 // "no transaction"
6672 if tmp_LLsize != store_size -> fail
6673 if addr != gs.LLaddr -> fail
6674 if zeroExtend(*addr) != gs.LLdata -> fail
6675 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6679 When thread scheduled
6680 gs.LLsize = 0 // "no transaction"
6681 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6684 if (INSN(29,24) == BITS6(0,0,1,0,0,0)
6685 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6686 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6687 UInt szBlg2
= INSN(31,30);
6688 Bool isLD
= INSN(22,22) == 1;
6689 Bool isAcqOrRel
= INSN(15,15) == 1;
6690 UInt ss
= INSN(20,16);
6691 UInt nn
= INSN(9,5);
6692 UInt tt
= INSN(4,0);
6694 vassert(szBlg2
< 4);
6695 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
6696 IRType ty
= integerIRTypeOfSize(szB
);
6697 const HChar
* suffix
[4] = { "rb", "rh", "r", "r" };
6699 IRTemp ea
= newTemp(Ity_I64
);
6700 assign(ea
, getIReg64orSP(nn
));
6701 gen_SIGBUS_if_not_XX_aligned(ea
, szB
);
6703 if (isLD
&& ss
== BITS5(1,1,1,1,1)) {
6704 IRTemp res
= newTemp(ty
);
6705 if (abiinfo
->guest__use_fallback_LLSC
) {
6706 // Do the load first so we don't update any guest state
6708 IRTemp loaded_data64
= newTemp(Ity_I64
);
6709 assign(loaded_data64
, widenUto64(ty
, loadLE(ty
, mkexpr(ea
))));
6710 stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64
, mkexpr(loaded_data64
) ));
6711 stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64
, mkU64(0) ));
6712 stmt( IRStmt_Put( OFFB_LLSC_ADDR
, mkexpr(ea
) ));
6713 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(szB
) ));
6714 putIReg64orZR(tt
, mkexpr(loaded_data64
));
6716 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), NULL
/*LL*/));
6717 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(res
)));
6720 stmt(IRStmt_MBE(Imbe_Fence
));
6722 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel
? "a" : "", suffix
[szBlg2
],
6723 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
),
6724 abiinfo
->guest__use_fallback_LLSC
6725 ? "(fallback implementation)" : "");
6730 stmt(IRStmt_MBE(Imbe_Fence
));
6732 IRExpr
* data
= narrowFrom64(ty
, getIReg64orZR(tt
));
6733 if (abiinfo
->guest__use_fallback_LLSC
) {
6734 // This is really ugly, since we don't have any way to do
6735 // proper if-then-else. First, set up as if the SC failed,
6736 // and jump forwards if it really has failed.
6738 // Continuation address
6739 IRConst
* nia
= IRConst_U64(guest_PC_curr_instr
+ 4);
6741 // "the SC failed". Any non-zero value means failure.
6742 putIReg64orZR(ss
, mkU64(1));
6744 IRTemp tmp_LLsize
= newTemp(Ity_I64
);
6745 assign(tmp_LLsize
, IRExpr_Get(OFFB_LLSC_SIZE
, Ity_I64
));
6746 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) // "no transaction"
6748 // Fail if no or wrong-size transaction
6749 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
6751 binop(Iop_CmpNE64
, mkexpr(tmp_LLsize
), mkU64(szB
)),
6752 Ijk_Boring
, nia
, OFFB_PC
6754 // Fail if the address doesn't match the LL address
6756 binop(Iop_CmpNE64
, mkexpr(ea
),
6757 IRExpr_Get(OFFB_LLSC_ADDR
, Ity_I64
)),
6758 Ijk_Boring
, nia
, OFFB_PC
6760 // Fail if the data doesn't match the LL data
6761 IRTemp llsc_data64
= newTemp(Ity_I64
);
6762 assign(llsc_data64
, IRExpr_Get(OFFB_LLSC_DATA_LO64
, Ity_I64
));
6764 binop(Iop_CmpNE64
, widenUto64(ty
, loadLE(ty
, mkexpr(ea
))),
6765 mkexpr(llsc_data64
)),
6766 Ijk_Boring
, nia
, OFFB_PC
6768 // Try to CAS the new value in.
6769 IRTemp old
= newTemp(ty
);
6770 IRTemp expd
= newTemp(ty
);
6771 assign(expd
, narrowFrom64(ty
, mkexpr(llsc_data64
)));
6772 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
6773 Iend_LE
, mkexpr(ea
),
6774 /*expdHi*/NULL
, mkexpr(expd
),
6775 /*dataHi*/NULL
, data
6777 // Fail if the CAS failed (viz, old != expd)
6780 widenUto64(ty
, mkexpr(old
)),
6781 widenUto64(ty
, mkexpr(expd
))),
6782 Ijk_Boring
, nia
, OFFB_PC
6784 // Otherwise we succeeded (!)
6785 putIReg64orZR(ss
, mkU64(0));
6787 IRTemp res
= newTemp(Ity_I1
);
6788 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), data
));
6789 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6790 Need to set rS to 1 on failure, 0 on success. */
6791 putIReg64orZR(ss
, binop(Iop_Xor64
, unop(Iop_1Uto64
, mkexpr(res
)),
6794 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel
? "a" : "", suffix
[szBlg2
],
6795 nameIRegOrZR(False
, ss
),
6796 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
),
6797 abiinfo
->guest__use_fallback_LLSC
6798 ? "(fallback implementation)" : "");
6801 /* else fall through */
6804 /* -------------------- LD{,A}XP -------------------- */
6805 /* -------------------- ST{,L}XP -------------------- */
6806 /* 31 30 29 23 20 15 14 9 4
6807 1 sz 001000 011 11111 0 t2 n t1 LDXP Rt1, Rt2, [Xn|SP]
6808 1 sz 001000 011 11111 1 t2 n t1 LDAXP Rt1, Rt2, [Xn|SP]
6809 1 sz 001000 001 s 0 t2 n t1 STXP Ws, Rt1, Rt2, [Xn|SP]
6810 1 sz 001000 001 s 1 t2 n t1 STLXP Ws, Rt1, Rt2, [Xn|SP]
6812 /* See just above, "LD{,A}X{R,RH,RB} / ST{,L}X{R,RH,RB}", for detailed
6813 comments about this implementation. Note the 'sz' field here is only 1
6814 bit; above, it is 2 bits, and has a different encoding.
6816 if (INSN(31,31) == 1
6817 && INSN(29,24) == BITS6(0,0,1,0,0,0)
6818 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,1)) {
6819 Bool elemIs64
= INSN(30,30) == 1;
6820 Bool isLD
= INSN(22,22) == 1;
6821 Bool isAcqOrRel
= INSN(15,15) == 1;
6822 UInt ss
= INSN(20,16);
6823 UInt tt2
= INSN(14,10);
6824 UInt nn
= INSN(9,5);
6825 UInt tt1
= INSN(4,0);
6827 UInt elemSzB
= elemIs64
? 8 : 4;
6828 UInt fullSzB
= 2 * elemSzB
;
6829 IRType elemTy
= integerIRTypeOfSize(elemSzB
);
6830 IRType fullTy
= integerIRTypeOfSize(fullSzB
);
6832 IRTemp ea
= newTemp(Ity_I64
);
6833 assign(ea
, getIReg64orSP(nn
));
6834 gen_SIGBUS_if_not_XX_aligned(ea
, fullSzB
);
6836 if (isLD
&& ss
== BITS5(1,1,1,1,1)) {
6837 if (abiinfo
->guest__use_fallback_LLSC
) {
6838 // Fallback implementation of LL.
6839 // Do the load first so we don't update any guest state if it
6840 // faults. Assumes little-endian guest.
6841 if (fullTy
== Ity_I64
) {
6842 vassert(elemSzB
== 4);
6843 IRTemp loaded_data64
= newTemp(Ity_I64
);
6844 assign(loaded_data64
, loadLE(fullTy
, mkexpr(ea
)));
6845 stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64
, mkexpr(loaded_data64
) ));
6846 stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64
, mkU64(0) ));
6847 stmt( IRStmt_Put( OFFB_LLSC_ADDR
, mkexpr(ea
) ));
6848 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(8) ));
6849 putIReg64orZR(tt1
, unop(Iop_32Uto64
,
6851 mkexpr(loaded_data64
))));
6852 putIReg64orZR(tt2
, unop(Iop_32Uto64
,
6854 mkexpr(loaded_data64
))));
6856 vassert(elemSzB
== 8 && fullTy
== Ity_I128
);
6857 IRTemp loaded_data128
= newTemp(Ity_I128
);
6858 // Hack: do the load as V128 rather than I128 so as to avoid
6859 // having to implement I128 loads in the arm64 back end.
6860 assign(loaded_data128
, unop(Iop_ReinterpV128asI128
,
6861 loadLE(Ity_V128
, mkexpr(ea
))));
6862 IRTemp loaded_data_lo64
= newTemp(Ity_I64
);
6863 IRTemp loaded_data_hi64
= newTemp(Ity_I64
);
6864 assign(loaded_data_lo64
, unop(Iop_128to64
,
6865 mkexpr(loaded_data128
)));
6866 assign(loaded_data_hi64
, unop(Iop_128HIto64
,
6867 mkexpr(loaded_data128
)));
6868 stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64
,
6869 mkexpr(loaded_data_lo64
) ));
6870 stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64
,
6871 mkexpr(loaded_data_hi64
) ));
6872 stmt( IRStmt_Put( OFFB_LLSC_ADDR
, mkexpr(ea
) ));
6873 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(16) ));
6874 putIReg64orZR(tt1
, mkexpr(loaded_data_lo64
));
6875 putIReg64orZR(tt2
, mkexpr(loaded_data_hi64
));
6878 // Non-fallback implementation of LL.
6879 IRTemp res
= newTemp(fullTy
); // I64 or I128
6880 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), NULL
/*LL*/));
6881 // Assuming a little-endian guest here. Rt1 goes at the lower
6882 // address, so it must live in the least significant half of `res`.
6883 IROp opGetLO
= fullTy
== Ity_I128
? Iop_128to64
: Iop_64to32
;
6884 IROp opGetHI
= fullTy
== Ity_I128
? Iop_128HIto64
: Iop_64HIto32
;
6885 putIReg64orZR(tt1
, widenUto64(elemTy
, unop(opGetLO
, mkexpr(res
))));
6886 putIReg64orZR(tt2
, widenUto64(elemTy
, unop(opGetHI
, mkexpr(res
))));
6889 stmt(IRStmt_MBE(Imbe_Fence
));
6891 DIP("ld%sxp %s, %s, [%s] %s\n",
6892 isAcqOrRel
? (isLD
? "a" : "l") : "",
6893 nameIRegOrZR(elemSzB
== 8, tt1
),
6894 nameIRegOrZR(elemSzB
== 8, tt2
),
6896 abiinfo
->guest__use_fallback_LLSC
6897 ? "(fallback implementation)" : "");
6902 stmt(IRStmt_MBE(Imbe_Fence
));
6904 if (abiinfo
->guest__use_fallback_LLSC
) {
6905 // Fallback implementation of SC.
6906 // This is really ugly, since we don't have any way to do
6907 // proper if-then-else. First, set up as if the SC failed,
6908 // and jump forwards if it really has failed.
6910 // Continuation address
6911 IRConst
* nia
= IRConst_U64(guest_PC_curr_instr
+ 4);
6913 // "the SC failed". Any non-zero value means failure.
6914 putIReg64orZR(ss
, mkU64(1));
6916 IRTemp tmp_LLsize
= newTemp(Ity_I64
);
6917 assign(tmp_LLsize
, IRExpr_Get(OFFB_LLSC_SIZE
, Ity_I64
));
6918 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) // "no transaction"
6920 // Fail if no or wrong-size transaction
6921 vassert((fullSzB
== 8 && fullTy
== Ity_I64
)
6922 || (fullSzB
== 16 && fullTy
== Ity_I128
));
6924 binop(Iop_CmpNE64
, mkexpr(tmp_LLsize
), mkU64(fullSzB
)),
6925 Ijk_Boring
, nia
, OFFB_PC
6927 // Fail if the address doesn't match the LL address
6929 binop(Iop_CmpNE64
, mkexpr(ea
),
6930 IRExpr_Get(OFFB_LLSC_ADDR
, Ity_I64
)),
6931 Ijk_Boring
, nia
, OFFB_PC
6933 // The data to be stored.
6934 IRTemp store_data
= newTemp(fullTy
);
6935 if (fullTy
== Ity_I64
) {
6938 narrowFrom64(Ity_I32
, getIReg64orZR(tt2
)),
6939 narrowFrom64(Ity_I32
, getIReg64orZR(tt1
))));
6942 binop(Iop_64HLto128
,
6943 getIReg64orZR(tt2
), getIReg64orZR(tt1
)));
6946 if (fullTy
== Ity_I64
) {
6947 // 64 bit (2x32 bit) path
6948 // Fail if the data in memory doesn't match the data stashed by
6950 IRTemp llsc_data_lo64
= newTemp(Ity_I64
);
6951 assign(llsc_data_lo64
,
6952 IRExpr_Get(OFFB_LLSC_DATA_LO64
, Ity_I64
));
6954 binop(Iop_CmpNE64
, loadLE(Ity_I64
, mkexpr(ea
)),
6955 mkexpr(llsc_data_lo64
)),
6956 Ijk_Boring
, nia
, OFFB_PC
6958 // Try to CAS the new value in.
6959 IRTemp old
= newTemp(Ity_I64
);
6960 IRTemp expd
= newTemp(Ity_I64
);
6961 assign(expd
, mkexpr(llsc_data_lo64
));
6962 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
6963 Iend_LE
, mkexpr(ea
),
6964 /*expdHi*/NULL
, mkexpr(expd
),
6965 /*dataHi*/NULL
, mkexpr(store_data
)
6967 // Fail if the CAS failed (viz, old != expd)
6969 binop(Iop_CmpNE64
, mkexpr(old
), mkexpr(expd
)),
6970 Ijk_Boring
, nia
, OFFB_PC
6973 // 128 bit (2x64 bit) path
6974 // Fail if the data in memory doesn't match the data stashed by
6976 IRTemp llsc_data_lo64
= newTemp(Ity_I64
);
6977 assign(llsc_data_lo64
,
6978 IRExpr_Get(OFFB_LLSC_DATA_LO64
, Ity_I64
));
6979 IRTemp llsc_data_hi64
= newTemp(Ity_I64
);
6980 assign(llsc_data_hi64
,
6981 IRExpr_Get(OFFB_LLSC_DATA_HI64
, Ity_I64
));
6982 IRTemp data_at_ea
= newTemp(Ity_I128
);
6984 unop(Iop_ReinterpV128asI128
,
6985 loadLE(Ity_V128
, mkexpr(ea
))));
6988 unop(Iop_128to64
, mkexpr(data_at_ea
)),
6989 mkexpr(llsc_data_lo64
)),
6990 Ijk_Boring
, nia
, OFFB_PC
6994 unop(Iop_128HIto64
, mkexpr(data_at_ea
)),
6995 mkexpr(llsc_data_hi64
)),
6996 Ijk_Boring
, nia
, OFFB_PC
6998 // Try to CAS the new value in.
6999 IRTemp old_lo64
= newTemp(Ity_I64
);
7000 IRTemp old_hi64
= newTemp(Ity_I64
);
7001 IRTemp expd_lo64
= newTemp(Ity_I64
);
7002 IRTemp expd_hi64
= newTemp(Ity_I64
);
7003 IRTemp store_data_lo64
= newTemp(Ity_I64
);
7004 IRTemp store_data_hi64
= newTemp(Ity_I64
);
7005 assign(expd_lo64
, mkexpr(llsc_data_lo64
));
7006 assign(expd_hi64
, mkexpr(llsc_data_hi64
));
7007 assign(store_data_lo64
, unop(Iop_128to64
, mkexpr(store_data
)));
7008 assign(store_data_hi64
, unop(Iop_128HIto64
, mkexpr(store_data
)));
7009 stmt( IRStmt_CAS(mkIRCAS(old_hi64
, old_lo64
,
7010 Iend_LE
, mkexpr(ea
),
7011 mkexpr(expd_hi64
), mkexpr(expd_lo64
),
7012 mkexpr(store_data_hi64
),
7013 mkexpr(store_data_lo64
)
7015 // Fail if the CAS failed (viz, old != expd)
7017 binop(Iop_CmpNE64
, mkexpr(old_lo64
), mkexpr(expd_lo64
)),
7018 Ijk_Boring
, nia
, OFFB_PC
7021 binop(Iop_CmpNE64
, mkexpr(old_hi64
), mkexpr(expd_hi64
)),
7022 Ijk_Boring
, nia
, OFFB_PC
7025 // Otherwise we succeeded (!)
7026 putIReg64orZR(ss
, mkU64(0));
7028 // Non-fallback implementation of SC.
7029 IRTemp res
= newTemp(Ity_I1
);
7030 IRExpr
* dataLO
= narrowFrom64(elemTy
, getIReg64orZR(tt1
));
7031 IRExpr
* dataHI
= narrowFrom64(elemTy
, getIReg64orZR(tt2
));
7032 IROp opMerge
= fullTy
== Ity_I128
? Iop_64HLto128
: Iop_32HLto64
;
7033 IRExpr
* data
= binop(opMerge
, dataHI
, dataLO
);
7034 // Assuming a little-endian guest here. Rt1 goes at the lower
7035 // address, so it must live in the least significant half of `data`.
7036 stmt(IRStmt_LLSC(Iend_LE
, res
, mkexpr(ea
), data
));
7037 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
7038 Need to set rS to 1 on failure, 0 on success. */
7039 putIReg64orZR(ss
, binop(Iop_Xor64
, unop(Iop_1Uto64
, mkexpr(res
)),
7042 DIP("st%sxp %s, %s, %s, [%s] %s\n",
7043 isAcqOrRel
? (isLD
? "a" : "l") : "",
7044 nameIRegOrZR(False
, ss
),
7045 nameIRegOrZR(elemSzB
== 8, tt1
),
7046 nameIRegOrZR(elemSzB
== 8, tt2
),
7048 abiinfo
->guest__use_fallback_LLSC
7049 ? "(fallback implementation)" : "");
7052 /* else fall through */
7055 /* ------------------ LDA{R,RH,RB} ------------------ */
7056 /* ------------------ STL{R,RH,RB} ------------------ */
7057 /* 31 29 23 20 14 9 4
7058 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
7059 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
7061 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
7062 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
7063 UInt szBlg2
= INSN(31,30);
7064 Bool isLD
= INSN(22,22) == 1;
7065 UInt nn
= INSN(9,5);
7066 UInt tt
= INSN(4,0);
7068 vassert(szBlg2
< 4);
7069 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
7070 IRType ty
= integerIRTypeOfSize(szB
);
7071 const HChar
* suffix
[4] = { "rb", "rh", "r", "r" };
7073 IRTemp ea
= newTemp(Ity_I64
);
7074 assign(ea
, getIReg64orSP(nn
));
7075 gen_SIGBUS_if_not_XX_aligned(ea
, szB
);
7078 IRTemp res
= newTemp(ty
);
7079 assign(res
, loadLE(ty
, mkexpr(ea
)));
7080 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(res
)));
7081 stmt(IRStmt_MBE(Imbe_Fence
));
7082 DIP("lda%s %s, [%s]\n", suffix
[szBlg2
],
7083 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
));
7085 stmt(IRStmt_MBE(Imbe_Fence
));
7086 IRExpr
* data
= narrowFrom64(ty
, getIReg64orZR(tt
));
7087 storeLE(mkexpr(ea
), data
);
7088 DIP("stl%s %s, [%s]\n", suffix
[szBlg2
],
7089 nameIRegOrZR(szB
== 8, tt
), nameIReg64orSP(nn
));
7094 /* The PRFM cases that follow are possibly allow Rt values (the
7095 prefetch operation) which are not allowed by the documentation.
7096 This should be looked into. */
7097 /* ------------------ PRFM (immediate) ------------------ */
7099 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
7101 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
7102 UInt imm12
= INSN(21,10);
7103 UInt nn
= INSN(9,5);
7104 UInt tt
= INSN(4,0);
7105 /* Generating any IR here is pointless, except for documentation
7106 purposes, as it will get optimised away later. */
7107 IRTemp ea
= newTemp(Ity_I64
);
7108 assign(ea
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(imm12
* 8)));
7109 DIP("prfm prfop=%u, [%s, #%u]\n", tt
, nameIReg64orSP(nn
), imm12
* 8);
7113 /* ------------------ PRFM (register) ------------------ */
7114 /* 31 29 22 20 15 12 11 9 4
7115 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
7117 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
7118 && INSN(11,10) == BITS2(1,0)) {
7120 UInt tt
= INSN(4,0);
7121 IRTemp ea
= gen_indexed_EA(dis_buf
, insn
, True
/*to/from int regs*/);
7122 if (ea
!= IRTemp_INVALID
) {
7123 /* No actual code to generate. */
7124 DIP("prfm prfop=%u, %s\n", tt
, dis_buf
);
7129 /* ------------------ PRFM (unscaled offset) ------------------ */
7130 /* 31 29 22 20 11 9 4
7131 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
7133 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
7134 && INSN(11,10) == BITS2(0,0)) {
7135 ULong imm9
= INSN(20,12);
7136 UInt nn
= INSN(9,5);
7137 UInt tt
= INSN(4,0);
7138 ULong offset
= sx_to_64(imm9
, 9);
7139 IRTemp ea
= newTemp(Ity_I64
);
7140 assign(ea
, binop(Iop_Add64
, getIReg64orSP(nn
), mkU64(offset
)));
7141 /* No actual code to generate. */
7142 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt
, nameIReg64orSP(nn
), offset
);
7146 /* ---------------- ARMv8.1-LSE: Atomic Memory Operations ---------------- */
7147 /* 31 29 23 22 21 20 15 11 9 4
7148 sz 111000 A R 1 s 0000 00 n t LDADD{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7149 sz 111000 A R 1 s 0001 00 n t LDCLR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7150 sz 111000 A R 1 s 0010 00 n t LDEOR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7151 sz 111000 A R 1 s 0011 00 n t LDSET{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7152 sz 111000 A R 1 s 0100 00 n t LDSMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7153 sz 111000 A R 1 s 0101 00 n t LDSMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7154 sz 111000 A R 1 s 0110 00 n t LDUMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7155 sz 111000 A R 1 s 0111 00 n t LDUMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7156 sz 111000 A R 1 s 1000 00 n t SWP{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7158 if (INSN(29,24) == BITS6(1,1,1,0,0,0)
7160 && (INSN(15,12) <= BITS4(1,0,0,0))
7161 && INSN(11,10) == BITS2(0,0)) {
7162 UInt szBlg2
= INSN(31,30);
7163 Bool isAcq
= INSN(23,23) == 1;
7164 Bool isRel
= INSN(22,22) == 1;
7165 UInt ss
= INSN(20,16);
7166 UInt opc
= INSN(15,12);
7167 UInt nn
= INSN(9,5);
7168 UInt tt
= INSN(4,0);
7170 const HChar
* nm
= NULL
;
7171 const HChar
* suffix
[4] = { "b", "h", "", "" };
7173 vassert(szBlg2
< 4);
7174 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 bytes*/
7175 IRType ty
= integerIRTypeOfSize(szB
);
7176 Bool is64
= szB
== 8;
7177 Bool isSigned
= (opc
== 4) || (opc
== 5) /*smax || smin*/;
7179 // IR used to emulate these atomic memory ops:
7182 // 3) widen operands and do arithmetic/logic op
7183 // 4) cas to see if target memory updated
7185 // 6) repeat from 1) if cas says target memory not updated
7186 // 7) update register
7188 IRTemp ea
= newTemp(Ity_I64
);
7189 assign(ea
, getIReg64orSP(nn
));
7190 gen_SIGBUS_if_not_XX_aligned(ea
, szB
);
7192 // Insert barrier before loading for acquire and acquire-release variants:
7194 if (isAcq
&& (tt
!= 31))
7195 stmt(IRStmt_MBE(Imbe_Fence
));
7197 // Load LHS from memory, RHS from register.
7198 IRTemp orig
= newTemp(ty
);
7199 assign(orig
, loadLE(ty
, mkexpr(ea
)));
7200 IRExpr
*lhs
= mkexpr(orig
);
7201 IRExpr
*rhs
= narrowFrom64(ty
, getIReg64orZR(ss
));
7204 lhs
= isSigned
? widenSto64(ty
, lhs
) : widenUto64(ty
, lhs
);
7205 rhs
= isSigned
? widenSto64(ty
, rhs
) : widenUto64(ty
, rhs
);
7207 // Perform the operation.
7211 res
= binop(Iop_Add64
, lhs
, rhs
);
7215 res
= binop(Iop_And64
, lhs
, unop(mkNOT(Ity_I64
), rhs
));
7219 res
= binop(Iop_Xor64
, lhs
, rhs
);
7223 res
= binop(Iop_Or64
, lhs
, rhs
);
7227 res
= IRExpr_ITE(binop(Iop_CmpLT64S
, lhs
, rhs
), rhs
, lhs
);
7231 res
= IRExpr_ITE(binop(Iop_CmpLT64S
, lhs
, rhs
), lhs
, rhs
);
7235 res
= IRExpr_ITE(binop(Iop_CmpLT64U
, lhs
, rhs
), rhs
, lhs
);
7239 res
= IRExpr_ITE(binop(Iop_CmpLT64U
, lhs
, rhs
), lhs
, rhs
);
7250 // Store the result back if LHS remains unchanged in memory.
7251 IRTemp old
= newTemp(ty
);
7252 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
7253 Iend_LE
, mkexpr(ea
),
7254 /*expdHi*/NULL
, mkexpr(orig
),
7255 /*dataHi*/NULL
, narrowFrom64(ty
, res
))) );
7257 // Insert barrier after storing for release and acquire-release variants:
7260 stmt(IRStmt_MBE(Imbe_Fence
));
7262 // Retry if the CAS failed (i.e. when old != orig).
7263 IRConst
* nia
= IRConst_U64(guest_PC_curr_instr
);
7265 binop(Iop_CasCmpNE64
,
7266 widenUto64(ty
, mkexpr(old
)),
7267 widenUto64(ty
, mkexpr(orig
))),
7268 Ijk_Boring
, nia
, OFFB_PC
));
7269 // Otherwise we succeeded.
7270 putIReg64orZR(tt
, widenUto64(ty
, mkexpr(old
)));
7272 DIP("%s%s%s%s %s, %s, [%s]\n", nm
, isAcq
? "a" : "", isRel
? "l" : "",
7273 suffix
[szBlg2
], nameIRegOrZR(is64
, ss
), nameIRegOrZR(is64
, tt
),
7274 nameIReg64orSP(nn
));
7278 /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
7279 /* 31 29 22 21 20 15 14 9 4
7280 sz 0010001 A 1 s R 11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7282 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
7284 && INSN(14,10) == BITS5(1,1,1,1,1)) {
7285 UInt szBlg2
= INSN(31,30);
7286 Bool isAcq
= INSN(22,22) == 1;
7287 Bool isRel
= INSN(15,15) == 1;
7288 UInt ss
= INSN(20,16);
7289 UInt nn
= INSN(9,5);
7290 UInt tt
= INSN(4,0);
7292 const HChar
* suffix
[4] = { "b", "h", "", "" };
7294 UInt szB
= 1 << szBlg2
; /* 1, 2, 4 or 8 */
7295 IRType ty
= integerIRTypeOfSize(szB
);
7296 Bool is64
= szB
== 8;
7298 IRTemp ea
= newTemp(Ity_I64
);
7299 assign(ea
, getIReg64orSP(nn
));
7300 gen_SIGBUS_if_not_XX_aligned(ea
, szB
);
7302 IRExpr
*exp
= narrowFrom64(ty
, getIReg64orZR(ss
));
7303 IRExpr
*new = narrowFrom64(ty
, getIReg64orZR(tt
));
7306 stmt(IRStmt_MBE(Imbe_Fence
));
7308 // Store the result back if LHS remains unchanged in memory.
7309 IRTemp old
= newTemp(ty
);
7310 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID
, old
,
7311 Iend_LE
, mkexpr(ea
),
7312 /*expdHi*/NULL
, exp
,
7313 /*dataHi*/NULL
, new)) );
7316 stmt(IRStmt_MBE(Imbe_Fence
));
7318 putIReg64orZR(ss
, widenUto64(ty
, mkexpr(old
)));
7319 DIP("cas%s%s%s %s, %s, [%s]\n",
7320 isAcq
? "a" : "", isRel
? "l" : "", suffix
[szBlg2
],
7321 nameIRegOrZR(is64
, ss
), nameIRegOrZR(is64
, tt
), nameIReg64orSP(nn
));
7325 /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
7326 /* 31 30 29 22 21 20 15 14 9 4
7327 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
7329 if (INSN(31,31) == 0
7330 && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
7332 && INSN(14,10) == BITS5(1,1,1,1,1)) {
7333 UInt is64
= INSN(30,30);
7334 Bool isAcq
= INSN(22,22) == 1;
7335 Bool isRel
= INSN(15,15) == 1;
7336 UInt ss
= INSN(20,16);
7337 UInt nn
= INSN(9,5);
7338 UInt tt
= INSN(4,0);
7340 if ((ss
& 0x1) || (tt
& 0x1)) {
7341 /* undefined; fall through */
7343 IRTemp ea
= newTemp(Ity_I64
);
7344 assign(ea
, getIReg64orSP(nn
));
7345 gen_SIGBUS_if_not_XX_aligned(ea
, is64
? 16 : 8);
7347 IRExpr
*expLo
= getIRegOrZR(is64
, ss
);
7348 IRExpr
*expHi
= getIRegOrZR(is64
, ss
+ 1);
7349 IRExpr
*newLo
= getIRegOrZR(is64
, tt
);
7350 IRExpr
*newHi
= getIRegOrZR(is64
, tt
+ 1);
7351 IRTemp oldLo
= newTemp(is64
? Ity_I64
: Ity_I32
);
7352 IRTemp oldHi
= newTemp(is64
? Ity_I64
: Ity_I32
);
7355 stmt(IRStmt_MBE(Imbe_Fence
));
7357 stmt( IRStmt_CAS(mkIRCAS(oldHi
, oldLo
,
7358 Iend_LE
, mkexpr(ea
),
7363 stmt(IRStmt_MBE(Imbe_Fence
));
7365 putIRegOrZR(is64
, ss
, mkexpr(oldLo
));
7366 putIRegOrZR(is64
, ss
+1, mkexpr(oldHi
));
7367 DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
7368 isAcq
? "a" : "", isRel
? "l" : "",
7369 nameIRegOrZR(is64
, ss
), nameIRegOrZR(is64
, ss
+1),
7370 nameIRegOrZR(is64
, tt
), nameIRegOrZR(is64
, tt
+1),
7371 nameIReg64orSP(nn
));
7377 vex_printf("ARM64 front end: load_store\n");
7385 /*------------------------------------------------------------*/
7386 /*--- Control flow and misc instructions ---*/
7387 /*------------------------------------------------------------*/
7390 Bool
dis_ARM64_branch_etc(/*MB_OUT*/DisResult
* dres
, UInt insn
,
7391 const VexArchInfo
* archinfo
,
7392 const VexAbiInfo
* abiinfo
, Bool sigill_diag
)
7394 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7396 /* ---------------------- B cond ----------------------- */
7398 0101010 0 imm19 0 cond */
7399 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
7400 UInt cond
= INSN(3,0);
7401 ULong uimm64
= INSN(23,5) << 2;
7402 Long simm64
= (Long
)sx_to_64(uimm64
, 21);
7403 vassert(dres
->whatNext
== Dis_Continue
);
7404 vassert(dres
->len
== 4);
7405 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
7406 stmt( IRStmt_Exit(unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
7408 IRConst_U64(guest_PC_curr_instr
+ simm64
),
7410 putPC(mkU64(guest_PC_curr_instr
+ 4));
7411 dres
->whatNext
= Dis_StopHere
;
7412 dres
->jk_StopHere
= Ijk_Boring
;
7413 DIP("b.%s 0x%llx\n", nameCC(cond
), guest_PC_curr_instr
+ simm64
);
7417 /* -------------------- B{L} uncond -------------------- */
7418 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
7419 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
7420 100101 imm26 B (PC + sxTo64(imm26 << 2))
7422 UInt bLink
= INSN(31,31);
7423 ULong uimm64
= INSN(25,0) << 2;
7424 Long simm64
= (Long
)sx_to_64(uimm64
, 28);
7426 putIReg64orSP(30, mkU64(guest_PC_curr_instr
+ 4));
7428 putPC(mkU64(guest_PC_curr_instr
+ simm64
));
7429 dres
->whatNext
= Dis_StopHere
;
7430 dres
->jk_StopHere
= Ijk_Call
;
7431 DIP("b%s 0x%llx\n", bLink
== 1 ? "l" : "",
7432 guest_PC_curr_instr
+ simm64
);
7436 /* --------------------- B{L} reg --------------------- */
7437 /* 31 24 22 20 15 9 4
7438 1101011 00 10 11111 000000 nn 00000 RET Rn
7439 1101011 00 01 11111 000000 nn 00000 CALL Rn
7440 1101011 00 00 11111 000000 nn 00000 JMP Rn
7442 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
7443 && INSN(20,16) == BITS5(1,1,1,1,1)
7444 && INSN(15,10) == BITS6(0,0,0,0,0,0)
7445 && INSN(4,0) == BITS5(0,0,0,0,0)) {
7446 UInt branch_type
= INSN(22,21);
7447 UInt nn
= INSN(9,5);
7448 if (branch_type
== BITS2(1,0) /* RET */) {
7449 putPC(getIReg64orZR(nn
));
7450 dres
->whatNext
= Dis_StopHere
;
7451 dres
->jk_StopHere
= Ijk_Ret
;
7452 DIP("ret %s\n", nameIReg64orZR(nn
));
7455 if (branch_type
== BITS2(0,1) /* CALL */) {
7456 IRTemp dst
= newTemp(Ity_I64
);
7457 assign(dst
, getIReg64orZR(nn
));
7458 putIReg64orSP(30, mkU64(guest_PC_curr_instr
+ 4));
7460 dres
->whatNext
= Dis_StopHere
;
7461 dres
->jk_StopHere
= Ijk_Call
;
7462 DIP("blr %s\n", nameIReg64orZR(nn
));
7465 if (branch_type
== BITS2(0,0) /* JMP */) {
7466 putPC(getIReg64orZR(nn
));
7467 dres
->whatNext
= Dis_StopHere
;
7468 dres
->jk_StopHere
= Ijk_Boring
;
7469 DIP("jmp %s\n", nameIReg64orZR(nn
));
7474 /* -------------------- CB{N}Z -------------------- */
7475 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7476 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7478 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
7479 Bool is64
= INSN(31,31) == 1;
7480 Bool bIfZ
= INSN(24,24) == 0;
7481 ULong uimm64
= INSN(23,5) << 2;
7482 UInt rT
= INSN(4,0);
7483 Long simm64
= (Long
)sx_to_64(uimm64
, 21);
7484 IRExpr
* cond
= NULL
;
7486 cond
= binop(bIfZ
? Iop_CmpEQ64
: Iop_CmpNE64
,
7487 getIReg64orZR(rT
), mkU64(0));
7489 cond
= binop(bIfZ
? Iop_CmpEQ32
: Iop_CmpNE32
,
7490 getIReg32orZR(rT
), mkU32(0));
7492 stmt( IRStmt_Exit(cond
,
7494 IRConst_U64(guest_PC_curr_instr
+ simm64
),
7496 putPC(mkU64(guest_PC_curr_instr
+ 4));
7497 dres
->whatNext
= Dis_StopHere
;
7498 dres
->jk_StopHere
= Ijk_Boring
;
7499 DIP("cb%sz %s, 0x%llx\n",
7500 bIfZ
? "" : "n", nameIRegOrZR(is64
, rT
),
7501 guest_PC_curr_instr
+ simm64
);
7505 /* -------------------- TB{N}Z -------------------- */
7506 /* 31 30 24 23 18 5 4
7507 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7508 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7510 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
7511 UInt b5
= INSN(31,31);
7512 Bool bIfZ
= INSN(24,24) == 0;
7513 UInt b40
= INSN(23,19);
7514 UInt imm14
= INSN(18,5);
7515 UInt tt
= INSN(4,0);
7516 UInt bitNo
= (b5
<< 5) | b40
;
7517 ULong uimm64
= imm14
<< 2;
7518 Long simm64
= sx_to_64(uimm64
, 16);
7520 = binop(bIfZ
? Iop_CmpEQ64
: Iop_CmpNE64
,
7522 binop(Iop_Shr64
, getIReg64orZR(tt
), mkU8(bitNo
)),
7525 stmt( IRStmt_Exit(cond
,
7527 IRConst_U64(guest_PC_curr_instr
+ simm64
),
7529 putPC(mkU64(guest_PC_curr_instr
+ 4));
7530 dres
->whatNext
= Dis_StopHere
;
7531 dres
->jk_StopHere
= Ijk_Boring
;
7532 DIP("tb%sz %s, #%u, 0x%llx\n",
7533 bIfZ
? "" : "n", nameIReg64orZR(tt
), bitNo
,
7534 guest_PC_curr_instr
+ simm64
);
7538 /* -------------------- SVC -------------------- */
7539 /* 11010100 000 imm16 000 01
7540 Don't bother with anything except the imm16==0 case.
7542 if (INSN(31,0) == 0xD4000001) {
7543 putPC(mkU64(guest_PC_curr_instr
+ 4));
7544 dres
->whatNext
= Dis_StopHere
;
7545 dres
->jk_StopHere
= Ijk_Sys_syscall
;
7550 /* ------------------ M{SR,RS} ------------------ */
7551 /* ---- Cases for TPIDR_EL0 ----
7552 0xD51BD0 010 Rt MSR tpidr_el0, rT
7553 0xD53BD0 010 Rt MRS rT, tpidr_el0
7555 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
7556 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
7557 Bool toSys
= INSN(21,21) == 0;
7558 UInt tt
= INSN(4,0);
7560 stmt( IRStmt_Put( OFFB_TPIDR_EL0
, getIReg64orZR(tt
)) );
7561 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt
));
7563 putIReg64orZR(tt
, IRExpr_Get( OFFB_TPIDR_EL0
, Ity_I64
));
7564 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt
));
7568 /* ---- Cases for FPCR ----
7569 0xD51B44 000 Rt MSR fpcr, rT
7570 0xD53B44 000 Rt MSR rT, fpcr
7572 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
7573 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
7574 Bool toSys
= INSN(21,21) == 0;
7575 UInt tt
= INSN(4,0);
7577 stmt( IRStmt_Put( OFFB_FPCR
, getIReg32orZR(tt
)) );
7578 DIP("msr fpcr, %s\n", nameIReg64orZR(tt
));
7580 putIReg32orZR(tt
, IRExpr_Get(OFFB_FPCR
, Ity_I32
));
7581 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt
));
7585 /* ---- Cases for FPSR ----
7586 0xD51B44 001 Rt MSR fpsr, rT
7587 0xD53B44 001 Rt MSR rT, fpsr
7588 The only part of this we model is FPSR.QC. All other bits
7589 are ignored when writing to it and RAZ when reading from it.
7591 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
7592 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
7593 Bool toSys
= INSN(21,21) == 0;
7594 UInt tt
= INSN(4,0);
7596 /* Just deal with FPSR.QC. Make up a V128 value which is
7597 zero if Xt[27] is zero and any other value if Xt[27] is
7599 IRTemp qc64
= newTemp(Ity_I64
);
7600 assign(qc64
, binop(Iop_And64
,
7601 binop(Iop_Shr64
, getIReg64orZR(tt
), mkU8(27)),
7603 IRExpr
* qcV128
= binop(Iop_64HLtoV128
, mkexpr(qc64
), mkexpr(qc64
));
7604 stmt( IRStmt_Put( OFFB_QCFLAG
, qcV128
) );
7605 DIP("msr fpsr, %s\n", nameIReg64orZR(tt
));
7607 /* Generate a value which is all zeroes except for bit 27,
7608 which must be zero if QCFLAG is all zeroes and one otherwise. */
7609 IRTemp qcV128
= newTempV128();
7610 assign(qcV128
, IRExpr_Get( OFFB_QCFLAG
, Ity_V128
));
7611 IRTemp qc64
= newTemp(Ity_I64
);
7612 assign(qc64
, binop(Iop_Or64
, unop(Iop_V128HIto64
, mkexpr(qcV128
)),
7613 unop(Iop_V128to64
, mkexpr(qcV128
))));
7614 IRExpr
* res
= binop(Iop_Shl64
,
7616 binop(Iop_CmpNE64
, mkexpr(qc64
), mkU64(0))),
7618 putIReg64orZR(tt
, res
);
7619 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt
));
7623 /* ---- Cases for NZCV ----
7624 D51B42 000 Rt MSR nzcv, rT
7625 D53B42 000 Rt MRS rT, nzcv
7626 The only parts of NZCV that actually exist are bits 31:28, which
7627 are the N Z C and V bits themselves. Hence the flags thunk provides
7628 all the state we need.
7630 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
7631 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
7632 Bool toSys
= INSN(21,21) == 0;
7633 UInt tt
= INSN(4,0);
7635 IRTemp t
= newTemp(Ity_I64
);
7636 assign(t
, binop(Iop_And64
, getIReg64orZR(tt
), mkU64(0xF0000000ULL
)));
7638 DIP("msr %s, nzcv\n", nameIReg32orZR(tt
));
7640 IRTemp res
= newTemp(Ity_I64
);
7641 assign(res
, mk_arm64g_calculate_flags_nzcv());
7642 putIReg32orZR(tt
, unop(Iop_64to32
, mkexpr(res
)));
7643 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt
));
7647 /* ---- Cases for DCZID_EL0 ----
7648 Don't support arbitrary reads and writes to this register. Just
7649 return the value 16, which indicates that the DC ZVA instruction
7650 is not permitted, so we don't have to emulate it.
7651 D5 3B 00 111 Rt MRS rT, dczid_el0
7653 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
7654 UInt tt
= INSN(4,0);
7655 putIReg64orZR(tt
, mkU64(1<<4));
7656 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt
));
7659 /* ---- Cases for CTR_EL0 ----
7660 We just handle reads, and make up a value from the D and I line
7661 sizes in the VexArchInfo we are given, and patch in the following
7662 fields that the Foundation model gives ("natively"):
7663 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7664 D5 3B 00 001 Rt MRS rT, dczid_el0
7666 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7667 UInt tt
= INSN(4,0);
7668 /* Need to generate a value from dMinLine_lg2_szB and
7669 dMinLine_lg2_szB. The value in the register is in 32-bit
7670 units, so need to subtract 2 from the values in the
7671 VexArchInfo. We can assume that the values here are valid --
7672 disInstr_ARM64 checks them -- so there's no need to deal with
7673 out-of-range cases. */
7674 vassert(archinfo
->arm64_dMinLine_lg2_szB
>= 2
7675 && archinfo
->arm64_dMinLine_lg2_szB
<= 17
7676 && archinfo
->arm64_iMinLine_lg2_szB
>= 2
7677 && archinfo
->arm64_iMinLine_lg2_szB
<= 17);
7679 = 0x8440c000 | ((0xF & (archinfo
->arm64_dMinLine_lg2_szB
- 2)) << 16)
7680 | ((0xF & (archinfo
->arm64_iMinLine_lg2_szB
- 2)) << 0);
7681 putIReg64orZR(tt
, mkU64(val
));
7682 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt
));
7685 /* ---- Cases for CNTVCT_EL0 ----
7686 This is a timestamp counter of some sort. Support reads of it only
7687 by passing through to the host.
7688 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7690 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7691 UInt tt
= INSN(4,0);
7692 IRTemp val
= newTemp(Ity_I64
);
7693 IRExpr
** args
= mkIRExprVec_0();
7694 IRDirty
* d
= unsafeIRDirty_1_N (
7697 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7698 &arm64g_dirtyhelper_MRS_CNTVCT_EL0
,
7701 /* execute the dirty call, dumping the result in val. */
7702 stmt( IRStmt_Dirty(d
) );
7703 putIReg64orZR(tt
, mkexpr(val
));
7704 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt
));
7707 /* ---- Cases for CNTFRQ_EL0 ----
7708 This is always RO at EL0, so it's safe to pass through to the host.
7709 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7711 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7712 UInt tt
= INSN(4,0);
7713 IRTemp val
= newTemp(Ity_I64
);
7714 IRExpr
** args
= mkIRExprVec_0();
7715 IRDirty
* d
= unsafeIRDirty_1_N (
7718 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7719 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0
,
7722 /* execute the dirty call, dumping the result in val. */
7723 stmt( IRStmt_Dirty(d
) );
7724 putIReg64orZR(tt
, mkexpr(val
));
7725 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt
));
7729 /* ------------------ IC_IVAU ------------------ */
7730 /* D5 0B 75 001 Rt ic ivau, rT
7732 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7733 /* We will always be provided with a valid iMinLine value. */
7734 vassert(archinfo
->arm64_iMinLine_lg2_szB
>= 2
7735 && archinfo
->arm64_iMinLine_lg2_szB
<= 17);
7736 /* Round the requested address, in rT, down to the start of the
7737 containing block. */
7738 UInt tt
= INSN(4,0);
7739 ULong lineszB
= 1ULL << archinfo
->arm64_iMinLine_lg2_szB
;
7740 IRTemp addr
= newTemp(Ity_I64
);
7741 assign( addr
, binop( Iop_And64
,
7743 mkU64(~(lineszB
- 1))) );
7744 /* Set the invalidation range, request exit-and-invalidate, with
7745 continuation at the next instruction. */
7746 stmt(IRStmt_Put(OFFB_CMSTART
, mkexpr(addr
)));
7747 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
)));
7748 /* be paranoid ... */
7749 stmt( IRStmt_MBE(Imbe_Fence
) );
7750 putPC(mkU64( guest_PC_curr_instr
+ 4 ));
7751 dres
->whatNext
= Dis_StopHere
;
7752 dres
->jk_StopHere
= Ijk_InvalICache
;
7753 DIP("ic ivau, %s\n", nameIReg64orZR(tt
));
7757 /* ------------------ DC_CVAU ------------------ */
7758 /* D5 0B 7B 001 Rt dc cvau, rT
7759 D5 0B 7E 001 Rt dc civac, rT
7761 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20
7762 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7E20) {
7763 /* Exactly the same scheme as for IC IVAU, except we observe the
7764 dMinLine size, and request an Ijk_FlushDCache instead of
7766 /* We will always be provided with a valid dMinLine value. */
7767 vassert(archinfo
->arm64_dMinLine_lg2_szB
>= 2
7768 && archinfo
->arm64_dMinLine_lg2_szB
<= 17);
7769 /* Round the requested address, in rT, down to the start of the
7770 containing block. */
7771 UInt tt
= INSN(4,0);
7772 ULong lineszB
= 1ULL << archinfo
->arm64_dMinLine_lg2_szB
;
7773 IRTemp addr
= newTemp(Ity_I64
);
7774 assign( addr
, binop( Iop_And64
,
7776 mkU64(~(lineszB
- 1))) );
7777 /* Set the flush range, request exit-and-flush, with
7778 continuation at the next instruction. */
7779 stmt(IRStmt_Put(OFFB_CMSTART
, mkexpr(addr
)));
7780 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
)));
7781 /* be paranoid ... */
7782 stmt( IRStmt_MBE(Imbe_Fence
) );
7783 putPC(mkU64( guest_PC_curr_instr
+ 4 ));
7784 dres
->whatNext
= Dis_StopHere
;
7785 dres
->jk_StopHere
= Ijk_FlushDCache
;
7786 DIP("dc cvau, %s\n", nameIReg64orZR(tt
));
7790 /* ------------------ ISB, DMB, DSB ------------------ */
7792 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7793 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7794 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7796 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7797 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7799 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7800 UInt opc
= INSN(6,5);
7801 UInt CRm
= INSN(11,8);
7802 vassert(opc
<= 2 && CRm
<= 15);
7803 stmt(IRStmt_MBE(Imbe_Fence
));
7804 const HChar
* opNames
[3]
7805 = { "dsb", "dmb", "isb" };
7806 const HChar
* howNames
[16]
7807 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7808 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7809 DIP("%s %s\n", opNames
[opc
], howNames
[CRm
]);
7813 /* -------------------- NOP -------------------- */
7814 if (INSN(31,0) == 0xD503201F) {
7819 /* -------------------- BRK -------------------- */
7821 1101 0100 001 imm16 00000 BRK #imm16
7823 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7824 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7825 UInt imm16
= INSN(20,5);
7826 /* Request SIGTRAP and then restart of this insn. */
7827 putPC(mkU64(guest_PC_curr_instr
+ 0));
7828 dres
->whatNext
= Dis_StopHere
;
7829 dres
->jk_StopHere
= Ijk_SigTRAP
;
7830 DIP("brk #%u\n", imm16
);
7834 /* ------------------- YIELD ------------------- */
7836 1101 0101 0000 0011 0010 0000 0011 1111
7838 if (INSN(31,0) == 0xD503203F) {
7839 /* Request yield followed by continuation at the next insn. */
7840 putPC(mkU64(guest_PC_curr_instr
+ 4));
7841 dres
->whatNext
= Dis_StopHere
;
7842 dres
->jk_StopHere
= Ijk_Yield
;
7847 /* -------------------- HINT ------------------- */
7849 1101 0101 0000 0011 0010 imm7 1 1111
7850 Catch otherwise unhandled HINT instructions - any
7851 like YIELD which are explicitly handled should go
7854 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7855 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7856 && INSN(15,12) == BITS4(0,0,1,0)
7857 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7858 UInt imm7
= INSN(11,5);
7859 DIP("hint #%u\n", imm7
);
7863 /* ------------------- CLREX ------------------ */
7865 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7866 CRm is apparently ignored.
7868 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7869 UInt mm
= INSN(11,8);
7870 /* AFAICS, this simply cancels a (all?) reservations made by a
7871 (any?) preceding LDREX(es). Arrange to hand it through to
7873 if (abiinfo
->guest__use_fallback_LLSC
) {
7874 stmt( IRStmt_Put( OFFB_LLSC_SIZE
, mkU64(0) )); // "no transaction"
7876 stmt( IRStmt_MBE(Imbe_CancelReservation
) );
7878 DIP("clrex #%u\n", mm
);
7883 vex_printf("ARM64 front end: branch_etc\n");
7890 /*------------------------------------------------------------*/
7891 /*--- SIMD and FP instructions: helper functions ---*/
7892 /*------------------------------------------------------------*/
7894 /* Some constructors for interleave/deinterleave expressions. */
7896 static IRExpr
* mk_CatEvenLanes64x2 ( IRTemp a10
, IRTemp b10
) {
7898 return binop(Iop_InterleaveLO64x2
, mkexpr(a10
), mkexpr(b10
));
7901 static IRExpr
* mk_CatOddLanes64x2 ( IRTemp a10
, IRTemp b10
) {
7903 return binop(Iop_InterleaveHI64x2
, mkexpr(a10
), mkexpr(b10
));
7906 static IRExpr
* mk_CatEvenLanes32x4 ( IRTemp a3210
, IRTemp b3210
) {
7907 // returns a2 a0 b2 b0
7908 return binop(Iop_CatEvenLanes32x4
, mkexpr(a3210
), mkexpr(b3210
));
7911 static IRExpr
* mk_CatOddLanes32x4 ( IRTemp a3210
, IRTemp b3210
) {
7912 // returns a3 a1 b3 b1
7913 return binop(Iop_CatOddLanes32x4
, mkexpr(a3210
), mkexpr(b3210
));
7916 static IRExpr
* mk_InterleaveLO32x4 ( IRTemp a3210
, IRTemp b3210
) {
7917 // returns a1 b1 a0 b0
7918 return binop(Iop_InterleaveLO32x4
, mkexpr(a3210
), mkexpr(b3210
));
7921 static IRExpr
* mk_InterleaveHI32x4 ( IRTemp a3210
, IRTemp b3210
) {
7922 // returns a3 b3 a2 b2
7923 return binop(Iop_InterleaveHI32x4
, mkexpr(a3210
), mkexpr(b3210
));
7926 static IRExpr
* mk_CatEvenLanes16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7927 // returns a6 a4 a2 a0 b6 b4 b2 b0
7928 return binop(Iop_CatEvenLanes16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7931 static IRExpr
* mk_CatOddLanes16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7932 // returns a7 a5 a3 a1 b7 b5 b3 b1
7933 return binop(Iop_CatOddLanes16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7936 static IRExpr
* mk_InterleaveLO16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7937 // returns a3 b3 a2 b2 a1 b1 a0 b0
7938 return binop(Iop_InterleaveLO16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7941 static IRExpr
* mk_InterleaveHI16x8 ( IRTemp a76543210
, IRTemp b76543210
) {
7942 // returns a7 b7 a6 b6 a5 b5 a4 b4
7943 return binop(Iop_InterleaveHI16x8
, mkexpr(a76543210
), mkexpr(b76543210
));
7946 static IRExpr
* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210
,
7947 IRTemp bFEDCBA9876543210
) {
7948 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7949 return binop(Iop_CatEvenLanes8x16
, mkexpr(aFEDCBA9876543210
),
7950 mkexpr(bFEDCBA9876543210
));
7953 static IRExpr
* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210
,
7954 IRTemp bFEDCBA9876543210
) {
7955 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7956 return binop(Iop_CatOddLanes8x16
, mkexpr(aFEDCBA9876543210
),
7957 mkexpr(bFEDCBA9876543210
));
7960 static IRExpr
* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210
,
7961 IRTemp bFEDCBA9876543210
) {
7962 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7963 return binop(Iop_InterleaveLO8x16
, mkexpr(aFEDCBA9876543210
),
7964 mkexpr(bFEDCBA9876543210
));
7967 static IRExpr
* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210
,
7968 IRTemp bFEDCBA9876543210
) {
7969 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7970 return binop(Iop_InterleaveHI8x16
, mkexpr(aFEDCBA9876543210
),
7971 mkexpr(bFEDCBA9876543210
));
7974 /* Generate N copies of |bit| in the bottom of a ULong. */
7975 static ULong
Replicate ( ULong bit
, Int N
)
7977 vassert(bit
<= 1 && N
>= 1 && N
< 64);
7981 /* Careful. This won't work for N == 64. */
7982 return (1ULL << N
) - 1;
7986 static ULong
Replicate32x2 ( ULong bits32
)
7988 vassert(0 == (bits32
& ~0xFFFFFFFFULL
));
7989 return (bits32
<< 32) | bits32
;
7992 static ULong
Replicate16x4 ( ULong bits16
)
7994 vassert(0 == (bits16
& ~0xFFFFULL
));
7995 return Replicate32x2((bits16
<< 16) | bits16
);
7998 static ULong
Replicate8x8 ( ULong bits8
)
8000 vassert(0 == (bits8
& ~0xFFULL
));
8001 return Replicate16x4((bits8
<< 8) | bits8
);
8004 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
8005 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
8006 is 64. In the former case, the upper 32 bits of the returned value
8007 are guaranteed to be zero. */
8008 static ULong
VFPExpandImm ( ULong imm8
, Int N
)
8010 vassert(imm8
<= 0xFF);
8011 vassert(N
== 32 || N
== 64);
8012 Int E
= ((N
== 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
8014 ULong imm8_6
= (imm8
>> 6) & 1;
8018 ULong sign
= (imm8
>> 7) & 1;
8019 ULong exp
= ((imm8_6
^ 1) << (E
-1)) | Replicate(imm8_6
, E
-1);
8020 ULong frac
= ((imm8
& 63) << (F
-6)) | Replicate(0, F
-6);
8021 vassert(sign
< (1ULL << 1));
8022 vassert(exp
< (1ULL << E
));
8023 vassert(frac
< (1ULL << F
));
8024 vassert(1 + E
+ F
== N
);
8025 ULong res
= (sign
<< (E
+F
)) | (exp
<< F
) | frac
;
8029 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
8030 This might fail, as indicated by the returned Bool. Page 2530 of
8032 static Bool
AdvSIMDExpandImm ( /*OUT*/ULong
* res
,
8033 UInt op
, UInt cmode
, UInt imm8
)
8036 vassert(cmode
<= 15);
8037 vassert(imm8
<= 255);
8039 *res
= 0; /* will overwrite iff returning True */
8042 Bool testimm8
= False
;
8044 switch (cmode
>> 1) {
8046 testimm8
= False
; imm64
= Replicate32x2(imm8
); break;
8048 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 8); break;
8050 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 16); break;
8052 testimm8
= True
; imm64
= Replicate32x2(imm8
<< 24); break;
8054 testimm8
= False
; imm64
= Replicate16x4(imm8
); break;
8056 testimm8
= True
; imm64
= Replicate16x4(imm8
<< 8); break;
8059 if ((cmode
& 1) == 0)
8060 imm64
= Replicate32x2((imm8
<< 8) | 0xFF);
8062 imm64
= Replicate32x2((imm8
<< 16) | 0xFFFF);
8066 if ((cmode
& 1) == 0 && op
== 0)
8067 imm64
= Replicate8x8(imm8
);
8068 if ((cmode
& 1) == 0 && op
== 1) {
8069 imm64
= 0; imm64
|= (imm8
& 0x80) ? 0xFF : 0x00;
8070 imm64
<<= 8; imm64
|= (imm8
& 0x40) ? 0xFF : 0x00;
8071 imm64
<<= 8; imm64
|= (imm8
& 0x20) ? 0xFF : 0x00;
8072 imm64
<<= 8; imm64
|= (imm8
& 0x10) ? 0xFF : 0x00;
8073 imm64
<<= 8; imm64
|= (imm8
& 0x08) ? 0xFF : 0x00;
8074 imm64
<<= 8; imm64
|= (imm8
& 0x04) ? 0xFF : 0x00;
8075 imm64
<<= 8; imm64
|= (imm8
& 0x02) ? 0xFF : 0x00;
8076 imm64
<<= 8; imm64
|= (imm8
& 0x01) ? 0xFF : 0x00;
8078 if ((cmode
& 1) == 1 && op
== 0) {
8079 ULong imm8_7
= (imm8
>> 7) & 1;
8080 ULong imm8_6
= (imm8
>> 6) & 1;
8081 ULong imm8_50
= imm8
& 63;
8082 ULong imm32
= (imm8_7
<< (1 + 5 + 6 + 19))
8083 | ((imm8_6
^ 1) << (5 + 6 + 19))
8084 | (Replicate(imm8_6
, 5) << (6 + 19))
8086 imm64
= Replicate32x2(imm32
);
8088 if ((cmode
& 1) == 1 && op
== 1) {
8089 // imm64 = imm8<7>:NOT(imm8<6>)
8090 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
8091 ULong imm8_7
= (imm8
>> 7) & 1;
8092 ULong imm8_6
= (imm8
>> 6) & 1;
8093 ULong imm8_50
= imm8
& 63;
8094 imm64
= (imm8_7
<< 63) | ((imm8_6
^ 1) << 62)
8095 | (Replicate(imm8_6
, 8) << 54)
8103 if (testimm8
&& imm8
== 0)
8110 /* Help a bit for decoding laneage for vector operations that can be
8111 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
8112 and SZ bits, typically for vector floating point. */
8113 static Bool
getLaneInfo_Q_SZ ( /*OUT*/IRType
* tyI
, /*OUT*/IRType
* tyF
,
8114 /*OUT*/UInt
* nLanes
, /*OUT*/Bool
* zeroUpper
,
8115 /*OUT*/const HChar
** arrSpec
,
8116 Bool bitQ
, Bool bitSZ
)
8118 vassert(bitQ
== True
|| bitQ
== False
);
8119 vassert(bitSZ
== True
|| bitSZ
== False
);
8120 if (bitQ
&& bitSZ
) { // 2x64
8121 if (tyI
) *tyI
= Ity_I64
;
8122 if (tyF
) *tyF
= Ity_F64
;
8123 if (nLanes
) *nLanes
= 2;
8124 if (zeroUpper
) *zeroUpper
= False
;
8125 if (arrSpec
) *arrSpec
= "2d";
8128 if (bitQ
&& !bitSZ
) { // 4x32
8129 if (tyI
) *tyI
= Ity_I32
;
8130 if (tyF
) *tyF
= Ity_F32
;
8131 if (nLanes
) *nLanes
= 4;
8132 if (zeroUpper
) *zeroUpper
= False
;
8133 if (arrSpec
) *arrSpec
= "4s";
8136 if (!bitQ
&& !bitSZ
) { // 2x32
8137 if (tyI
) *tyI
= Ity_I32
;
8138 if (tyF
) *tyF
= Ity_F32
;
8139 if (nLanes
) *nLanes
= 2;
8140 if (zeroUpper
) *zeroUpper
= True
;
8141 if (arrSpec
) *arrSpec
= "2s";
8144 // Else impliedly 1x64, which isn't allowed.
8148 /* Helper for decoding laneage for shift-style vector operations
8149 that involve an immediate shift amount. */
8150 static Bool
getLaneInfo_IMMH_IMMB ( /*OUT*/UInt
* shift
, /*OUT*/UInt
* szBlg2
,
8151 UInt immh
, UInt immb
)
8153 vassert(immh
< (1<<4));
8154 vassert(immb
< (1<<3));
8155 UInt immhb
= (immh
<< 3) | immb
;
8157 if (shift
) *shift
= 128 - immhb
;
8158 if (szBlg2
) *szBlg2
= 3;
8162 if (shift
) *shift
= 64 - immhb
;
8163 if (szBlg2
) *szBlg2
= 2;
8167 if (shift
) *shift
= 32 - immhb
;
8168 if (szBlg2
) *szBlg2
= 1;
8172 if (shift
) *shift
= 16 - immhb
;
8173 if (szBlg2
) *szBlg2
= 0;
8179 /* Generate IR to fold all lanes of the V128 value in 'src' as
8180 characterised by the operator 'op', and return the result in the
8181 bottom bits of a V128, with all other bits set to zero. */
8182 static IRTemp
math_FOLDV ( IRTemp src
, IROp op
)
8184 /* The basic idea is to use repeated applications of Iop_CatEven*
8185 and Iop_CatOdd* operators to 'src' so as to clone each lane into
8186 a complete vector. Then fold all those vectors with 'op' and
8187 zero out all but the least significant lane. */
8189 case Iop_Min8Sx16
: case Iop_Min8Ux16
:
8190 case Iop_Max8Sx16
: case Iop_Max8Ux16
: case Iop_Add8x16
: {
8191 /* NB: temp naming here is misleading -- the naming is for 8
8192 lanes of 16 bit, whereas what is being operated on is 16
8194 IRTemp x76543210
= src
;
8195 IRTemp x76547654
= newTempV128();
8196 IRTemp x32103210
= newTempV128();
8197 assign(x76547654
, mk_CatOddLanes64x2 (x76543210
, x76543210
));
8198 assign(x32103210
, mk_CatEvenLanes64x2(x76543210
, x76543210
));
8199 IRTemp x76767676
= newTempV128();
8200 IRTemp x54545454
= newTempV128();
8201 IRTemp x32323232
= newTempV128();
8202 IRTemp x10101010
= newTempV128();
8203 assign(x76767676
, mk_CatOddLanes32x4 (x76547654
, x76547654
));
8204 assign(x54545454
, mk_CatEvenLanes32x4(x76547654
, x76547654
));
8205 assign(x32323232
, mk_CatOddLanes32x4 (x32103210
, x32103210
));
8206 assign(x10101010
, mk_CatEvenLanes32x4(x32103210
, x32103210
));
8207 IRTemp x77777777
= newTempV128();
8208 IRTemp x66666666
= newTempV128();
8209 IRTemp x55555555
= newTempV128();
8210 IRTemp x44444444
= newTempV128();
8211 IRTemp x33333333
= newTempV128();
8212 IRTemp x22222222
= newTempV128();
8213 IRTemp x11111111
= newTempV128();
8214 IRTemp x00000000
= newTempV128();
8215 assign(x77777777
, mk_CatOddLanes16x8 (x76767676
, x76767676
));
8216 assign(x66666666
, mk_CatEvenLanes16x8(x76767676
, x76767676
));
8217 assign(x55555555
, mk_CatOddLanes16x8 (x54545454
, x54545454
));
8218 assign(x44444444
, mk_CatEvenLanes16x8(x54545454
, x54545454
));
8219 assign(x33333333
, mk_CatOddLanes16x8 (x32323232
, x32323232
));
8220 assign(x22222222
, mk_CatEvenLanes16x8(x32323232
, x32323232
));
8221 assign(x11111111
, mk_CatOddLanes16x8 (x10101010
, x10101010
));
8222 assign(x00000000
, mk_CatEvenLanes16x8(x10101010
, x10101010
));
8223 /* Naming not misleading after here. */
8224 IRTemp xAllF
= newTempV128();
8225 IRTemp xAllE
= newTempV128();
8226 IRTemp xAllD
= newTempV128();
8227 IRTemp xAllC
= newTempV128();
8228 IRTemp xAllB
= newTempV128();
8229 IRTemp xAllA
= newTempV128();
8230 IRTemp xAll9
= newTempV128();
8231 IRTemp xAll8
= newTempV128();
8232 IRTemp xAll7
= newTempV128();
8233 IRTemp xAll6
= newTempV128();
8234 IRTemp xAll5
= newTempV128();
8235 IRTemp xAll4
= newTempV128();
8236 IRTemp xAll3
= newTempV128();
8237 IRTemp xAll2
= newTempV128();
8238 IRTemp xAll1
= newTempV128();
8239 IRTemp xAll0
= newTempV128();
8240 assign(xAllF
, mk_CatOddLanes8x16 (x77777777
, x77777777
));
8241 assign(xAllE
, mk_CatEvenLanes8x16(x77777777
, x77777777
));
8242 assign(xAllD
, mk_CatOddLanes8x16 (x66666666
, x66666666
));
8243 assign(xAllC
, mk_CatEvenLanes8x16(x66666666
, x66666666
));
8244 assign(xAllB
, mk_CatOddLanes8x16 (x55555555
, x55555555
));
8245 assign(xAllA
, mk_CatEvenLanes8x16(x55555555
, x55555555
));
8246 assign(xAll9
, mk_CatOddLanes8x16 (x44444444
, x44444444
));
8247 assign(xAll8
, mk_CatEvenLanes8x16(x44444444
, x44444444
));
8248 assign(xAll7
, mk_CatOddLanes8x16 (x33333333
, x33333333
));
8249 assign(xAll6
, mk_CatEvenLanes8x16(x33333333
, x33333333
));
8250 assign(xAll5
, mk_CatOddLanes8x16 (x22222222
, x22222222
));
8251 assign(xAll4
, mk_CatEvenLanes8x16(x22222222
, x22222222
));
8252 assign(xAll3
, mk_CatOddLanes8x16 (x11111111
, x11111111
));
8253 assign(xAll2
, mk_CatEvenLanes8x16(x11111111
, x11111111
));
8254 assign(xAll1
, mk_CatOddLanes8x16 (x00000000
, x00000000
));
8255 assign(xAll0
, mk_CatEvenLanes8x16(x00000000
, x00000000
));
8256 IRTemp maxFE
= newTempV128();
8257 IRTemp maxDC
= newTempV128();
8258 IRTemp maxBA
= newTempV128();
8259 IRTemp max98
= newTempV128();
8260 IRTemp max76
= newTempV128();
8261 IRTemp max54
= newTempV128();
8262 IRTemp max32
= newTempV128();
8263 IRTemp max10
= newTempV128();
8264 assign(maxFE
, binop(op
, mkexpr(xAllF
), mkexpr(xAllE
)));
8265 assign(maxDC
, binop(op
, mkexpr(xAllD
), mkexpr(xAllC
)));
8266 assign(maxBA
, binop(op
, mkexpr(xAllB
), mkexpr(xAllA
)));
8267 assign(max98
, binop(op
, mkexpr(xAll9
), mkexpr(xAll8
)));
8268 assign(max76
, binop(op
, mkexpr(xAll7
), mkexpr(xAll6
)));
8269 assign(max54
, binop(op
, mkexpr(xAll5
), mkexpr(xAll4
)));
8270 assign(max32
, binop(op
, mkexpr(xAll3
), mkexpr(xAll2
)));
8271 assign(max10
, binop(op
, mkexpr(xAll1
), mkexpr(xAll0
)));
8272 IRTemp maxFEDC
= newTempV128();
8273 IRTemp maxBA98
= newTempV128();
8274 IRTemp max7654
= newTempV128();
8275 IRTemp max3210
= newTempV128();
8276 assign(maxFEDC
, binop(op
, mkexpr(maxFE
), mkexpr(maxDC
)));
8277 assign(maxBA98
, binop(op
, mkexpr(maxBA
), mkexpr(max98
)));
8278 assign(max7654
, binop(op
, mkexpr(max76
), mkexpr(max54
)));
8279 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
8280 IRTemp maxFEDCBA98
= newTempV128();
8281 IRTemp max76543210
= newTempV128();
8282 assign(maxFEDCBA98
, binop(op
, mkexpr(maxFEDC
), mkexpr(maxBA98
)));
8283 assign(max76543210
, binop(op
, mkexpr(max7654
), mkexpr(max3210
)));
8284 IRTemp maxAllLanes
= newTempV128();
8285 assign(maxAllLanes
, binop(op
, mkexpr(maxFEDCBA98
),
8286 mkexpr(max76543210
)));
8287 IRTemp res
= newTempV128();
8288 assign(res
, unop(Iop_ZeroHI120ofV128
, mkexpr(maxAllLanes
)));
8291 case Iop_Min16Sx8
: case Iop_Min16Ux8
:
8292 case Iop_Max16Sx8
: case Iop_Max16Ux8
: case Iop_Add16x8
: {
8293 IRTemp x76543210
= src
;
8294 IRTemp x76547654
= newTempV128();
8295 IRTemp x32103210
= newTempV128();
8296 assign(x76547654
, mk_CatOddLanes64x2 (x76543210
, x76543210
));
8297 assign(x32103210
, mk_CatEvenLanes64x2(x76543210
, x76543210
));
8298 IRTemp x76767676
= newTempV128();
8299 IRTemp x54545454
= newTempV128();
8300 IRTemp x32323232
= newTempV128();
8301 IRTemp x10101010
= newTempV128();
8302 assign(x76767676
, mk_CatOddLanes32x4 (x76547654
, x76547654
));
8303 assign(x54545454
, mk_CatEvenLanes32x4(x76547654
, x76547654
));
8304 assign(x32323232
, mk_CatOddLanes32x4 (x32103210
, x32103210
));
8305 assign(x10101010
, mk_CatEvenLanes32x4(x32103210
, x32103210
));
8306 IRTemp x77777777
= newTempV128();
8307 IRTemp x66666666
= newTempV128();
8308 IRTemp x55555555
= newTempV128();
8309 IRTemp x44444444
= newTempV128();
8310 IRTemp x33333333
= newTempV128();
8311 IRTemp x22222222
= newTempV128();
8312 IRTemp x11111111
= newTempV128();
8313 IRTemp x00000000
= newTempV128();
8314 assign(x77777777
, mk_CatOddLanes16x8 (x76767676
, x76767676
));
8315 assign(x66666666
, mk_CatEvenLanes16x8(x76767676
, x76767676
));
8316 assign(x55555555
, mk_CatOddLanes16x8 (x54545454
, x54545454
));
8317 assign(x44444444
, mk_CatEvenLanes16x8(x54545454
, x54545454
));
8318 assign(x33333333
, mk_CatOddLanes16x8 (x32323232
, x32323232
));
8319 assign(x22222222
, mk_CatEvenLanes16x8(x32323232
, x32323232
));
8320 assign(x11111111
, mk_CatOddLanes16x8 (x10101010
, x10101010
));
8321 assign(x00000000
, mk_CatEvenLanes16x8(x10101010
, x10101010
));
8322 IRTemp max76
= newTempV128();
8323 IRTemp max54
= newTempV128();
8324 IRTemp max32
= newTempV128();
8325 IRTemp max10
= newTempV128();
8326 assign(max76
, binop(op
, mkexpr(x77777777
), mkexpr(x66666666
)));
8327 assign(max54
, binop(op
, mkexpr(x55555555
), mkexpr(x44444444
)));
8328 assign(max32
, binop(op
, mkexpr(x33333333
), mkexpr(x22222222
)));
8329 assign(max10
, binop(op
, mkexpr(x11111111
), mkexpr(x00000000
)));
8330 IRTemp max7654
= newTempV128();
8331 IRTemp max3210
= newTempV128();
8332 assign(max7654
, binop(op
, mkexpr(max76
), mkexpr(max54
)));
8333 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
8334 IRTemp max76543210
= newTempV128();
8335 assign(max76543210
, binop(op
, mkexpr(max7654
), mkexpr(max3210
)));
8336 IRTemp res
= newTempV128();
8337 assign(res
, unop(Iop_ZeroHI112ofV128
, mkexpr(max76543210
)));
8340 case Iop_Max32Fx4
: case Iop_Min32Fx4
:
8341 case Iop_Min32Sx4
: case Iop_Min32Ux4
:
8342 case Iop_Max32Sx4
: case Iop_Max32Ux4
: case Iop_Add32x4
: {
8344 IRTemp x3232
= newTempV128();
8345 IRTemp x1010
= newTempV128();
8346 assign(x3232
, mk_CatOddLanes64x2 (x3210
, x3210
));
8347 assign(x1010
, mk_CatEvenLanes64x2(x3210
, x3210
));
8348 IRTemp x3333
= newTempV128();
8349 IRTemp x2222
= newTempV128();
8350 IRTemp x1111
= newTempV128();
8351 IRTemp x0000
= newTempV128();
8352 assign(x3333
, mk_CatOddLanes32x4 (x3232
, x3232
));
8353 assign(x2222
, mk_CatEvenLanes32x4(x3232
, x3232
));
8354 assign(x1111
, mk_CatOddLanes32x4 (x1010
, x1010
));
8355 assign(x0000
, mk_CatEvenLanes32x4(x1010
, x1010
));
8356 IRTemp max32
= newTempV128();
8357 IRTemp max10
= newTempV128();
8358 assign(max32
, binop(op
, mkexpr(x3333
), mkexpr(x2222
)));
8359 assign(max10
, binop(op
, mkexpr(x1111
), mkexpr(x0000
)));
8360 IRTemp max3210
= newTempV128();
8361 assign(max3210
, binop(op
, mkexpr(max32
), mkexpr(max10
)));
8362 IRTemp res
= newTempV128();
8363 assign(res
, unop(Iop_ZeroHI96ofV128
, mkexpr(max3210
)));
8368 IRTemp x00
= newTempV128();
8369 IRTemp x11
= newTempV128();
8370 assign(x11
, binop(Iop_InterleaveHI64x2
, mkexpr(x10
), mkexpr(x10
)));
8371 assign(x00
, binop(Iop_InterleaveLO64x2
, mkexpr(x10
), mkexpr(x10
)));
8372 IRTemp max10
= newTempV128();
8373 assign(max10
, binop(op
, mkexpr(x11
), mkexpr(x00
)));
8374 IRTemp res
= newTempV128();
8375 assign(res
, unop(Iop_ZeroHI64ofV128
, mkexpr(max10
)));
8384 /* Generate IR for TBL and TBX. This deals with the 128 bit case
8386 static IRTemp
math_TBL_TBX ( IRTemp tab
[4], UInt len
, IRTemp src
,
8389 vassert(len
>= 0 && len
<= 3);
8391 /* Generate some useful constants as concisely as possible. */
8392 IRTemp half15
= newTemp(Ity_I64
);
8393 assign(half15
, mkU64(0x0F0F0F0F0F0F0F0FULL
));
8394 IRTemp half16
= newTemp(Ity_I64
);
8395 assign(half16
, mkU64(0x1010101010101010ULL
));
8398 IRTemp allZero
= newTempV128();
8399 assign(allZero
, mkV128(0x0000));
8400 /* A vector containing 15 in each 8-bit lane */
8401 IRTemp all15
= newTempV128();
8402 assign(all15
, binop(Iop_64HLtoV128
, mkexpr(half15
), mkexpr(half15
)));
8403 /* A vector containing 16 in each 8-bit lane */
8404 IRTemp all16
= newTempV128();
8405 assign(all16
, binop(Iop_64HLtoV128
, mkexpr(half16
), mkexpr(half16
)));
8406 /* A vector containing 32 in each 8-bit lane */
8407 IRTemp all32
= newTempV128();
8408 assign(all32
, binop(Iop_Add8x16
, mkexpr(all16
), mkexpr(all16
)));
8409 /* A vector containing 48 in each 8-bit lane */
8410 IRTemp all48
= newTempV128();
8411 assign(all48
, binop(Iop_Add8x16
, mkexpr(all16
), mkexpr(all32
)));
8412 /* A vector containing 64 in each 8-bit lane */
8413 IRTemp all64
= newTempV128();
8414 assign(all64
, binop(Iop_Add8x16
, mkexpr(all32
), mkexpr(all32
)));
8416 /* Group the 16/32/48/64 vectors so as to be indexable. */
8417 IRTemp allXX
[4] = { all16
, all32
, all48
, all64
};
8419 /* Compute the result for each table vector, with zeroes in places
8420 where the index values are out of range, and OR them into the
8422 IRTemp running_result
= newTempV128();
8423 assign(running_result
, mkV128(0));
8426 for (tabent
= 0; tabent
<= len
; tabent
++) {
8427 vassert(tabent
>= 0 && tabent
< 4);
8428 IRTemp bias
= newTempV128();
8430 mkexpr(tabent
== 0 ? allZero
: allXX
[tabent
-1]));
8431 IRTemp biased_indices
= newTempV128();
8432 assign(biased_indices
,
8433 binop(Iop_Sub8x16
, mkexpr(src
), mkexpr(bias
)));
8434 IRTemp valid_mask
= newTempV128();
8436 binop(Iop_CmpGT8Ux16
, mkexpr(all16
), mkexpr(biased_indices
)));
8437 IRTemp safe_biased_indices
= newTempV128();
8438 assign(safe_biased_indices
,
8439 binop(Iop_AndV128
, mkexpr(biased_indices
), mkexpr(all15
)));
8440 IRTemp results_or_junk
= newTempV128();
8441 assign(results_or_junk
,
8442 binop(Iop_Perm8x16
, mkexpr(tab
[tabent
]),
8443 mkexpr(safe_biased_indices
)));
8444 IRTemp results_or_zero
= newTempV128();
8445 assign(results_or_zero
,
8446 binop(Iop_AndV128
, mkexpr(results_or_junk
), mkexpr(valid_mask
)));
8447 /* And OR that into the running result. */
8448 IRTemp tmp
= newTempV128();
8449 assign(tmp
, binop(Iop_OrV128
, mkexpr(results_or_zero
),
8450 mkexpr(running_result
)));
8451 running_result
= tmp
;
8454 /* So now running_result holds the overall result where the indices
8455 are in range, and zero in out-of-range lanes. Now we need to
8456 compute an overall validity mask and use this to copy in the
8457 lanes in the oor_values for out of range indices. This is
8458 unnecessary for TBL but will get folded out by iropt, so we lean
8459 on that and generate the same code for TBL and TBX here. */
8460 IRTemp overall_valid_mask
= newTempV128();
8461 assign(overall_valid_mask
,
8462 binop(Iop_CmpGT8Ux16
, mkexpr(allXX
[len
]), mkexpr(src
)));
8463 IRTemp result
= newTempV128();
8466 mkexpr(running_result
),
8469 unop(Iop_NotV128
, mkexpr(overall_valid_mask
)))));
8474 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
8475 an op which takes two I64s and produces a V128. That is, a widening
8476 operator. Generate IR which applies |opI64x2toV128| to either the
8477 lower (if |is2| is False) or upper (if |is2| is True) halves of
8478 |argL| and |argR|, and return the value in a new IRTemp.
8481 IRTemp
math_BINARY_WIDENING_V128 ( Bool is2
, IROp opI64x2toV128
,
8482 IRExpr
* argL
, IRExpr
* argR
)
8484 IRTemp res
= newTempV128();
8485 IROp slice
= is2
? Iop_V128HIto64
: Iop_V128to64
;
8486 assign(res
, binop(opI64x2toV128
, unop(slice
, argL
),
8487 unop(slice
, argR
)));
8492 /* Generate signed/unsigned absolute difference vector IR. */
8494 IRTemp
math_ABD ( Bool isU
, UInt size
, IRExpr
* argLE
, IRExpr
* argRE
)
8497 IRTemp argL
= newTempV128();
8498 IRTemp argR
= newTempV128();
8499 IRTemp msk
= newTempV128();
8500 IRTemp res
= newTempV128();
8501 assign(argL
, argLE
);
8502 assign(argR
, argRE
);
8503 assign(msk
, binop(isU
? mkVecCMPGTU(size
) : mkVecCMPGTS(size
),
8504 mkexpr(argL
), mkexpr(argR
)));
8508 binop(mkVecSUB(size
), mkexpr(argL
), mkexpr(argR
)),
8511 binop(mkVecSUB(size
), mkexpr(argR
), mkexpr(argL
)),
8512 unop(Iop_NotV128
, mkexpr(msk
)))));
8517 /* Generate IR that takes a V128 and sign- or zero-widens
8518 either the lower or upper set of lanes to twice-as-wide,
8519 resulting in a new V128 value. */
8521 IRTemp
math_WIDEN_LO_OR_HI_LANES ( Bool zWiden
, Bool fromUpperHalf
,
8522 UInt sizeNarrow
, IRExpr
* srcE
)
8524 IRTemp src
= newTempV128();
8525 IRTemp res
= newTempV128();
8527 switch (sizeNarrow
) {
8530 binop(zWiden
? Iop_ShrN64x2
: Iop_SarN64x2
,
8531 binop(fromUpperHalf
? Iop_InterleaveHI32x4
8532 : Iop_InterleaveLO32x4
,
8539 binop(zWiden
? Iop_ShrN32x4
: Iop_SarN32x4
,
8540 binop(fromUpperHalf
? Iop_InterleaveHI16x8
8541 : Iop_InterleaveLO16x8
,
8548 binop(zWiden
? Iop_ShrN16x8
: Iop_SarN16x8
,
8549 binop(fromUpperHalf
? Iop_InterleaveHI8x16
8550 : Iop_InterleaveLO8x16
,
8562 /* Generate IR that takes a V128 and sign- or zero-widens
8563 either the even or odd lanes to twice-as-wide,
8564 resulting in a new V128 value. */
8566 IRTemp
math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden
, Bool fromOdd
,
8567 UInt sizeNarrow
, IRExpr
* srcE
)
8569 IRTemp src
= newTempV128();
8570 IRTemp res
= newTempV128();
8571 IROp opSAR
= mkVecSARN(sizeNarrow
+1);
8572 IROp opSHR
= mkVecSHRN(sizeNarrow
+1);
8573 IROp opSHL
= mkVecSHLN(sizeNarrow
+1);
8574 IROp opSxR
= zWiden
? opSHR
: opSAR
;
8576 switch (sizeNarrow
) {
8577 case X10
: amt
= 32; break;
8578 case X01
: amt
= 16; break;
8579 case X00
: amt
= 8; break;
8580 default: vassert(0);
8584 assign(res
, binop(opSxR
, mkexpr(src
), mkU8(amt
)));
8586 assign(res
, binop(opSxR
, binop(opSHL
, mkexpr(src
), mkU8(amt
)),
8593 /* Generate IR that takes two V128s and narrows (takes lower half)
8594 of each lane, producing a single V128 value. */
8596 IRTemp
math_NARROW_LANES ( IRTemp argHi
, IRTemp argLo
, UInt sizeNarrow
)
8598 IRTemp res
= newTempV128();
8599 assign(res
, binop(mkVecCATEVENLANES(sizeNarrow
),
8600 mkexpr(argHi
), mkexpr(argLo
)));
8605 /* Return a temp which holds the vector dup of the lane of width
8606 (1 << size) obtained from src[laneNo]. */
8608 IRTemp
math_DUP_VEC_ELEM ( IRExpr
* src
, UInt size
, UInt laneNo
)
8611 /* Normalise |laneNo| so it is of the form
8612 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
8613 This puts the bits we want to inspect at constant offsets
8614 regardless of the value of |size|.
8616 UInt ix
= laneNo
<< size
;
8618 IROp ops
[4] = { Iop_INVALID
, Iop_INVALID
, Iop_INVALID
, Iop_INVALID
};
8621 ops
[0] = (ix
& 1) ? Iop_CatOddLanes8x16
: Iop_CatEvenLanes8x16
;
8624 ops
[1] = (ix
& 2) ? Iop_CatOddLanes16x8
: Iop_CatEvenLanes16x8
;
8627 ops
[2] = (ix
& 4) ? Iop_CatOddLanes32x4
: Iop_CatEvenLanes32x4
;
8630 ops
[3] = (ix
& 8) ? Iop_InterleaveHI64x2
: Iop_InterleaveLO64x2
;
8635 IRTemp res
= newTempV128();
8638 for (i
= 3; i
>= 0; i
--) {
8639 if (ops
[i
] == Iop_INVALID
)
8641 IRTemp tmp
= newTempV128();
8642 assign(tmp
, binop(ops
[i
], mkexpr(res
), mkexpr(res
)));
8649 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
8650 selector encoded as shown below. Return a new V128 holding the
8651 selected lane from |srcV| dup'd out to V128, and also return the
8652 lane number, log2 of the lane size in bytes, and width-character via
8653 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
8654 is an invalid selector, in which case return
8655 IRTemp_INVALID, 0, 0 and '?' respectively.
8657 imm5 = xxxx1 signifies .b[xxxx]
8664 IRTemp
handle_DUP_VEC_ELEM ( /*OUT*/UInt
* laneNo
,
8665 /*OUT*/UInt
* laneSzLg2
, /*OUT*/HChar
* laneCh
,
8666 IRExpr
* srcV
, UInt imm5
)
8673 *laneNo
= (imm5
>> 1) & 15;
8677 else if (imm5
& 2) {
8678 *laneNo
= (imm5
>> 2) & 7;
8682 else if (imm5
& 4) {
8683 *laneNo
= (imm5
>> 3) & 3;
8687 else if (imm5
& 8) {
8688 *laneNo
= (imm5
>> 4) & 1;
8694 return IRTemp_INVALID
;
8697 return math_DUP_VEC_ELEM(srcV
, *laneSzLg2
, *laneNo
);
8701 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8703 IRTemp
math_VEC_DUP_IMM ( UInt size
, ULong imm
)
8705 IRType ty
= Ity_INVALID
;
8706 IRTemp rcS
= IRTemp_INVALID
;
8709 vassert(imm
<= 0xFFFFULL
);
8711 rcS
= newTemp(ty
); assign(rcS
, mkU16( (UShort
)imm
));
8714 vassert(imm
<= 0xFFFFFFFFULL
);
8716 rcS
= newTemp(ty
); assign(rcS
, mkU32( (UInt
)imm
));
8720 rcS
= newTemp(ty
); assign(rcS
, mkU64(imm
)); break;
8724 IRTemp rcV
= math_DUP_TO_V128(rcS
, ty
);
8729 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8730 and the upper can contain any value -- it is ignored. If |is2| is False,
8731 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8732 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8733 half of vector reg |dd| and leave the lower half unchanged. This
8734 simulates the behaviour of the "foo/foo2" instructions in which the
8735 destination is half the width of sources, for example addhn/addhn2.
8738 void putLO64andZUorPutHI64 ( Bool is2
, UInt dd
, IRTemp new64
)
8741 /* Get the old contents of Vdd, zero the upper half, and replace
8743 IRTemp t_zero_oldLO
= newTempV128();
8744 assign(t_zero_oldLO
, unop(Iop_ZeroHI64ofV128
, getQReg128(dd
)));
8745 IRTemp t_newHI_zero
= newTempV128();
8746 assign(t_newHI_zero
, binop(Iop_InterleaveLO64x2
, mkexpr(new64
),
8748 IRTemp res
= newTempV128();
8749 assign(res
, binop(Iop_OrV128
, mkexpr(t_zero_oldLO
),
8750 mkexpr(t_newHI_zero
)));
8751 putQReg128(dd
, mkexpr(res
));
8753 /* This is simple. */
8754 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(new64
)));
8759 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8760 the q result in |*qabs| and the normal result in |*nabs|. */
8762 void math_SQABS ( /*OUT*/IRTemp
* qabs
, /*OUT*/IRTemp
* nabs
,
8763 IRExpr
* srcE
, UInt size
)
8765 IRTemp src
, mask
, maskn
, nsub
, qsub
;
8766 src
= mask
= maskn
= nsub
= qsub
= IRTemp_INVALID
;
8767 newTempsV128_7(&src
, &mask
, &maskn
, &nsub
, &qsub
, nabs
, qabs
);
8769 assign(mask
, binop(mkVecCMPGTS(size
), mkV128(0x0000), mkexpr(src
)));
8770 assign(maskn
, unop(Iop_NotV128
, mkexpr(mask
)));
8771 assign(nsub
, binop(mkVecSUB(size
), mkV128(0x0000), mkexpr(src
)));
8772 assign(qsub
, binop(mkVecQSUBS(size
), mkV128(0x0000), mkexpr(src
)));
8773 assign(*nabs
, binop(Iop_OrV128
,
8774 binop(Iop_AndV128
, mkexpr(nsub
), mkexpr(mask
)),
8775 binop(Iop_AndV128
, mkexpr(src
), mkexpr(maskn
))));
8776 assign(*qabs
, binop(Iop_OrV128
,
8777 binop(Iop_AndV128
, mkexpr(qsub
), mkexpr(mask
)),
8778 binop(Iop_AndV128
, mkexpr(src
), mkexpr(maskn
))));
8782 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8783 the q result in |*qneg| and the normal result in |*nneg|. */
8785 void math_SQNEG ( /*OUT*/IRTemp
* qneg
, /*OUT*/IRTemp
* nneg
,
8786 IRExpr
* srcE
, UInt size
)
8788 IRTemp src
= IRTemp_INVALID
;
8789 newTempsV128_3(&src
, nneg
, qneg
);
8791 assign(*nneg
, binop(mkVecSUB(size
), mkV128(0x0000), mkexpr(src
)));
8792 assign(*qneg
, binop(mkVecQSUBS(size
), mkV128(0x0000), mkexpr(src
)));
8796 /* Zero all except the least significant lane of |srcE|, where |size|
8797 indicates the lane size in the usual way. */
8798 static IRTemp
math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size
, IRExpr
* srcE
)
8801 IRTemp t
= newTempV128();
8802 assign(t
, unop(mkVecZEROHIxxOFV128(size
), srcE
));
8807 /* Generate IR to compute vector widening MULL from either the lower
8808 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8809 widening multiplies are unsigned when isU==True and signed when
8810 isU==False. |size| is the narrow lane size indication. Optionally,
8811 the product may be added to or subtracted from vecD, at the wide lane
8812 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8813 is 'm' (only multiply) then the accumulate part does not happen, and
8814 |vecD| is expected to == IRTemp_INVALID.
8816 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8817 are allowed. The result is returned in a new IRTemp, which is
8818 returned in *res. */
8820 void math_MULL_ACC ( /*OUT*/IRTemp
* res
,
8821 Bool is2
, Bool isU
, UInt size
, HChar mas
,
8822 IRTemp vecN
, IRTemp vecM
, IRTemp vecD
)
8824 vassert(res
&& *res
== IRTemp_INVALID
);
8826 vassert(mas
== 'm' || mas
== 'a' || mas
== 's');
8827 if (mas
== 'm') vassert(vecD
== IRTemp_INVALID
);
8828 IROp mulOp
= isU
? mkVecMULLU(size
) : mkVecMULLS(size
);
8829 IROp accOp
= (mas
== 'a') ? mkVecADD(size
+1)
8830 : (mas
== 's' ? mkVecSUB(size
+1)
8832 IRTemp mul
= math_BINARY_WIDENING_V128(is2
, mulOp
,
8833 mkexpr(vecN
), mkexpr(vecM
));
8834 *res
= newTempV128();
8835 assign(*res
, mas
== 'm' ? mkexpr(mul
)
8836 : binop(accOp
, mkexpr(vecD
), mkexpr(mul
)));
8840 /* Same as math_MULL_ACC, except the multiply is signed widening,
8841 the multiplied value is then doubled, before being added to or
8842 subtracted from the accumulated value. And everything is
8843 saturated. In all cases, saturation residuals are returned
8844 via (sat1q, sat1n), and in the accumulate cases,
8845 via (sat2q, sat2n) too. All results are returned in new temporaries.
8846 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8847 so the caller can tell this has happened. */
8849 void math_SQDMULL_ACC ( /*OUT*/IRTemp
* res
,
8850 /*OUT*/IRTemp
* sat1q
, /*OUT*/IRTemp
* sat1n
,
8851 /*OUT*/IRTemp
* sat2q
, /*OUT*/IRTemp
* sat2n
,
8852 Bool is2
, UInt size
, HChar mas
,
8853 IRTemp vecN
, IRTemp vecM
, IRTemp vecD
)
8856 vassert(mas
== 'm' || mas
== 'a' || mas
== 's');
8858 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8859 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8860 IOW take either the low or high halves of vecN and vecM, signed widen,
8861 multiply, double that, and signedly saturate. Also compute the same
8862 but without saturation.
8864 vassert(sat2q
&& *sat2q
== IRTemp_INVALID
);
8865 vassert(sat2n
&& *sat2n
== IRTemp_INVALID
);
8866 newTempsV128_3(sat1q
, sat1n
, res
);
8867 IRTemp tq
= math_BINARY_WIDENING_V128(is2
, mkVecQDMULLS(size
),
8868 mkexpr(vecN
), mkexpr(vecM
));
8869 IRTemp tn
= math_BINARY_WIDENING_V128(is2
, mkVecMULLS(size
),
8870 mkexpr(vecN
), mkexpr(vecM
));
8871 assign(*sat1q
, mkexpr(tq
));
8872 assign(*sat1n
, binop(mkVecADD(size
+1), mkexpr(tn
), mkexpr(tn
)));
8874 /* If there is no accumulation, the final result is sat1q,
8875 and there's no assignment to sat2q or sat2n. */
8877 assign(*res
, mkexpr(*sat1q
));
8882 sat2q = vecD +sq/-sq sat1q
8883 sat2n = vecD +/- sat1n
8886 newTempsV128_2(sat2q
, sat2n
);
8887 assign(*sat2q
, binop(mas
== 'a' ? mkVecQADDS(size
+1) : mkVecQSUBS(size
+1),
8888 mkexpr(vecD
), mkexpr(*sat1q
)));
8889 assign(*sat2n
, binop(mas
== 'a' ? mkVecADD(size
+1) : mkVecSUB(size
+1),
8890 mkexpr(vecD
), mkexpr(*sat1n
)));
8891 assign(*res
, mkexpr(*sat2q
));
8895 /* Generate IR for widening signed vector multiplies. The operands
8896 have their lane width signedly widened, and they are then multiplied
8897 at the wider width, returning results in two new IRTemps. */
8899 void math_MULLS ( /*OUT*/IRTemp
* resHI
, /*OUT*/IRTemp
* resLO
,
8900 UInt sizeNarrow
, IRTemp argL
, IRTemp argR
)
8902 vassert(sizeNarrow
<= 2);
8903 newTempsV128_2(resHI
, resLO
);
8904 IRTemp argLhi
= newTemp(Ity_I64
);
8905 IRTemp argLlo
= newTemp(Ity_I64
);
8906 IRTemp argRhi
= newTemp(Ity_I64
);
8907 IRTemp argRlo
= newTemp(Ity_I64
);
8908 assign(argLhi
, unop(Iop_V128HIto64
, mkexpr(argL
)));
8909 assign(argLlo
, unop(Iop_V128to64
, mkexpr(argL
)));
8910 assign(argRhi
, unop(Iop_V128HIto64
, mkexpr(argR
)));
8911 assign(argRlo
, unop(Iop_V128to64
, mkexpr(argR
)));
8912 IROp opMulls
= mkVecMULLS(sizeNarrow
);
8913 assign(*resHI
, binop(opMulls
, mkexpr(argLhi
), mkexpr(argRhi
)));
8914 assign(*resLO
, binop(opMulls
, mkexpr(argLlo
), mkexpr(argRlo
)));
8918 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8919 double that, possibly add a rounding constant (R variants), and take
8922 void math_SQDMULH ( /*OUT*/IRTemp
* res
,
8923 /*OUT*/IRTemp
* sat1q
, /*OUT*/IRTemp
* sat1n
,
8924 Bool isR
, UInt size
, IRTemp vN
, IRTemp vM
)
8926 vassert(size
== X01
|| size
== X10
); /* s or h only */
8928 newTempsV128_3(res
, sat1q
, sat1n
);
8930 IRTemp mullsHI
= IRTemp_INVALID
, mullsLO
= IRTemp_INVALID
;
8931 math_MULLS(&mullsHI
, &mullsLO
, size
, vN
, vM
);
8933 IRTemp addWide
= mkVecADD(size
+1);
8936 assign(*sat1q
, binop(mkVecQRDMULHIS(size
), mkexpr(vN
), mkexpr(vM
)));
8938 Int rcShift
= size
== X01
? 15 : 31;
8939 IRTemp roundConst
= math_VEC_DUP_IMM(size
+1, 1ULL << rcShift
);
8941 binop(mkVecCATODDLANES(size
),
8943 binop(addWide
, mkexpr(mullsHI
), mkexpr(mullsHI
)),
8944 mkexpr(roundConst
)),
8946 binop(addWide
, mkexpr(mullsLO
), mkexpr(mullsLO
)),
8947 mkexpr(roundConst
))));
8949 assign(*sat1q
, binop(mkVecQDMULHIS(size
), mkexpr(vN
), mkexpr(vM
)));
8952 binop(mkVecCATODDLANES(size
),
8953 binop(addWide
, mkexpr(mullsHI
), mkexpr(mullsHI
)),
8954 binop(addWide
, mkexpr(mullsLO
), mkexpr(mullsLO
))));
8957 assign(*res
, mkexpr(*sat1q
));
8960 /* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
8961 double, add a rounding constant, take the high half and accumulate. */
8963 void math_SQRDMLAH ( /*OUT*/IRTemp
* res
, /*OUT*/IRTemp
* res_nosat
, Bool isAdd
,
8964 UInt size
, IRTemp vD
, IRTemp vN
, IRTemp vM
)
8966 vassert(size
== X01
|| size
== X10
); /* s or h only */
8968 /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
8970 IRTemp mul
, mul_nosat
, dummy
;
8971 mul
= mul_nosat
= dummy
= IRTemp_INVALID
;
8972 math_SQDMULH(&mul
, &dummy
, &mul_nosat
, True
/*R*/, size
, vN
, vM
);
8974 IROp op
= isAdd
? mkVecADD(size
) : mkVecSUB(size
);
8975 IROp qop
= isAdd
? mkVecQADDS(size
) : mkVecQSUBS(size
);
8976 newTempsV128_2(res
, res_nosat
);
8977 assign(*res
, binop(qop
, mkexpr(vD
), mkexpr(mul
)));
8978 assign(*res_nosat
, binop(op
, mkexpr(vD
), mkexpr(mul_nosat
)));
8982 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8983 a new temp in *res, and the Q difference pair in new temps in
8984 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8985 three operations it is. */
8987 void math_QSHL_IMM ( /*OUT*/IRTemp
* res
,
8988 /*OUT*/IRTemp
* qDiff1
, /*OUT*/IRTemp
* qDiff2
,
8989 IRTemp src
, UInt size
, UInt shift
, const HChar
* nm
)
8992 UInt laneBits
= 8 << size
;
8993 vassert(shift
< laneBits
);
8994 newTempsV128_3(res
, qDiff1
, qDiff2
);
8995 IRTemp z128
= newTempV128();
8996 assign(z128
, mkV128(0x0000));
8999 if (vex_streq(nm
, "uqshl")) {
9000 IROp qop
= mkVecQSHLNSATUU(size
);
9001 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
9003 /* No shift means no saturation. */
9004 assign(*qDiff1
, mkexpr(z128
));
9005 assign(*qDiff2
, mkexpr(z128
));
9007 /* Saturation has occurred if any of the shifted-out bits are
9008 nonzero. We get the shifted-out bits by right-shifting the
9010 UInt rshift
= laneBits
- shift
;
9011 vassert(rshift
>= 1 && rshift
< laneBits
);
9012 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
9013 assign(*qDiff2
, mkexpr(z128
));
9019 if (vex_streq(nm
, "sqshl")) {
9020 IROp qop
= mkVecQSHLNSATSS(size
);
9021 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
9023 /* No shift means no saturation. */
9024 assign(*qDiff1
, mkexpr(z128
));
9025 assign(*qDiff2
, mkexpr(z128
));
9027 /* Saturation has occurred if any of the shifted-out bits are
9028 different from the top bit of the original value. */
9029 UInt rshift
= laneBits
- 1 - shift
;
9030 vassert(rshift
>= 0 && rshift
< laneBits
-1);
9031 /* qDiff1 is the shifted out bits, and the top bit of the original
9032 value, preceded by zeroes. */
9033 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
9034 /* qDiff2 is the top bit of the original value, cloned the
9035 correct number of times. */
9036 assign(*qDiff2
, binop(mkVecSHRN(size
),
9037 binop(mkVecSARN(size
), mkexpr(src
),
9040 /* This also succeeds in comparing the top bit of the original
9041 value to itself, which is a bit stupid, but not wrong. */
9047 if (vex_streq(nm
, "sqshlu")) {
9048 IROp qop
= mkVecQSHLNSATSU(size
);
9049 assign(*res
, binop(qop
, mkexpr(src
), mkU8(shift
)));
9051 /* If there's no shift, saturation depends on the top bit
9053 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(laneBits
-1)));
9054 assign(*qDiff2
, mkexpr(z128
));
9056 /* Saturation has occurred if any of the shifted-out bits are
9057 nonzero. We get the shifted-out bits by right-shifting the
9059 UInt rshift
= laneBits
- shift
;
9060 vassert(rshift
>= 1 && rshift
< laneBits
);
9061 assign(*qDiff1
, binop(mkVecSHRN(size
), mkexpr(src
), mkU8(rshift
)));
9062 assign(*qDiff2
, mkexpr(z128
));
9071 /* Generate IR to do SRHADD and URHADD. */
9073 IRTemp
math_RHADD ( UInt size
, Bool isU
, IRTemp aa
, IRTemp bb
)
9076 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
9079 IROp opSHR
= isU
? mkVecSHRN(size
) : mkVecSARN(size
);
9080 IROp opADD
= mkVecADD(size
);
9081 /* The only tricky bit is to generate the correct vector 1 constant. */
9082 const ULong ones64
[4]
9083 = { 0x0101010101010101ULL
, 0x0001000100010001ULL
,
9084 0x0000000100000001ULL
, 0x0000000000000001ULL
};
9085 IRTemp imm64
= newTemp(Ity_I64
);
9086 assign(imm64
, mkU64(ones64
[size
]));
9087 IRTemp vecOne
= newTempV128();
9088 assign(vecOne
, binop(Iop_64HLtoV128
, mkexpr(imm64
), mkexpr(imm64
)));
9089 IRTemp scaOne
= newTemp(Ity_I8
);
9090 assign(scaOne
, mkU8(1));
9091 IRTemp res
= newTempV128();
9094 binop(opSHR
, mkexpr(aa
), mkexpr(scaOne
)),
9096 binop(opSHR
, mkexpr(bb
), mkexpr(scaOne
)),
9100 binop(Iop_AndV128
, mkexpr(aa
),
9102 binop(Iop_AndV128
, mkexpr(bb
),
9116 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
9117 thusly: if, after application of |opZHI| to both |qres| and |nres|,
9118 they have the same value, leave QCFLAG unchanged. Otherwise, set it
9119 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
9120 operators, or Iop_INVALID, in which case |qres| and |nres| are used
9121 unmodified. The presence |opZHI| means this function can be used to
9122 generate QCFLAG update code for both scalar and vector SIMD operations.
9125 void updateQCFLAGwithDifferenceZHI ( IRTemp qres
, IRTemp nres
, IROp opZHI
)
9127 IRTemp diff
= newTempV128();
9128 IRTemp oldQCFLAG
= newTempV128();
9129 IRTemp newQCFLAG
= newTempV128();
9130 if (opZHI
== Iop_INVALID
) {
9131 assign(diff
, binop(Iop_XorV128
, mkexpr(qres
), mkexpr(nres
)));
9133 vassert(opZHI
== Iop_ZeroHI64ofV128
9134 || opZHI
== Iop_ZeroHI96ofV128
|| opZHI
== Iop_ZeroHI112ofV128
);
9135 assign(diff
, unop(opZHI
, binop(Iop_XorV128
, mkexpr(qres
), mkexpr(nres
))));
9137 assign(oldQCFLAG
, IRExpr_Get(OFFB_QCFLAG
, Ity_V128
));
9138 assign(newQCFLAG
, binop(Iop_OrV128
, mkexpr(oldQCFLAG
), mkexpr(diff
)));
9139 stmt(IRStmt_Put(OFFB_QCFLAG
, mkexpr(newQCFLAG
)));
9143 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
9144 are used unmodified, hence suitable for QCFLAG updates for whole-vector
9147 void updateQCFLAGwithDifference ( IRTemp qres
, IRTemp nres
)
9149 updateQCFLAGwithDifferenceZHI(qres
, nres
, Iop_INVALID
);
9153 /* Generate IR to rearrange two vector values in a way which is useful
9154 for doing S/D/H add-pair etc operations. There are 5 cases:
9156 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
9158 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
9160 8h: [m7 m6 m5 m4 m3 m2 m1 m0] [n7 n6 n5 n4 n3 n2 n1 n0] -->
9161 [m7 m5 n7 n5 m3 m1 n3 n1] [m6 m4 n6 n4 m2 m0 n2 n0]
9163 2s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
9165 4h: [m7 m6 m5 m4 m3 m2 m1 m0] [n7 n6 n5 n4 n3 n2 n1 n0] -->
9166 [ 0 0 0 0 m3 m1 n3 n1] [ 0 0 0 0 m2 m0 n2 n0]
9169 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
9170 /*OUT*/IRTemp
* rearrL
, /*OUT*/IRTemp
* rearrR
,
9171 IRTemp vecM
, IRTemp vecN
, ARM64VecESize sz
, UInt bitQ
9174 vassert(rearrL
&& *rearrL
== IRTemp_INVALID
);
9175 vassert(rearrR
&& *rearrR
== IRTemp_INVALID
);
9176 *rearrL
= newTempV128();
9177 *rearrR
= newTempV128();
9183 assign(*rearrL
, binop(Iop_InterleaveHI64x2
, mkexpr(vecM
), mkexpr(vecN
)));
9184 assign(*rearrR
, binop(Iop_InterleaveLO64x2
, mkexpr(vecM
), mkexpr(vecN
)));
9190 assign(*rearrL
, binop(Iop_CatOddLanes32x4
, mkexpr(vecM
), mkexpr(vecN
)));
9191 assign(*rearrR
, binop(Iop_CatEvenLanes32x4
, mkexpr(vecM
), mkexpr(vecN
)));
9194 IRTemp m1n1m0n0
= newTempV128();
9195 IRTemp m0n0m1n1
= newTempV128();
9196 assign(m1n1m0n0
, binop(Iop_InterleaveLO32x4
,
9197 mkexpr(vecM
), mkexpr(vecN
)));
9198 assign(m0n0m1n1
, triop(Iop_SliceV128
,
9199 mkexpr(m1n1m0n0
), mkexpr(m1n1m0n0
), mkU8(8)));
9200 assign(*rearrL
, unop(Iop_ZeroHI64ofV128
, mkexpr(m1n1m0n0
)));
9201 assign(*rearrR
, unop(Iop_ZeroHI64ofV128
, mkexpr(m0n0m1n1
)));
9208 assign(*rearrL
, binop(Iop_CatOddLanes16x8
, mkexpr(vecM
), mkexpr(vecN
)));
9209 assign(*rearrR
, binop(Iop_CatEvenLanes16x8
, mkexpr(vecM
), mkexpr(vecN
)));
9212 IRTemp m3m1n3n1
= newTempV128();
9213 IRTemp m2m0n2n0
= newTempV128();
9214 assign(m3m1n3n1
, binop(Iop_CatOddLanes16x8
, mkexpr(vecM
), mkexpr(vecN
)));
9215 assign(m2m0n2n0
, binop(Iop_CatEvenLanes16x8
, mkexpr(vecM
), mkexpr(vecN
)));
9216 assign(*rearrL
, unop(Iop_ZeroHI64ofV128
,
9217 binop(Iop_CatEvenLanes32x4
, mkexpr(m3m1n3n1
),
9218 mkexpr(m3m1n3n1
))));
9219 assign(*rearrR
, unop(Iop_ZeroHI64ofV128
,
9220 binop(Iop_CatEvenLanes32x4
, mkexpr(m2m0n2n0
),
9221 mkexpr(m2m0n2n0
))));
9225 default: vpanic("math_REARRANGE_FOR_FLOATING_PAIRWISE");
9230 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
9231 static Double
two_to_the_minus ( Int n
)
9233 if (n
== 1) return 0.5;
9234 vassert(n
>= 2 && n
<= 64);
9236 return two_to_the_minus(half
) * two_to_the_minus(n
- half
);
9240 /* Returns 2.0 ^ n for n in 1 .. 64 */
9241 static Double
two_to_the_plus ( Int n
)
9243 if (n
== 1) return 2.0;
9244 vassert(n
>= 2 && n
<= 64);
9246 return two_to_the_plus(half
) * two_to_the_plus(n
- half
);
9250 /*------------------------------------------------------------*/
9251 /*--- SIMD and FP instructions ---*/
9252 /*------------------------------------------------------------*/
9255 Bool
dis_AdvSIMD_EXT(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9257 /* 31 29 23 21 20 15 14 10 9 4
9258 0 q 101110 op2 0 m 0 imm4 0 n d
9261 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9262 if (INSN(31,31) != 0
9263 || INSN(29,24) != BITS6(1,0,1,1,1,0)
9264 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
9267 UInt bitQ
= INSN(30,30);
9268 UInt op2
= INSN(23,22);
9269 UInt mm
= INSN(20,16);
9270 UInt imm4
= INSN(14,11);
9271 UInt nn
= INSN(9,5);
9272 UInt dd
= INSN(4,0);
9274 if (op2
== BITS2(0,0)) {
9275 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
9276 IRTemp sHi
= newTempV128();
9277 IRTemp sLo
= newTempV128();
9278 IRTemp res
= newTempV128();
9279 assign(sHi
, getQReg128(mm
));
9280 assign(sLo
, getQReg128(nn
));
9283 assign(res
, mkexpr(sLo
));
9285 vassert(imm4
>= 1 && imm4
<= 15);
9286 assign(res
, triop(Iop_SliceV128
,
9287 mkexpr(sHi
), mkexpr(sLo
), mkU8(imm4
)));
9289 putQReg128(dd
, mkexpr(res
));
9290 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd
, nn
, mm
, imm4
);
9292 if (imm4
>= 8) return False
;
9294 assign(res
, mkexpr(sLo
));
9296 vassert(imm4
>= 1 && imm4
<= 7);
9297 IRTemp hi64lo64
= newTempV128();
9298 assign(hi64lo64
, binop(Iop_InterleaveLO64x2
,
9299 mkexpr(sHi
), mkexpr(sLo
)));
9300 assign(res
, triop(Iop_SliceV128
,
9301 mkexpr(hi64lo64
), mkexpr(hi64lo64
), mkU8(imm4
)));
9303 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
9304 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd
, nn
, mm
, imm4
);
9315 Bool
dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9317 /* 31 29 23 21 20 15 14 12 11 9 4
9318 0 q 001110 op2 0 m 0 len op 00 n d
9319 Decode fields: op2,len,op
9321 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9322 if (INSN(31,31) != 0
9323 || INSN(29,24) != BITS6(0,0,1,1,1,0)
9326 || INSN(11,10) != BITS2(0,0)) {
9329 UInt bitQ
= INSN(30,30);
9330 UInt op2
= INSN(23,22);
9331 UInt mm
= INSN(20,16);
9332 UInt len
= INSN(14,13);
9333 UInt bitOP
= INSN(12,12);
9334 UInt nn
= INSN(9,5);
9335 UInt dd
= INSN(4,0);
9338 /* -------- 00,xx,0 TBL, xx register table -------- */
9339 /* -------- 00,xx,1 TBX, xx register table -------- */
9340 /* 31 28 20 15 14 12 9 4
9341 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
9342 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
9343 where Ta = 16b(q=1) or 8b(q=0)
9345 Bool isTBX
= bitOP
== 1;
9346 /* The out-of-range values to use. */
9347 IRTemp oor_values
= newTempV128();
9348 assign(oor_values
, isTBX
? getQReg128(dd
) : mkV128(0));
9350 IRTemp src
= newTempV128();
9351 assign(src
, getQReg128(mm
));
9352 /* The table values */
9355 for (i
= 0; i
<= len
; i
++) {
9357 tab
[i
] = newTempV128();
9358 assign(tab
[i
], getQReg128((nn
+ i
) % 32));
9360 IRTemp res
= math_TBL_TBX(tab
, len
, src
, oor_values
);
9361 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9362 const HChar
* Ta
= bitQ
==1 ? "16b" : "8b";
9363 const HChar
* nm
= isTBX
? "tbx" : "tbl";
9364 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
9365 nm
, nameQReg128(dd
), Ta
, nn
, (nn
+ len
) % 32, nameQReg128(mm
), Ta
);
9369 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9376 Bool
dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9378 /* 31 29 23 21 20 15 14 11 9 4
9379 0 q 001110 size 0 m 0 opcode 10 n d
9380 Decode fields: opcode
9382 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9383 if (INSN(31,31) != 0
9384 || INSN(29,24) != BITS6(0,0,1,1,1,0)
9385 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
9388 UInt bitQ
= INSN(30,30);
9389 UInt size
= INSN(23,22);
9390 UInt mm
= INSN(20,16);
9391 UInt opcode
= INSN(14,12);
9392 UInt nn
= INSN(9,5);
9393 UInt dd
= INSN(4,0);
9395 if (opcode
== BITS3(0,0,1) || opcode
== BITS3(1,0,1)) {
9396 /* -------- 001 UZP1 std7_std7_std7 -------- */
9397 /* -------- 101 UZP2 std7_std7_std7 -------- */
9398 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
9399 Bool isUZP1
= opcode
== BITS3(0,0,1);
9400 IROp op
= isUZP1
? mkVecCATEVENLANES(size
)
9401 : mkVecCATODDLANES(size
);
9402 IRTemp preL
= newTempV128();
9403 IRTemp preR
= newTempV128();
9404 IRTemp res
= newTempV128();
9406 assign(preL
, binop(Iop_InterleaveLO64x2
, getQReg128(mm
),
9408 assign(preR
, mkexpr(preL
));
9410 assign(preL
, getQReg128(mm
));
9411 assign(preR
, getQReg128(nn
));
9413 assign(res
, binop(op
, mkexpr(preL
), mkexpr(preR
)));
9414 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9415 const HChar
* nm
= isUZP1
? "uzp1" : "uzp2";
9416 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9417 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
9418 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
9422 if (opcode
== BITS3(0,1,0) || opcode
== BITS3(1,1,0)) {
9423 /* -------- 010 TRN1 std7_std7_std7 -------- */
9424 /* -------- 110 TRN2 std7_std7_std7 -------- */
9425 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
9426 Bool isTRN1
= opcode
== BITS3(0,1,0);
9427 IROp op1
= isTRN1
? mkVecCATEVENLANES(size
)
9428 : mkVecCATODDLANES(size
);
9429 IROp op2
= mkVecINTERLEAVEHI(size
);
9430 IRTemp srcM
= newTempV128();
9431 IRTemp srcN
= newTempV128();
9432 IRTemp res
= newTempV128();
9433 assign(srcM
, getQReg128(mm
));
9434 assign(srcN
, getQReg128(nn
));
9435 assign(res
, binop(op2
, binop(op1
, mkexpr(srcM
), mkexpr(srcM
)),
9436 binop(op1
, mkexpr(srcN
), mkexpr(srcN
))));
9437 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9438 const HChar
* nm
= isTRN1
? "trn1" : "trn2";
9439 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9440 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
9441 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
9445 if (opcode
== BITS3(0,1,1) || opcode
== BITS3(1,1,1)) {
9446 /* -------- 011 ZIP1 std7_std7_std7 -------- */
9447 /* -------- 111 ZIP2 std7_std7_std7 -------- */
9448 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
9449 Bool isZIP1
= opcode
== BITS3(0,1,1);
9450 IROp op
= isZIP1
? mkVecINTERLEAVELO(size
)
9451 : mkVecINTERLEAVEHI(size
);
9452 IRTemp preL
= newTempV128();
9453 IRTemp preR
= newTempV128();
9454 IRTemp res
= newTempV128();
9455 if (bitQ
== 0 && !isZIP1
) {
9456 IRTemp z128
= newTempV128();
9457 assign(z128
, mkV128(0x0000));
9458 // preL = Vm shifted left 32 bits
9459 // preR = Vn shifted left 32 bits
9460 assign(preL
, triop(Iop_SliceV128
,
9461 getQReg128(mm
), mkexpr(z128
), mkU8(12)));
9462 assign(preR
, triop(Iop_SliceV128
,
9463 getQReg128(nn
), mkexpr(z128
), mkU8(12)));
9466 assign(preL
, getQReg128(mm
));
9467 assign(preR
, getQReg128(nn
));
9469 assign(res
, binop(op
, mkexpr(preL
), mkexpr(preR
)));
9470 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9471 const HChar
* nm
= isZIP1
? "zip1" : "zip2";
9472 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9473 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
9474 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
9484 Bool
dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9486 /* 31 28 23 21 16 11 9 4
9487 0 q u 01110 size 11000 opcode 10 n d
9488 Decode fields: u,size,opcode
9490 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9491 if (INSN(31,31) != 0
9492 || INSN(28,24) != BITS5(0,1,1,1,0)
9493 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
9496 UInt bitQ
= INSN(30,30);
9497 UInt bitU
= INSN(29,29);
9498 UInt size
= INSN(23,22);
9499 UInt opcode
= INSN(16,12);
9500 UInt nn
= INSN(9,5);
9501 UInt dd
= INSN(4,0);
9503 if (opcode
== BITS5(0,0,0,1,1)) {
9504 /* -------- 0,xx,00011 SADDLV -------- */
9505 /* -------- 1,xx,00011 UADDLV -------- */
9506 /* size is the narrow size */
9507 if (size
== X11
|| (size
== X10
&& bitQ
== 0)) return False
;
9508 Bool isU
= bitU
== 1;
9509 IRTemp src
= newTempV128();
9510 assign(src
, getQReg128(nn
));
9511 /* The basic plan is to widen the lower half, and if Q = 1,
9512 the upper half too. Add them together (if Q = 1), and in
9513 either case fold with add at twice the lane width.
9516 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
9517 isU
, False
/*!fromUpperHalf*/, size
, mkexpr(src
)));
9520 = binop(mkVecADD(size
+1),
9522 mkexpr(math_WIDEN_LO_OR_HI_LANES(
9523 isU
, True
/*fromUpperHalf*/, size
, mkexpr(src
)))
9527 IRTemp tWi
= newTempV128();
9528 assign(tWi
, widened
);
9529 IRTemp res
= math_FOLDV(tWi
, mkVecADD(size
+1));
9530 putQReg128(dd
, mkexpr(res
));
9531 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9532 const HChar ch
= "bhsd"[size
];
9533 DIP("%s %s.%c, %s.%s\n", isU
? "uaddlv" : "saddlv",
9534 nameQReg128(dd
), ch
, nameQReg128(nn
), arr
);
9539 /**/ if (opcode
== BITS5(0,1,0,1,0)) { ix
= bitU
== 0 ? 1 : 2; }
9540 else if (opcode
== BITS5(1,1,0,1,0)) { ix
= bitU
== 0 ? 3 : 4; }
9541 else if (opcode
== BITS5(1,1,0,1,1) && bitU
== 0) { ix
= 5; }
9544 /* -------- 0,xx,01010: SMAXV -------- (1) */
9545 /* -------- 1,xx,01010: UMAXV -------- (2) */
9546 /* -------- 0,xx,11010: SMINV -------- (3) */
9547 /* -------- 1,xx,11010: UMINV -------- (4) */
9548 /* -------- 0,xx,11011: ADDV -------- (5) */
9549 vassert(ix
>= 1 && ix
<= 5);
9550 if (size
== X11
) return False
; // 1d,2d cases not allowed
9551 if (size
== X10
&& bitQ
== 0) return False
; // 2s case not allowed
9552 const IROp opMAXS
[3]
9553 = { Iop_Max8Sx16
, Iop_Max16Sx8
, Iop_Max32Sx4
};
9554 const IROp opMAXU
[3]
9555 = { Iop_Max8Ux16
, Iop_Max16Ux8
, Iop_Max32Ux4
};
9556 const IROp opMINS
[3]
9557 = { Iop_Min8Sx16
, Iop_Min16Sx8
, Iop_Min32Sx4
};
9558 const IROp opMINU
[3]
9559 = { Iop_Min8Ux16
, Iop_Min16Ux8
, Iop_Min32Ux4
};
9561 = { Iop_Add8x16
, Iop_Add16x8
, Iop_Add32x4
};
9563 IROp op
= Iop_INVALID
;
9564 const HChar
* nm
= NULL
;
9566 case 1: op
= opMAXS
[size
]; nm
= "smaxv"; break;
9567 case 2: op
= opMAXU
[size
]; nm
= "umaxv"; break;
9568 case 3: op
= opMINS
[size
]; nm
= "sminv"; break;
9569 case 4: op
= opMINU
[size
]; nm
= "uminv"; break;
9570 case 5: op
= opADD
[size
]; nm
= "addv"; break;
9571 default: vassert(0);
9573 vassert(op
!= Iop_INVALID
&& nm
!= NULL
);
9574 IRTemp tN1
= newTempV128();
9575 assign(tN1
, getQReg128(nn
));
9576 /* If Q == 0, we're just folding lanes in the lower half of
9577 the value. In which case, copy the lower half of the
9578 source into the upper half, so we can then treat it the
9579 same as the full width case. Except for the addition case,
9580 in which we have to zero out the upper half. */
9581 IRTemp tN2
= newTempV128();
9582 assign(tN2
, bitQ
== 0
9583 ? (ix
== 5 ? unop(Iop_ZeroHI64ofV128
, mkexpr(tN1
))
9584 : mk_CatEvenLanes64x2(tN1
,tN1
))
9586 IRTemp res
= math_FOLDV(tN2
, op
);
9587 if (res
== IRTemp_INVALID
)
9588 return False
; /* means math_FOLDV
9589 doesn't handle this case yet */
9590 putQReg128(dd
, mkexpr(res
));
9591 const IRType tys
[3] = { Ity_I8
, Ity_I16
, Ity_I32
};
9592 IRType laneTy
= tys
[size
];
9593 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
9594 DIP("%s %s, %s.%s\n", nm
,
9595 nameQRegLO(dd
, laneTy
), nameQReg128(nn
), arr
);
9599 if ((size
== X00
|| size
== X10
)
9600 && (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,1,1))) {
9601 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
9602 /* -------- 0,10,01100: FMINMNV s_4s -------- */
9603 /* -------- 1,00,01111: FMAXV s_4s -------- */
9604 /* -------- 1,10,01111: FMINV s_4s -------- */
9605 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9606 if (bitQ
== 0) return False
; // Only 4s is allowed
9607 Bool isMIN
= (size
& 2) == 2;
9608 Bool isNM
= opcode
== BITS5(0,1,1,0,0);
9609 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(2);
9610 IRTemp src
= newTempV128();
9611 assign(src
, getQReg128(nn
));
9612 IRTemp res
= math_FOLDV(src
, opMXX
);
9613 putQReg128(dd
, mkexpr(res
));
9614 DIP("%s%sv s%u, %u.4s\n",
9615 isMIN
? "fmin" : "fmax", isNM
? "nm" : "", dd
, nn
);
9619 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9626 Bool
dis_AdvSIMD_copy(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9628 /* 31 28 20 15 14 10 9 4
9629 0 q op 01110000 imm5 0 imm4 1 n d
9630 Decode fields: q,op,imm4
9632 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9633 if (INSN(31,31) != 0
9634 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
9635 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9638 UInt bitQ
= INSN(30,30);
9639 UInt bitOP
= INSN(29,29);
9640 UInt imm5
= INSN(20,16);
9641 UInt imm4
= INSN(14,11);
9642 UInt nn
= INSN(9,5);
9643 UInt dd
= INSN(4,0);
9645 /* -------- x,0,0000: DUP (element, vector) -------- */
9647 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
9649 if (bitOP
== 0 && imm4
== BITS4(0,0,0,0)) {
9653 IRTemp res
= handle_DUP_VEC_ELEM(&laneNo
, &laneSzLg2
, &laneCh
,
9654 getQReg128(nn
), imm5
);
9655 if (res
== IRTemp_INVALID
)
9657 if (bitQ
== 0 && laneSzLg2
== X11
)
9658 return False
; /* .1d case */
9659 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
9660 const HChar
* arT
= nameArr_Q_SZ(bitQ
, laneSzLg2
);
9661 DIP("dup %s.%s, %s.%c[%u]\n",
9662 nameQReg128(dd
), arT
, nameQReg128(nn
), laneCh
, laneNo
);
9666 /* -------- x,0,0001: DUP (general, vector) -------- */
9668 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
9669 Q=0 writes 64, Q=1 writes 128
9670 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
9671 xxx10 4H(q=0) or 8H(q=1), R=W
9672 xx100 2S(q=0) or 4S(q=1), R=W
9673 x1000 Invalid(q=0) or 2D(q=1), R=X
9674 x0000 Invalid(q=0) or Invalid(q=1)
9675 Require op=0, imm4=0001
9677 if (bitOP
== 0 && imm4
== BITS4(0,0,0,1)) {
9678 Bool isQ
= bitQ
== 1;
9679 IRTemp w0
= newTemp(Ity_I64
);
9680 const HChar
* arT
= "??";
9681 IRType laneTy
= Ity_INVALID
;
9683 arT
= isQ
? "16b" : "8b";
9685 assign(w0
, unop(Iop_8Uto64
, unop(Iop_64to8
, getIReg64orZR(nn
))));
9687 else if (imm5
& 2) {
9688 arT
= isQ
? "8h" : "4h";
9690 assign(w0
, unop(Iop_16Uto64
, unop(Iop_64to16
, getIReg64orZR(nn
))));
9692 else if (imm5
& 4) {
9693 arT
= isQ
? "4s" : "2s";
9695 assign(w0
, unop(Iop_32Uto64
, unop(Iop_64to32
, getIReg64orZR(nn
))));
9697 else if ((imm5
& 8) && isQ
) {
9700 assign(w0
, getIReg64orZR(nn
));
9703 /* invalid; leave laneTy unchanged. */
9706 if (laneTy
!= Ity_INVALID
) {
9707 IRTemp w1
= math_DUP_TO_64(w0
, laneTy
);
9708 putQReg128(dd
, binop(Iop_64HLtoV128
,
9709 isQ
? mkexpr(w1
) : mkU64(0), mkexpr(w1
)));
9710 DIP("dup %s.%s, %s\n",
9711 nameQReg128(dd
), arT
, nameIRegOrZR(laneTy
== Ity_I64
, nn
));
9718 /* -------- 1,0,0011: INS (general) -------- */
9720 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9721 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9726 if (bitQ
== 1 && bitOP
== 0 && imm4
== BITS4(0,0,1,1)) {
9731 src
= unop(Iop_64to8
, getIReg64orZR(nn
));
9732 laneNo
= (imm5
>> 1) & 15;
9735 else if (imm5
& 2) {
9736 src
= unop(Iop_64to16
, getIReg64orZR(nn
));
9737 laneNo
= (imm5
>> 2) & 7;
9740 else if (imm5
& 4) {
9741 src
= unop(Iop_64to32
, getIReg64orZR(nn
));
9742 laneNo
= (imm5
>> 3) & 3;
9745 else if (imm5
& 8) {
9746 src
= getIReg64orZR(nn
);
9747 laneNo
= (imm5
>> 4) & 1;
9752 vassert(laneNo
< 16);
9753 putQRegLane(dd
, laneNo
, src
);
9754 DIP("ins %s.%c[%u], %s\n",
9755 nameQReg128(dd
), ts
, laneNo
, nameIReg64orZR(nn
));
9762 /* -------- x,0,0101: SMOV -------- */
9763 /* -------- x,0,0111: UMOV -------- */
9765 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9766 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9767 dest is Xd when q==1, Wd when q==0
9769 Ts,index,ops = case q:imm5 of
9770 0:xxxx1 -> B, xxxx, 8Uto64
9772 0:xxx10 -> H, xxx, 16Uto64
9774 0:xx100 -> S, xx, 32Uto64
9776 1:x1000 -> D, x, copy64
9779 Ts,index,ops = case q:imm5 of
9780 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9781 1:xxxx1 -> B, xxxx, 8Sto64
9782 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9783 1:xxx10 -> H, xxx, 16Sto64
9785 1:xx100 -> S, xx, 32Sto64
9789 if (bitOP
== 0 && (imm4
== BITS4(0,1,0,1) || imm4
== BITS4(0,1,1,1))) {
9790 Bool isU
= (imm4
& 2) == 2;
9791 const HChar
* arTs
= "??";
9792 UInt laneNo
= 16; /* invalid */
9793 // Setting 'res' to non-NULL determines valid/invalid
9795 if (!bitQ
&& (imm5
& 1)) { // 0:xxxx1
9796 laneNo
= (imm5
>> 1) & 15;
9797 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I8
);
9798 res
= isU
? unop(Iop_8Uto64
, lane
)
9799 : unop(Iop_32Uto64
, unop(Iop_8Sto32
, lane
));
9802 else if (bitQ
&& (imm5
& 1)) { // 1:xxxx1
9803 laneNo
= (imm5
>> 1) & 15;
9804 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I8
);
9806 : unop(Iop_8Sto64
, lane
);
9809 else if (!bitQ
&& (imm5
& 2)) { // 0:xxx10
9810 laneNo
= (imm5
>> 2) & 7;
9811 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I16
);
9812 res
= isU
? unop(Iop_16Uto64
, lane
)
9813 : unop(Iop_32Uto64
, unop(Iop_16Sto32
, lane
));
9816 else if (bitQ
&& (imm5
& 2)) { // 1:xxx10
9817 laneNo
= (imm5
>> 2) & 7;
9818 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I16
);
9820 : unop(Iop_16Sto64
, lane
);
9823 else if (!bitQ
&& (imm5
& 4)) { // 0:xx100
9824 laneNo
= (imm5
>> 3) & 3;
9825 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I32
);
9826 res
= isU
? unop(Iop_32Uto64
, lane
)
9830 else if (bitQ
&& (imm5
& 4)) { // 1:xxx10
9831 laneNo
= (imm5
>> 3) & 3;
9832 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I32
);
9834 : unop(Iop_32Sto64
, lane
);
9837 else if (bitQ
&& (imm5
& 8)) { // 1:x1000
9838 laneNo
= (imm5
>> 4) & 1;
9839 IRExpr
* lane
= getQRegLane(nn
, laneNo
, Ity_I64
);
9846 vassert(laneNo
< 16);
9847 putIReg64orZR(dd
, res
);
9848 DIP("%cmov %s, %s.%s[%u]\n", isU
? 'u' : 's',
9849 nameIRegOrZR(bitQ
== 1, dd
),
9850 nameQReg128(nn
), arTs
, laneNo
);
9857 /* -------- 1,1,xxxx: INS (element) -------- */
9859 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9861 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9862 xxx10 -> H, xxx, imm4[3:1]
9863 xx100 -> S, xx, imm4[3:2]
9864 x1000 -> D, x, imm4[3:3]
9866 if (bitQ
== 1 && bitOP
== 1) {
9868 IRType ity
= Ity_INVALID
;
9874 ix1
= (imm5
>> 1) & 15;
9875 ix2
= (imm4
>> 0) & 15;
9877 else if (imm5
& 2) {
9880 ix1
= (imm5
>> 2) & 7;
9881 ix2
= (imm4
>> 1) & 7;
9883 else if (imm5
& 4) {
9886 ix1
= (imm5
>> 3) & 3;
9887 ix2
= (imm4
>> 2) & 3;
9889 else if (imm5
& 8) {
9892 ix1
= (imm5
>> 4) & 1;
9893 ix2
= (imm4
>> 3) & 1;
9896 if (ity
!= Ity_INVALID
) {
9899 putQRegLane(dd
, ix1
, getQRegLane(nn
, ix2
, ity
));
9900 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9901 nameQReg128(dd
), ts
, ix1
, nameQReg128(nn
), ts
, ix2
);
9914 Bool
dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
9916 /* 31 28 18 15 11 9 4
9917 0q op 01111 00000 abc cmode 01 defgh d
9918 Decode fields: q,op,cmode
9919 Bit 11 is really "o2", but it is always zero.
9921 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9922 if (INSN(31,31) != 0
9923 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9924 || INSN(11,10) != BITS2(0,1)) {
9927 UInt bitQ
= INSN(30,30);
9928 UInt bitOP
= INSN(29,29);
9929 UInt cmode
= INSN(15,12);
9930 UInt abcdefgh
= (INSN(18,16) << 5) | INSN(9,5);
9931 UInt dd
= INSN(4,0);
9934 UInt op_cmode
= (bitOP
<< 4) | cmode
;
9940 Bool isFMOV
= False
;
9942 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9943 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9944 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9945 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9946 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9947 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9948 ok
= True
; isMOV
= True
; break;
9950 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9951 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9952 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9953 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9954 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9955 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9956 ok
= True
; isORR
= True
; break;
9958 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9959 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9960 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9961 ok
= True
; isMOV
= True
; break;
9963 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9964 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9965 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9966 ok
= True
; isORR
= True
; break;
9968 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9969 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9970 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9971 ok
= True
; isMOV
= True
; break;
9973 /* -------- x,0,1110 MOVI 8-bit -------- */
9974 case BITS5(0,1,1,1,0):
9975 ok
= True
; isMOV
= True
; break;
9977 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9978 case BITS5(0,1,1,1,1): // 0:1111
9979 ok
= True
; isFMOV
= True
; break;
9981 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9982 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9983 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9984 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9985 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9986 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9987 ok
= True
; isMVN
= True
; break;
9989 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9990 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9991 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9992 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9993 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9994 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9995 ok
= True
; isBIC
= True
; break;
9997 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9998 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9999 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
10000 ok
= True
; isMVN
= True
; break;
10002 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
10003 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
10004 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
10005 ok
= True
; isBIC
= True
; break;
10007 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
10008 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
10009 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
10010 ok
= True
; isMVN
= True
; break;
10012 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
10013 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
10014 case BITS5(1,1,1,1,0):
10015 ok
= True
; isMOV
= True
; break;
10017 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
10018 case BITS5(1,1,1,1,1): // 1:1111
10019 ok
= bitQ
== 1; isFMOV
= True
; break;
10025 vassert(1 == (isMOV
? 1 : 0) + (isMVN
? 1 : 0)
10026 + (isORR
? 1 : 0) + (isBIC
? 1 : 0) + (isFMOV
? 1 : 0));
10027 ok
= AdvSIMDExpandImm(&imm64lo
, bitOP
, cmode
, abcdefgh
);
10030 if (isORR
|| isBIC
) {
10032 = isORR
? 0ULL : ~0ULL;
10034 = binop(Iop_64HLtoV128
, mkU64(inv
^ imm64lo
), mkU64(inv
^ imm64lo
));
10036 = binop(isORR
? Iop_OrV128
: Iop_AndV128
, getQReg128(dd
), immV128
);
10037 const HChar
* nm
= isORR
? "orr" : "bic";
10039 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, res
));
10040 DIP("%s %s.1d, %016llx\n", nm
, nameQReg128(dd
), imm64lo
);
10042 putQReg128(dd
, res
);
10043 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm
,
10044 nameQReg128(dd
), imm64lo
, imm64lo
);
10047 else if (isMOV
|| isMVN
|| isFMOV
) {
10048 if (isMVN
) imm64lo
= ~imm64lo
;
10049 ULong imm64hi
= bitQ
== 0 ? 0 : imm64lo
;
10050 IRExpr
* immV128
= binop(Iop_64HLtoV128
, mkU64(imm64hi
),
10052 putQReg128(dd
, immV128
);
10053 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd
), imm64hi
, imm64lo
);
10057 /* else fall through */
10065 Bool
dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10067 /* 31 28 20 15 14 10 9 4
10068 01 op 11110000 imm5 0 imm4 1 n d
10069 Decode fields: op,imm4
10071 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10072 if (INSN(31,30) != BITS2(0,1)
10073 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
10074 || INSN(15,15) != 0 || INSN(10,10) != 1) {
10077 UInt bitOP
= INSN(29,29);
10078 UInt imm5
= INSN(20,16);
10079 UInt imm4
= INSN(14,11);
10080 UInt nn
= INSN(9,5);
10081 UInt dd
= INSN(4,0);
10083 if (bitOP
== 0 && imm4
== BITS4(0,0,0,0)) {
10084 /* -------- 0,0000 DUP (element, scalar) -------- */
10085 IRTemp w0
= newTemp(Ity_I64
);
10086 const HChar
* arTs
= "??";
10087 IRType laneTy
= Ity_INVALID
;
10088 UInt laneNo
= 16; /* invalid */
10091 laneNo
= (imm5
>> 1) & 15;
10093 assign(w0
, unop(Iop_8Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
10095 else if (imm5
& 2) {
10097 laneNo
= (imm5
>> 2) & 7;
10099 assign(w0
, unop(Iop_16Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
10101 else if (imm5
& 4) {
10103 laneNo
= (imm5
>> 3) & 3;
10105 assign(w0
, unop(Iop_32Uto64
, getQRegLane(nn
, laneNo
, laneTy
)));
10107 else if (imm5
& 8) {
10109 laneNo
= (imm5
>> 4) & 1;
10111 assign(w0
, getQRegLane(nn
, laneNo
, laneTy
));
10114 /* invalid; leave laneTy unchanged. */
10117 if (laneTy
!= Ity_INVALID
) {
10118 vassert(laneNo
< 16);
10119 putQReg128(dd
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(w0
)));
10120 DIP("dup %s, %s.%s[%u]\n",
10121 nameQRegLO(dd
, laneTy
), nameQReg128(nn
), arTs
, laneNo
);
10124 /* else fall through */
10133 Bool
dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult
* dres
, UInt insn
,
10134 const VexArchInfo
* archinfo
)
10136 /* 31 28 23 21 16 11 9 4
10137 01 u 11110 sz 11000 opcode 10 n d
10138 Decode fields: u,sz,opcode
10140 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10141 if (INSN(31,30) != BITS2(0,1)
10142 || INSN(28,24) != BITS5(1,1,1,1,0)
10143 || INSN(21,17) != BITS5(1,1,0,0,0)
10144 || INSN(11,10) != BITS2(1,0)) {
10147 UInt bitU
= INSN(29,29);
10148 UInt sz
= INSN(23,22);
10149 UInt opcode
= INSN(16,12);
10150 UInt nn
= INSN(9,5);
10151 UInt dd
= INSN(4,0);
10153 if (bitU
== 0 && sz
== X11
&& opcode
== BITS5(1,1,0,1,1)) {
10154 /* -------- 0,11,11011 ADDP d_2d -------- */
10155 IRTemp xy
= newTempV128();
10156 IRTemp xx
= newTempV128();
10157 assign(xy
, getQReg128(nn
));
10158 assign(xx
, binop(Iop_InterleaveHI64x2
, mkexpr(xy
), mkexpr(xy
)));
10159 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
10160 binop(Iop_Add64x2
, mkexpr(xy
), mkexpr(xx
))));
10161 DIP("addp d%u, %s.2d\n", dd
, nameQReg128(nn
));
10165 if (bitU
== 1 && sz
<= X01
&& opcode
== BITS5(0,1,1,0,1)) {
10166 /* -------- 1,00,01101 ADDP s_2s -------- */
10167 /* -------- 1,01,01101 ADDP d_2d -------- */
10168 Bool isD
= sz
== X01
;
10169 IROp opZHI
= mkVecZEROHIxxOFV128(isD
? 3 : 2);
10170 IROp opADD
= mkVecADDF(isD
? 3 : 2);
10171 IRTemp src
= newTempV128();
10172 IRTemp argL
= newTempV128();
10173 IRTemp argR
= newTempV128();
10174 assign(src
, getQReg128(nn
));
10175 assign(argL
, unop(opZHI
, mkexpr(src
)));
10176 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
10177 mkU8(isD
? 8 : 4))));
10178 putQReg128(dd
, unop(opZHI
,
10179 triop(opADD
, mkexpr(mk_get_IR_rounding_mode()),
10180 mkexpr(argL
), mkexpr(argR
))));
10181 DIP(isD
? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd
, nn
);
10185 /* Half-precision floating point ADDP (v8.2). */
10186 if (bitU
== 0 && sz
<= X00
&& opcode
== BITS5(0,1,1,0,1)) {
10187 /* -------- 0,00,01101 ADDP h_2h -------- */
10188 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
10190 IROp opZHI
= mkVecZEROHIxxOFV128(1);
10191 IROp opADD
= mkVecADDF(1);
10192 IRTemp src
= newTempV128();
10193 IRTemp argL
= newTempV128();
10194 IRTemp argR
= newTempV128();
10195 assign(src
, getQReg128(nn
));
10196 assign(argL
, unop(opZHI
, mkexpr(src
)));
10197 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
10199 putQReg128(dd
, unop(opZHI
,
10200 triop(opADD
, mkexpr(mk_get_IR_rounding_mode()),
10201 mkexpr(argL
), mkexpr(argR
))));
10202 DIP("faddp h%u, v%u.2h\n", dd
, nn
);
10207 && (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,1,1))) {
10208 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
10209 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
10210 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
10211 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
10212 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
10213 Bool isD
= (sz
& 1) == 1;
10214 Bool isMIN
= (sz
& 2) == 2;
10215 Bool isNM
= opcode
== BITS5(0,1,1,0,0);
10216 IROp opZHI
= mkVecZEROHIxxOFV128(isD
? 3 : 2);
10217 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? 3 : 2);
10218 IRTemp src
= newTempV128();
10219 IRTemp argL
= newTempV128();
10220 IRTemp argR
= newTempV128();
10221 assign(src
, getQReg128(nn
));
10222 assign(argL
, unop(opZHI
, mkexpr(src
)));
10223 assign(argR
, unop(opZHI
, triop(Iop_SliceV128
, mkexpr(src
), mkexpr(src
),
10224 mkU8(isD
? 8 : 4))));
10225 putQReg128(dd
, unop(opZHI
,
10226 binop(opMXX
, mkexpr(argL
), mkexpr(argR
))));
10227 HChar c
= isD
? 'd' : 's';
10228 DIP("%s%sp %c%u, v%u.2%c\n",
10229 isMIN
? "fmin" : "fmax", isNM
? "nm" : "", c
, dd
, nn
, c
);
10239 Bool
dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10241 /* 31 28 22 18 15 10 9 4
10242 01 u 111110 immh immb opcode 1 n d
10243 Decode fields: u,immh,opcode
10245 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10246 if (INSN(31,30) != BITS2(0,1)
10247 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
10250 UInt bitU
= INSN(29,29);
10251 UInt immh
= INSN(22,19);
10252 UInt immb
= INSN(18,16);
10253 UInt opcode
= INSN(15,11);
10254 UInt nn
= INSN(9,5);
10255 UInt dd
= INSN(4,0);
10256 UInt immhb
= (immh
<< 3) | immb
;
10258 if ((immh
& 8) == 8
10259 && (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,0,1,0))) {
10260 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
10261 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
10262 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
10263 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
10264 Bool isU
= bitU
== 1;
10265 Bool isAcc
= opcode
== BITS5(0,0,0,1,0);
10266 UInt sh
= 128 - immhb
;
10267 vassert(sh
>= 1 && sh
<= 64);
10268 IROp op
= isU
? Iop_ShrN64x2
: Iop_SarN64x2
;
10269 IRExpr
* src
= getQReg128(nn
);
10270 IRTemp shf
= newTempV128();
10271 IRTemp res
= newTempV128();
10272 if (sh
== 64 && isU
) {
10273 assign(shf
, mkV128(0x0000));
10280 assign(shf
, binop(op
, src
, mkU8(sh
- nudge
)));
10282 assign(res
, isAcc
? binop(Iop_Add64x2
, getQReg128(dd
), mkexpr(shf
))
10284 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10285 const HChar
* nm
= isAcc
? (isU
? "usra" : "ssra")
10286 : (isU
? "ushr" : "sshr");
10287 DIP("%s d%u, d%u, #%u\n", nm
, dd
, nn
, sh
);
10291 if ((immh
& 8) == 8
10292 && (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,1,0))) {
10293 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
10294 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
10295 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
10296 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
10297 Bool isU
= bitU
== 1;
10298 Bool isAcc
= opcode
== BITS5(0,0,1,1,0);
10299 UInt sh
= 128 - immhb
;
10300 vassert(sh
>= 1 && sh
<= 64);
10301 IROp op
= isU
? Iop_Rsh64Ux2
: Iop_Rsh64Sx2
;
10302 vassert(sh
>= 1 && sh
<= 64);
10303 IRExpr
* src
= getQReg128(nn
);
10304 IRTemp imm8
= newTemp(Ity_I8
);
10305 assign(imm8
, mkU8((UChar
)(-sh
)));
10306 IRExpr
* amt
= mkexpr(math_DUP_TO_V128(imm8
, Ity_I8
));
10307 IRTemp shf
= newTempV128();
10308 IRTemp res
= newTempV128();
10309 assign(shf
, binop(op
, src
, amt
));
10310 assign(res
, isAcc
? binop(Iop_Add64x2
, getQReg128(dd
), mkexpr(shf
))
10312 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10313 const HChar
* nm
= isAcc
? (isU
? "ursra" : "srsra")
10314 : (isU
? "urshr" : "srshr");
10315 DIP("%s d%u, d%u, #%u\n", nm
, dd
, nn
, sh
);
10319 if (bitU
== 1 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,0,0)) {
10320 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
10321 UInt sh
= 128 - immhb
;
10322 vassert(sh
>= 1 && sh
<= 64);
10324 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, getQReg128(dd
)));
10326 /* sh is in range 1 .. 63 */
10327 ULong nmask
= (ULong
)(((Long
)0x8000000000000000ULL
) >> (sh
-1));
10328 IRExpr
* nmaskV
= binop(Iop_64HLtoV128
, mkU64(nmask
), mkU64(nmask
));
10329 IRTemp res
= newTempV128();
10330 assign(res
, binop(Iop_OrV128
,
10331 binop(Iop_AndV128
, getQReg128(dd
), nmaskV
),
10332 binop(Iop_ShrN64x2
, getQReg128(nn
), mkU8(sh
))));
10333 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10335 DIP("sri d%u, d%u, #%u\n", dd
, nn
, sh
);
10339 if (bitU
== 0 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,1,0)) {
10340 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
10341 UInt sh
= immhb
- 64;
10342 vassert(sh
>= 0 && sh
< 64);
10344 unop(Iop_ZeroHI64ofV128
,
10345 sh
== 0 ? getQReg128(nn
)
10346 : binop(Iop_ShlN64x2
, getQReg128(nn
), mkU8(sh
))));
10347 DIP("shl d%u, d%u, #%u\n", dd
, nn
, sh
);
10351 if (bitU
== 1 && (immh
& 8) == 8 && opcode
== BITS5(0,1,0,1,0)) {
10352 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
10353 UInt sh
= immhb
- 64;
10354 vassert(sh
>= 0 && sh
< 64);
10356 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, getQReg128(nn
)));
10358 /* sh is in range 1 .. 63 */
10359 ULong nmask
= (1ULL << sh
) - 1;
10360 IRExpr
* nmaskV
= binop(Iop_64HLtoV128
, mkU64(nmask
), mkU64(nmask
));
10361 IRTemp res
= newTempV128();
10362 assign(res
, binop(Iop_OrV128
,
10363 binop(Iop_AndV128
, getQReg128(dd
), nmaskV
),
10364 binop(Iop_ShlN64x2
, getQReg128(nn
), mkU8(sh
))));
10365 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10367 DIP("sli d%u, d%u, #%u\n", dd
, nn
, sh
);
10371 if (opcode
== BITS5(0,1,1,1,0)
10372 || (bitU
== 1 && opcode
== BITS5(0,1,1,0,0))) {
10373 /* -------- 0,01110 SQSHL #imm -------- */
10374 /* -------- 1,01110 UQSHL #imm -------- */
10375 /* -------- 1,01100 SQSHLU #imm -------- */
10378 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10379 if (!ok
) return False
;
10380 vassert(size
>= 0 && size
<= 3);
10381 /* The shift encoding has opposite sign for the leftwards case.
10382 Adjust shift to compensate. */
10383 UInt lanebits
= 8 << size
;
10384 shift
= lanebits
- shift
;
10385 vassert(shift
>= 0 && shift
< lanebits
);
10386 const HChar
* nm
= NULL
;
10387 /**/ if (bitU
== 0 && opcode
== BITS5(0,1,1,1,0)) nm
= "sqshl";
10388 else if (bitU
== 1 && opcode
== BITS5(0,1,1,1,0)) nm
= "uqshl";
10389 else if (bitU
== 1 && opcode
== BITS5(0,1,1,0,0)) nm
= "sqshlu";
10391 IRTemp qDiff1
= IRTemp_INVALID
;
10392 IRTemp qDiff2
= IRTemp_INVALID
;
10393 IRTemp res
= IRTemp_INVALID
;
10394 IRTemp src
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(nn
));
10395 /* This relies on the fact that the zeroed out lanes generate zeroed
10396 result lanes and don't saturate, so there's no point in trimming
10397 the resulting res, qDiff1 or qDiff2 values. */
10398 math_QSHL_IMM(&res
, &qDiff1
, &qDiff2
, src
, size
, shift
, nm
);
10399 putQReg128(dd
, mkexpr(res
));
10400 updateQCFLAGwithDifference(qDiff1
, qDiff2
);
10401 const HChar arr
= "bhsd"[size
];
10402 DIP("%s %c%u, %c%u, #%u\n", nm
, arr
, dd
, arr
, nn
, shift
);
10406 if (opcode
== BITS5(1,0,0,1,0) || opcode
== BITS5(1,0,0,1,1)
10408 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1)))) {
10409 /* -------- 0,10010 SQSHRN #imm -------- */
10410 /* -------- 1,10010 UQSHRN #imm -------- */
10411 /* -------- 0,10011 SQRSHRN #imm -------- */
10412 /* -------- 1,10011 UQRSHRN #imm -------- */
10413 /* -------- 1,10000 SQSHRUN #imm -------- */
10414 /* -------- 1,10001 SQRSHRUN #imm -------- */
10417 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
10418 if (!ok
|| size
== X11
) return False
;
10419 vassert(size
>= X00
&& size
<= X10
);
10420 vassert(shift
>= 1 && shift
<= (8 << size
));
10421 const HChar
* nm
= "??";
10422 IROp op
= Iop_INVALID
;
10423 /* Decide on the name and the operation. */
10424 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
10425 nm
= "sqshrn"; op
= mkVecQANDqsarNNARROWSS(size
);
10427 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
10428 nm
= "uqshrn"; op
= mkVecQANDqshrNNARROWUU(size
);
10430 else if (bitU
== 0 && opcode
== BITS5(1,0,0,1,1)) {
10431 nm
= "sqrshrn"; op
= mkVecQANDqrsarNNARROWSS(size
);
10433 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
10434 nm
= "uqrshrn"; op
= mkVecQANDqrshrNNARROWUU(size
);
10436 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,0)) {
10437 nm
= "sqshrun"; op
= mkVecQANDqsarNNARROWSU(size
);
10439 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,1)) {
10440 nm
= "sqrshrun"; op
= mkVecQANDqrsarNNARROWSU(size
);
10443 /* Compute the result (Q, shifted value) pair. */
10444 IRTemp src128
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
+1, getQReg128(nn
));
10445 IRTemp pair
= newTempV128();
10446 assign(pair
, binop(op
, mkexpr(src128
), mkU8(shift
)));
10447 /* Update the result reg */
10448 IRTemp res64in128
= newTempV128();
10449 assign(res64in128
, unop(Iop_ZeroHI64ofV128
, mkexpr(pair
)));
10450 putQReg128(dd
, mkexpr(res64in128
));
10451 /* Update the Q flag. */
10452 IRTemp q64q64
= newTempV128();
10453 assign(q64q64
, binop(Iop_InterleaveHI64x2
, mkexpr(pair
), mkexpr(pair
)));
10454 IRTemp z128
= newTempV128();
10455 assign(z128
, mkV128(0x0000));
10456 updateQCFLAGwithDifference(q64q64
, z128
);
10458 const HChar arrNarrow
= "bhsd"[size
];
10459 const HChar arrWide
= "bhsd"[size
+1];
10460 DIP("%s %c%u, %c%u, #%u\n", nm
, arrNarrow
, dd
, arrWide
, nn
, shift
);
10464 if (immh
>= BITS4(0,1,0,0) && opcode
== BITS5(1,1,1,0,0)) {
10465 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
10466 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
10469 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
10470 /* The following holds because immh is never zero. */
10472 /* The following holds because immh >= 0100. */
10473 vassert(size
== X10
|| size
== X11
);
10474 Bool isD
= size
== X11
;
10475 Bool isU
= bitU
== 1;
10476 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
10477 Double scale
= two_to_the_minus(fbits
);
10478 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
10479 : IRExpr_Const(IRConst_F32( (Float
)scale
));
10480 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
10481 IROp opCVT
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
10482 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
10483 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
10484 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10485 IRTemp src
= newTemp(tyI
);
10486 IRTemp res
= newTemp(tyF
);
10487 IRTemp rm
= mk_get_IR_rounding_mode();
10488 assign(src
, getQRegLane(nn
, 0, tyI
));
10489 assign(res
, triop(opMUL
, mkexpr(rm
),
10490 binop(opCVT
, mkexpr(rm
), mkexpr(src
)), scaleE
));
10491 putQRegLane(dd
, 0, mkexpr(res
));
10493 putQRegLane(dd
, 1, mkU32(0));
10495 putQRegLane(dd
, 1, mkU64(0));
10496 const HChar ch
= isD
? 'd' : 's';
10497 DIP("%s %c%u, %c%u, #%u\n", isU
? "ucvtf" : "scvtf",
10498 ch
, dd
, ch
, nn
, fbits
);
10502 if (immh
>= BITS4(0,1,0,0) && opcode
== BITS5(1,1,1,1,1)) {
10503 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
10504 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
10507 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
10508 /* The following holds because immh is never zero. */
10510 /* The following holds because immh >= 0100. */
10511 vassert(size
== X10
|| size
== X11
);
10512 Bool isD
= size
== X11
;
10513 Bool isU
= bitU
== 1;
10514 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
10515 Double scale
= two_to_the_plus(fbits
);
10516 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
10517 : IRExpr_Const(IRConst_F32( (Float
)scale
));
10518 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
10519 IROp opCVT
= isU
? (isD
? Iop_F64toI64U
: Iop_F32toI32U
)
10520 : (isD
? Iop_F64toI64S
: Iop_F32toI32S
);
10521 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
10522 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
10523 IRTemp src
= newTemp(tyF
);
10524 IRTemp res
= newTemp(tyI
);
10525 IRTemp rm
= newTemp(Ity_I32
);
10526 assign(src
, getQRegLane(nn
, 0, tyF
));
10527 assign(rm
, mkU32(Irrm_ZERO
));
10528 assign(res
, binop(opCVT
, mkexpr(rm
),
10529 triop(opMUL
, mkexpr(rm
), mkexpr(src
), scaleE
)));
10530 putQRegLane(dd
, 0, mkexpr(res
));
10532 putQRegLane(dd
, 1, mkU32(0));
10534 putQRegLane(dd
, 1, mkU64(0));
10535 const HChar ch
= isD
? 'd' : 's';
10536 DIP("%s %c%u, %c%u, #%u\n", isU
? "fcvtzu" : "fcvtzs",
10537 ch
, dd
, ch
, nn
, fbits
);
10541 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10548 Bool
dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10550 /* 31 29 28 23 21 20 15 11 9 4
10551 01 U 11110 size 1 m opcode 00 n d
10552 Decode fields: u,opcode
10554 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10555 if (INSN(31,30) != BITS2(0,1)
10556 || INSN(28,24) != BITS5(1,1,1,1,0)
10557 || INSN(21,21) != 1
10558 || INSN(11,10) != BITS2(0,0)) {
10561 UInt bitU
= INSN(29,29);
10562 UInt size
= INSN(23,22);
10563 UInt mm
= INSN(20,16);
10564 UInt opcode
= INSN(15,12);
10565 UInt nn
= INSN(9,5);
10566 UInt dd
= INSN(4,0);
10570 && (opcode
== BITS4(1,1,0,1)
10571 || opcode
== BITS4(1,0,0,1) || opcode
== BITS4(1,0,1,1))) {
10572 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
10573 /* -------- 0,1001 SQDMLAL -------- */ // 1
10574 /* -------- 0,1011 SQDMLSL -------- */ // 2
10575 /* Widens, and size refers to the narrowed lanes. */
10578 case BITS4(1,1,0,1): ks
= 0; break;
10579 case BITS4(1,0,0,1): ks
= 1; break;
10580 case BITS4(1,0,1,1): ks
= 2; break;
10581 default: vassert(0);
10583 vassert(ks
>= 0 && ks
<= 2);
10584 if (size
== X00
|| size
== X11
) return False
;
10585 vassert(size
<= 2);
10586 IRTemp vecN
, vecM
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
10587 vecN
= vecM
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
10588 newTempsV128_3(&vecN
, &vecM
, &vecD
);
10589 assign(vecN
, getQReg128(nn
));
10590 assign(vecM
, getQReg128(mm
));
10591 assign(vecD
, getQReg128(dd
));
10592 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
10593 False
/*!is2*/, size
, "mas"[ks
],
10594 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
10595 IROp opZHI
= mkVecZEROHIxxOFV128(size
+1);
10596 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
10597 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
10598 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
10599 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
10600 updateQCFLAGwithDifferenceZHI(sat2q
, sat2n
, opZHI
);
10602 const HChar
* nm
= ks
== 0 ? "sqdmull"
10603 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
10604 const HChar arrNarrow
= "bhsd"[size
];
10605 const HChar arrWide
= "bhsd"[size
+1];
10606 DIP("%s %c%u, %c%u, %c%u\n",
10607 nm
, arrWide
, dd
, arrNarrow
, nn
, arrNarrow
, mm
);
10617 Bool
dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult
* dres
, UInt insn
)
10619 /* 31 29 28 23 21 20 15 10 9 4
10620 01 U 11110 size 1 m opcode 1 n d
10621 Decode fields: u,size,opcode
10623 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10624 if (INSN(31,30) != BITS2(0,1)
10625 || INSN(28,24) != BITS5(1,1,1,1,0)
10626 || INSN(21,21) != 1
10627 || INSN(10,10) != 1) {
10630 UInt bitU
= INSN(29,29);
10631 UInt size
= INSN(23,22);
10632 UInt mm
= INSN(20,16);
10633 UInt opcode
= INSN(15,11);
10634 UInt nn
= INSN(9,5);
10635 UInt dd
= INSN(4,0);
10638 if (opcode
== BITS5(0,0,0,0,1) || opcode
== BITS5(0,0,1,0,1)) {
10639 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
10640 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
10641 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
10642 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
10643 Bool isADD
= opcode
== BITS5(0,0,0,0,1);
10644 Bool isU
= bitU
== 1;
10645 IROp qop
= Iop_INVALID
;
10646 IROp nop
= Iop_INVALID
;
10648 qop
= isU
? mkVecQADDU(size
) : mkVecQADDS(size
);
10649 nop
= mkVecADD(size
);
10651 qop
= isU
? mkVecQSUBU(size
) : mkVecQSUBS(size
);
10652 nop
= mkVecSUB(size
);
10654 IRTemp argL
= newTempV128();
10655 IRTemp argR
= newTempV128();
10656 IRTemp qres
= newTempV128();
10657 IRTemp nres
= newTempV128();
10658 assign(argL
, getQReg128(nn
));
10659 assign(argR
, getQReg128(mm
));
10660 assign(qres
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10661 size
, binop(qop
, mkexpr(argL
), mkexpr(argR
)))));
10662 assign(nres
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10663 size
, binop(nop
, mkexpr(argL
), mkexpr(argR
)))));
10664 putQReg128(dd
, mkexpr(qres
));
10665 updateQCFLAGwithDifference(qres
, nres
);
10666 const HChar
* nm
= isADD
? (isU
? "uqadd" : "sqadd")
10667 : (isU
? "uqsub" : "sqsub");
10668 const HChar arr
= "bhsd"[size
];
10669 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10673 if (size
== X11
&& opcode
== BITS5(0,0,1,1,0)) {
10674 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
10675 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
10676 Bool isGT
= bitU
== 0;
10677 IRExpr
* argL
= getQReg128(nn
);
10678 IRExpr
* argR
= getQReg128(mm
);
10679 IRTemp res
= newTempV128();
10681 isGT
? binop(Iop_CmpGT64Sx2
, argL
, argR
)
10682 : binop(Iop_CmpGT64Ux2
, argL
, argR
));
10683 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10684 DIP("%s %s, %s, %s\n",isGT
? "cmgt" : "cmhi",
10685 nameQRegLO(dd
, Ity_I64
),
10686 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10690 if (size
== X11
&& opcode
== BITS5(0,0,1,1,1)) {
10691 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
10692 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
10693 Bool isGE
= bitU
== 0;
10694 IRExpr
* argL
= getQReg128(nn
);
10695 IRExpr
* argR
= getQReg128(mm
);
10696 IRTemp res
= newTempV128();
10698 isGE
? unop(Iop_NotV128
, binop(Iop_CmpGT64Sx2
, argR
, argL
))
10699 : unop(Iop_NotV128
, binop(Iop_CmpGT64Ux2
, argR
, argL
)));
10700 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10701 DIP("%s %s, %s, %s\n", isGE
? "cmge" : "cmhs",
10702 nameQRegLO(dd
, Ity_I64
),
10703 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10707 if (size
== X11
&& (opcode
== BITS5(0,1,0,0,0)
10708 || opcode
== BITS5(0,1,0,1,0))) {
10709 /* -------- 0,xx,01000 SSHL d_d_d -------- */
10710 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
10711 /* -------- 1,xx,01000 USHL d_d_d -------- */
10712 /* -------- 1,xx,01010 URSHL d_d_d -------- */
10713 Bool isU
= bitU
== 1;
10714 Bool isR
= opcode
== BITS5(0,1,0,1,0);
10715 IROp op
= isR
? (isU
? mkVecRSHU(size
) : mkVecRSHS(size
))
10716 : (isU
? mkVecSHU(size
) : mkVecSHS(size
));
10717 IRTemp res
= newTempV128();
10718 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
10719 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10720 const HChar
* nm
= isR
? (isU
? "urshl" : "srshl")
10721 : (isU
? "ushl" : "sshl");
10722 DIP("%s %s, %s, %s\n", nm
,
10723 nameQRegLO(dd
, Ity_I64
),
10724 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10728 if (opcode
== BITS5(0,1,0,0,1) || opcode
== BITS5(0,1,0,1,1)) {
10729 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
10730 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
10731 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10732 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10733 Bool isU
= bitU
== 1;
10734 Bool isR
= opcode
== BITS5(0,1,0,1,1);
10735 IROp op
= isR
? (isU
? mkVecQANDUQRSH(size
) : mkVecQANDSQRSH(size
))
10736 : (isU
? mkVecQANDUQSH(size
) : mkVecQANDSQSH(size
));
10737 /* This is a bit tricky. Since we're only interested in the lowest
10738 lane of the result, we zero out all the rest in the operands, so
10739 as to ensure that other lanes don't pollute the returned Q value.
10740 This works because it means, for the lanes we don't care about, we
10741 are shifting zero by zero, which can never saturate. */
10742 IRTemp res256
= newTemp(Ity_V256
);
10743 IRTemp resSH
= newTempV128();
10744 IRTemp resQ
= newTempV128();
10745 IRTemp zero
= newTempV128();
10749 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(nn
))),
10750 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, getQReg128(mm
)))));
10751 assign(resSH
, unop(Iop_V256toV128_0
, mkexpr(res256
)));
10752 assign(resQ
, unop(Iop_V256toV128_1
, mkexpr(res256
)));
10753 assign(zero
, mkV128(0x0000));
10754 putQReg128(dd
, mkexpr(resSH
));
10755 updateQCFLAGwithDifference(resQ
, zero
);
10756 const HChar
* nm
= isR
? (isU
? "uqrshl" : "sqrshl")
10757 : (isU
? "uqshl" : "sqshl");
10758 const HChar arr
= "bhsd"[size
];
10759 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10763 if (size
== X11
&& opcode
== BITS5(1,0,0,0,0)) {
10764 /* -------- 0,11,10000 ADD d_d_d -------- */
10765 /* -------- 1,11,10000 SUB d_d_d -------- */
10766 Bool isSUB
= bitU
== 1;
10767 IRTemp res
= newTemp(Ity_I64
);
10768 assign(res
, binop(isSUB
? Iop_Sub64
: Iop_Add64
,
10769 getQRegLane(nn
, 0, Ity_I64
),
10770 getQRegLane(mm
, 0, Ity_I64
)));
10771 putQRegLane(dd
, 0, mkexpr(res
));
10772 putQRegLane(dd
, 1, mkU64(0));
10773 DIP("%s %s, %s, %s\n", isSUB
? "sub" : "add",
10774 nameQRegLO(dd
, Ity_I64
),
10775 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10779 if (size
== X11
&& opcode
== BITS5(1,0,0,0,1)) {
10780 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10781 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10782 Bool isEQ
= bitU
== 1;
10783 IRExpr
* argL
= getQReg128(nn
);
10784 IRExpr
* argR
= getQReg128(mm
);
10785 IRTemp res
= newTempV128();
10787 isEQ
? binop(Iop_CmpEQ64x2
, argL
, argR
)
10788 : unop(Iop_NotV128
, binop(Iop_CmpEQ64x2
,
10789 binop(Iop_AndV128
, argL
, argR
),
10791 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
10792 DIP("%s %s, %s, %s\n", isEQ
? "cmeq" : "cmtst",
10793 nameQRegLO(dd
, Ity_I64
),
10794 nameQRegLO(nn
, Ity_I64
), nameQRegLO(mm
, Ity_I64
));
10798 if (opcode
== BITS5(1,0,1,1,0)) {
10799 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10800 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10801 if (size
== X00
|| size
== X11
) return False
;
10802 Bool isR
= bitU
== 1;
10803 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
10804 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
10805 newTempsV128_2(&vN
, &vM
);
10806 assign(vN
, getQReg128(nn
));
10807 assign(vM
, getQReg128(mm
));
10808 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
10810 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
))));
10811 updateQCFLAGwithDifference(
10812 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(sat1q
)),
10813 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(sat1n
)));
10814 const HChar arr
= "bhsd"[size
];
10815 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
10816 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10820 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,0,1,0)) {
10821 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10822 IRType ity
= size
== X11
? Ity_F64
: Ity_F32
;
10823 IRTemp res
= newTemp(ity
);
10824 assign(res
, unop(mkABSF(ity
),
10826 mkexpr(mk_get_IR_rounding_mode()),
10827 getQRegLO(nn
,ity
), getQRegLO(mm
,ity
))));
10828 putQReg128(dd
, mkV128(0x0000));
10829 putQRegLO(dd
, mkexpr(res
));
10830 DIP("fabd %s, %s, %s\n",
10831 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10835 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,1,0,1,1)) {
10836 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10837 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10838 IRType ity
= size
== X01
? Ity_F64
: Ity_F32
;
10839 IRTemp res
= newTemp(ity
);
10840 assign(res
, triop(mkMULF(ity
),
10841 mkexpr(mk_get_IR_rounding_mode()),
10842 getQRegLO(nn
,ity
), getQRegLO(mm
,ity
)));
10843 putQReg128(dd
, mkV128(0x0000));
10844 putQRegLO(dd
, mkexpr(res
));
10845 DIP("fmulx %s, %s, %s\n",
10846 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10850 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,0)) {
10851 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10852 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10853 Bool isD
= size
== X01
;
10854 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10855 Bool isGE
= bitU
== 1;
10856 IROp opCMP
= isGE
? (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
)
10857 : (isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
);
10858 IRTemp res
= newTempV128();
10859 assign(res
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
)) // swapd
10860 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
10861 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10863 DIP("%s %s, %s, %s\n", isGE
? "fcmge" : "fcmeq",
10864 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10868 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,0,0)) {
10869 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10870 Bool isD
= size
== X11
;
10871 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10872 IROp opCMP
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
10873 IRTemp res
= newTempV128();
10874 assign(res
, binop(opCMP
, getQReg128(mm
), getQReg128(nn
))); // swapd
10875 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10877 DIP("%s %s, %s, %s\n", "fcmgt",
10878 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10882 if (bitU
== 1 && opcode
== BITS5(1,1,1,0,1)) {
10883 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10884 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10885 Bool isD
= (size
& 1) == 1;
10886 IRType ity
= isD
? Ity_F64
: Ity_F32
;
10887 Bool isGT
= (size
& 2) == 2;
10888 IROp opCMP
= isGT
? (isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
)
10889 : (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
);
10890 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
10891 IRTemp res
= newTempV128();
10892 assign(res
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
10893 unop(opABS
, getQReg128(nn
)))); // swapd
10894 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10896 DIP("%s %s, %s, %s\n", isGT
? "facgt" : "facge",
10897 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
10901 if (bitU
== 0 && opcode
== BITS5(1,1,1,1,1)) {
10902 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10903 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10904 Bool isSQRT
= (size
& 2) == 2;
10905 Bool isD
= (size
& 1) == 1;
10906 IROp op
= isSQRT
? (isD
? Iop_RSqrtStep64Fx2
: Iop_RSqrtStep32Fx4
)
10907 : (isD
? Iop_RecipStep64Fx2
: Iop_RecipStep32Fx4
);
10908 IRTemp res
= newTempV128();
10909 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
10910 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
10912 HChar c
= isD
? 'd' : 's';
10913 DIP("%s %c%u, %c%u, %c%u\n", isSQRT
? "frsqrts" : "frecps",
10914 c
, dd
, c
, nn
, c
, mm
);
10923 Bool
dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult
* dres
, UInt insn
,
10924 const VexArchInfo
* archinfo
)
10926 /* 31 29 28 23 21 20 15 10 9 4
10927 01 U 11110 size 0 m opcode 1 n d
10928 Decode fields: u,size,opcode
10930 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10931 if (INSN(31,30) != BITS2(0,1)
10932 || INSN(28,24) != BITS5(1,1,1,1,0)
10933 || INSN(21,21) != 0
10934 || INSN(10,10) != 1) {
10937 UInt bitU
= INSN(29,29);
10938 UInt size
= INSN(23,22);
10939 UInt mm
= INSN(20,16);
10940 UInt opcode
= INSN(15,11);
10941 UInt nn
= INSN(9,5);
10942 UInt dd
= INSN(4,0);
10944 vassert(mm
< 32 && nn
< 32 && dd
< 32);
10946 if (bitU
== 1 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1))) {
10947 /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
10948 /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
10949 if (size
== X00
|| size
== X11
) return False
;
10950 Bool isAdd
= opcode
== BITS5(1,0,0,0,0);
10952 IRTemp res
, res_nosat
, vD
, vN
, vM
;
10953 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
10954 newTempsV128_3(&vD
, &vN
, &vM
);
10955 assign(vD
, getQReg128(dd
));
10956 assign(vN
, getQReg128(nn
));
10957 assign(vM
, getQReg128(mm
));
10959 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
10961 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
))));
10962 updateQCFLAGwithDifference(
10963 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res
)),
10964 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(res_nosat
)));
10966 const HChar arr
= "hs"[size
];
10967 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
10968 DIP("%s %c%u, %c%u, %c%u\n", nm
, arr
, dd
, arr
, nn
, arr
, mm
);
10972 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(0,0,0,1,0)) {
10973 /* -------- 1,11,00010 FABD h_h_h -------- */
10974 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
10976 IRTemp res
= newTemp(Ity_F16
);
10977 assign(res
, unop(mkABSF(Ity_F16
),
10978 triop(mkSUBF(Ity_F16
),
10979 mkexpr(mk_get_IR_rounding_mode()),
10980 getQRegLO(nn
,Ity_F16
), getQRegLO(mm
,Ity_F16
))));
10981 putQReg128(dd
, mkV128(0x0000));
10982 putQRegLO(dd
, mkexpr(res
));
10983 DIP("fabd %s, %s, %s\n",
10984 nameQRegLO(dd
, Ity_F16
), nameQRegLO(nn
, Ity_F16
), nameQRegLO(mm
, Ity_F16
));
10988 if (size
== X01
&& opcode
== BITS5(0,0,1,0,0)) {
10989 /* -------- 0,01,00100 FCMEQ h_h_h -------- */
10990 /* -------- 1,01,00100 FCMGE h_h_h -------- */
10991 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
10993 Bool isGE
= bitU
== 1;
10994 IROp opCMP
= isGE
? Iop_CmpLE16Fx8
: Iop_CmpEQ16Fx8
;
10995 IRTemp res
= newTempV128();
10996 /* Swap source and destination in order to use existing LE IR op for GE. */
10997 assign(res
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
))
10998 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
10999 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01
, mkexpr(res
))));
11000 DIP("%s %s, %s, %s\n", isGE
? "fcmge" : "fcmeq",
11001 nameQRegLO(dd
, Ity_F16
), nameQRegLO(nn
, Ity_F16
), nameQRegLO(mm
, Ity_F16
));
11005 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(0,0,1,0,0)) {
11006 /* -------- 1,11,00100 FCMGT h_h_h -------- */
11007 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
11009 IRTemp res
= newTempV128();
11010 /* Swap source and destination in order to use existing LT IR op for GT. */
11011 assign(res
, binop(Iop_CmpLT16Fx8
, getQReg128(mm
), getQReg128(nn
)));
11012 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01
, mkexpr(res
))));
11013 DIP("%s %s, %s, %s\n", "fcmgt",
11014 nameQRegLO(dd
, Ity_F16
), nameQRegLO(nn
, Ity_F16
), nameQRegLO(mm
, Ity_F16
));
11018 if (bitU
== 1 && opcode
== BITS5(0,0,1,0,1)) {
11019 /* -------- 1,01,00101 FACGE h_h_h -------- */
11020 /* -------- 1,01,00101 FACGT h_h_h -------- */
11021 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
11023 IRType ity
= Ity_F16
;
11024 Bool isGT
= (size
& 2) == 2;
11025 IROp opCMP
= isGT
? Iop_CmpLT16Fx8
: Iop_CmpLE16Fx8
;
11026 IROp opABS
= Iop_Abs16Fx8
;
11027 IRTemp res
= newTempV128();
11028 assign(res
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
11029 unop(opABS
, getQReg128(nn
))));
11030 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01
,
11032 DIP("%s %s, %s, %s\n", isGT
? "facgt" : "facge",
11033 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
11043 Bool
dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11045 /* 31 29 28 23 21 16 11 9 4
11046 01 U 11110 size 10000 opcode 10 n d
11047 Decode fields: u,size,opcode
11049 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11050 if (INSN(31,30) != BITS2(0,1)
11051 || INSN(28,24) != BITS5(1,1,1,1,0)
11052 || INSN(21,17) != BITS5(1,0,0,0,0)
11053 || INSN(11,10) != BITS2(1,0)) {
11056 UInt bitU
= INSN(29,29);
11057 UInt size
= INSN(23,22);
11058 UInt opcode
= INSN(16,12);
11059 UInt nn
= INSN(9,5);
11060 UInt dd
= INSN(4,0);
11063 if (opcode
== BITS5(0,0,0,1,1)) {
11064 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
11065 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
11066 /* These are a bit tricky (to say the least). See comments on
11067 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
11069 Bool isUSQADD
= bitU
== 1;
11070 IROp qop
= isUSQADD
? mkVecQADDEXTSUSATUU(size
)
11071 : mkVecQADDEXTUSSATSS(size
);
11072 IROp nop
= mkVecADD(size
);
11073 IRTemp argL
= newTempV128();
11074 IRTemp argR
= newTempV128();
11075 assign(argL
, getQReg128(nn
));
11076 assign(argR
, getQReg128(dd
));
11077 IRTemp qres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11078 size
, binop(qop
, mkexpr(argL
), mkexpr(argR
)));
11079 IRTemp nres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11080 size
, binop(nop
, mkexpr(argL
), mkexpr(argR
)));
11081 putQReg128(dd
, mkexpr(qres
));
11082 updateQCFLAGwithDifference(qres
, nres
);
11083 const HChar arr
= "bhsd"[size
];
11084 DIP("%s %c%u, %c%u\n", isUSQADD
? "usqadd" : "suqadd", arr
, dd
, arr
, nn
);
11088 if (opcode
== BITS5(0,0,1,1,1)) {
11089 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
11090 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
11091 Bool isNEG
= bitU
== 1;
11092 IRTemp qresFW
= IRTemp_INVALID
, nresFW
= IRTemp_INVALID
;
11093 (isNEG
? math_SQNEG
: math_SQABS
)( &qresFW
, &nresFW
,
11094 getQReg128(nn
), size
);
11095 IRTemp qres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(qresFW
));
11096 IRTemp nres
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(size
, mkexpr(nresFW
));
11097 putQReg128(dd
, mkexpr(qres
));
11098 updateQCFLAGwithDifference(qres
, nres
);
11099 const HChar arr
= "bhsd"[size
];
11100 DIP("%s %c%u, %c%u\n", isNEG
? "sqneg" : "sqabs", arr
, dd
, arr
, nn
);
11104 if (size
== X11
&& opcode
== BITS5(0,1,0,0,0)) {
11105 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
11106 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
11107 Bool isGT
= bitU
== 0;
11108 IRExpr
* argL
= getQReg128(nn
);
11109 IRExpr
* argR
= mkV128(0x0000);
11110 IRTemp res
= newTempV128();
11111 assign(res
, isGT
? binop(Iop_CmpGT64Sx2
, argL
, argR
)
11112 : unop(Iop_NotV128
, binop(Iop_CmpGT64Sx2
, argR
, argL
)));
11113 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
11114 DIP("cm%s d%u, d%u, #0\n", isGT
? "gt" : "ge", dd
, nn
);
11118 if (size
== X11
&& opcode
== BITS5(0,1,0,0,1)) {
11119 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
11120 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
11121 Bool isEQ
= bitU
== 0;
11122 IRExpr
* argL
= getQReg128(nn
);
11123 IRExpr
* argR
= mkV128(0x0000);
11124 IRTemp res
= newTempV128();
11125 assign(res
, isEQ
? binop(Iop_CmpEQ64x2
, argL
, argR
)
11126 : unop(Iop_NotV128
,
11127 binop(Iop_CmpGT64Sx2
, argL
, argR
)));
11128 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
, mkexpr(res
)));
11129 DIP("cm%s d%u, d%u, #0\n", isEQ
? "eq" : "le", dd
, nn
);
11133 if (bitU
== 0 && size
== X11
&& opcode
== BITS5(0,1,0,1,0)) {
11134 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
11135 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
11136 binop(Iop_CmpGT64Sx2
, mkV128(0x0000),
11138 DIP("cm%s d%u, d%u, #0\n", "lt", dd
, nn
);
11142 if (bitU
== 0 && size
== X11
&& opcode
== BITS5(0,1,0,1,1)) {
11143 /* -------- 0,11,01011 ABS d_d -------- */
11144 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
11145 unop(Iop_Abs64x2
, getQReg128(nn
))));
11146 DIP("abs d%u, d%u\n", dd
, nn
);
11150 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(0,1,0,1,1)) {
11151 /* -------- 1,11,01011 NEG d_d -------- */
11152 putQReg128(dd
, unop(Iop_ZeroHI64ofV128
,
11153 binop(Iop_Sub64x2
, mkV128(0x0000), getQReg128(nn
))));
11154 DIP("neg d%u, d%u\n", dd
, nn
);
11158 UInt ix
= 0; /*INVALID*/
11161 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 4 : 1; break;
11162 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 5 : 2; break;
11163 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break;
11168 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
11169 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
11170 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
11171 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
11172 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
11173 Bool isD
= size
== X11
;
11174 IRType ity
= isD
? Ity_F64
: Ity_F32
;
11175 IROp opCmpEQ
= isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
;
11176 IROp opCmpLE
= isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
;
11177 IROp opCmpLT
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
11178 IROp opCmp
= Iop_INVALID
;
11180 const HChar
* nm
= "??";
11182 case 1: nm
= "fcmgt"; opCmp
= opCmpLT
; swap
= True
; break;
11183 case 2: nm
= "fcmeq"; opCmp
= opCmpEQ
; break;
11184 case 3: nm
= "fcmlt"; opCmp
= opCmpLT
; break;
11185 case 4: nm
= "fcmge"; opCmp
= opCmpLE
; swap
= True
; break;
11186 case 5: nm
= "fcmle"; opCmp
= opCmpLE
; break;
11187 default: vassert(0);
11189 IRExpr
* zero
= mkV128(0x0000);
11190 IRTemp res
= newTempV128();
11191 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
11192 : binop(opCmp
, getQReg128(nn
), zero
));
11193 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
11196 DIP("%s %s, %s, #0.0\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
11200 if (opcode
== BITS5(1,0,1,0,0)
11201 || (bitU
== 1 && opcode
== BITS5(1,0,0,1,0))) {
11202 /* -------- 0,xx,10100: SQXTN -------- */
11203 /* -------- 1,xx,10100: UQXTN -------- */
11204 /* -------- 1,xx,10010: SQXTUN -------- */
11205 if (size
== X11
) return False
;
11207 IROp opN
= Iop_INVALID
;
11208 Bool zWiden
= True
;
11209 const HChar
* nm
= "??";
11210 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,1,0,0)) {
11211 opN
= mkVecQNARROWUNSS(size
); nm
= "sqxtn"; zWiden
= False
;
11213 else if (bitU
== 1 && opcode
== BITS5(1,0,1,0,0)) {
11214 opN
= mkVecQNARROWUNUU(size
); nm
= "uqxtn";
11216 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
11217 opN
= mkVecQNARROWUNSU(size
); nm
= "sqxtun";
11220 IRTemp src
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11221 size
+1, getQReg128(nn
));
11222 IRTemp resN
= math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11223 size
, unop(Iop_64UtoV128
, unop(opN
, mkexpr(src
))));
11224 putQReg128(dd
, mkexpr(resN
));
11225 /* This widens zero lanes to zero, and compares it against zero, so all
11226 of the non-participating lanes make no contribution to the
11228 IRTemp resW
= math_WIDEN_LO_OR_HI_LANES(zWiden
, False
/*!fromUpperHalf*/,
11229 size
, mkexpr(resN
));
11230 updateQCFLAGwithDifference(src
, resW
);
11231 const HChar arrNarrow
= "bhsd"[size
];
11232 const HChar arrWide
= "bhsd"[size
+1];
11233 DIP("%s %c%u, %c%u\n", nm
, arrNarrow
, dd
, arrWide
, nn
);
11237 if (opcode
== BITS5(1,0,1,1,0) && bitU
== 1 && size
== X01
) {
11238 /* -------- 1,01,10110 FCVTXN s_d -------- */
11239 /* Using Irrm_NEAREST here isn't right. The docs say "round to
11240 odd" but I don't know what that really means. */
11242 binop(Iop_F64toF32
, mkU32(Irrm_NEAREST
),
11243 getQRegLO(nn
, Ity_F64
)));
11244 putQRegLane(dd
, 1, mkU32(0));
11245 putQRegLane(dd
, 1, mkU64(0));
11246 DIP("fcvtxn s%u, d%u\n", dd
, nn
);
11250 ix
= 0; /*INVALID*/
11252 case BITS5(1,1,0,1,0): ix
= ((size
& 2) == 2) ? 4 : 1; break;
11253 case BITS5(1,1,0,1,1): ix
= ((size
& 2) == 2) ? 5 : 2; break;
11254 case BITS5(1,1,1,0,0): if ((size
& 2) == 0) ix
= 3; break;
11258 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
11259 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
11260 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
11261 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
11262 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
11263 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
11264 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
11265 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
11266 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
11267 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
11268 Bool isD
= (size
& 1) == 1;
11269 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
11270 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
11271 IRRoundingMode irrm
= 8; /*impossible*/
11274 case 1: ch
= 'n'; irrm
= Irrm_NEAREST
; break;
11275 case 2: ch
= 'm'; irrm
= Irrm_NegINF
; break;
11276 case 3: ch
= 'a'; irrm
= Irrm_NEAREST
; break; /* kludge? */
11277 case 4: ch
= 'p'; irrm
= Irrm_PosINF
; break;
11278 case 5: ch
= 'z'; irrm
= Irrm_ZERO
; break;
11279 default: vassert(0);
11281 IROp cvt
= Iop_INVALID
;
11283 cvt
= isD
? Iop_F64toI64U
: Iop_F32toI32U
;
11285 cvt
= isD
? Iop_F64toI64S
: Iop_F32toI32S
;
11287 IRTemp src
= newTemp(tyF
);
11288 IRTemp res
= newTemp(tyI
);
11289 assign(src
, getQRegLane(nn
, 0, tyF
));
11290 assign(res
, binop(cvt
, mkU32(irrm
), mkexpr(src
)));
11291 putQRegLane(dd
, 0, mkexpr(res
)); /* bits 31-0 or 63-0 */
11293 putQRegLane(dd
, 1, mkU32(0)); /* bits 63-32 */
11295 putQRegLane(dd
, 1, mkU64(0)); /* bits 127-64 */
11296 HChar sOrD
= isD
? 'd' : 's';
11297 DIP("fcvt%c%c %c%u, %c%u\n", ch
, bitU
== 1 ? 'u' : 's',
11298 sOrD
, dd
, sOrD
, nn
);
11302 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,1)) {
11303 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
11304 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
11305 Bool isU
= bitU
== 1;
11306 Bool isD
= (size
& 1) == 1;
11307 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
11308 IROp iop
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
11309 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
11310 IRTemp rm
= mk_get_IR_rounding_mode();
11311 putQRegLO(dd
, binop(iop
, mkexpr(rm
), getQRegLO(nn
, tyI
)));
11313 putQRegLane(dd
, 1, mkU32(0)); /* bits 63-32 */
11315 putQRegLane(dd
, 1, mkU64(0)); /* bits 127-64 */
11316 HChar c
= isD
? 'd' : 's';
11317 DIP("%ccvtf %c%u, %c%u\n", isU
? 'u' : 's', c
, dd
, c
, nn
);
11321 if (size
>= X10
&& opcode
== BITS5(1,1,1,0,1)) {
11322 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
11323 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
11324 Bool isSQRT
= bitU
== 1;
11325 Bool isD
= (size
& 1) == 1;
11326 IROp op
= isSQRT
? (isD
? Iop_RSqrtEst64Fx2
: Iop_RSqrtEst32Fx4
)
11327 : (isD
? Iop_RecipEst64Fx2
: Iop_RecipEst32Fx4
);
11328 IRTemp resV
= newTempV128();
11329 assign(resV
, unop(op
, getQReg128(nn
)));
11330 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? X11
: X10
,
11332 HChar c
= isD
? 'd' : 's';
11333 DIP("%s %c%u, %c%u\n", isSQRT
? "frsqrte" : "frecpe", c
, dd
, c
, nn
);
11337 if (bitU
== 0 && size
>= X10
&& opcode
== BITS5(1,1,1,1,1)) {
11338 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
11339 Bool isD
= (size
& 1) == 1;
11340 IRType ty
= isD
? Ity_F64
: Ity_F32
;
11341 IROp op
= isD
? Iop_RecpExpF64
: Iop_RecpExpF32
;
11342 IRTemp res
= newTemp(ty
);
11343 IRTemp rm
= mk_get_IR_rounding_mode();
11344 assign(res
, binop(op
, mkexpr(rm
), getQRegLane(nn
, 0, ty
)));
11345 putQReg128(dd
, mkV128(0x0000));
11346 putQRegLane(dd
, 0, mkexpr(res
));
11347 HChar c
= isD
? 'd' : 's';
11348 DIP("%s %c%u, %c%u\n", "frecpx", c
, dd
, c
, nn
);
11358 Bool
dis_AdvSIMD_scalar_two_reg_misc_fp16(/*MB_OUT*/DisResult
* dres
, UInt insn
,
11359 const VexArchInfo
* archinfo
)
11361 /* This decode function only handles instructions with half-precision
11362 floating-point (fp16) operands.
11364 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
11367 /* 31 29 28 23 21 16 11 9 4
11368 01 U 11110 size 11100 opcode 10 n d
11369 Decode fields: u,size,opcode
11371 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11372 if (INSN(31,30) != BITS2(0,1)
11373 || INSN(28,24) != BITS5(1,1,1,1,0)
11374 || INSN(21,17) != BITS5(1,1,1,0,0)
11375 || INSN(11,10) != BITS2(1,0)) {
11378 UInt bitU
= INSN(29,29);
11379 UInt size
= INSN(23,22);
11380 UInt opcode
= INSN(16,12);
11381 UInt nn
= INSN(9,5);
11382 UInt dd
= INSN(4,0);
11383 vassert(size
== 3);
11385 /* Decoding FCM<condtion> based on opcode and bitU. ix used to select
11388 UInt ix
= 0; // Invalid <condition>
11390 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 4 : 1; break; // FCMLE=4,FCMEQ=1
11391 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 5 : 2; break; // FCMGE=5,FCMGT=2
11392 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break; // FCMLT=3
11396 /* -------- 0,01101 FCMEQ h_h_#0.0 (ix 1) -------- */
11397 /* -------- 0,01100 FCMGT h_h_#0.0 (ix 2) -------- */
11398 /* -------- 0,01110 FCMLT h_h_#0.0 (ix 3) -------- */
11399 /* -------- 1,01101 FCMLE h_h_#0.0 (ix 4) -------- */
11400 /* -------- 1,01100 FCMGE h_h_#0.0 (ix 5) -------- */
11401 IRType ity
= Ity_F16
;
11402 IROp opCmp
= Iop_INVALID
;
11404 const HChar
* nm
= "??";
11406 case 1: nm
= "fcmeq"; opCmp
= Iop_CmpEQ16Fx8
; break;
11407 case 2: nm
= "fcmgt"; opCmp
= Iop_CmpLT16Fx8
; swap
= True
; break;
11408 case 3: nm
= "fcmlt"; opCmp
= Iop_CmpLT16Fx8
; break;
11409 case 4: nm
= "fcmle"; opCmp
= Iop_CmpLE16Fx8
; break;
11410 case 5: nm
= "fcmge"; opCmp
= Iop_CmpLE16Fx8
; swap
= True
; break;
11411 default: vassert(0);
11413 IRExpr
* zero
= mkV128(0x0000);
11414 IRTemp res
= newTempV128();
11415 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
11416 : binop(opCmp
, getQReg128(nn
), zero
));
11417 putQReg128(dd
, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01
, mkexpr(res
))));
11419 DIP("%s %s, %s, #0.0\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
11429 Bool
dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11431 /* 31 28 23 21 20 19 15 11 9 4
11432 01 U 11111 size L M m opcode H 0 n d
11433 Decode fields are: u,size,opcode
11434 M is really part of the mm register number. Individual
11435 cases need to inspect L and H though.
11437 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11438 if (INSN(31,30) != BITS2(0,1)
11439 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
11442 UInt bitU
= INSN(29,29);
11443 UInt size
= INSN(23,22);
11444 UInt bitL
= INSN(21,21);
11445 UInt bitM
= INSN(20,20);
11446 UInt mmLO4
= INSN(19,16);
11447 UInt opcode
= INSN(15,12);
11448 UInt bitH
= INSN(11,11);
11449 UInt nn
= INSN(9,5);
11450 UInt dd
= INSN(4,0);
11452 vassert(bitH
< 2 && bitM
< 2 && bitL
< 2);
11454 if (bitU
== 0 && size
>= X10
11455 && (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,1,0,1))) {
11456 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
11457 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
11458 Bool isD
= (size
& 1) == 1;
11459 Bool isSUB
= opcode
== BITS4(0,1,0,1);
11461 if (!isD
) index
= (bitH
<< 1) | bitL
;
11462 else if (isD
&& bitL
== 0) index
= bitH
;
11463 else return False
; // sz:L == x11 => unallocated encoding
11464 vassert(index
< (isD
? 2 : 4));
11465 IRType ity
= isD
? Ity_F64
: Ity_F32
;
11466 IRTemp elem
= newTemp(ity
);
11467 UInt mm
= (bitM
<< 4) | mmLO4
;
11468 assign(elem
, getQRegLane(mm
, index
, ity
));
11469 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
11470 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
11471 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
11472 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
11473 IRTemp rm
= mk_get_IR_rounding_mode();
11474 IRTemp t1
= newTempV128();
11475 IRTemp t2
= newTempV128();
11476 // FIXME: double rounding; use FMA primops instead
11477 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
11478 assign(t2
, triop(isSUB
? opSUB
: opADD
,
11479 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
11481 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? 3 : 2,
11483 const HChar c
= isD
? 'd' : 's';
11484 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB
? "fmls" : "fmla",
11485 c
, dd
, c
, nn
, nameQReg128(mm
), c
, index
);
11489 if (size
>= X10
&& opcode
== BITS4(1,0,0,1)) {
11490 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
11491 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
11492 Bool isD
= (size
& 1) == 1;
11493 Bool isMULX
= bitU
== 1;
11495 if (!isD
) index
= (bitH
<< 1) | bitL
;
11496 else if (isD
&& bitL
== 0) index
= bitH
;
11497 else return False
; // sz:L == x11 => unallocated encoding
11498 vassert(index
< (isD
? 2 : 4));
11499 IRType ity
= isD
? Ity_F64
: Ity_F32
;
11500 IRTemp elem
= newTemp(ity
);
11501 UInt mm
= (bitM
<< 4) | mmLO4
;
11502 assign(elem
, getQRegLane(mm
, index
, ity
));
11503 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
11504 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
11505 IRTemp rm
= mk_get_IR_rounding_mode();
11506 IRTemp t1
= newTempV128();
11507 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11508 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
11510 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD
? 3 : 2,
11512 const HChar c
= isD
? 'd' : 's';
11513 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX
? "fmulx" : "fmul",
11514 c
, dd
, c
, nn
, nameQReg128(mm
), c
, index
);
11519 && (opcode
== BITS4(1,0,1,1)
11520 || opcode
== BITS4(0,0,1,1) || opcode
== BITS4(0,1,1,1))) {
11521 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
11522 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
11523 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
11524 /* Widens, and size refers to the narrowed lanes. */
11527 case BITS4(1,0,1,1): ks
= 0; break;
11528 case BITS4(0,0,1,1): ks
= 1; break;
11529 case BITS4(0,1,1,1): ks
= 2; break;
11530 default: vassert(0);
11532 vassert(ks
>= 0 && ks
<= 2);
11533 UInt mm
= 32; // invalid
11534 UInt ix
= 16; // invalid
11537 return False
; // h_b_b[] case is not allowed
11539 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
11541 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
11543 return False
; // q_d_d[] case is not allowed
11547 vassert(mm
< 32 && ix
< 16);
11548 IRTemp vecN
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
11549 vecN
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
11550 newTempsV128_2(&vecN
, &vecD
);
11551 assign(vecN
, getQReg128(nn
));
11552 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
11553 assign(vecD
, getQReg128(dd
));
11554 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
11555 False
/*!is2*/, size
, "mas"[ks
],
11556 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
11557 IROp opZHI
= mkVecZEROHIxxOFV128(size
+1);
11558 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
11559 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
11560 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
11561 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
11562 updateQCFLAGwithDifferenceZHI(sat2q
, sat2n
, opZHI
);
11564 const HChar
* nm
= ks
== 0 ? "sqmull"
11565 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
11566 const HChar arrNarrow
= "bhsd"[size
];
11567 const HChar arrWide
= "bhsd"[size
+1];
11568 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
11569 nm
, arrWide
, dd
, arrNarrow
, nn
, dd
, arrNarrow
, ix
);
11573 if (bitU
== 0 && (opcode
== BITS4(1,1,0,0) || opcode
== BITS4(1,1,0,1))) {
11574 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
11575 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
11576 UInt mm
= 32; // invalid
11577 UInt ix
= 16; // invalid
11580 return False
; // b case is not allowed
11582 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
11584 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
11586 return False
; // q case is not allowed
11590 vassert(mm
< 32 && ix
< 16);
11591 Bool isR
= opcode
== BITS4(1,1,0,1);
11592 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
11593 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
11594 vN
= newTempV128();
11595 assign(vN
, getQReg128(nn
));
11596 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
11597 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
11598 IROp opZHI
= mkVecZEROHIxxOFV128(size
);
11599 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
11600 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
11601 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
11602 HChar ch
= size
== X01
? 'h' : 's';
11603 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm
, ch
, dd
, ch
, nn
, ch
, (Int
)dd
, ix
);
11607 if (bitU
== 1 && (opcode
== BITS4(1,1,0,1) || opcode
== BITS4(1,1,1,1))) {
11608 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
11609 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
11610 UInt mm
= 32; // invalid
11611 UInt ix
= 16; // invalid
11614 return False
; // b case is not allowed
11616 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
11618 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
11620 return False
; // d case is not allowed
11625 vassert(mm
< 32 && ix
< 16);
11626 Bool isAdd
= opcode
== BITS4(1,1,0,1);
11628 IRTemp res
, res_nosat
, vD
, vN
, vM
;
11629 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
11630 newTempsV128_2(&vD
, &vN
);
11631 assign(vD
, getQReg128(dd
));
11632 assign(vN
, getQReg128(nn
));
11633 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
11635 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
11636 IROp opZHI
= mkVecZEROHIxxOFV128(size
);
11637 putQReg128(dd
, unop(opZHI
, mkexpr(res
)));
11638 updateQCFLAGwithDifferenceZHI(res
, res_nosat
, opZHI
);
11640 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
11641 HChar ch
= size
== X01
? 'h' : 's';
11642 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm
, ch
, dd
, ch
, nn
, ch
, (Int
)dd
, ix
);
11652 Bool
dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
11654 /* 31 28 22 18 15 10 9 4
11655 0 q u 011110 immh immb opcode 1 n d
11656 Decode fields: u,opcode
11658 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11659 if (INSN(31,31) != 0
11660 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
11663 UInt bitQ
= INSN(30,30);
11664 UInt bitU
= INSN(29,29);
11665 UInt immh
= INSN(22,19);
11666 UInt immb
= INSN(18,16);
11667 UInt opcode
= INSN(15,11);
11668 UInt nn
= INSN(9,5);
11669 UInt dd
= INSN(4,0);
11671 if (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,0,1,0)) {
11672 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
11673 /* -------- 1,00000 USHR std7_std7_#imm -------- */
11674 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
11675 /* -------- 1,00010 USRA std7_std7_#imm -------- */
11676 /* laneTy, shift = case immh:immb of
11677 0001:xxx -> B, SHR:8-xxx
11678 001x:xxx -> H, SHR:16-xxxx
11679 01xx:xxx -> S, SHR:32-xxxxx
11680 1xxx:xxx -> D, SHR:64-xxxxxx
11685 Bool isQ
= bitQ
== 1;
11686 Bool isU
= bitU
== 1;
11687 Bool isAcc
= opcode
== BITS5(0,0,0,1,0);
11688 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11689 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11690 vassert(size
>= 0 && size
<= 3);
11691 UInt lanebits
= 8 << size
;
11692 vassert(shift
>= 1 && shift
<= lanebits
);
11693 IROp op
= isU
? mkVecSHRN(size
) : mkVecSARN(size
);
11694 IRExpr
* src
= getQReg128(nn
);
11695 IRTemp shf
= newTempV128();
11696 IRTemp res
= newTempV128();
11697 if (shift
== lanebits
&& isU
) {
11698 assign(shf
, mkV128(0x0000));
11701 if (shift
== lanebits
) {
11705 assign(shf
, binop(op
, src
, mkU8(shift
- nudge
)));
11707 assign(res
, isAcc
? binop(mkVecADD(size
), getQReg128(dd
), mkexpr(shf
))
11709 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11710 HChar laneCh
= "bhsd"[size
];
11711 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11712 const HChar
* nm
= isAcc
? (isU
? "usra" : "ssra")
11713 : (isU
? "ushr" : "sshr");
11714 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
11715 nameQReg128(dd
), nLanes
, laneCh
,
11716 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11720 if (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,1,0)) {
11721 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
11722 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
11723 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
11724 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
11725 /* laneTy, shift = case immh:immb of
11726 0001:xxx -> B, SHR:8-xxx
11727 001x:xxx -> H, SHR:16-xxxx
11728 01xx:xxx -> S, SHR:32-xxxxx
11729 1xxx:xxx -> D, SHR:64-xxxxxx
11734 Bool isQ
= bitQ
== 1;
11735 Bool isU
= bitU
== 1;
11736 Bool isAcc
= opcode
== BITS5(0,0,1,1,0);
11737 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11738 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11739 vassert(size
>= 0 && size
<= 3);
11740 UInt lanebits
= 8 << size
;
11741 vassert(shift
>= 1 && shift
<= lanebits
);
11742 IROp op
= isU
? mkVecRSHU(size
) : mkVecRSHS(size
);
11743 IRExpr
* src
= getQReg128(nn
);
11744 IRTemp imm8
= newTemp(Ity_I8
);
11745 assign(imm8
, mkU8((UChar
)(-shift
)));
11746 IRExpr
* amt
= mkexpr(math_DUP_TO_V128(imm8
, Ity_I8
));
11747 IRTemp shf
= newTempV128();
11748 IRTemp res
= newTempV128();
11749 assign(shf
, binop(op
, src
, amt
));
11750 assign(res
, isAcc
? binop(mkVecADD(size
), getQReg128(dd
), mkexpr(shf
))
11752 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11753 HChar laneCh
= "bhsd"[size
];
11754 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11755 const HChar
* nm
= isAcc
? (isU
? "ursra" : "srsra")
11756 : (isU
? "urshr" : "srshr");
11757 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
11758 nameQReg128(dd
), nLanes
, laneCh
,
11759 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11763 if (bitU
== 1 && opcode
== BITS5(0,1,0,0,0)) {
11764 /* -------- 1,01000 SRI std7_std7_#imm -------- */
11765 /* laneTy, shift = case immh:immb of
11766 0001:xxx -> B, SHR:8-xxx
11767 001x:xxx -> H, SHR:16-xxxx
11768 01xx:xxx -> S, SHR:32-xxxxx
11769 1xxx:xxx -> D, SHR:64-xxxxxx
11774 Bool isQ
= bitQ
== 1;
11775 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11776 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11777 vassert(size
>= 0 && size
<= 3);
11778 UInt lanebits
= 8 << size
;
11779 vassert(shift
>= 1 && shift
<= lanebits
);
11780 IRExpr
* src
= getQReg128(nn
);
11781 IRTemp res
= newTempV128();
11782 if (shift
== lanebits
) {
11783 assign(res
, getQReg128(dd
));
11785 assign(res
, binop(mkVecSHRN(size
), src
, mkU8(shift
)));
11786 IRExpr
* nmask
= binop(mkVecSHLN(size
),
11787 mkV128(0xFFFF), mkU8(lanebits
- shift
));
11788 IRTemp tmp
= newTempV128();
11789 assign(tmp
, binop(Iop_OrV128
,
11791 binop(Iop_AndV128
, getQReg128(dd
), nmask
)));
11794 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11795 HChar laneCh
= "bhsd"[size
];
11796 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11797 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
11798 nameQReg128(dd
), nLanes
, laneCh
,
11799 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11803 if (opcode
== BITS5(0,1,0,1,0)) {
11804 /* -------- 0,01010 SHL std7_std7_#imm -------- */
11805 /* -------- 1,01010 SLI std7_std7_#imm -------- */
11806 /* laneTy, shift = case immh:immb of
11808 001x:xxx -> H, xxxx
11809 01xx:xxx -> S, xxxxx
11810 1xxx:xxx -> D, xxxxxx
11815 Bool isSLI
= bitU
== 1;
11816 Bool isQ
= bitQ
== 1;
11817 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11818 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11819 vassert(size
>= 0 && size
<= 3);
11820 /* The shift encoding has opposite sign for the leftwards case.
11821 Adjust shift to compensate. */
11822 UInt lanebits
= 8 << size
;
11823 shift
= lanebits
- shift
;
11824 vassert(shift
>= 0 && shift
< lanebits
);
11825 IROp op
= mkVecSHLN(size
);
11826 IRExpr
* src
= getQReg128(nn
);
11827 IRTemp res
= newTempV128();
11831 assign(res
, binop(op
, src
, mkU8(shift
)));
11833 IRExpr
* nmask
= binop(mkVecSHRN(size
),
11834 mkV128(0xFFFF), mkU8(lanebits
- shift
));
11835 IRTemp tmp
= newTempV128();
11836 assign(tmp
, binop(Iop_OrV128
,
11838 binop(Iop_AndV128
, getQReg128(dd
), nmask
)));
11842 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11843 HChar laneCh
= "bhsd"[size
];
11844 UInt nLanes
= (isQ
? 128 : 64) / lanebits
;
11845 const HChar
* nm
= isSLI
? "sli" : "shl";
11846 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm
,
11847 nameQReg128(dd
), nLanes
, laneCh
,
11848 nameQReg128(nn
), nLanes
, laneCh
, shift
);
11852 if (opcode
== BITS5(0,1,1,1,0)
11853 || (bitU
== 1 && opcode
== BITS5(0,1,1,0,0))) {
11854 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
11855 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
11856 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
11859 Bool isQ
= bitQ
== 1;
11860 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11861 if (!ok
|| (bitQ
== 0 && size
== X11
)) return False
;
11862 vassert(size
>= 0 && size
<= 3);
11863 /* The shift encoding has opposite sign for the leftwards case.
11864 Adjust shift to compensate. */
11865 UInt lanebits
= 8 << size
;
11866 shift
= lanebits
- shift
;
11867 vassert(shift
>= 0 && shift
< lanebits
);
11868 const HChar
* nm
= NULL
;
11869 /**/ if (bitU
== 0 && opcode
== BITS5(0,1,1,1,0)) nm
= "sqshl";
11870 else if (bitU
== 1 && opcode
== BITS5(0,1,1,1,0)) nm
= "uqshl";
11871 else if (bitU
== 1 && opcode
== BITS5(0,1,1,0,0)) nm
= "sqshlu";
11873 IRTemp qDiff1
= IRTemp_INVALID
;
11874 IRTemp qDiff2
= IRTemp_INVALID
;
11875 IRTemp res
= IRTemp_INVALID
;
11876 IRTemp src
= newTempV128();
11877 assign(src
, getQReg128(nn
));
11878 math_QSHL_IMM(&res
, &qDiff1
, &qDiff2
, src
, size
, shift
, nm
);
11879 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
11880 updateQCFLAGwithDifferenceZHI(qDiff1
, qDiff2
,
11881 isQ
? Iop_INVALID
: Iop_ZeroHI64ofV128
);
11882 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
11883 DIP("%s %s.%s, %s.%s, #%u\n", nm
,
11884 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, shift
);
11889 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1))) {
11890 /* -------- 0,10000 SHRN{,2} #imm -------- */
11891 /* -------- 0,10001 RSHRN{,2} #imm -------- */
11892 /* Narrows, and size is the narrow size. */
11895 Bool is2
= bitQ
== 1;
11896 Bool isR
= opcode
== BITS5(1,0,0,0,1);
11897 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11898 if (!ok
|| size
== X11
) return False
;
11899 vassert(shift
>= 1);
11900 IRTemp t1
= newTempV128();
11901 IRTemp t2
= newTempV128();
11902 IRTemp t3
= newTempV128();
11903 assign(t1
, getQReg128(nn
));
11904 assign(t2
, isR
? binop(mkVecADD(size
+1),
11906 mkexpr(math_VEC_DUP_IMM(size
+1, 1ULL<<(shift
-1))))
11908 assign(t3
, binop(mkVecSHRN(size
+1), mkexpr(t2
), mkU8(shift
)));
11909 IRTemp t4
= math_NARROW_LANES(t3
, t3
, size
);
11910 putLO64andZUorPutHI64(is2
, dd
, t4
);
11911 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11912 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11913 DIP("%s %s.%s, %s.%s, #%u\n", isR
? "rshrn" : "shrn",
11914 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
, shift
);
11918 if (opcode
== BITS5(1,0,0,1,0) || opcode
== BITS5(1,0,0,1,1)
11920 && (opcode
== BITS5(1,0,0,0,0) || opcode
== BITS5(1,0,0,0,1)))) {
11921 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
11922 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
11923 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
11924 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
11925 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
11926 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
11929 Bool is2
= bitQ
== 1;
11930 Bool ok
= getLaneInfo_IMMH_IMMB(&shift
, &size
, immh
, immb
);
11931 if (!ok
|| size
== X11
) return False
;
11932 vassert(shift
>= 1 && shift
<= (8 << size
));
11933 const HChar
* nm
= "??";
11934 IROp op
= Iop_INVALID
;
11935 /* Decide on the name and the operation. */
11936 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
11937 nm
= "sqshrn"; op
= mkVecQANDqsarNNARROWSS(size
);
11939 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
11940 nm
= "uqshrn"; op
= mkVecQANDqshrNNARROWUU(size
);
11942 else if (bitU
== 0 && opcode
== BITS5(1,0,0,1,1)) {
11943 nm
= "sqrshrn"; op
= mkVecQANDqrsarNNARROWSS(size
);
11945 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
11946 nm
= "uqrshrn"; op
= mkVecQANDqrshrNNARROWUU(size
);
11948 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,0)) {
11949 nm
= "sqshrun"; op
= mkVecQANDqsarNNARROWSU(size
);
11951 else if (bitU
== 1 && opcode
== BITS5(1,0,0,0,1)) {
11952 nm
= "sqrshrun"; op
= mkVecQANDqrsarNNARROWSU(size
);
11955 /* Compute the result (Q, shifted value) pair. */
11956 IRTemp src128
= newTempV128();
11957 assign(src128
, getQReg128(nn
));
11958 IRTemp pair
= newTempV128();
11959 assign(pair
, binop(op
, mkexpr(src128
), mkU8(shift
)));
11960 /* Update the result reg */
11961 IRTemp res64in128
= newTempV128();
11962 assign(res64in128
, unop(Iop_ZeroHI64ofV128
, mkexpr(pair
)));
11963 putLO64andZUorPutHI64(is2
, dd
, res64in128
);
11964 /* Update the Q flag. */
11965 IRTemp q64q64
= newTempV128();
11966 assign(q64q64
, binop(Iop_InterleaveHI64x2
, mkexpr(pair
), mkexpr(pair
)));
11967 IRTemp z128
= newTempV128();
11968 assign(z128
, mkV128(0x0000));
11969 updateQCFLAGwithDifference(q64q64
, z128
);
11971 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
11972 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
11973 DIP("%s %s.%s, %s.%s, #%u\n", nm
,
11974 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
, shift
);
11978 if (opcode
== BITS5(1,0,1,0,0)) {
11979 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11980 /* -------- 1,10100 USHLL{,2} #imm -------- */
11981 /* 31 28 22 18 15 9 4
11982 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11983 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11985 = case immh of 1xxx -> invalid
11986 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11987 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11988 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11989 0000 -> AdvSIMD modified immediate (???)
11991 Bool isQ
= bitQ
== 1;
11992 Bool isU
= bitU
== 1;
11993 UInt immhb
= (immh
<< 3) | immb
;
11994 IRTemp src
= newTempV128();
11995 IRTemp zero
= newTempV128();
11996 IRExpr
* res
= NULL
;
11998 const HChar
* ta
= "??";
11999 const HChar
* tb
= "??";
12000 assign(src
, getQReg128(nn
));
12001 assign(zero
, mkV128(0x0000));
12003 /* invalid; don't assign to res */
12005 else if (immh
& 4) {
12007 vassert(sh
< 32); /* so 32-sh is 1..32 */
12009 tb
= isQ
? "4s" : "2s";
12010 IRExpr
* tmp
= isQ
? mk_InterleaveHI32x4(src
, zero
)
12011 : mk_InterleaveLO32x4(src
, zero
);
12012 res
= binop(isU
? Iop_ShrN64x2
: Iop_SarN64x2
, tmp
, mkU8(32-sh
));
12014 else if (immh
& 2) {
12016 vassert(sh
< 16); /* so 16-sh is 1..16 */
12018 tb
= isQ
? "8h" : "4h";
12019 IRExpr
* tmp
= isQ
? mk_InterleaveHI16x8(src
, zero
)
12020 : mk_InterleaveLO16x8(src
, zero
);
12021 res
= binop(isU
? Iop_ShrN32x4
: Iop_SarN32x4
, tmp
, mkU8(16-sh
));
12023 else if (immh
& 1) {
12025 vassert(sh
< 8); /* so 8-sh is 1..8 */
12027 tb
= isQ
? "16b" : "8b";
12028 IRExpr
* tmp
= isQ
? mk_InterleaveHI8x16(src
, zero
)
12029 : mk_InterleaveLO8x16(src
, zero
);
12030 res
= binop(isU
? Iop_ShrN16x8
: Iop_SarN16x8
, tmp
, mkU8(8-sh
));
12032 vassert(immh
== 0);
12033 /* invalid; don't assign to res */
12037 putQReg128(dd
, res
);
12038 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
12039 isU
? 'u' : 's', isQ
? "2" : "",
12040 nameQReg128(dd
), ta
, nameQReg128(nn
), tb
, sh
);
12046 if (opcode
== BITS5(1,1,1,0,0)) {
12047 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
12048 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
12049 /* If immh is of the form 00xx, the insn is invalid. */
12050 if (immh
< BITS4(0,1,0,0)) return False
;
12053 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
12054 /* The following holds because immh is never zero. */
12056 /* The following holds because immh >= 0100. */
12057 vassert(size
== X10
|| size
== X11
);
12058 Bool isD
= size
== X11
;
12059 Bool isU
= bitU
== 1;
12060 Bool isQ
= bitQ
== 1;
12061 if (isD
&& !isQ
) return False
; /* reject .1d case */
12062 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
12063 Double scale
= two_to_the_minus(fbits
);
12064 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
12065 : IRExpr_Const(IRConst_F32( (Float
)scale
));
12066 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
12067 IROp opCVT
= isU
? (isD
? Iop_I64UtoF64
: Iop_I32UtoF32
)
12068 : (isD
? Iop_I64StoF64
: Iop_I32StoF32
);
12069 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
12070 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
12071 UInt nLanes
= (isQ
? 2 : 1) * (isD
? 1 : 2);
12072 vassert(nLanes
== 2 || nLanes
== 4);
12073 for (UInt i
= 0; i
< nLanes
; i
++) {
12074 IRTemp src
= newTemp(tyI
);
12075 IRTemp res
= newTemp(tyF
);
12076 IRTemp rm
= mk_get_IR_rounding_mode();
12077 assign(src
, getQRegLane(nn
, i
, tyI
));
12078 assign(res
, triop(opMUL
, mkexpr(rm
),
12079 binop(opCVT
, mkexpr(rm
), mkexpr(src
)),
12081 putQRegLane(dd
, i
, mkexpr(res
));
12084 putQRegLane(dd
, 1, mkU64(0));
12086 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12087 DIP("%s %s.%s, %s.%s, #%u\n", isU
? "ucvtf" : "scvtf",
12088 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, fbits
);
12092 if (opcode
== BITS5(1,1,1,1,1)) {
12093 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
12094 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
12095 /* If immh is of the form 00xx, the insn is invalid. */
12096 if (immh
< BITS4(0,1,0,0)) return False
;
12099 Bool ok
= getLaneInfo_IMMH_IMMB(&fbits
, &size
, immh
, immb
);
12100 /* The following holds because immh is never zero. */
12102 /* The following holds because immh >= 0100. */
12103 vassert(size
== X10
|| size
== X11
);
12104 Bool isD
= size
== X11
;
12105 Bool isU
= bitU
== 1;
12106 Bool isQ
= bitQ
== 1;
12107 if (isD
&& !isQ
) return False
; /* reject .1d case */
12108 vassert(fbits
>= 1 && fbits
<= (isD
? 64 : 32));
12109 Double scale
= two_to_the_plus(fbits
);
12110 IRExpr
* scaleE
= isD
? IRExpr_Const(IRConst_F64(scale
))
12111 : IRExpr_Const(IRConst_F32( (Float
)scale
));
12112 IROp opMUL
= isD
? Iop_MulF64
: Iop_MulF32
;
12113 IROp opCVT
= isU
? (isD
? Iop_F64toI64U
: Iop_F32toI32U
)
12114 : (isD
? Iop_F64toI64S
: Iop_F32toI32S
);
12115 IRType tyF
= isD
? Ity_F64
: Ity_F32
;
12116 IRType tyI
= isD
? Ity_I64
: Ity_I32
;
12117 UInt nLanes
= (isQ
? 2 : 1) * (isD
? 1 : 2);
12118 vassert(nLanes
== 2 || nLanes
== 4);
12119 for (UInt i
= 0; i
< nLanes
; i
++) {
12120 IRTemp src
= newTemp(tyF
);
12121 IRTemp res
= newTemp(tyI
);
12122 IRTemp rm
= newTemp(Ity_I32
);
12123 assign(src
, getQRegLane(nn
, i
, tyF
));
12124 assign(rm
, mkU32(Irrm_ZERO
));
12125 assign(res
, binop(opCVT
, mkexpr(rm
),
12126 triop(opMUL
, mkexpr(rm
),
12127 mkexpr(src
), scaleE
)));
12128 putQRegLane(dd
, i
, mkexpr(res
));
12131 putQRegLane(dd
, 1, mkU64(0));
12133 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12134 DIP("%s %s.%s, %s.%s, #%u\n", isU
? "fcvtzu" : "fcvtzs",
12135 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, fbits
);
12139 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12146 Bool
dis_AdvSIMD_three_different(/*MB_OUT*/DisResult
* dres
, UInt insn
)
12148 /* 31 30 29 28 23 21 20 15 11 9 4
12149 0 Q U 01110 size 1 m opcode 00 n d
12150 Decode fields: u,opcode
12152 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12153 if (INSN(31,31) != 0
12154 || INSN(28,24) != BITS5(0,1,1,1,0)
12155 || INSN(21,21) != 1
12156 || INSN(11,10) != BITS2(0,0)) {
12159 UInt bitQ
= INSN(30,30);
12160 UInt bitU
= INSN(29,29);
12161 UInt size
= INSN(23,22);
12162 UInt mm
= INSN(20,16);
12163 UInt opcode
= INSN(15,12);
12164 UInt nn
= INSN(9,5);
12165 UInt dd
= INSN(4,0);
12167 Bool is2
= bitQ
== 1;
12169 if (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,0,1,0)) {
12170 /* -------- 0,0000 SADDL{2} -------- */
12171 /* -------- 1,0000 UADDL{2} -------- */
12172 /* -------- 0,0010 SSUBL{2} -------- */
12173 /* -------- 1,0010 USUBL{2} -------- */
12174 /* Widens, and size refers to the narrow lanes. */
12175 if (size
== X11
) return False
;
12176 vassert(size
<= 2);
12177 Bool isU
= bitU
== 1;
12178 Bool isADD
= opcode
== BITS4(0,0,0,0);
12179 IRTemp argL
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(nn
));
12180 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
12181 IRTemp res
= newTempV128();
12182 assign(res
, binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
12183 mkexpr(argL
), mkexpr(argR
)));
12184 putQReg128(dd
, mkexpr(res
));
12185 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12186 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12187 const HChar
* nm
= isADD
? (isU
? "uaddl" : "saddl")
12188 : (isU
? "usubl" : "ssubl");
12189 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
12190 nameQReg128(dd
), arrWide
,
12191 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
12195 if (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,0,1,1)) {
12196 /* -------- 0,0001 SADDW{2} -------- */
12197 /* -------- 1,0001 UADDW{2} -------- */
12198 /* -------- 0,0011 SSUBW{2} -------- */
12199 /* -------- 1,0011 USUBW{2} -------- */
12200 /* Widens, and size refers to the narrow lanes. */
12201 if (size
== X11
) return False
;
12202 vassert(size
<= 2);
12203 Bool isU
= bitU
== 1;
12204 Bool isADD
= opcode
== BITS4(0,0,0,1);
12205 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
12206 IRTemp res
= newTempV128();
12207 assign(res
, binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
12208 getQReg128(nn
), mkexpr(argR
)));
12209 putQReg128(dd
, mkexpr(res
));
12210 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12211 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12212 const HChar
* nm
= isADD
? (isU
? "uaddw" : "saddw")
12213 : (isU
? "usubw" : "ssubw");
12214 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
12215 nameQReg128(dd
), arrWide
,
12216 nameQReg128(nn
), arrWide
, nameQReg128(mm
), arrNarrow
);
12220 if (opcode
== BITS4(0,1,0,0) || opcode
== BITS4(0,1,1,0)) {
12221 /* -------- 0,0100 ADDHN{2} -------- */
12222 /* -------- 1,0100 RADDHN{2} -------- */
12223 /* -------- 0,0110 SUBHN{2} -------- */
12224 /* -------- 1,0110 RSUBHN{2} -------- */
12225 /* Narrows, and size refers to the narrowed lanes. */
12226 if (size
== X11
) return False
;
12227 vassert(size
<= 2);
12228 const UInt shift
[3] = { 8, 16, 32 };
12229 Bool isADD
= opcode
== BITS4(0,1,0,0);
12230 Bool isR
= bitU
== 1;
12231 /* Combined elements in wide lanes */
12232 IRTemp wide
= newTempV128();
12233 IRExpr
* wideE
= binop(isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1),
12234 getQReg128(nn
), getQReg128(mm
));
12236 wideE
= binop(mkVecADD(size
+1),
12238 mkexpr(math_VEC_DUP_IMM(size
+1,
12239 1ULL << (shift
[size
]-1))));
12241 assign(wide
, wideE
);
12242 /* Top halves of elements, still in wide lanes */
12243 IRTemp shrd
= newTempV128();
12244 assign(shrd
, binop(mkVecSHRN(size
+1), mkexpr(wide
), mkU8(shift
[size
])));
12245 /* Elements now compacted into lower 64 bits */
12246 IRTemp new64
= newTempV128();
12247 assign(new64
, binop(mkVecCATEVENLANES(size
), mkexpr(shrd
), mkexpr(shrd
)));
12248 putLO64andZUorPutHI64(is2
, dd
, new64
);
12249 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12250 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12251 const HChar
* nm
= isADD
? (isR
? "raddhn" : "addhn")
12252 : (isR
? "rsubhn" : "subhn");
12253 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
12254 nameQReg128(dd
), arrNarrow
,
12255 nameQReg128(nn
), arrWide
, nameQReg128(mm
), arrWide
);
12259 if (opcode
== BITS4(0,1,0,1) || opcode
== BITS4(0,1,1,1)) {
12260 /* -------- 0,0101 SABAL{2} -------- */
12261 /* -------- 1,0101 UABAL{2} -------- */
12262 /* -------- 0,0111 SABDL{2} -------- */
12263 /* -------- 1,0111 UABDL{2} -------- */
12264 /* Widens, and size refers to the narrow lanes. */
12265 if (size
== X11
) return False
;
12266 vassert(size
<= 2);
12267 Bool isU
= bitU
== 1;
12268 Bool isACC
= opcode
== BITS4(0,1,0,1);
12269 IRTemp argL
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(nn
));
12270 IRTemp argR
= math_WIDEN_LO_OR_HI_LANES(isU
, is2
, size
, getQReg128(mm
));
12271 IRTemp abd
= math_ABD(isU
, size
+1, mkexpr(argL
), mkexpr(argR
));
12272 IRTemp res
= newTempV128();
12273 assign(res
, isACC
? binop(mkVecADD(size
+1), mkexpr(abd
), getQReg128(dd
))
12275 putQReg128(dd
, mkexpr(res
));
12276 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12277 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12278 const HChar
* nm
= isACC
? (isU
? "uabal" : "sabal")
12279 : (isU
? "uabdl" : "sabdl");
12280 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
12281 nameQReg128(dd
), arrWide
,
12282 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
12286 if (opcode
== BITS4(1,1,0,0)
12287 || opcode
== BITS4(1,0,0,0) || opcode
== BITS4(1,0,1,0)) {
12288 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
12289 /* -------- 1,1100 UMULL{2} -------- */ // 0
12290 /* -------- 0,1000 SMLAL{2} -------- */ // 1
12291 /* -------- 1,1000 UMLAL{2} -------- */ // 1
12292 /* -------- 0,1010 SMLSL{2} -------- */ // 2
12293 /* -------- 1,1010 UMLSL{2} -------- */ // 2
12294 /* Widens, and size refers to the narrow lanes. */
12297 case BITS4(1,1,0,0): ks
= 0; break;
12298 case BITS4(1,0,0,0): ks
= 1; break;
12299 case BITS4(1,0,1,0): ks
= 2; break;
12300 default: vassert(0);
12302 vassert(ks
>= 0 && ks
<= 2);
12303 if (size
== X11
) return False
;
12304 vassert(size
<= 2);
12305 Bool isU
= bitU
== 1;
12306 IRTemp vecN
= newTempV128();
12307 IRTemp vecM
= newTempV128();
12308 IRTemp vecD
= newTempV128();
12309 assign(vecN
, getQReg128(nn
));
12310 assign(vecM
, getQReg128(mm
));
12311 assign(vecD
, getQReg128(dd
));
12312 IRTemp res
= IRTemp_INVALID
;
12313 math_MULL_ACC(&res
, is2
, isU
, size
, "mas"[ks
],
12314 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
12315 putQReg128(dd
, mkexpr(res
));
12316 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12317 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12318 const HChar
* nm
= ks
== 0 ? "mull" : (ks
== 1 ? "mlal" : "mlsl");
12319 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU
? 'u' : 's', nm
, is2
? "2" : "",
12320 nameQReg128(dd
), arrWide
,
12321 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
12326 && (opcode
== BITS4(1,1,0,1)
12327 || opcode
== BITS4(1,0,0,1) || opcode
== BITS4(1,0,1,1))) {
12328 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
12329 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
12330 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
12331 /* Widens, and size refers to the narrow lanes. */
12334 case BITS4(1,1,0,1): ks
= 0; break;
12335 case BITS4(1,0,0,1): ks
= 1; break;
12336 case BITS4(1,0,1,1): ks
= 2; break;
12337 default: vassert(0);
12339 vassert(ks
>= 0 && ks
<= 2);
12340 if (size
== X00
|| size
== X11
) return False
;
12341 vassert(size
<= 2);
12342 IRTemp vecN
, vecM
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
12343 vecN
= vecM
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
12344 newTempsV128_3(&vecN
, &vecM
, &vecD
);
12345 assign(vecN
, getQReg128(nn
));
12346 assign(vecM
, getQReg128(mm
));
12347 assign(vecD
, getQReg128(dd
));
12348 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
12349 is2
, size
, "mas"[ks
],
12350 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
12351 putQReg128(dd
, mkexpr(res
));
12352 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
12353 updateQCFLAGwithDifference(sat1q
, sat1n
);
12354 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
12355 updateQCFLAGwithDifference(sat2q
, sat2n
);
12357 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12358 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
12359 const HChar
* nm
= ks
== 0 ? "sqdmull"
12360 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
12361 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm
, is2
? "2" : "",
12362 nameQReg128(dd
), arrWide
,
12363 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
12367 if (bitU
== 0 && opcode
== BITS4(1,1,1,0)) {
12368 /* -------- 0,1110 PMULL{2} -------- */
12369 /* Widens, and size refers to the narrow lanes. */
12370 if (size
!= X00
&& size
!= X11
) return False
;
12371 IRTemp res
= IRTemp_INVALID
;
12372 IRExpr
* srcN
= getQReg128(nn
);
12373 IRExpr
* srcM
= getQReg128(mm
);
12374 const HChar
* arrNarrow
= NULL
;
12375 const HChar
* arrWide
= NULL
;
12377 res
= math_BINARY_WIDENING_V128(is2
, Iop_PolynomialMull8x8
,
12379 arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
12380 arrWide
= nameArr_Q_SZ(1, size
+1);
12382 /* The same thing as the X00 case, except we have to call
12383 a helper to do it. */
12384 vassert(size
== X11
);
12385 res
= newTemp(Ity_V128
);
12387 = is2
? Iop_V128HIto64
: Iop_V128to64
;
12389 = mkIRExprVec_3( IRExpr_VECRET(),
12390 unop(slice
, srcN
), unop(slice
, srcM
));
12392 = unsafeIRDirty_1_N( res
, 0/*regparms*/,
12393 "arm64g_dirtyhelper_PMULLQ",
12394 &arm64g_dirtyhelper_PMULLQ
, args
);
12395 stmt(IRStmt_Dirty(di
));
12396 /* We can't use nameArr_Q_SZ for this because it can't deal with
12397 Q-sized (128 bit) results. Hence do it by hand. */
12398 arrNarrow
= bitQ
== 0 ? "1d" : "2d";
12401 putQReg128(dd
, mkexpr(res
));
12402 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2
? "2" : "",
12403 nameQReg128(dd
), arrWide
,
12404 nameQReg128(nn
), arrNarrow
, nameQReg128(mm
), arrNarrow
);
12414 Bool
dis_AdvSIMD_three_same(/*MB_OUT*/DisResult
* dres
, UInt insn
)
12416 /* 31 30 29 28 23 21 20 15 10 9 4
12417 0 Q U 01110 size 1 m opcode 1 n d
12418 Decode fields: u,size,opcode
12420 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12421 if (INSN(31,31) != 0
12422 || INSN(28,24) != BITS5(0,1,1,1,0)
12423 || INSN(21,21) != 1
12424 || INSN(10,10) != 1) {
12427 UInt bitQ
= INSN(30,30);
12428 UInt bitU
= INSN(29,29);
12429 UInt size
= INSN(23,22);
12430 UInt mm
= INSN(20,16);
12431 UInt opcode
= INSN(15,11);
12432 UInt nn
= INSN(9,5);
12433 UInt dd
= INSN(4,0);
12436 if (opcode
== BITS5(0,0,0,0,0) || opcode
== BITS5(0,0,1,0,0)) {
12437 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
12438 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
12439 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
12440 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
12441 if (size
== X11
) return False
;
12442 Bool isADD
= opcode
== BITS5(0,0,0,0,0);
12443 Bool isU
= bitU
== 1;
12444 /* Widen both args out, do the math, narrow to final result. */
12445 IRTemp argL
= newTempV128();
12446 IRTemp argLhi
= IRTemp_INVALID
;
12447 IRTemp argLlo
= IRTemp_INVALID
;
12448 IRTemp argR
= newTempV128();
12449 IRTemp argRhi
= IRTemp_INVALID
;
12450 IRTemp argRlo
= IRTemp_INVALID
;
12451 IRTemp resHi
= newTempV128();
12452 IRTemp resLo
= newTempV128();
12453 IRTemp res
= IRTemp_INVALID
;
12454 assign(argL
, getQReg128(nn
));
12455 argLlo
= math_WIDEN_LO_OR_HI_LANES(isU
, False
, size
, mkexpr(argL
));
12456 argLhi
= math_WIDEN_LO_OR_HI_LANES(isU
, True
, size
, mkexpr(argL
));
12457 assign(argR
, getQReg128(mm
));
12458 argRlo
= math_WIDEN_LO_OR_HI_LANES(isU
, False
, size
, mkexpr(argR
));
12459 argRhi
= math_WIDEN_LO_OR_HI_LANES(isU
, True
, size
, mkexpr(argR
));
12460 IROp opADDSUB
= isADD
? mkVecADD(size
+1) : mkVecSUB(size
+1);
12461 IROp opSxR
= isU
? mkVecSHRN(size
+1) : mkVecSARN(size
+1);
12462 assign(resHi
, binop(opSxR
,
12463 binop(opADDSUB
, mkexpr(argLhi
), mkexpr(argRhi
)),
12465 assign(resLo
, binop(opSxR
,
12466 binop(opADDSUB
, mkexpr(argLlo
), mkexpr(argRlo
)),
12468 res
= math_NARROW_LANES ( resHi
, resLo
, size
);
12469 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12470 const HChar
* nm
= isADD
? (isU
? "uhadd" : "shadd")
12471 : (isU
? "uhsub" : "shsub");
12472 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12473 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12474 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12478 if (opcode
== BITS5(0,0,0,1,0)) {
12479 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
12480 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
12481 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12482 Bool isU
= bitU
== 1;
12483 IRTemp argL
= newTempV128();
12484 IRTemp argR
= newTempV128();
12485 assign(argL
, getQReg128(nn
));
12486 assign(argR
, getQReg128(mm
));
12487 IRTemp res
= math_RHADD(size
, isU
, argL
, argR
);
12488 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12489 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12490 DIP("%s %s.%s, %s.%s, %s.%s\n", isU
? "urhadd" : "srhadd",
12491 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12495 if (opcode
== BITS5(0,0,0,0,1) || opcode
== BITS5(0,0,1,0,1)) {
12496 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
12497 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
12498 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
12499 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
12500 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12501 Bool isADD
= opcode
== BITS5(0,0,0,0,1);
12502 Bool isU
= bitU
== 1;
12503 IROp qop
= Iop_INVALID
;
12504 IROp nop
= Iop_INVALID
;
12506 qop
= isU
? mkVecQADDU(size
) : mkVecQADDS(size
);
12507 nop
= mkVecADD(size
);
12509 qop
= isU
? mkVecQSUBU(size
) : mkVecQSUBS(size
);
12510 nop
= mkVecSUB(size
);
12512 IRTemp argL
= newTempV128();
12513 IRTemp argR
= newTempV128();
12514 IRTemp qres
= newTempV128();
12515 IRTemp nres
= newTempV128();
12516 assign(argL
, getQReg128(nn
));
12517 assign(argR
, getQReg128(mm
));
12518 assign(qres
, math_MAYBE_ZERO_HI64_fromE(
12519 bitQ
, binop(qop
, mkexpr(argL
), mkexpr(argR
))));
12520 assign(nres
, math_MAYBE_ZERO_HI64_fromE(
12521 bitQ
, binop(nop
, mkexpr(argL
), mkexpr(argR
))));
12522 putQReg128(dd
, mkexpr(qres
));
12523 updateQCFLAGwithDifference(qres
, nres
);
12524 const HChar
* nm
= isADD
? (isU
? "uqadd" : "sqadd")
12525 : (isU
? "uqsub" : "sqsub");
12526 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12527 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12528 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12532 if (bitU
== 0 && opcode
== BITS5(0,0,0,1,1)) {
12533 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
12534 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
12535 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
12536 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
12537 Bool isORx
= (size
& 2) == 2;
12538 Bool invert
= (size
& 1) == 1;
12539 IRTemp res
= newTempV128();
12540 assign(res
, binop(isORx
? Iop_OrV128
: Iop_AndV128
,
12542 invert
? unop(Iop_NotV128
, getQReg128(mm
))
12543 : getQReg128(mm
)));
12544 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12545 const HChar
* names
[4] = { "and", "bic", "orr", "orn" };
12546 const HChar
* ar
= bitQ
== 1 ? "16b" : "8b";
12547 DIP("%s %s.%s, %s.%s, %s.%s\n", names
[INSN(23,22)],
12548 nameQReg128(dd
), ar
, nameQReg128(nn
), ar
, nameQReg128(mm
), ar
);
12552 if (bitU
== 1 && opcode
== BITS5(0,0,0,1,1)) {
12553 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
12554 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
12555 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
12556 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
12557 IRTemp argD
= newTempV128();
12558 IRTemp argN
= newTempV128();
12559 IRTemp argM
= newTempV128();
12560 assign(argD
, getQReg128(dd
));
12561 assign(argN
, getQReg128(nn
));
12562 assign(argM
, getQReg128(mm
));
12563 const IROp opXOR
= Iop_XorV128
;
12564 const IROp opAND
= Iop_AndV128
;
12565 const IROp opNOT
= Iop_NotV128
;
12566 IRTemp res
= newTempV128();
12568 case BITS2(0,0): /* EOR */
12569 assign(res
, binop(opXOR
, mkexpr(argM
), mkexpr(argN
)));
12571 case BITS2(0,1): /* BSL */
12572 assign(res
, binop(opXOR
, mkexpr(argM
),
12574 binop(opXOR
, mkexpr(argM
), mkexpr(argN
)),
12577 case BITS2(1,0): /* BIT */
12578 assign(res
, binop(opXOR
, mkexpr(argD
),
12580 binop(opXOR
, mkexpr(argD
), mkexpr(argN
)),
12583 case BITS2(1,1): /* BIF */
12584 assign(res
, binop(opXOR
, mkexpr(argD
),
12586 binop(opXOR
, mkexpr(argD
), mkexpr(argN
)),
12587 unop(opNOT
, mkexpr(argM
)))));
12592 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12593 const HChar
* nms
[4] = { "eor", "bsl", "bit", "bif" };
12594 const HChar
* arr
= bitQ
== 1 ? "16b" : "8b";
12595 DIP("%s %s.%s, %s.%s, %s.%s\n", nms
[size
],
12596 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12600 if (opcode
== BITS5(0,0,1,1,0)) {
12601 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
12602 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
12603 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12604 Bool isGT
= bitU
== 0;
12605 IRExpr
* argL
= getQReg128(nn
);
12606 IRExpr
* argR
= getQReg128(mm
);
12607 IRTemp res
= newTempV128();
12609 isGT
? binop(mkVecCMPGTS(size
), argL
, argR
)
12610 : binop(mkVecCMPGTU(size
), argL
, argR
));
12611 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12612 const HChar
* nm
= isGT
? "cmgt" : "cmhi";
12613 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12614 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12615 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12619 if (opcode
== BITS5(0,0,1,1,1)) {
12620 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
12621 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
12622 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12623 Bool isGE
= bitU
== 0;
12624 IRExpr
* argL
= getQReg128(nn
);
12625 IRExpr
* argR
= getQReg128(mm
);
12626 IRTemp res
= newTempV128();
12628 isGE
? unop(Iop_NotV128
, binop(mkVecCMPGTS(size
), argR
, argL
))
12629 : unop(Iop_NotV128
, binop(mkVecCMPGTU(size
), argR
, argL
)));
12630 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12631 const HChar
* nm
= isGE
? "cmge" : "cmhs";
12632 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12633 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12634 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12638 if (opcode
== BITS5(0,1,0,0,0) || opcode
== BITS5(0,1,0,1,0)) {
12639 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
12640 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
12641 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
12642 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
12643 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12644 Bool isU
= bitU
== 1;
12645 Bool isR
= opcode
== BITS5(0,1,0,1,0);
12646 IROp op
= isR
? (isU
? mkVecRSHU(size
) : mkVecRSHS(size
))
12647 : (isU
? mkVecSHU(size
) : mkVecSHS(size
));
12648 IRTemp res
= newTempV128();
12649 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12650 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12651 const HChar
* nm
= isR
? (isU
? "urshl" : "srshl")
12652 : (isU
? "ushl" : "sshl");
12653 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12654 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12655 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12659 if (opcode
== BITS5(0,1,0,0,1) || opcode
== BITS5(0,1,0,1,1)) {
12660 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
12661 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
12662 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
12663 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
12664 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12665 Bool isU
= bitU
== 1;
12666 Bool isR
= opcode
== BITS5(0,1,0,1,1);
12667 IROp op
= isR
? (isU
? mkVecQANDUQRSH(size
) : mkVecQANDSQRSH(size
))
12668 : (isU
? mkVecQANDUQSH(size
) : mkVecQANDSQSH(size
));
12669 /* This is a bit tricky. If we're only interested in the lowest 64 bits
12670 of the result (viz, bitQ == 0), then we must adjust the operands to
12671 ensure that the upper part of the result, that we don't care about,
12672 doesn't pollute the returned Q value. To do this, zero out the upper
12673 operand halves beforehand. This works because it means, for the
12674 lanes we don't care about, we are shifting zero by zero, which can
12676 IRTemp res256
= newTemp(Ity_V256
);
12677 IRTemp resSH
= newTempV128();
12678 IRTemp resQ
= newTempV128();
12679 IRTemp zero
= newTempV128();
12680 assign(res256
, binop(op
,
12681 math_MAYBE_ZERO_HI64_fromE(bitQ
, getQReg128(nn
)),
12682 math_MAYBE_ZERO_HI64_fromE(bitQ
, getQReg128(mm
))));
12683 assign(resSH
, unop(Iop_V256toV128_0
, mkexpr(res256
)));
12684 assign(resQ
, unop(Iop_V256toV128_1
, mkexpr(res256
)));
12685 assign(zero
, mkV128(0x0000));
12686 putQReg128(dd
, mkexpr(resSH
));
12687 updateQCFLAGwithDifference(resQ
, zero
);
12688 const HChar
* nm
= isR
? (isU
? "uqrshl" : "sqrshl")
12689 : (isU
? "uqshl" : "sqshl");
12690 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12691 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12692 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12696 if (opcode
== BITS5(0,1,1,0,0) || opcode
== BITS5(0,1,1,0,1)) {
12697 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
12698 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
12699 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
12700 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
12701 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12702 Bool isU
= bitU
== 1;
12703 Bool isMAX
= (opcode
& 1) == 0;
12704 IROp op
= isMAX
? (isU
? mkVecMAXU(size
) : mkVecMAXS(size
))
12705 : (isU
? mkVecMINU(size
) : mkVecMINS(size
));
12706 IRTemp t
= newTempV128();
12707 assign(t
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12708 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t
));
12709 const HChar
* nm
= isMAX
? (isU
? "umax" : "smax")
12710 : (isU
? "umin" : "smin");
12711 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12712 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12713 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12717 if (opcode
== BITS5(0,1,1,1,0) || opcode
== BITS5(0,1,1,1,1)) {
12718 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
12719 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
12720 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
12721 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
12722 if (size
== X11
) return False
; // 1d/2d cases not allowed
12723 Bool isU
= bitU
== 1;
12724 Bool isACC
= opcode
== BITS5(0,1,1,1,1);
12725 vassert(size
<= 2);
12726 IRTemp t1
= math_ABD(isU
, size
, getQReg128(nn
), getQReg128(mm
));
12727 IRTemp t2
= newTempV128();
12728 assign(t2
, isACC
? binop(mkVecADD(size
), mkexpr(t1
), getQReg128(dd
))
12730 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12731 const HChar
* nm
= isACC
? (isU
? "uaba" : "saba")
12732 : (isU
? "uabd" : "sabd");
12733 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12734 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12735 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12739 if (opcode
== BITS5(1,0,0,0,0)) {
12740 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
12741 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
12742 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12743 Bool isSUB
= bitU
== 1;
12744 IROp op
= isSUB
? mkVecSUB(size
) : mkVecADD(size
);
12745 IRTemp t
= newTempV128();
12746 assign(t
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
12747 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t
));
12748 const HChar
* nm
= isSUB
? "sub" : "add";
12749 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12750 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12751 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12755 if (opcode
== BITS5(1,0,0,0,1)) {
12756 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
12757 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
12758 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12759 Bool isEQ
= bitU
== 1;
12760 IRExpr
* argL
= getQReg128(nn
);
12761 IRExpr
* argR
= getQReg128(mm
);
12762 IRTemp res
= newTempV128();
12764 isEQ
? binop(mkVecCMPEQ(size
), argL
, argR
)
12765 : unop(Iop_NotV128
, binop(mkVecCMPEQ(size
),
12766 binop(Iop_AndV128
, argL
, argR
),
12768 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12769 const HChar
* nm
= isEQ
? "cmeq" : "cmtst";
12770 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12771 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12772 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12776 if (opcode
== BITS5(1,0,0,1,0)) {
12777 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
12778 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
12779 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12780 Bool isMLS
= bitU
== 1;
12781 IROp opMUL
= mkVecMUL(size
);
12782 IROp opADDSUB
= isMLS
? mkVecSUB(size
) : mkVecADD(size
);
12783 IRTemp res
= newTempV128();
12784 if (opMUL
!= Iop_INVALID
&& opADDSUB
!= Iop_INVALID
) {
12785 assign(res
, binop(opADDSUB
,
12787 binop(opMUL
, getQReg128(nn
), getQReg128(mm
))));
12788 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12789 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12790 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS
? "mls" : "mla",
12791 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12797 if (opcode
== BITS5(1,0,0,1,1)) {
12798 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
12799 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
12800 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12801 Bool isPMUL
= bitU
== 1;
12802 const IROp opsPMUL
[4]
12803 = { Iop_PolynomialMul8x16
, Iop_INVALID
, Iop_INVALID
, Iop_INVALID
};
12804 IROp opMUL
= isPMUL
? opsPMUL
[size
] : mkVecMUL(size
);
12805 IRTemp res
= newTempV128();
12806 if (opMUL
!= Iop_INVALID
) {
12807 assign(res
, binop(opMUL
, getQReg128(nn
), getQReg128(mm
)));
12808 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12809 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12810 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL
? "pmul" : "mul",
12811 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12817 if (opcode
== BITS5(1,0,1,0,0) || opcode
== BITS5(1,0,1,0,1)) {
12818 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
12819 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
12820 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
12821 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
12822 if (size
== X11
) return False
;
12823 Bool isU
= bitU
== 1;
12824 Bool isMAX
= opcode
== BITS5(1,0,1,0,0);
12825 IRTemp vN
= newTempV128();
12826 IRTemp vM
= newTempV128();
12827 IROp op
= isMAX
? (isU
? mkVecMAXU(size
) : mkVecMAXS(size
))
12828 : (isU
? mkVecMINU(size
) : mkVecMINS(size
));
12829 assign(vN
, getQReg128(nn
));
12830 assign(vM
, getQReg128(mm
));
12831 IRTemp res128
= newTempV128();
12834 binop(mkVecCATEVENLANES(size
), mkexpr(vM
), mkexpr(vN
)),
12835 binop(mkVecCATODDLANES(size
), mkexpr(vM
), mkexpr(vN
))));
12836 /* In the half-width case, use CatEL32x4 to extract the half-width
12837 result from the full-width result. */
12839 = bitQ
== 0 ? unop(Iop_ZeroHI64ofV128
,
12840 binop(Iop_CatEvenLanes32x4
, mkexpr(res128
),
12843 putQReg128(dd
, res
);
12844 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12845 const HChar
* nm
= isMAX
? (isU
? "umaxp" : "smaxp")
12846 : (isU
? "uminp" : "sminp");
12847 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12848 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12852 if (opcode
== BITS5(1,0,1,1,0)) {
12853 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
12854 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
12855 if (size
== X00
|| size
== X11
) return False
;
12856 Bool isR
= bitU
== 1;
12857 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
12858 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
12859 newTempsV128_2(&vN
, &vM
);
12860 assign(vN
, getQReg128(nn
));
12861 assign(vM
, getQReg128(mm
));
12862 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
12863 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12864 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
12865 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
12866 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12867 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
12868 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
12869 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12873 if (bitU
== 0 && opcode
== BITS5(1,0,1,1,1)) {
12874 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
12875 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
12876 IRTemp vN
= newTempV128();
12877 IRTemp vM
= newTempV128();
12878 assign(vN
, getQReg128(nn
));
12879 assign(vM
, getQReg128(mm
));
12880 IRTemp res128
= newTempV128();
12882 binop(mkVecADD(size
),
12883 binop(mkVecCATEVENLANES(size
), mkexpr(vM
), mkexpr(vN
)),
12884 binop(mkVecCATODDLANES(size
), mkexpr(vM
), mkexpr(vN
))));
12885 /* In the half-width case, use CatEL32x4 to extract the half-width
12886 result from the full-width result. */
12888 = bitQ
== 0 ? unop(Iop_ZeroHI64ofV128
,
12889 binop(Iop_CatEvenLanes32x4
, mkexpr(res128
),
12892 putQReg128(dd
, res
);
12893 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
12894 DIP("addp %s.%s, %s.%s, %s.%s\n",
12895 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12900 && (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,1,1,0))) {
12901 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12902 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12903 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12904 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12905 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12906 Bool isD
= (size
& 1) == 1;
12907 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12908 Bool isMIN
= (size
& 2) == 2;
12909 Bool isNM
= opcode
== BITS5(1,1,0,0,0);
12910 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? X11
: X10
);
12911 IRTemp res
= newTempV128();
12912 assign(res
, binop(opMXX
, getQReg128(nn
), getQReg128(mm
)));
12913 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
12914 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12915 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
12916 isMIN
? "fmin" : "fmax", isNM
? "nm" : "",
12917 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12921 if (bitU
== 0 && opcode
== BITS5(1,1,0,0,1)) {
12922 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12923 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12924 Bool isD
= (size
& 1) == 1;
12925 Bool isSUB
= (size
& 2) == 2;
12926 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12927 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
12928 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
12929 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
12930 IRTemp rm
= mk_get_IR_rounding_mode();
12931 IRTemp t1
= newTempV128();
12932 IRTemp t2
= newTempV128();
12933 // FIXME: double rounding; use FMA primops instead
12934 assign(t1
, triop(opMUL
,
12935 mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12936 assign(t2
, triop(isSUB
? opSUB
: opADD
,
12937 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
12938 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12939 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12940 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB
? "fmls" : "fmla",
12941 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12945 if (bitU
== 0 && opcode
== BITS5(1,1,0,1,0)) {
12946 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12947 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12948 Bool isD
= (size
& 1) == 1;
12949 Bool isSUB
= (size
& 2) == 2;
12950 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12952 = { Iop_Add32Fx4
, Iop_Add64Fx2
, Iop_Sub32Fx4
, Iop_Sub64Fx2
};
12953 IROp op
= ops
[size
];
12954 IRTemp rm
= mk_get_IR_rounding_mode();
12955 IRTemp t1
= newTempV128();
12956 IRTemp t2
= newTempV128();
12957 assign(t1
, triop(op
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12958 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12959 putQReg128(dd
, mkexpr(t2
));
12960 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12961 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB
? "fsub" : "fadd",
12962 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12966 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,0,1,0)) {
12967 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12968 Bool isD
= (size
& 1) == 1;
12969 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12970 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
12971 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
12972 IRTemp rm
= mk_get_IR_rounding_mode();
12973 IRTemp t1
= newTempV128();
12974 IRTemp t2
= newTempV128();
12975 // FIXME: use Abd primop instead?
12976 assign(t1
, triop(opSUB
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12977 assign(t2
, unop(opABS
, mkexpr(t1
)));
12978 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
12979 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12980 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12981 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
12985 if (size
<= X01
&& opcode
== BITS5(1,1,0,1,1)) {
12986 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12987 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12988 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12989 Bool isD
= (size
& 1) == 1;
12990 Bool isMULX
= bitU
== 0;
12991 if (bitQ
== 0 && isD
) return False
; // implied 1d case
12992 IRTemp rm
= mk_get_IR_rounding_mode();
12993 IRTemp t1
= newTempV128();
12994 assign(t1
, triop(isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
,
12995 mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
12996 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
12997 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
12998 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX
? "fmulx" : "fmul",
12999 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13003 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,0)) {
13004 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13005 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13006 Bool isD
= (size
& 1) == 1;
13007 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13008 Bool isGE
= bitU
== 1;
13009 IROp opCMP
= isGE
? (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
)
13010 : (isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
);
13011 IRTemp t1
= newTempV128();
13012 assign(t1
, isGE
? binop(opCMP
, getQReg128(mm
), getQReg128(nn
)) // swapd
13013 : binop(opCMP
, getQReg128(nn
), getQReg128(mm
)));
13014 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13015 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13016 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE
? "fcmge" : "fcmeq",
13017 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13021 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,0,0)) {
13022 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13023 Bool isD
= (size
& 1) == 1;
13024 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13025 IROp opCMP
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
13026 IRTemp t1
= newTempV128();
13027 assign(t1
, binop(opCMP
, getQReg128(mm
), getQReg128(nn
))); // swapd
13028 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13029 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13030 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
13031 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13035 if (bitU
== 1 && opcode
== BITS5(1,1,1,0,1)) {
13036 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13037 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13038 Bool isD
= (size
& 1) == 1;
13039 Bool isGT
= (size
& 2) == 2;
13040 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13041 IROp opCMP
= isGT
? (isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
)
13042 : (isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
);
13043 IROp opABS
= isD
? Iop_Abs64Fx2
: Iop_Abs32Fx4
;
13044 IRTemp t1
= newTempV128();
13045 assign(t1
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
13046 unop(opABS
, getQReg128(nn
)))); // swapd
13047 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13048 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13049 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT
? "facgt" : "facge",
13050 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13055 && (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,1,1,0))) {
13056 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13057 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13058 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13059 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13060 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
13061 Bool isD
= (size
& 1) == 1;
13062 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13063 Bool isMIN
= (size
& 2) == 2;
13064 Bool isNM
= opcode
== BITS5(1,1,0,0,0);
13065 IROp opMXX
= (isMIN
? mkVecMINF
: mkVecMAXF
)(isD
? 3 : 2);
13066 IRTemp srcN
= newTempV128();
13067 IRTemp srcM
= newTempV128();
13068 IRTemp preL
= IRTemp_INVALID
;
13069 IRTemp preR
= IRTemp_INVALID
;
13070 assign(srcN
, getQReg128(nn
));
13071 assign(srcM
, getQReg128(mm
));
13072 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
, srcM
, srcN
,
13073 isD
? ARM64VSizeD
: ARM64VSizeS
, bitQ
);
13075 dd
, math_MAYBE_ZERO_HI64_fromE(
13077 binop(opMXX
, mkexpr(preL
), mkexpr(preR
))));
13078 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13079 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
13080 isMIN
? "fmin" : "fmax", isNM
? "nm" : "",
13081 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13085 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(1,1,0,1,0)) {
13086 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13087 Bool isD
= size
== X01
;
13088 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13089 IRTemp srcN
= newTempV128();
13090 IRTemp srcM
= newTempV128();
13091 IRTemp preL
= IRTemp_INVALID
;
13092 IRTemp preR
= IRTemp_INVALID
;
13093 assign(srcN
, getQReg128(nn
));
13094 assign(srcM
, getQReg128(mm
));
13095 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
, srcM
, srcN
,
13096 isD
? ARM64VSizeD
: ARM64VSizeS
, bitQ
);
13098 dd
, math_MAYBE_ZERO_HI64_fromE(
13100 triop(mkVecADDF(isD
? 3 : 2),
13101 mkexpr(mk_get_IR_rounding_mode()),
13102 mkexpr(preL
), mkexpr(preR
))));
13103 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13104 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
13105 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13109 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(1,1,1,1,1)) {
13110 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13111 Bool isD
= (size
& 1) == 1;
13112 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13113 vassert(size
<= 1);
13114 const IROp ops
[2] = { Iop_Div32Fx4
, Iop_Div64Fx2
};
13115 IROp op
= ops
[size
];
13116 IRTemp rm
= mk_get_IR_rounding_mode();
13117 IRTemp t1
= newTempV128();
13118 IRTemp t2
= newTempV128();
13119 assign(t1
, triop(op
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
13120 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13121 putQReg128(dd
, mkexpr(t2
));
13122 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13123 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
13124 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13128 if (bitU
== 0 && opcode
== BITS5(1,1,1,1,1)) {
13129 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13130 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13131 Bool isSQRT
= (size
& 2) == 2;
13132 Bool isD
= (size
& 1) == 1;
13133 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13134 IROp op
= isSQRT
? (isD
? Iop_RSqrtStep64Fx2
: Iop_RSqrtStep32Fx4
)
13135 : (isD
? Iop_RecipStep64Fx2
: Iop_RecipStep32Fx4
);
13136 IRTemp res
= newTempV128();
13137 assign(res
, binop(op
, getQReg128(nn
), getQReg128(mm
)));
13138 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13139 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
13140 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT
? "frsqrts" : "frecps",
13141 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13151 Bool
dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13153 /* 31 30 29 28 23 21 20 15 14 10 9 4
13154 0 Q U 01110 size 0 m 1 opcode 1 n d
13155 Decode fields: u,size,opcode
13157 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13158 if (INSN(31,31) != 0
13159 || INSN(28,24) != BITS5(0,1,1,1,0)
13160 || INSN(21,21) != 0
13161 || INSN(15,15) != 1
13162 || INSN(10,10) != 1) {
13165 UInt bitQ
= INSN(30,30);
13166 UInt bitU
= INSN(29,29);
13167 UInt size
= INSN(23,22);
13168 UInt mm
= INSN(20,16);
13169 UInt opcode
= INSN(14,11);
13170 UInt nn
= INSN(9,5);
13171 UInt dd
= INSN(4,0);
13173 vassert(mm
< 32 && nn
< 32 && dd
< 32);
13175 if (bitU
== 1 && (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,0,0,1))) {
13176 /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
13177 /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
13178 if (size
== X00
|| size
== X11
) return False
;
13179 Bool isAdd
= opcode
== BITS4(0,0,0,0);
13181 IRTemp res
, res_nosat
, vD
, vN
, vM
;
13182 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
13183 newTempsV128_3(&vD
, &vN
, &vM
);
13184 assign(vD
, getQReg128(dd
));
13185 assign(vN
, getQReg128(nn
));
13186 assign(vM
, getQReg128(mm
));
13188 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
13189 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
13190 updateQCFLAGwithDifferenceZHI(res
, res_nosat
, opZHI
);
13191 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13193 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13194 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
13195 DIP("%s %s.%s, %s.%s, %s.%s\n", nm
,
13196 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13205 Bool
dis_AdvSIMD_three_same_fp16(/*MB_OUT*/DisResult
* dres
, UInt insn
,
13206 const VexArchInfo
* archinfo
)
13208 /* This decode function only handles instructions with half-precision
13209 floating-point (fp16) operands.
13211 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
13214 /* 31 30 29 28 23 21 20 15 10 9 4
13215 0 Q U 01110 size 0 m opcode 1 n d
13216 Decode fields: u,size,opcode
13218 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13219 if (INSN(31,31) != 0
13220 || INSN(28,24) != BITS5(0,1,1,1,0)
13221 || INSN(21,21) != 0
13222 || INSN(10,10) != 1) {
13225 UInt bitQ
= INSN(30,30);
13226 UInt bitU
= INSN(29,29);
13227 UInt size
= INSN(23,22);
13228 UInt mm
= INSN(20,16);
13229 UInt opcode
= INSN(15,11);
13230 UInt nn
= INSN(9,5);
13231 UInt dd
= INSN(4,0);
13233 vassert(mm
< 32 && nn
< 32 && dd
< 32);
13235 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(0,0,0,1,0)) {
13236 /* -------- 1,01,00010 FADDP 4h_4h_4h, 8h_8h_8h -------- */
13237 IROp opADD
= mkVecADDF(1); //bitQ == 0 ? 0 : 1);
13238 IRTemp srcN
= newTempV128();
13239 IRTemp srcM
= newTempV128();
13240 IRTemp preL
= IRTemp_INVALID
;
13241 IRTemp preR
= IRTemp_INVALID
;
13242 assign(srcN
, getQReg128(nn
));
13243 assign(srcM
, getQReg128(mm
));
13244 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL
, &preR
, srcM
, srcN
,
13245 ARM64VSizeH
, bitQ
);
13247 dd
, math_MAYBE_ZERO_HI64_fromE(
13249 triop(opADD
, mkexpr(mk_get_IR_rounding_mode()),
13250 mkexpr(preL
), mkexpr(preR
))));
13251 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
13252 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
13253 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13257 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(0,0,0,1,0)) {
13258 /* -------- 1,11,00010 FABD 4h_4h_4h, 8h_8h_8h -------- */
13259 IRTemp rm
= mk_get_IR_rounding_mode();
13260 IRTemp t1
= newTempV128();
13261 IRTemp t2
= newTempV128();
13262 assign(t1
, triop(Iop_Sub16Fx8
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
13263 assign(t2
, unop(Iop_Abs16Fx8
, mkexpr(t1
)));
13264 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
13265 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
13266 DIP("%s %s.%s, %s.%s, %s.%s\n", "fabd",
13267 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13271 if (size
== X01
&& opcode
== BITS5(0,0,1,0,0)) {
13272 /* -------- 0,01,00100 FCMEQ 4h_4h_4h, 8h_8h_8h -------- */
13273 /* -------- 1,01,00100 FCMGE 4h_4h_4h, 8h_8h_8h -------- */
13274 Bool isGE
= bitU
== 1;
13275 IRTemp t1
= newTempV128();
13276 /* Swap source and destination in order to use existing LE IR op for GE. */
13277 assign(t1
, isGE
? binop(Iop_CmpLE16Fx8
, getQReg128(mm
), getQReg128(nn
))
13278 : binop(Iop_CmpEQ16Fx8
, getQReg128(nn
), getQReg128(mm
)));
13279 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13280 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
13281 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE
? "fcmge" : "fcmeq",
13282 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13286 if (size
== X11
&& opcode
== BITS5(0,0,1,0,0)) {
13287 /* -------- 1,11,00100 FCMGT 4h_4h_4h, 8h_8h_8h -------- */
13288 IRTemp t1
= newTempV128();
13289 /* Swap source and destination in order to use existing LT IR op for GT. */
13290 assign(t1
, binop(Iop_CmpLT16Fx8
, getQReg128(mm
), getQReg128(nn
)));
13291 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13292 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
13293 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
13294 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13298 if (bitU
== 1 && opcode
== BITS5(0,0,1,0,1)) {
13299 /* -------- 1,01,00101 FACGE 4h_4h_4h 8h_8h_8h -------- */
13300 /* -------- 1,11,00101 FACGT 4h_4h_4h 8h_8h_8h -------- */
13301 Bool isGT
= (size
& 3) == 3;
13302 IROp opCMP
= isGT
? Iop_CmpLT16Fx8
: Iop_CmpLE16Fx8
;
13303 IROp opABS
= Iop_Abs16Fx8
;
13304 IRTemp t1
= newTempV128();
13305 assign(t1
, binop(opCMP
, unop(opABS
, getQReg128(mm
)),
13306 unop(opABS
, getQReg128(nn
))));
13307 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13308 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
13309 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT
? "facgt" : "facge",
13310 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13314 if (bitU
== 0 && size
== X01
&& opcode
== BITS5(0,0,0,1,0)) {
13315 /* -------- 0,01,00010 FADD 4h_4h_4h, 8h_8h_8h -------- */
13316 IRTemp rm
= mk_get_IR_rounding_mode();
13317 IRTemp t1
= newTempV128();
13318 IRTemp t2
= newTempV128();
13319 assign(t1
, triop(Iop_Add16Fx8
, mkexpr(rm
), getQReg128(nn
), getQReg128(mm
)));
13320 assign(t2
, math_MAYBE_ZERO_HI64(bitQ
, t1
));
13321 putQReg128(dd
, mkexpr(t2
));
13322 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
13323 DIP("%s %s.%s, %s.%s, %s.%s\n", "fadd",
13324 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), arr
);
13334 Bool
dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult
* dres
, UInt insn
)
13336 /* 31 30 29 28 23 21 16 11 9 4
13337 0 Q U 01110 size 10000 opcode 10 n d
13338 Decode fields: U,size,opcode
13340 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13341 if (INSN(31,31) != 0
13342 || INSN(28,24) != BITS5(0,1,1,1,0)
13343 || INSN(21,17) != BITS5(1,0,0,0,0)
13344 || INSN(11,10) != BITS2(1,0)) {
13347 UInt bitQ
= INSN(30,30);
13348 UInt bitU
= INSN(29,29);
13349 UInt size
= INSN(23,22);
13350 UInt opcode
= INSN(16,12);
13351 UInt nn
= INSN(9,5);
13352 UInt dd
= INSN(4,0);
13355 if (bitU
== 0 && size
<= X10
&& opcode
== BITS5(0,0,0,0,0)) {
13356 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
13357 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
13358 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
13359 const IROp iops
[3] = { Iop_Reverse8sIn64_x2
,
13360 Iop_Reverse16sIn64_x2
, Iop_Reverse32sIn64_x2
};
13361 vassert(size
<= 2);
13362 IRTemp res
= newTempV128();
13363 assign(res
, unop(iops
[size
], getQReg128(nn
)));
13364 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13365 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13366 DIP("%s %s.%s, %s.%s\n", "rev64",
13367 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13371 if (bitU
== 1 && size
<= X01
&& opcode
== BITS5(0,0,0,0,0)) {
13372 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
13373 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
13374 Bool isH
= size
== X01
;
13375 IRTemp res
= newTempV128();
13376 IROp iop
= isH
? Iop_Reverse16sIn32_x4
: Iop_Reverse8sIn32_x4
;
13377 assign(res
, unop(iop
, getQReg128(nn
)));
13378 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13379 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13380 DIP("%s %s.%s, %s.%s\n", "rev32",
13381 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13385 if (bitU
== 0 && size
== X00
&& opcode
== BITS5(0,0,0,0,1)) {
13386 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
13387 IRTemp res
= newTempV128();
13388 assign(res
, unop(Iop_Reverse8sIn16_x8
, getQReg128(nn
)));
13389 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13390 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13391 DIP("%s %s.%s, %s.%s\n", "rev16",
13392 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13396 if (opcode
== BITS5(0,0,0,1,0) || opcode
== BITS5(0,0,1,1,0)) {
13397 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
13398 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
13399 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
13400 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
13401 /* Widens, and size refers to the narrow size. */
13402 if (size
== X11
) return False
; // no 1d or 2d cases
13403 Bool isU
= bitU
== 1;
13404 Bool isACC
= opcode
== BITS5(0,0,1,1,0);
13405 IRTemp src
= newTempV128();
13406 IRTemp sum
= newTempV128();
13407 IRTemp res
= newTempV128();
13408 assign(src
, getQReg128(nn
));
13410 binop(mkVecADD(size
+1),
13411 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
13412 isU
, True
/*fromOdd*/, size
, mkexpr(src
))),
13413 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
13414 isU
, False
/*!fromOdd*/, size
, mkexpr(src
)))));
13415 assign(res
, isACC
? binop(mkVecADD(size
+1), mkexpr(sum
), getQReg128(dd
))
13417 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13418 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13419 const HChar
* arrWide
= nameArr_Q_SZ(bitQ
, size
+1);
13420 DIP("%s %s.%s, %s.%s\n", isACC
? (isU
? "uadalp" : "sadalp")
13421 : (isU
? "uaddlp" : "saddlp"),
13422 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
);
13426 if (opcode
== BITS5(0,0,0,1,1)) {
13427 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
13428 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
13429 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13430 Bool isUSQADD
= bitU
== 1;
13431 /* This is switched (in the US vs SU sense) deliberately.
13432 SUQADD corresponds to the ExtUSsatSS variants and
13433 USQADD corresponds to the ExtSUsatUU variants.
13434 See libvex_ir for more details. */
13435 IROp qop
= isUSQADD
? mkVecQADDEXTSUSATUU(size
)
13436 : mkVecQADDEXTUSSATSS(size
);
13437 IROp nop
= mkVecADD(size
);
13438 IRTemp argL
= newTempV128();
13439 IRTemp argR
= newTempV128();
13440 IRTemp qres
= newTempV128();
13441 IRTemp nres
= newTempV128();
13442 /* Because the two arguments to the addition are implicitly
13443 extended differently (one signedly, the other unsignedly) it is
13444 important to present them to the primop in the correct order. */
13445 assign(argL
, getQReg128(nn
));
13446 assign(argR
, getQReg128(dd
));
13447 assign(qres
, math_MAYBE_ZERO_HI64_fromE(
13448 bitQ
, binop(qop
, mkexpr(argL
), mkexpr(argR
))));
13449 assign(nres
, math_MAYBE_ZERO_HI64_fromE(
13450 bitQ
, binop(nop
, mkexpr(argL
), mkexpr(argR
))));
13451 putQReg128(dd
, mkexpr(qres
));
13452 updateQCFLAGwithDifference(qres
, nres
);
13453 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13454 DIP("%s %s.%s, %s.%s\n", isUSQADD
? "usqadd" : "suqadd",
13455 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13459 if (opcode
== BITS5(0,0,1,0,0)) {
13460 /* -------- 0,xx,00100: CLS std6_std6 -------- */
13461 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
13462 if (size
== X11
) return False
; // no 1d or 2d cases
13463 const IROp opsCLS
[3] = { Iop_Cls8x16
, Iop_Cls16x8
, Iop_Cls32x4
};
13464 const IROp opsCLZ
[3] = { Iop_Clz8x16
, Iop_Clz16x8
, Iop_Clz32x4
};
13465 Bool isCLZ
= bitU
== 1;
13466 IRTemp res
= newTempV128();
13467 vassert(size
<= 2);
13468 assign(res
, unop(isCLZ
? opsCLZ
[size
] : opsCLS
[size
], getQReg128(nn
)));
13469 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13470 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13471 DIP("%s %s.%s, %s.%s\n", isCLZ
? "clz" : "cls",
13472 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13476 if (size
== X00
&& opcode
== BITS5(0,0,1,0,1)) {
13477 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
13478 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
13479 IRTemp res
= newTempV128();
13480 assign(res
, unop(bitU
== 0 ? Iop_Cnt8x16
: Iop_NotV128
, getQReg128(nn
)));
13481 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13482 const HChar
* arr
= nameArr_Q_SZ(bitQ
, 0);
13483 DIP("%s %s.%s, %s.%s\n", bitU
== 0 ? "cnt" : "not",
13484 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13488 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(0,0,1,0,1)) {
13489 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
13490 IRTemp res
= newTempV128();
13491 assign(res
, unop(Iop_Reverse1sIn8_x16
, getQReg128(nn
)));
13492 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13493 const HChar
* arr
= nameArr_Q_SZ(bitQ
, 0);
13494 DIP("%s %s.%s, %s.%s\n", "rbit",
13495 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13499 if (opcode
== BITS5(0,0,1,1,1)) {
13500 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
13501 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
13502 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13503 Bool isNEG
= bitU
== 1;
13504 IRTemp qresFW
= IRTemp_INVALID
, nresFW
= IRTemp_INVALID
;
13505 (isNEG
? math_SQNEG
: math_SQABS
)( &qresFW
, &nresFW
,
13506 getQReg128(nn
), size
);
13507 IRTemp qres
= newTempV128(), nres
= newTempV128();
13508 assign(qres
, math_MAYBE_ZERO_HI64(bitQ
, qresFW
));
13509 assign(nres
, math_MAYBE_ZERO_HI64(bitQ
, nresFW
));
13510 putQReg128(dd
, mkexpr(qres
));
13511 updateQCFLAGwithDifference(qres
, nres
);
13512 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13513 DIP("%s %s.%s, %s.%s\n", isNEG
? "sqneg" : "sqabs",
13514 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13518 if (opcode
== BITS5(0,1,0,0,0)) {
13519 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
13520 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
13521 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13522 Bool isGT
= bitU
== 0;
13523 IRExpr
* argL
= getQReg128(nn
);
13524 IRExpr
* argR
= mkV128(0x0000);
13525 IRTemp res
= newTempV128();
13526 IROp opGTS
= mkVecCMPGTS(size
);
13527 assign(res
, isGT
? binop(opGTS
, argL
, argR
)
13528 : unop(Iop_NotV128
, binop(opGTS
, argR
, argL
)));
13529 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13530 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13531 DIP("cm%s %s.%s, %s.%s, #0\n", isGT
? "gt" : "ge",
13532 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13536 if (opcode
== BITS5(0,1,0,0,1)) {
13537 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
13538 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
13539 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13540 Bool isEQ
= bitU
== 0;
13541 IRExpr
* argL
= getQReg128(nn
);
13542 IRExpr
* argR
= mkV128(0x0000);
13543 IRTemp res
= newTempV128();
13544 assign(res
, isEQ
? binop(mkVecCMPEQ(size
), argL
, argR
)
13545 : unop(Iop_NotV128
,
13546 binop(mkVecCMPGTS(size
), argL
, argR
)));
13547 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13548 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13549 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ
? "eq" : "le",
13550 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13554 if (bitU
== 0 && opcode
== BITS5(0,1,0,1,0)) {
13555 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
13556 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13557 IRExpr
* argL
= getQReg128(nn
);
13558 IRExpr
* argR
= mkV128(0x0000);
13559 IRTemp res
= newTempV128();
13560 assign(res
, binop(mkVecCMPGTS(size
), argR
, argL
));
13561 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13562 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13563 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
13564 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13568 if (bitU
== 0 && opcode
== BITS5(0,1,0,1,1)) {
13569 /* -------- 0,xx,01011: ABS std7_std7 -------- */
13570 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13571 IRTemp res
= newTempV128();
13572 assign(res
, unop(mkVecABS(size
), getQReg128(nn
)));
13573 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13574 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13575 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13579 if (bitU
== 1 && opcode
== BITS5(0,1,0,1,1)) {
13580 /* -------- 1,xx,01011: NEG std7_std7 -------- */
13581 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13582 IRTemp res
= newTempV128();
13583 assign(res
, binop(mkVecSUB(size
), mkV128(0x0000), getQReg128(nn
)));
13584 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13585 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13586 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13590 UInt ix
= 0; /*INVALID*/
13593 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 4 : 1; break;
13594 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 5 : 2; break;
13595 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break;
13600 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
13601 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
13602 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
13603 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
13604 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
13605 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13606 Bool isD
= size
== X11
;
13607 IROp opCmpEQ
= isD
? Iop_CmpEQ64Fx2
: Iop_CmpEQ32Fx4
;
13608 IROp opCmpLE
= isD
? Iop_CmpLE64Fx2
: Iop_CmpLE32Fx4
;
13609 IROp opCmpLT
= isD
? Iop_CmpLT64Fx2
: Iop_CmpLT32Fx4
;
13610 IROp opCmp
= Iop_INVALID
;
13612 const HChar
* nm
= "??";
13614 case 1: nm
= "fcmgt"; opCmp
= opCmpLT
; swap
= True
; break;
13615 case 2: nm
= "fcmeq"; opCmp
= opCmpEQ
; break;
13616 case 3: nm
= "fcmlt"; opCmp
= opCmpLT
; break;
13617 case 4: nm
= "fcmge"; opCmp
= opCmpLE
; swap
= True
; break;
13618 case 5: nm
= "fcmle"; opCmp
= opCmpLE
; break;
13619 default: vassert(0);
13621 IRExpr
* zero
= mkV128(0x0000);
13622 IRTemp res
= newTempV128();
13623 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
13624 : binop(opCmp
, getQReg128(nn
), zero
));
13625 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13626 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
13627 DIP("%s %s.%s, %s.%s, #0.0\n", nm
,
13628 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13632 if (size
>= X10
&& opcode
== BITS5(0,1,1,1,1)) {
13633 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
13634 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
13635 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
13636 Bool isFNEG
= bitU
== 1;
13637 IROp op
= isFNEG
? (size
== X10
? Iop_Neg32Fx4
: Iop_Neg64Fx2
)
13638 : (size
== X10
? Iop_Abs32Fx4
: Iop_Abs64Fx2
);
13639 IRTemp res
= newTempV128();
13640 assign(res
, unop(op
, getQReg128(nn
)));
13641 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13642 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
13643 DIP("%s %s.%s, %s.%s\n", isFNEG
? "fneg" : "fabs",
13644 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13648 if (bitU
== 0 && opcode
== BITS5(1,0,0,1,0)) {
13649 /* -------- 0,xx,10010: XTN{,2} -------- */
13650 if (size
== X11
) return False
;
13652 Bool is2
= bitQ
== 1;
13653 IROp opN
= mkVecNARROWUN(size
);
13654 IRTemp resN
= newTempV128();
13655 assign(resN
, unop(Iop_64UtoV128
, unop(opN
, getQReg128(nn
))));
13656 putLO64andZUorPutHI64(is2
, dd
, resN
);
13657 const HChar
* nm
= "xtn";
13658 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13659 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13660 DIP("%s%s %s.%s, %s.%s\n", is2
? "2" : "", nm
,
13661 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
13665 if (opcode
== BITS5(1,0,1,0,0)
13666 || (bitU
== 1 && opcode
== BITS5(1,0,0,1,0))) {
13667 /* -------- 0,xx,10100: SQXTN{,2} -------- */
13668 /* -------- 1,xx,10100: UQXTN{,2} -------- */
13669 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
13670 if (size
== X11
) return False
;
13672 Bool is2
= bitQ
== 1;
13673 IROp opN
= Iop_INVALID
;
13674 Bool zWiden
= True
;
13675 const HChar
* nm
= "??";
13676 /**/ if (bitU
== 0 && opcode
== BITS5(1,0,1,0,0)) {
13677 opN
= mkVecQNARROWUNSS(size
); nm
= "sqxtn"; zWiden
= False
;
13679 else if (bitU
== 1 && opcode
== BITS5(1,0,1,0,0)) {
13680 opN
= mkVecQNARROWUNUU(size
); nm
= "uqxtn";
13682 else if (bitU
== 1 && opcode
== BITS5(1,0,0,1,0)) {
13683 opN
= mkVecQNARROWUNSU(size
); nm
= "sqxtun";
13686 IRTemp src
= newTempV128();
13687 assign(src
, getQReg128(nn
));
13688 IRTemp resN
= newTempV128();
13689 assign(resN
, unop(Iop_64UtoV128
, unop(opN
, mkexpr(src
))));
13690 putLO64andZUorPutHI64(is2
, dd
, resN
);
13691 IRTemp resW
= math_WIDEN_LO_OR_HI_LANES(zWiden
, False
/*!fromUpperHalf*/,
13692 size
, mkexpr(resN
));
13693 updateQCFLAGwithDifference(src
, resW
);
13694 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13695 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13696 DIP("%s%s %s.%s, %s.%s\n", is2
? "2" : "", nm
,
13697 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
13701 if (bitU
== 1 && opcode
== BITS5(1,0,0,1,1)) {
13702 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
13703 /* Widens, and size is the narrow size. */
13704 if (size
== X11
) return False
;
13705 Bool is2
= bitQ
== 1;
13706 IROp opINT
= is2
? mkVecINTERLEAVEHI(size
) : mkVecINTERLEAVELO(size
);
13707 IROp opSHL
= mkVecSHLN(size
+1);
13708 IRTemp src
= newTempV128();
13709 IRTemp res
= newTempV128();
13710 assign(src
, getQReg128(nn
));
13711 assign(res
, binop(opSHL
, binop(opINT
, mkexpr(src
), mkexpr(src
)),
13713 putQReg128(dd
, mkexpr(res
));
13714 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
13715 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
13716 DIP("shll%s %s.%s, %s.%s, #%d\n", is2
? "2" : "",
13717 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
, 8 << size
);
13721 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,0,1,1,0)) {
13722 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
13723 UInt nLanes
= size
== X00
? 4 : 2;
13724 IRType srcTy
= size
== X00
? Ity_F32
: Ity_F64
;
13725 IROp opCvt
= size
== X00
? Iop_F32toF16
: Iop_F64toF32
;
13726 IRTemp rm
= mk_get_IR_rounding_mode();
13727 IRTemp src
[nLanes
];
13728 for (UInt i
= 0; i
< nLanes
; i
++) {
13729 src
[i
] = newTemp(srcTy
);
13730 assign(src
[i
], getQRegLane(nn
, i
, srcTy
));
13732 for (UInt i
= 0; i
< nLanes
; i
++) {
13733 putQRegLane(dd
, nLanes
* bitQ
+ i
,
13734 binop(opCvt
, mkexpr(rm
), mkexpr(src
[i
])));
13737 putQRegLane(dd
, 1, mkU64(0));
13739 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
13740 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
13741 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ
? "2" : "",
13742 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
13746 if (bitU
== 1 && size
== X01
&& opcode
== BITS5(1,0,1,1,0)) {
13747 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
13748 /* Using Irrm_NEAREST here isn't right. The docs say "round to
13749 odd" but I don't know what that really means. */
13750 IRType srcTy
= Ity_F64
;
13751 IROp opCvt
= Iop_F64toF32
;
13753 for (UInt i
= 0; i
< 2; i
++) {
13754 src
[i
] = newTemp(srcTy
);
13755 assign(src
[i
], getQRegLane(nn
, i
, srcTy
));
13757 for (UInt i
= 0; i
< 2; i
++) {
13758 putQRegLane(dd
, 2 * bitQ
+ i
,
13759 binop(opCvt
, mkU32(Irrm_NEAREST
), mkexpr(src
[i
])));
13762 putQRegLane(dd
, 1, mkU64(0));
13764 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
13765 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
13766 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ
? "2" : "",
13767 nameQReg128(dd
), arrNarrow
, nameQReg128(nn
), arrWide
);
13771 if (bitU
== 0 && size
<= X01
&& opcode
== BITS5(1,0,1,1,1)) {
13772 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
13773 UInt nLanes
= size
== X00
? 4 : 2;
13774 IRType srcTy
= size
== X00
? Ity_F16
: Ity_F32
;
13775 IROp opCvt
= size
== X00
? Iop_F16toF32
: Iop_F32toF64
;
13776 IRTemp src
[nLanes
];
13777 for (UInt i
= 0; i
< nLanes
; i
++) {
13778 src
[i
] = newTemp(srcTy
);
13779 assign(src
[i
], getQRegLane(nn
, nLanes
* bitQ
+ i
, srcTy
));
13781 for (UInt i
= 0; i
< nLanes
; i
++) {
13782 putQRegLane(dd
, i
, unop(opCvt
, mkexpr(src
[i
])));
13784 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, 1+size
);
13785 const HChar
* arrWide
= nameArr_Q_SZ(1, 1+size
+1);
13786 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ
? "2" : "",
13787 nameQReg128(dd
), arrWide
, nameQReg128(nn
), arrNarrow
);
13792 if (opcode
== BITS5(1,1,0,0,0) || opcode
== BITS5(1,1,0,0,1)) {
13793 ix
= 1 + ((((bitU
& 1) << 2) | ((size
& 2) << 0)) | ((opcode
& 1) << 0));
13794 // = 1 + bitU[0]:size[1]:opcode[0]
13795 vassert(ix
>= 1 && ix
<= 8);
13796 if (ix
== 7) ix
= 0;
13799 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
13800 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
13801 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
13802 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
13803 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
13804 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
13805 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
13806 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
13808 FRINTN: tieeven -- !! FIXME KLUDGED !!
13812 FRINTA: tieaway -- !! FIXME KLUDGED !!
13813 FRINTX: per FPCR + "exact = TRUE"
13816 Bool isD
= (size
& 1) == 1;
13817 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13819 IRTemp irrmRM
= mk_get_IR_rounding_mode();
13822 IRTemp irrm
= newTemp(Ity_I32
);
13824 case 1: ch
= 'n'; assign(irrm
, mkU32(Irrm_NEAREST
)); break;
13825 case 2: ch
= 'm'; assign(irrm
, mkU32(Irrm_NegINF
)); break;
13826 case 3: ch
= 'p'; assign(irrm
, mkU32(Irrm_PosINF
)); break;
13827 case 4: ch
= 'z'; assign(irrm
, mkU32(Irrm_ZERO
)); break;
13828 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13829 case 5: ch
= 'a'; assign(irrm
, mkU32(Irrm_NEAREST
)); break;
13830 // I am unsure about the following, due to the "integral exact"
13831 // description in the manual. What does it mean? (frintx, that is)
13832 case 6: ch
= 'x'; assign(irrm
, mkexpr(irrmRM
)); break;
13833 case 8: ch
= 'i'; assign(irrm
, mkexpr(irrmRM
)); break;
13834 default: vassert(0);
13837 IROp opRND
= isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
;
13839 for (UInt i
= 0; i
< 2; i
++) {
13840 putQRegLane(dd
, i
, binop(opRND
, mkexpr(irrm
),
13841 getQRegLane(nn
, i
, Ity_F64
)));
13844 UInt n
= bitQ
==1 ? 4 : 2;
13845 for (UInt i
= 0; i
< n
; i
++) {
13846 putQRegLane(dd
, i
, binop(opRND
, mkexpr(irrm
),
13847 getQRegLane(nn
, i
, Ity_F32
)));
13850 putQRegLane(dd
, 1, mkU64(0)); // zero out lanes 2 and 3
13852 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13853 DIP("frint%c %s.%s, %s.%s\n", ch
,
13854 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13858 ix
= 0; /*INVALID*/
13860 case BITS5(1,1,0,1,0): ix
= ((size
& 2) == 2) ? 4 : 1; break;
13861 case BITS5(1,1,0,1,1): ix
= ((size
& 2) == 2) ? 5 : 2; break;
13862 case BITS5(1,1,1,0,0): if ((size
& 2) == 0) ix
= 3; break;
13866 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13867 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13868 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13869 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13870 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13871 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
13872 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
13873 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
13874 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
13875 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
13876 Bool isD
= (size
& 1) == 1;
13877 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13879 IRRoundingMode irrm
= 8; /*impossible*/
13882 case 1: ch
= 'n'; irrm
= Irrm_NEAREST
; break;
13883 case 2: ch
= 'm'; irrm
= Irrm_NegINF
; break;
13884 case 3: ch
= 'a'; irrm
= Irrm_NEAREST
; break; /* kludge? */
13885 case 4: ch
= 'p'; irrm
= Irrm_PosINF
; break;
13886 case 5: ch
= 'z'; irrm
= Irrm_ZERO
; break;
13887 default: vassert(0);
13889 IROp cvt
= Iop_INVALID
;
13891 cvt
= isD
? Iop_F64toI64U
: Iop_F32toI32U
;
13893 cvt
= isD
? Iop_F64toI64S
: Iop_F32toI32S
;
13896 for (UInt i
= 0; i
< 2; i
++) {
13897 putQRegLane(dd
, i
, binop(cvt
, mkU32(irrm
),
13898 getQRegLane(nn
, i
, Ity_F64
)));
13901 UInt n
= bitQ
==1 ? 4 : 2;
13902 for (UInt i
= 0; i
< n
; i
++) {
13903 putQRegLane(dd
, i
, binop(cvt
, mkU32(irrm
),
13904 getQRegLane(nn
, i
, Ity_F32
)));
13907 putQRegLane(dd
, 1, mkU64(0)); // zero out lanes 2 and 3
13909 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13910 DIP("fcvt%c%c %s.%s, %s.%s\n", ch
, bitU
== 1 ? 'u' : 's',
13911 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13915 if (size
== X10
&& opcode
== BITS5(1,1,1,0,0)) {
13916 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
13917 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
13918 Bool isREC
= bitU
== 0;
13919 IROp op
= isREC
? Iop_RecipEst32Ux4
: Iop_RSqrtEst32Ux4
;
13920 IRTemp res
= newTempV128();
13921 assign(res
, unop(op
, getQReg128(nn
)));
13922 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
13923 const HChar
* nm
= isREC
? "urecpe" : "ursqrte";
13924 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
13925 DIP("%s %s.%s, %s.%s\n", nm
,
13926 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13930 if (size
<= X01
&& opcode
== BITS5(1,1,1,0,1)) {
13931 /* -------- 0,0x,11101: SCVTF -------- */
13932 /* -------- 1,0x,11101: UCVTF -------- */
13933 /* 31 28 22 21 15 9 4
13934 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
13935 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
13937 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
13939 Bool isQ
= bitQ
== 1;
13940 Bool isU
= bitU
== 1;
13941 Bool isF64
= (size
& 1) == 1;
13942 if (isQ
|| !isF64
) {
13943 IRType tyF
= Ity_INVALID
, tyI
= Ity_INVALID
;
13945 Bool zeroHI
= False
;
13946 const HChar
* arrSpec
= NULL
;
13947 Bool ok
= getLaneInfo_Q_SZ(&tyI
, &tyF
, &nLanes
, &zeroHI
, &arrSpec
,
13949 IROp iop
= isU
? (isF64
? Iop_I64UtoF64
: Iop_I32UtoF32
)
13950 : (isF64
? Iop_I64StoF64
: Iop_I32StoF32
);
13951 IRTemp rm
= mk_get_IR_rounding_mode();
13953 vassert(ok
); /* the 'if' above should ensure this */
13954 for (i
= 0; i
< nLanes
; i
++) {
13956 binop(iop
, mkexpr(rm
), getQRegLane(nn
, i
, tyI
)));
13959 putQRegLane(dd
, 1, mkU64(0));
13961 DIP("%ccvtf %s.%s, %s.%s\n", isU
? 'u' : 's',
13962 nameQReg128(dd
), arrSpec
, nameQReg128(nn
), arrSpec
);
13965 /* else fall through */
13968 if (size
>= X10
&& opcode
== BITS5(1,1,1,0,1)) {
13969 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
13970 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
13971 Bool isSQRT
= bitU
== 1;
13972 Bool isD
= (size
& 1) == 1;
13973 IROp op
= isSQRT
? (isD
? Iop_RSqrtEst64Fx2
: Iop_RSqrtEst32Fx4
)
13974 : (isD
? Iop_RecipEst64Fx2
: Iop_RecipEst32Fx4
);
13975 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13976 IRTemp resV
= newTempV128();
13977 assign(resV
, unop(op
, getQReg128(nn
)));
13978 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
13979 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
13980 DIP("%s %s.%s, %s.%s\n", isSQRT
? "frsqrte" : "frecpe",
13981 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
13985 if (bitU
== 1 && size
>= X10
&& opcode
== BITS5(1,1,1,1,1)) {
13986 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
13987 Bool isD
= (size
& 1) == 1;
13988 IROp op
= isD
? Iop_Sqrt64Fx2
: Iop_Sqrt32Fx4
;
13989 if (bitQ
== 0 && isD
) return False
; // implied 1d case
13990 IRTemp resV
= newTempV128();
13991 assign(resV
, binop(op
, mkexpr(mk_get_IR_rounding_mode()),
13993 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
13994 const HChar
* arr
= bitQ
== 0 ? "2s" : (size
== X11
? "2d" : "4s");
13995 DIP("%s %s.%s, %s.%s\n", "fsqrt",
13996 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
14006 Bool
dis_AdvSIMD_two_reg_misc_fp16(/*MB_OUT*/DisResult
* dres
, UInt insn
,
14007 const VexArchInfo
* archinfo
)
14009 /* This decode function only handles instructions with half-precision
14010 floating-point (fp16) operands.
14012 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
14015 /* 31 30 29 28 23 21 16 11 9 4
14016 0 Q U 01110 size 11100 opcode 10 n d
14017 Decode fields: U,size,opcode
14019 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14020 if (INSN(31,31) != 0
14021 || INSN(28,24) != BITS5(0,1,1,1,0)
14022 || INSN(21,17) != BITS5(1,1,1,0,0)
14023 || INSN(11,10) != BITS2(1,0)) {
14026 UInt bitQ
= INSN(30,30);
14027 UInt bitU
= INSN(29,29);
14028 UInt size
= INSN(23,22);
14029 UInt opcode
= INSN(16,12);
14030 UInt nn
= INSN(9,5);
14031 UInt dd
= INSN(4,0);
14034 if (size
== X11
&& opcode
== BITS5(0,1,1,1,1)) {
14035 /* -------- Q,0,11,01111: FABS 4h_4h, 8h_8h -------- */
14036 /* -------- Q,1,11,01111: FNEG 4h_4h, 8h_8h -------- */
14037 Bool isFNEG
= bitU
== 1;
14038 IROp op
= isFNEG
? Iop_Neg16Fx8
: Iop_Abs16Fx8
;
14039 IRTemp res
= newTempV128();
14040 assign(res
, unop(op
, getQReg128(nn
)));
14041 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
14042 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
14043 DIP("%s %s.%s, %s.%s\n", isFNEG
? "fneg" : "fabs",
14044 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
14048 if (bitU
== 1 && size
== X11
&& opcode
== BITS5(1,1,1,1,1)) {
14049 /* -------- 1,11,11111: FSQRT 4h_4h, 8h_8h -------- */
14050 IRTemp resV
= newTempV128();
14051 assign(resV
, binop(Iop_Sqrt16Fx8
, mkexpr(mk_get_IR_rounding_mode()),
14053 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, resV
));
14054 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
14055 DIP("%s %s.%s, %s.%s\n", "fsqrt",
14056 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
14060 /* Decoding FCM<condtion> based on opcode and bitU. ix used to select
14063 UInt ix
= 0; // Invalid <condition>
14065 case BITS5(0,1,1,0,1): ix
= (bitU
== 1) ? 4 : 1; break; // FCMLE=4,FCMEQ=1
14066 case BITS5(0,1,1,0,0): ix
= (bitU
== 1) ? 5 : 2; break; // FCMGE=5,FCMGT=2
14067 case BITS5(0,1,1,1,0): if (bitU
== 0) ix
= 3; break; // FCMLT=3
14071 /* -------- 0,01101 FCMEQ 4h_4h,8h_8h _#0.0 (ix 1) -------- */
14072 /* -------- 0,01100 FCMGT 4h_4h,8h_8h _#0.0 (ix 2) -------- */
14073 /* -------- 0,01110 FCMLT 4h_4h,8h_8h _#0.0 (ix 3) -------- */
14074 /* -------- 1,01101 FCMLE 4h_4h,8h_8h _#0.0 (ix 4) -------- */
14075 /* -------- 1,01100 FCMGE 4h_4h,8h_8h _#0.0 (ix 5) -------- */
14076 IROp opCmp
= Iop_INVALID
;
14078 const HChar
* nm
= "??";
14080 case 1: nm
= "fcmeq"; opCmp
= Iop_CmpEQ16Fx8
; break;
14081 case 2: nm
= "fcmgt"; opCmp
= Iop_CmpLT16Fx8
; swap
= True
; break;
14082 case 3: nm
= "fcmlt"; opCmp
= Iop_CmpLT16Fx8
; break;
14083 case 4: nm
= "fcmle"; opCmp
= Iop_CmpLE16Fx8
; break;
14084 case 5: nm
= "fcmge"; opCmp
= Iop_CmpLE16Fx8
; swap
= True
; break;
14085 default: vassert(0);
14087 IRExpr
* zero
= mkV128(0x0000);
14088 IRTemp res
= newTempV128();
14089 assign(res
, swap
? binop(opCmp
, zero
, getQReg128(nn
))
14090 : binop(opCmp
, getQReg128(nn
), zero
));
14091 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
14092 const HChar
* arr
= bitQ
== 0 ? "4h" : "8h";
14093 DIP("%s %s.%s, %s.%s, #0.0\n", nm
,
14094 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
);
14103 Bool
dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14105 /* 31 28 23 21 20 19 15 11 9 4
14106 0 Q U 01111 size L M m opcode H 0 n d
14107 Decode fields are: u,size,opcode
14108 M is really part of the mm register number. Individual
14109 cases need to inspect L and H though.
14111 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14112 if (INSN(31,31) != 0
14113 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
14116 UInt bitQ
= INSN(30,30);
14117 UInt bitU
= INSN(29,29);
14118 UInt size
= INSN(23,22);
14119 UInt bitL
= INSN(21,21);
14120 UInt bitM
= INSN(20,20);
14121 UInt mmLO4
= INSN(19,16);
14122 UInt opcode
= INSN(15,12);
14123 UInt bitH
= INSN(11,11);
14124 UInt nn
= INSN(9,5);
14125 UInt dd
= INSN(4,0);
14127 vassert(bitH
< 2 && bitM
< 2 && bitL
< 2);
14129 if (bitU
== 0 && size
>= X10
14130 && (opcode
== BITS4(0,0,0,1) || opcode
== BITS4(0,1,0,1))) {
14131 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14132 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14133 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
14134 Bool isD
= (size
& 1) == 1;
14135 Bool isSUB
= opcode
== BITS4(0,1,0,1);
14137 if (!isD
) index
= (bitH
<< 1) | bitL
;
14138 else if (isD
&& bitL
== 0) index
= bitH
;
14139 else return False
; // sz:L == x11 => unallocated encoding
14140 vassert(index
< (isD
? 2 : 4));
14141 IRType ity
= isD
? Ity_F64
: Ity_F32
;
14142 IRTemp elem
= newTemp(ity
);
14143 UInt mm
= (bitM
<< 4) | mmLO4
;
14144 assign(elem
, getQRegLane(mm
, index
, ity
));
14145 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
14146 IROp opADD
= isD
? Iop_Add64Fx2
: Iop_Add32Fx4
;
14147 IROp opSUB
= isD
? Iop_Sub64Fx2
: Iop_Sub32Fx4
;
14148 IROp opMUL
= isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
;
14149 IRTemp rm
= mk_get_IR_rounding_mode();
14150 IRTemp t1
= newTempV128();
14151 IRTemp t2
= newTempV128();
14152 // FIXME: double rounding; use FMA primops instead
14153 assign(t1
, triop(opMUL
, mkexpr(rm
), getQReg128(nn
), mkexpr(dupd
)));
14154 assign(t2
, triop(isSUB
? opSUB
: opADD
,
14155 mkexpr(rm
), getQReg128(dd
), mkexpr(t1
)));
14156 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, t2
));
14157 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
14158 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB
? "fmls" : "fmla",
14159 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
),
14160 isD
? 'd' : 's', index
);
14164 if (size
>= X10
&& opcode
== BITS4(1,0,0,1)) {
14165 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14166 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14167 if (bitQ
== 0 && size
== X11
) return False
; // implied 1d case
14168 Bool isD
= (size
& 1) == 1;
14169 Bool isMULX
= bitU
== 1;
14171 if (!isD
) index
= (bitH
<< 1) | bitL
;
14172 else if (isD
&& bitL
== 0) index
= bitH
;
14173 else return False
; // sz:L == x11 => unallocated encoding
14174 vassert(index
< (isD
? 2 : 4));
14175 IRType ity
= isD
? Ity_F64
: Ity_F32
;
14176 IRTemp elem
= newTemp(ity
);
14177 UInt mm
= (bitM
<< 4) | mmLO4
;
14178 assign(elem
, getQRegLane(mm
, index
, ity
));
14179 IRTemp dupd
= math_DUP_TO_V128(elem
, ity
);
14180 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
14181 IRTemp res
= newTempV128();
14182 assign(res
, triop(isD
? Iop_Mul64Fx2
: Iop_Mul32Fx4
,
14183 mkexpr(mk_get_IR_rounding_mode()),
14184 getQReg128(nn
), mkexpr(dupd
)));
14185 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
14186 const HChar
* arr
= bitQ
== 0 ? "2s" : (isD
? "2d" : "4s");
14187 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
14188 isMULX
? "fmulx" : "fmul", nameQReg128(dd
), arr
,
14189 nameQReg128(nn
), arr
, nameQReg128(mm
), isD
? 'd' : 's', index
);
14193 if ((bitU
== 1 && (opcode
== BITS4(0,0,0,0) || opcode
== BITS4(0,1,0,0)))
14194 || (bitU
== 0 && opcode
== BITS4(1,0,0,0))) {
14195 /* -------- 1,xx,0000 MLA s/h variants only -------- */
14196 /* -------- 1,xx,0100 MLS s/h variants only -------- */
14197 /* -------- 0,xx,1000 MUL s/h variants only -------- */
14198 Bool isMLA
= opcode
== BITS4(0,0,0,0);
14199 Bool isMLS
= opcode
== BITS4(0,1,0,0);
14200 UInt mm
= 32; // invalid
14201 UInt ix
= 16; // invalid
14204 return False
; // b case is not allowed
14206 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
14208 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
14210 return False
; // d case is not allowed
14214 vassert(mm
< 32 && ix
< 16);
14215 IROp opMUL
= mkVecMUL(size
);
14216 IROp opADD
= mkVecADD(size
);
14217 IROp opSUB
= mkVecSUB(size
);
14218 HChar ch
= size
== X01
? 'h' : 's';
14219 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
14220 IRTemp vecD
= newTempV128();
14221 IRTemp vecN
= newTempV128();
14222 IRTemp res
= newTempV128();
14223 assign(vecD
, getQReg128(dd
));
14224 assign(vecN
, getQReg128(nn
));
14225 IRExpr
* prod
= binop(opMUL
, mkexpr(vecN
), mkexpr(vecM
));
14226 if (isMLA
|| isMLS
) {
14227 assign(res
, binop(isMLA
? opADD
: opSUB
, mkexpr(vecD
), prod
));
14231 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
14232 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
14233 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA
? "mla"
14234 : (isMLS
? "mls" : "mul"),
14235 nameQReg128(dd
), arr
,
14236 nameQReg128(nn
), arr
, nameQReg128(dd
), ch
, ix
);
14240 if (opcode
== BITS4(1,0,1,0)
14241 || opcode
== BITS4(0,0,1,0) || opcode
== BITS4(0,1,1,0)) {
14242 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
14243 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
14244 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
14245 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
14246 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
14247 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
14248 /* Widens, and size refers to the narrowed lanes. */
14251 case BITS4(1,0,1,0): ks
= 0; break;
14252 case BITS4(0,0,1,0): ks
= 1; break;
14253 case BITS4(0,1,1,0): ks
= 2; break;
14254 default: vassert(0);
14256 vassert(ks
>= 0 && ks
<= 2);
14257 Bool isU
= bitU
== 1;
14258 Bool is2
= bitQ
== 1;
14259 UInt mm
= 32; // invalid
14260 UInt ix
= 16; // invalid
14263 return False
; // h_b_b[] case is not allowed
14265 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
14267 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
14269 return False
; // q_d_d[] case is not allowed
14273 vassert(mm
< 32 && ix
< 16);
14274 IRTemp vecN
= newTempV128();
14275 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
14276 IRTemp vecD
= newTempV128();
14277 assign(vecN
, getQReg128(nn
));
14278 assign(vecD
, getQReg128(dd
));
14279 IRTemp res
= IRTemp_INVALID
;
14280 math_MULL_ACC(&res
, is2
, isU
, size
, "mas"[ks
],
14281 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
14282 putQReg128(dd
, mkexpr(res
));
14283 const HChar
* nm
= ks
== 0 ? "mull" : (ks
== 1 ? "mlal" : "mlsl");
14284 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
14285 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
14286 HChar ch
= size
== X01
? 'h' : 's';
14287 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
14288 isU
? 'u' : 's', nm
, is2
? "2" : "",
14289 nameQReg128(dd
), arrWide
,
14290 nameQReg128(nn
), arrNarrow
, nameQReg128(dd
), ch
, ix
);
14295 && (opcode
== BITS4(1,0,1,1)
14296 || opcode
== BITS4(0,0,1,1) || opcode
== BITS4(0,1,1,1))) {
14297 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
14298 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
14299 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
14300 /* Widens, and size refers to the narrowed lanes. */
14303 case BITS4(1,0,1,1): ks
= 0; break;
14304 case BITS4(0,0,1,1): ks
= 1; break;
14305 case BITS4(0,1,1,1): ks
= 2; break;
14306 default: vassert(0);
14308 vassert(ks
>= 0 && ks
<= 2);
14309 Bool is2
= bitQ
== 1;
14310 UInt mm
= 32; // invalid
14311 UInt ix
= 16; // invalid
14314 return False
; // h_b_b[] case is not allowed
14316 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
14318 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
14320 return False
; // q_d_d[] case is not allowed
14324 vassert(mm
< 32 && ix
< 16);
14325 IRTemp vecN
, vecD
, res
, sat1q
, sat1n
, sat2q
, sat2n
;
14326 vecN
= vecD
= res
= sat1q
= sat1n
= sat2q
= sat2n
= IRTemp_INVALID
;
14327 newTempsV128_2(&vecN
, &vecD
);
14328 assign(vecN
, getQReg128(nn
));
14329 IRTemp vecM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
14330 assign(vecD
, getQReg128(dd
));
14331 math_SQDMULL_ACC(&res
, &sat1q
, &sat1n
, &sat2q
, &sat2n
,
14332 is2
, size
, "mas"[ks
],
14333 vecN
, vecM
, ks
== 0 ? IRTemp_INVALID
: vecD
);
14334 putQReg128(dd
, mkexpr(res
));
14335 vassert(sat1q
!= IRTemp_INVALID
&& sat1n
!= IRTemp_INVALID
);
14336 updateQCFLAGwithDifference(sat1q
, sat1n
);
14337 if (sat2q
!= IRTemp_INVALID
|| sat2n
!= IRTemp_INVALID
) {
14338 updateQCFLAGwithDifference(sat2q
, sat2n
);
14340 const HChar
* nm
= ks
== 0 ? "sqdmull"
14341 : (ks
== 1 ? "sqdmlal" : "sqdmlsl");
14342 const HChar
* arrNarrow
= nameArr_Q_SZ(bitQ
, size
);
14343 const HChar
* arrWide
= nameArr_Q_SZ(1, size
+1);
14344 HChar ch
= size
== X01
? 'h' : 's';
14345 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
14346 nm
, is2
? "2" : "",
14347 nameQReg128(dd
), arrWide
,
14348 nameQReg128(nn
), arrNarrow
, nameQReg128(dd
), ch
, ix
);
14352 if (bitU
== 0 && (opcode
== BITS4(1,1,0,0) || opcode
== BITS4(1,1,0,1))) {
14353 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
14354 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
14355 UInt mm
= 32; // invalid
14356 UInt ix
= 16; // invalid
14359 return False
; // b case is not allowed
14361 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
14363 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
14365 return False
; // q case is not allowed
14369 vassert(mm
< 32 && ix
< 16);
14370 Bool isR
= opcode
== BITS4(1,1,0,1);
14371 IRTemp res
, sat1q
, sat1n
, vN
, vM
;
14372 res
= sat1q
= sat1n
= vN
= vM
= IRTemp_INVALID
;
14373 vN
= newTempV128();
14374 assign(vN
, getQReg128(nn
));
14375 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
14376 math_SQDMULH(&res
, &sat1q
, &sat1n
, isR
, size
, vN
, vM
);
14377 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
14378 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
14379 updateQCFLAGwithDifferenceZHI(sat1q
, sat1n
, opZHI
);
14380 const HChar
* nm
= isR
? "sqrdmulh" : "sqdmulh";
14381 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
14382 HChar ch
= size
== X01
? 'h' : 's';
14383 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm
,
14384 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(dd
), ch
, ix
);
14388 if (bitU
== 1 && (opcode
== BITS4(1,1,0,1) || opcode
== BITS4(1,1,1,1))) {
14389 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
14390 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
14391 UInt mm
= 32; // invalid
14392 UInt ix
= 16; // invalid
14395 return False
; // b case is not allowed
14397 mm
= mmLO4
; ix
= (bitH
<< 2) | (bitL
<< 1) | (bitM
<< 0); break;
14399 mm
= (bitM
<< 4) | mmLO4
; ix
= (bitH
<< 1) | (bitL
<< 0); break;
14401 return False
; // d case is not allowed
14405 vassert(mm
< 32 && ix
< 16);
14407 IRTemp res
, res_nosat
, vD
, vN
, vM
;
14408 res
= res_nosat
= vD
= vN
= vM
= IRTemp_INVALID
;
14409 newTempsV128_2(&vD
, &vN
);
14410 assign(vD
, getQReg128(dd
));
14411 assign(vN
, getQReg128(nn
));
14413 vM
= math_DUP_VEC_ELEM(getQReg128(mm
), size
, ix
);
14414 Bool isAdd
= opcode
== BITS4(1,1,0,1);
14415 math_SQRDMLAH(&res
, &res_nosat
, isAdd
, size
, vD
, vN
, vM
);
14416 IROp opZHI
= bitQ
== 0 ? Iop_ZeroHI64ofV128
: Iop_INVALID
;
14417 updateQCFLAGwithDifferenceZHI(res
, res_nosat
, opZHI
);
14418 putQReg128(dd
, math_MAYBE_ZERO_HI64(bitQ
, res
));
14420 const HChar
* arr
= nameArr_Q_SZ(bitQ
, size
);
14421 const HChar
* nm
= isAdd
? "sqrdmlah" : "sqrdmlsh";
14422 HChar ch
= size
== X01
? 'h' : 's';
14423 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm
,
14424 nameQReg128(dd
), arr
, nameQReg128(nn
), arr
, nameQReg128(mm
), ch
, ix
);
14434 Bool
dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14436 /* 31 23 21 16 11 9 4
14437 0100 1110 size 10100 opcode 10 n d
14438 Decode fields are: size,opcode
14439 Size is always 00 in ARMv8, it appears.
14441 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14442 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
14443 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
14446 UInt size
= INSN(23,22);
14447 UInt opcode
= INSN(16,12);
14448 UInt nn
= INSN(9,5);
14449 UInt dd
= INSN(4,0);
14451 if (size
== BITS2(0,0)
14452 && (opcode
== BITS5(0,0,1,0,0) || opcode
== BITS5(0,0,1,0,1))) {
14453 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
14454 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
14455 Bool isD
= opcode
== BITS5(0,0,1,0,1);
14456 IRTemp op1
= newTemp(Ity_V128
);
14457 IRTemp op2
= newTemp(Ity_V128
);
14458 IRTemp xord
= newTemp(Ity_V128
);
14459 IRTemp res
= newTemp(Ity_V128
);
14460 void* helper
= isD
? &arm64g_dirtyhelper_AESD
14461 : &arm64g_dirtyhelper_AESE
;
14462 const HChar
* hname
= isD
? "arm64g_dirtyhelper_AESD"
14463 : "arm64g_dirtyhelper_AESE";
14464 assign(op1
, getQReg128(dd
));
14465 assign(op2
, getQReg128(nn
));
14466 assign(xord
, binop(Iop_XorV128
, mkexpr(op1
), mkexpr(op2
)));
14468 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hname
, helper
,
14471 unop(Iop_V128HIto64
, mkexpr(xord
)),
14472 unop(Iop_V128to64
, mkexpr(xord
)) ) );
14473 stmt(IRStmt_Dirty(di
));
14474 putQReg128(dd
, mkexpr(res
));
14475 DIP("aes%c %s.16b, %s.16b\n", isD
? 'd' : 'e',
14476 nameQReg128(dd
), nameQReg128(nn
));
14480 if (size
== BITS2(0,0)
14481 && (opcode
== BITS5(0,0,1,1,0) || opcode
== BITS5(0,0,1,1,1))) {
14482 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
14483 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
14484 Bool isI
= opcode
== BITS5(0,0,1,1,1);
14485 IRTemp src
= newTemp(Ity_V128
);
14486 IRTemp res
= newTemp(Ity_V128
);
14487 void* helper
= isI
? &arm64g_dirtyhelper_AESIMC
14488 : &arm64g_dirtyhelper_AESMC
;
14489 const HChar
* hname
= isI
? "arm64g_dirtyhelper_AESIMC"
14490 : "arm64g_dirtyhelper_AESMC";
14491 assign(src
, getQReg128(nn
));
14493 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hname
, helper
,
14496 unop(Iop_V128HIto64
, mkexpr(src
)),
14497 unop(Iop_V128to64
, mkexpr(src
)) ) );
14498 stmt(IRStmt_Dirty(di
));
14499 putQReg128(dd
, mkexpr(res
));
14500 DIP("aes%s %s.16b, %s.16b\n", isI
? "imc" : "mc",
14501 nameQReg128(dd
), nameQReg128(nn
));
14511 Bool
dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14513 /* 31 28 23 21 20 15 14 11 9 4
14514 0101 1110 sz 0 m 0 opc 00 n d
14515 Decode fields are: sz,opc
14517 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14518 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
14519 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
14522 UInt sz
= INSN(23,22);
14523 UInt mm
= INSN(20,16);
14524 UInt opc
= INSN(14,12);
14525 UInt nn
= INSN(9,5);
14526 UInt dd
= INSN(4,0);
14527 if (sz
== BITS2(0,0) && opc
<= BITS3(1,1,0)) {
14528 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
14529 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
14530 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
14531 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
14532 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
14533 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
14534 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
14536 const HChar
* inames
[7]
14537 = { "sha1c", "sha1p", "sha1m", "sha1su0",
14538 "sha256h", "sha256h2", "sha256su1" };
14539 void(*helpers
[7])(V128
*,ULong
,ULong
,ULong
,ULong
,ULong
,ULong
)
14540 = { &arm64g_dirtyhelper_SHA1C
, &arm64g_dirtyhelper_SHA1P
,
14541 &arm64g_dirtyhelper_SHA1M
, &arm64g_dirtyhelper_SHA1SU0
,
14542 &arm64g_dirtyhelper_SHA256H
, &arm64g_dirtyhelper_SHA256H2
,
14543 &arm64g_dirtyhelper_SHA256SU1
};
14544 const HChar
* hnames
[7]
14545 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
14546 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
14547 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
14548 "arm64g_dirtyhelper_SHA256SU1" };
14549 IRTemp vD
= newTemp(Ity_V128
);
14550 IRTemp vN
= newTemp(Ity_V128
);
14551 IRTemp vM
= newTemp(Ity_V128
);
14552 IRTemp vDhi
= newTemp(Ity_I64
);
14553 IRTemp vDlo
= newTemp(Ity_I64
);
14554 IRTemp vNhiPre
= newTemp(Ity_I64
);
14555 IRTemp vNloPre
= newTemp(Ity_I64
);
14556 IRTemp vNhi
= newTemp(Ity_I64
);
14557 IRTemp vNlo
= newTemp(Ity_I64
);
14558 IRTemp vMhi
= newTemp(Ity_I64
);
14559 IRTemp vMlo
= newTemp(Ity_I64
);
14560 assign(vD
, getQReg128(dd
));
14561 assign(vN
, getQReg128(nn
));
14562 assign(vM
, getQReg128(mm
));
14563 assign(vDhi
, unop(Iop_V128HIto64
, mkexpr(vD
)));
14564 assign(vDlo
, unop(Iop_V128to64
, mkexpr(vD
)));
14565 assign(vNhiPre
, unop(Iop_V128HIto64
, mkexpr(vN
)));
14566 assign(vNloPre
, unop(Iop_V128to64
, mkexpr(vN
)));
14567 assign(vMhi
, unop(Iop_V128HIto64
, mkexpr(vM
)));
14568 assign(vMlo
, unop(Iop_V128to64
, mkexpr(vM
)));
14569 /* Mask off any bits of the N register operand that aren't actually
14570 needed, so that Memcheck doesn't complain unnecessarily. */
14572 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
14573 assign(vNhi
, mkU64(0));
14574 assign(vNlo
, unop(Iop_32Uto64
, unop(Iop_64to32
, mkexpr(vNloPre
))));
14576 case BITS3(0,1,1): case BITS3(1,0,0):
14577 case BITS3(1,0,1): case BITS3(1,1,0):
14578 assign(vNhi
, mkexpr(vNhiPre
));
14579 assign(vNlo
, mkexpr(vNloPre
));
14584 IRTemp res
= newTemp(Ity_V128
);
14586 = unsafeIRDirty_1_N( res
, 0/*regparms*/, hnames
[opc
], helpers
[opc
],
14589 mkexpr(vDhi
), mkexpr(vDlo
), mkexpr(vNhi
),
14590 mkexpr(vNlo
), mkexpr(vMhi
), mkexpr(vMlo
)));
14591 stmt(IRStmt_Dirty(di
));
14592 putQReg128(dd
, mkexpr(res
));
14594 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
14595 DIP("%s q%u, s%u, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
14597 case BITS3(0,1,1): case BITS3(1,1,0):
14598 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
14600 case BITS3(1,0,0): case BITS3(1,0,1):
14601 DIP("%s q%u, q%u, v%u.4s\n", inames
[opc
], dd
, nn
, mm
);
14615 Bool
dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14617 /* 31 28 23 21 16 11 9 4
14618 0101 1110 sz 10100 opc 10 n d
14619 Decode fields are: sz,opc
14621 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14622 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
14623 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
14626 UInt sz
= INSN(23,22);
14627 UInt opc
= INSN(16,12);
14628 UInt nn
= INSN(9,5);
14629 UInt dd
= INSN(4,0);
14630 if (sz
== BITS2(0,0) && opc
<= BITS5(0,0,0,1,0)) {
14631 /* -------- 00,00000 SHA1H Sd, Sn -------- */
14632 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
14633 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
14635 const HChar
* inames
[3] = { "sha1h", "sha1su1", "sha256su0" };
14636 IRTemp vD
= newTemp(Ity_V128
);
14637 IRTemp vN
= newTemp(Ity_V128
);
14638 IRTemp vDhi
= newTemp(Ity_I64
);
14639 IRTemp vDlo
= newTemp(Ity_I64
);
14640 IRTemp vNhi
= newTemp(Ity_I64
);
14641 IRTemp vNlo
= newTemp(Ity_I64
);
14642 assign(vD
, getQReg128(dd
));
14643 assign(vN
, getQReg128(nn
));
14644 assign(vDhi
, unop(Iop_V128HIto64
, mkexpr(vD
)));
14645 assign(vDlo
, unop(Iop_V128to64
, mkexpr(vD
)));
14646 assign(vNhi
, unop(Iop_V128HIto64
, mkexpr(vN
)));
14647 assign(vNlo
, unop(Iop_V128to64
, mkexpr(vN
)));
14648 /* Mask off any bits of the N register operand that aren't actually
14649 needed, so that Memcheck doesn't complain unnecessarily. Also
14650 construct the calls, given that the helper functions don't take
14651 the same number of arguments. */
14652 IRDirty
* di
= NULL
;
14653 IRTemp res
= newTemp(Ity_V128
);
14655 case BITS5(0,0,0,0,0): {
14656 IRExpr
* vNloMasked
= unop(Iop_32Uto64
,
14657 unop(Iop_64to32
, mkexpr(vNlo
)));
14658 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
14659 "arm64g_dirtyhelper_SHA1H",
14660 &arm64g_dirtyhelper_SHA1H
,
14663 mkU64(0), vNloMasked
) );
14666 case BITS5(0,0,0,0,1):
14667 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
14668 "arm64g_dirtyhelper_SHA1SU1",
14669 &arm64g_dirtyhelper_SHA1SU1
,
14672 mkexpr(vDhi
), mkexpr(vDlo
),
14673 mkexpr(vNhi
), mkexpr(vNlo
)) );
14675 case BITS5(0,0,0,1,0):
14676 di
= unsafeIRDirty_1_N( res
, 0/*regparms*/,
14677 "arm64g_dirtyhelper_SHA256SU0",
14678 &arm64g_dirtyhelper_SHA256SU0
,
14681 mkexpr(vDhi
), mkexpr(vDlo
),
14682 mkexpr(vNhi
), mkexpr(vNlo
)) );
14687 stmt(IRStmt_Dirty(di
));
14688 putQReg128(dd
, mkexpr(res
));
14690 case BITS5(0,0,0,0,0):
14691 DIP("%s s%u, s%u\n", inames
[opc
], dd
, nn
);
14693 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
14694 DIP("%s v%u.4s, v%u.4s\n", inames
[opc
], dd
, nn
);
14708 Bool
dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14710 /* 31 28 23 21 20 15 13 9 4
14711 000 11110 ty 1 m op 1000 n opcode2
14712 The first 3 bits are really "M 0 S", but M and S are always zero.
14713 Decode fields are: ty,op,opcode2
14715 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14716 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14717 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
14720 UInt ty
= INSN(23,22);
14721 UInt mm
= INSN(20,16);
14722 UInt op
= INSN(15,14);
14723 UInt nn
= INSN(9,5);
14724 UInt opcode2
= INSN(4,0);
14727 if (ty
<= X01
&& op
== X00
14728 && (opcode2
& BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
14729 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
14730 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
14731 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
14732 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
14734 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
14735 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
14736 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
14737 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
14739 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
14740 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
14741 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
14742 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
14744 FCMPE generates Invalid Operation exn if either arg is any kind
14745 of NaN. FCMP generates Invalid Operation exn if either arg is a
14746 signalling NaN. We ignore this detail here and produce the same
14749 Bool isD
= (ty
& 1) == 1;
14750 Bool isCMPE
= (opcode2
& 16) == 16;
14751 Bool cmpZero
= (opcode2
& 8) == 8;
14752 IRType ity
= isD
? Ity_F64
: Ity_F32
;
14754 if (cmpZero
&& mm
!= 0) valid
= False
;
14756 IRTemp argL
= newTemp(ity
);
14757 IRTemp argR
= newTemp(ity
);
14758 IRTemp irRes
= newTemp(Ity_I32
);
14759 assign(argL
, getQRegLO(nn
, ity
));
14762 ? (IRExpr_Const(isD
? IRConst_F64i(0) : IRConst_F32i(0)))
14763 : getQRegLO(mm
, ity
));
14764 assign(irRes
, binop(isD
? Iop_CmpF64
: Iop_CmpF32
,
14765 mkexpr(argL
), mkexpr(argR
)));
14766 IRTemp nzcv
= mk_convert_IRCmpF64Result_to_NZCV(irRes
);
14767 IRTemp nzcv_28x0
= newTemp(Ity_I64
);
14768 assign(nzcv_28x0
, binop(Iop_Shl64
, mkexpr(nzcv
), mkU8(28)));
14769 setFlags_COPY(nzcv_28x0
);
14770 DIP("fcmp%s %s, %s\n", isCMPE
? "e" : "", nameQRegLO(nn
, ity
),
14771 cmpZero
? "#0.0" : nameQRegLO(mm
, ity
));
14783 Bool
dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult
* dres
, UInt insn
,
14784 const VexArchInfo
* archinfo
, Bool sigill_diag
)
14786 /* 31 28 23 21 20 15 11 9 4 3
14787 000 11110 ty 1 m cond 01 n op nzcv
14788 The first 3 bits are really "M 0 S", but M and S are always zero.
14789 Decode fields are: ty,op
14791 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14792 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14793 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
14796 UInt ty
= INSN(23,22);
14797 UInt mm
= INSN(20,16);
14798 UInt cond
= INSN(15,12);
14799 UInt nn
= INSN(9,5);
14800 UInt op
= INSN(4,4);
14801 UInt nzcv
= INSN(3,0);
14802 vassert(ty
< 4 && op
<= 1);
14804 /* -------- 00,0 FCCMP s_s -------- */
14805 /* -------- 00,1 FCCMPE s_s -------- */
14806 /* -------- 01,0 FCCMP d_d -------- */
14807 /* -------- 01,1 FCCMPE d_d -------- */
14808 /* -------- 11,0 FCCMP h_h -------- */
14809 /* -------- 11,1 FCCMPE h_h -------- */
14811 /* FCCMPE generates Invalid Operation exn if either arg is any kind
14812 of NaN. FCCMP generates Invalid Operation exn if either arg is a
14813 signalling NaN. We ignore this detail here and produce the same
14816 Bool isCMPE
= op
== 1;
14823 else if (ty
== 1) {
14827 else if (ty
== 3) {
14828 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
14834 /* ty = 2 is an illegal encoding */
14836 vex_printf("ARM64 front end: dis_AdvSIMD_fp_conditional_compare\n");
14840 IRTemp argL
= newTemp(ity
);
14841 IRTemp argR
= newTemp(ity
);
14842 IRTemp irRes
= newTemp(Ity_I32
);
14843 assign(argL
, getQRegLO(nn
, ity
));
14844 assign(argR
, getQRegLO(mm
, ity
));
14845 assign(irRes
, binop(irop
, mkexpr(argL
), mkexpr(argR
)));
14846 IRTemp condT
= newTemp(Ity_I1
);
14847 assign(condT
, unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)));
14848 IRTemp nzcvT
= mk_convert_IRCmpF64Result_to_NZCV(irRes
);
14850 IRTemp nzcvT_28x0
= newTemp(Ity_I64
);
14851 assign(nzcvT_28x0
, binop(Iop_Shl64
, mkexpr(nzcvT
), mkU8(28)));
14853 IRExpr
* nzcvF_28x0
= mkU64(((ULong
)nzcv
) << 28);
14855 IRTemp nzcv_28x0
= newTemp(Ity_I64
);
14856 assign(nzcv_28x0
, IRExpr_ITE(mkexpr(condT
),
14857 mkexpr(nzcvT_28x0
), nzcvF_28x0
));
14858 setFlags_COPY(nzcv_28x0
);
14859 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE
? "e" : "",
14860 nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
), nzcv
, nameCC(cond
));
14868 Bool
dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14870 /* 31 23 21 20 15 11 9 5
14871 000 11110 ty 1 m cond 11 n d
14872 The first 3 bits are really "M 0 S", but M and S are always zero.
14875 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14876 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
14877 || INSN(11,10) != BITS2(1,1)) {
14880 UInt ty
= INSN(23,22);
14881 UInt mm
= INSN(20,16);
14882 UInt cond
= INSN(15,12);
14883 UInt nn
= INSN(9,5);
14884 UInt dd
= INSN(4,0);
14886 /* -------- 00: FCSEL s_s -------- */
14887 /* -------- 00: FCSEL d_d -------- */
14888 IRType ity
= ty
== X01
? Ity_F64
: Ity_F32
;
14889 IRTemp srcT
= newTemp(ity
);
14890 IRTemp srcF
= newTemp(ity
);
14891 IRTemp res
= newTemp(ity
);
14892 assign(srcT
, getQRegLO(nn
, ity
));
14893 assign(srcF
, getQRegLO(mm
, ity
));
14894 assign(res
, IRExpr_ITE(
14895 unop(Iop_64to1
, mk_arm64g_calculate_condition(cond
)),
14896 mkexpr(srcT
), mkexpr(srcF
)));
14897 putQReg128(dd
, mkV128(0x0000));
14898 putQRegLO(dd
, mkexpr(res
));
14899 DIP("fcsel %s, %s, %s, %s\n",
14900 nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
),
14910 Bool
dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
14912 /* 31 28 23 21 20 14 9 4
14913 000 11110 ty 1 opcode 10000 n d
14914 The first 3 bits are really "M 0 S", but M and S are always zero.
14915 Decode fields: ty,opcode
14917 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14918 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
14919 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
14922 UInt ty
= INSN(23,22);
14923 UInt opcode
= INSN(20,15);
14924 UInt nn
= INSN(9,5);
14925 UInt dd
= INSN(4,0);
14927 if (opcode
<= BITS6(0,0,0,0,1,1)) {
14928 /* -------- 0x,000000: FMOV d_d, s_s -------- */
14929 /* -------- 0x,000001: FABS d_d, s_s, h_h --- */
14930 /* -------- 0x,000010: FNEG d_d, s_s, h_h --- */
14931 /* -------- 0x,000011: FSQRT d_d, s_s, h_h --- */
14933 if (ty
== X01
) ity
= Ity_F64
;
14934 else if (ty
== X00
) ity
= Ity_F32
;
14935 else if (ty
== X11
) ity
= Ity_F16
;
14937 IRTemp src
= newTemp(ity
);
14938 IRTemp res
= newTemp(ity
);
14939 const HChar
* nm
= "??";
14940 assign(src
, getQRegLO(nn
, ity
));
14942 case BITS6(0,0,0,0,0,0):
14943 nm
= "fmov"; assign(res
, mkexpr(src
)); break;
14944 case BITS6(0,0,0,0,0,1):
14945 nm
= "fabs"; assign(res
, unop(mkABSF(ity
), mkexpr(src
))); break;
14946 case BITS6(0,0,0,0,1,0):
14947 nm
= "fneg"; assign(res
, unop(mkNEGF(ity
), mkexpr(src
))); break;
14948 case BITS6(0,0,0,0,1,1):
14950 assign(res
, binop(mkSQRTF(ity
),
14951 mkexpr(mk_get_IR_rounding_mode()),
14952 mkexpr(src
))); break;
14956 putQReg128(dd
, mkV128(0x0000));
14957 putQRegLO(dd
, mkexpr(res
));
14958 DIP("%s %s, %s\n", nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
14962 if ( (ty
== X11
&& (opcode
== BITS6(0,0,0,1,0,0)
14963 || opcode
== BITS6(0,0,0,1,0,1)))
14964 || (ty
== X00
&& (opcode
== BITS6(0,0,0,1,1,1)
14965 || opcode
== BITS6(0,0,0,1,0,1)))
14966 || (ty
== X01
&& (opcode
== BITS6(0,0,0,1,1,1)
14967 || opcode
== BITS6(0,0,0,1,0,0)))) {
14968 /* -------- 11,000100: FCVT s_h -------- */
14969 /* -------- 11,000101: FCVT d_h -------- */
14970 /* -------- 00,000111: FCVT h_s -------- */
14971 /* -------- 00,000101: FCVT d_s -------- */
14972 /* -------- 01,000111: FCVT h_d -------- */
14973 /* -------- 01,000100: FCVT s_d -------- */
14974 /* 31 23 21 16 14 9 4
14975 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
14976 --------- 11 ----- 01 --------- FCVT Dd, Hn
14977 --------- 00 ----- 11 --------- FCVT Hd, Sn
14978 --------- 00 ----- 01 --------- FCVT Dd, Sn
14979 --------- 01 ----- 11 --------- FCVT Hd, Dn
14980 --------- 01 ----- 00 --------- FCVT Sd, Dn
14981 Rounding, when dst is smaller than src, is per the FPCR.
14984 UInt b1615
= opcode
& BITS2(1,1);
14985 switch ((b2322
<< 2) | b1615
) {
14986 case BITS4(0,0,0,1): // S -> D
14987 case BITS4(1,1,0,1): { // H -> D
14988 Bool srcIsH
= b2322
== BITS2(1,1);
14989 IRType srcTy
= srcIsH
? Ity_F16
: Ity_F32
;
14990 IRTemp res
= newTemp(Ity_F64
);
14991 assign(res
, unop(srcIsH
? Iop_F16toF64
: Iop_F32toF64
,
14992 getQRegLO(nn
, srcTy
)));
14993 putQReg128(dd
, mkV128(0x0000));
14994 putQRegLO(dd
, mkexpr(res
));
14995 DIP("fcvt %s, %s\n",
14996 nameQRegLO(dd
, Ity_F64
), nameQRegLO(nn
, srcTy
));
14999 case BITS4(0,1,0,0): // D -> S
15000 case BITS4(0,1,1,1): { // D -> H
15001 Bool dstIsH
= b1615
== BITS2(1,1);
15002 IRType dstTy
= dstIsH
? Ity_F16
: Ity_F32
;
15003 IRTemp res
= newTemp(dstTy
);
15004 assign(res
, binop(dstIsH
? Iop_F64toF16
: Iop_F64toF32
,
15005 mkexpr(mk_get_IR_rounding_mode()),
15006 getQRegLO(nn
, Ity_F64
)));
15007 putQReg128(dd
, mkV128(0x0000));
15008 putQRegLO(dd
, mkexpr(res
));
15009 DIP("fcvt %s, %s\n",
15010 nameQRegLO(dd
, dstTy
), nameQRegLO(nn
, Ity_F64
));
15013 case BITS4(0,0,1,1): // S -> H
15014 case BITS4(1,1,0,0): { // H -> S
15015 Bool toH
= b1615
== BITS2(1,1);
15016 IRType srcTy
= toH
? Ity_F32
: Ity_F16
;
15017 IRType dstTy
= toH
? Ity_F16
: Ity_F32
;
15018 IRTemp res
= newTemp(dstTy
);
15020 assign(res
, binop(Iop_F32toF16
,
15021 mkexpr(mk_get_IR_rounding_mode()),
15022 getQRegLO(nn
, srcTy
)));
15025 assign(res
, unop(Iop_F16toF32
,
15026 getQRegLO(nn
, srcTy
)));
15028 putQReg128(dd
, mkV128(0x0000));
15029 putQRegLO(dd
, mkexpr(res
));
15030 DIP("fcvt %s, %s\n",
15031 nameQRegLO(dd
, dstTy
), nameQRegLO(nn
, srcTy
));
15037 /* else unhandled */
15042 && opcode
>= BITS6(0,0,1,0,0,0) && opcode
<= BITS6(0,0,1,1,1,1)
15043 && opcode
!= BITS6(0,0,1,1,0,1)) {
15044 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
15045 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
15046 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
15047 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
15048 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
15049 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
15050 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
15051 /* 31 23 21 17 14 9 4
15052 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
15054 x==0 => S-registers, x==1 => D-registers
15055 rm (17:15) encodings:
15056 111 per FPCR (FRINTI)
15060 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
15061 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
15062 110 per FPCR + "exact = TRUE" (FRINTX)
15065 Bool isD
= (ty
& 1) == 1;
15066 UInt rm
= opcode
& BITS6(0,0,0,1,1,1);
15067 IRType ity
= isD
? Ity_F64
: Ity_F32
;
15068 IRExpr
* irrmE
= NULL
;
15071 case BITS3(0,1,1): ch
= 'z'; irrmE
= mkU32(Irrm_ZERO
); break;
15072 case BITS3(0,1,0): ch
= 'm'; irrmE
= mkU32(Irrm_NegINF
); break;
15073 case BITS3(0,0,1): ch
= 'p'; irrmE
= mkU32(Irrm_PosINF
); break;
15074 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
15075 case BITS3(1,0,0): ch
= 'a'; irrmE
= mkU32(Irrm_NEAREST
); break;
15076 // I am unsure about the following, due to the "integral exact"
15077 // description in the manual. What does it mean? (frintx, that is)
15079 ch
= 'x'; irrmE
= mkexpr(mk_get_IR_rounding_mode()); break;
15081 ch
= 'i'; irrmE
= mkexpr(mk_get_IR_rounding_mode()); break;
15082 // The following is a kludge. There's no Irrm_ value to represent
15083 // this ("to nearest, with ties to even")
15084 case BITS3(0,0,0): ch
= 'n'; irrmE
= mkU32(Irrm_NEAREST
); break;
15088 IRTemp src
= newTemp(ity
);
15089 IRTemp dst
= newTemp(ity
);
15090 assign(src
, getQRegLO(nn
, ity
));
15091 assign(dst
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
15092 irrmE
, mkexpr(src
)));
15093 putQReg128(dd
, mkV128(0x0000));
15094 putQRegLO(dd
, mkexpr(dst
));
15095 DIP("frint%c %s, %s\n",
15096 ch
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
));
15108 Bool
dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult
* dres
, UInt insn
,
15109 const VexArchInfo
* archinfo
)
15111 /* 31 28 23 21 20 15 11 9 4
15112 000 11110 ty 1 m opcode 10 n d
15113 The first 3 bits are really "M 0 S", but M and S are always zero.
15114 Decode fields: ty, opcode
15116 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15117 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15118 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
15121 UInt ty
= INSN(23,22);
15122 UInt mm
= INSN(20,16);
15123 UInt opcode
= INSN(15,12);
15124 UInt nn
= INSN(9,5);
15125 UInt dd
= INSN(4,0);
15127 if (ty
<= X01
&& opcode
<= BITS4(0,1,1,1)) {
15128 /* ------- 0x,0000: FMUL d_d, s_s ------- */
15129 /* ------- 0x,0001: FDIV d_d, s_s ------- */
15130 /* ------- 0x,0010: FADD d_d, s_s ------- */
15131 /* ------- 0x,0011: FSUB d_d, s_s ------- */
15132 /* ------- 0x,0100: FMAX d_d, s_s ------- */
15133 /* ------- 0x,0101: FMIN d_d, s_s ------- */
15134 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
15135 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
15136 IRType ity
= ty
== X00
? Ity_F32
: Ity_F64
;
15137 IROp iop
= Iop_INVALID
;
15138 const HChar
* nm
= "???";
15140 case BITS4(0,0,0,0): nm
= "fmul"; iop
= mkMULF(ity
); break;
15141 case BITS4(0,0,0,1): nm
= "fdiv"; iop
= mkDIVF(ity
); break;
15142 case BITS4(0,0,1,0): nm
= "fadd"; iop
= mkADDF(ity
); break;
15143 case BITS4(0,0,1,1): nm
= "fsub"; iop
= mkSUBF(ity
); break;
15144 case BITS4(0,1,0,0): nm
= "fmax"; iop
= mkVecMAXF(ty
+2); break;
15145 case BITS4(0,1,0,1): nm
= "fmin"; iop
= mkVecMINF(ty
+2); break;
15146 case BITS4(0,1,1,0): nm
= "fmaxnm"; iop
= mkVecMAXF(ty
+2); break; //!!
15147 case BITS4(0,1,1,1): nm
= "fminnm"; iop
= mkVecMINF(ty
+2); break; //!!
15148 default: vassert(0);
15150 if (opcode
<= BITS4(0,0,1,1)) {
15151 // This is really not good code. TODO: avoid width-changing
15152 IRTemp res
= newTemp(ity
);
15153 assign(res
, triop(iop
, mkexpr(mk_get_IR_rounding_mode()),
15154 getQRegLO(nn
, ity
), getQRegLO(mm
, ity
)));
15155 putQReg128(dd
, mkV128(0));
15156 putQRegLO(dd
, mkexpr(res
));
15158 putQReg128(dd
, unop(mkVecZEROHIxxOFV128(ty
+2),
15159 binop(iop
, getQReg128(nn
), getQReg128(mm
))));
15161 DIP("%s %s, %s, %s\n",
15162 nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
15166 if (ty
<= X01
&& opcode
== BITS4(1,0,0,0)) {
15167 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
15168 IRType ity
= ty
== X00
? Ity_F32
: Ity_F64
;
15169 IROp iop
= mkMULF(ity
);
15170 IROp iopn
= mkNEGF(ity
);
15171 const HChar
* nm
= "fnmul";
15172 IRExpr
* resE
= unop(iopn
,
15173 triop(iop
, mkexpr(mk_get_IR_rounding_mode()),
15174 getQRegLO(nn
, ity
), getQRegLO(mm
, ity
)));
15175 IRTemp res
= newTemp(ity
);
15177 putQReg128(dd
, mkV128(0));
15178 putQRegLO(dd
, mkexpr(res
));
15179 DIP("%s %s, %s, %s\n",
15180 nm
, nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
), nameQRegLO(mm
, ity
));
15184 if (ty
== X11
&& opcode
<= BITS4(0,0,1,0)) {
15185 /* ------- 11,0010: FADD h_h ------- */
15186 if ((archinfo
->hwcaps
& VEX_HWCAPS_ARM64_FP16
) == 0)
15188 IRTemp res
= newTemp(Ity_F16
);
15189 assign(res
, triop(mkADDF(Ity_F16
), mkexpr(mk_get_IR_rounding_mode()),
15190 getQRegLO(nn
, Ity_F16
), getQRegLO(mm
, Ity_F16
)));
15191 putQReg128(dd
, mkV128(0));
15192 putQRegLO(dd
, mkexpr(res
));
15193 DIP("fadd %s, %s, %s\n",
15194 nameQRegLO(dd
, Ity_F16
), nameQRegLO(nn
, Ity_F16
), nameQRegLO(mm
, Ity_F16
));
15204 Bool
dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult
* dres
, UInt insn
)
15206 /* 31 28 23 21 20 15 14 9 4
15207 000 11111 ty o1 m o0 a n d
15208 The first 3 bits are really "M 0 S", but M and S are always zero.
15209 Decode fields: ty,o1,o0
15211 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15212 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
15215 UInt ty
= INSN(23,22);
15216 UInt bitO1
= INSN(21,21);
15217 UInt mm
= INSN(20,16);
15218 UInt bitO0
= INSN(15,15);
15219 UInt aa
= INSN(14,10);
15220 UInt nn
= INSN(9,5);
15221 UInt dd
= INSN(4,0);
15225 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
15226 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
15227 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
15228 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
15229 /* -------------------- F{N}M{ADD,SUB} -------------------- */
15230 /* 31 22 20 15 14 9 4 ix
15231 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
15232 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
15233 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
15234 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
15235 where Fx=Dx when sz=1, Fx=Sx when sz=0
15237 -----SPEC------ ----IMPL----
15238 fmadd a + n * m fmadd (a, n, m)
15239 fmsub a + (-n) * m fmsub (a, n, m)
15240 fnmadd (-a) + (-n) * m fmadd (-a, -n, m)
15241 fnmsub (-a) + n * m fmadd (-a, n, m)
15243 Note Iop_MAdd/SubF32/64 take arguments in the order: rm, N, M, A
15245 Bool isD
= (ty
& 1) == 1;
15246 UInt ix
= (bitO1
<< 1) | bitO0
;
15247 IRType ity
= isD
? Ity_F64
: Ity_F32
;
15248 IROp opFMADD
= mkFMADDF(ity
);
15249 IROp opFMSUB
= mkFMSUBF(ity
);
15250 IROp opNEG
= mkNEGF(ity
);
15251 IRTemp res
= newTemp(ity
);
15252 IRExpr
* eA
= getQRegLO(aa
, ity
);
15253 IRExpr
* eN
= getQRegLO(nn
, ity
);
15254 IRExpr
* eM
= getQRegLO(mm
, ity
);
15255 IRExpr
* rm
= mkexpr(mk_get_IR_rounding_mode());
15257 case 0: /* FMADD */
15258 assign(res
, qop(opFMADD
, rm
, eN
, eM
, eA
));
15260 case 1: /* FMSUB */
15261 assign(res
, qop(opFMSUB
, rm
, eN
, eM
, eA
));
15263 case 2: /* FNMADD */
15264 assign(res
, qop(opFMADD
, rm
, unop(opNEG
, eN
), eM
,
15267 case 3: /* FNMSUB */
15268 assign(res
, qop(opFMADD
, rm
, eN
, eM
, unop(opNEG
, eA
)));
15273 putQReg128(dd
, mkV128(0x0000));
15274 putQRegLO(dd
, mkexpr(res
));
15275 const HChar
* names
[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
15276 DIP("%s %s, %s, %s, %s\n",
15277 names
[ix
], nameQRegLO(dd
, ity
), nameQRegLO(nn
, ity
),
15278 nameQRegLO(mm
, ity
), nameQRegLO(aa
, ity
));
15288 Bool
dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult
* dres
, UInt insn
)
15290 /* 31 28 23 21 20 12 9 4
15291 000 11110 ty 1 imm8 100 imm5 d
15292 The first 3 bits are really "M 0 S", but M and S are always zero.
15294 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15295 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15296 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
15299 UInt ty
= INSN(23,22);
15300 UInt imm8
= INSN(20,13);
15301 UInt imm5
= INSN(9,5);
15302 UInt dd
= INSN(4,0);
15304 /* ------- 00,00000: FMOV s_imm ------- */
15305 /* ------- 01,00000: FMOV d_imm ------- */
15306 if (ty
<= X01
&& imm5
== BITS5(0,0,0,0,0)) {
15307 Bool isD
= (ty
& 1) == 1;
15308 ULong imm
= VFPExpandImm(imm8
, isD
? 64 : 32);
15310 vassert(0 == (imm
& 0xFFFFFFFF00000000ULL
));
15312 putQReg128(dd
, mkV128(0));
15313 putQRegLO(dd
, isD
? mkU64(imm
) : mkU32(imm
& 0xFFFFFFFFULL
));
15314 DIP("fmov %s, #0x%llx\n",
15315 nameQRegLO(dd
, isD
? Ity_F64
: Ity_F32
), imm
);
15325 Bool
dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult
* dres
, UInt insn
)
15327 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15328 /* 31 30 29 28 23 21 20 18 15 9 4
15329 sf 0 0 11110 type 0 rmode opcode scale n d
15330 The first 3 bits are really "sf 0 S", but S is always zero.
15331 Decode fields: sf,type,rmode,opcode
15333 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15334 if (INSN(30,29) != BITS2(0,0)
15335 || INSN(28,24) != BITS5(1,1,1,1,0)
15336 || INSN(21,21) != 0) {
15339 UInt bitSF
= INSN(31,31);
15340 UInt ty
= INSN(23,22); // type
15341 UInt rm
= INSN(20,19); // rmode
15342 UInt op
= INSN(18,16); // opcode
15343 UInt sc
= INSN(15,10); // scale
15344 UInt nn
= INSN(9,5);
15345 UInt dd
= INSN(4,0);
15347 if (ty
<= X01
&& rm
== X11
15348 && (op
== BITS3(0,0,0) || op
== BITS3(0,0,1))) {
15349 /* -------- (ix) sf ty rm opc -------- */
15350 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
15351 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
15352 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
15353 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
15355 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
15356 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
15357 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
15358 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
15359 Bool isI64
= bitSF
== 1;
15360 Bool isF64
= (ty
& 1) == 1;
15361 Bool isU
= (op
& 1) == 1;
15362 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
15364 Int fbits
= 64 - sc
;
15365 vassert(fbits
>= 1 && fbits
<= (isI64
? 64 : 32));
15367 Double scale
= two_to_the_plus(fbits
);
15368 IRExpr
* scaleE
= isF64
? IRExpr_Const(IRConst_F64(scale
))
15369 : IRExpr_Const(IRConst_F32( (Float
)scale
));
15370 IROp opMUL
= isF64
? Iop_MulF64
: Iop_MulF32
;
15373 = { Iop_F32toI32S
, Iop_F64toI32S
, Iop_F32toI64S
, Iop_F64toI64S
,
15374 Iop_F32toI32U
, Iop_F64toI32U
, Iop_F32toI64U
, Iop_F64toI64U
};
15375 IRTemp irrm
= newTemp(Ity_I32
);
15376 assign(irrm
, mkU32(Irrm_ZERO
));
15378 IRExpr
* src
= getQRegLO(nn
, isF64
? Ity_F64
: Ity_F32
);
15379 IRExpr
* res
= binop(ops
[ix
], mkexpr(irrm
),
15380 triop(opMUL
, mkexpr(irrm
), src
, scaleE
));
15381 putIRegOrZR(isI64
, dd
, res
);
15383 DIP("fcvtz%c %s, %s, #%d\n",
15384 isU
? 'u' : 's', nameIRegOrZR(isI64
, dd
),
15385 nameQRegLO(nn
, isF64
? Ity_F64
: Ity_F32
), fbits
);
15389 /* ------ sf,ty,rm,opc ------ */
15390 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
15391 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
15392 /* (ix) sf S 28 ty rm opc 15 9 4
15393 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
15394 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
15395 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
15396 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
15398 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
15399 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
15400 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
15401 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
15403 These are signed/unsigned conversion from integer registers to
15404 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
15405 scaled per |scale|.
15407 if (ty
<= X01
&& rm
== X00
15408 && (op
== BITS3(0,1,0) || op
== BITS3(0,1,1))
15409 && (bitSF
== 1 || ((sc
>> 5) & 1) == 1)) {
15410 Bool isI64
= bitSF
== 1;
15411 Bool isF64
= (ty
& 1) == 1;
15412 Bool isU
= (op
& 1) == 1;
15413 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
15415 Int fbits
= 64 - sc
;
15416 vassert(fbits
>= 1 && fbits
<= (isI64
? 64 : 32));
15418 Double scale
= two_to_the_minus(fbits
);
15419 IRExpr
* scaleE
= isF64
? IRExpr_Const(IRConst_F64(scale
))
15420 : IRExpr_Const(IRConst_F32( (Float
)scale
));
15421 IROp opMUL
= isF64
? Iop_MulF64
: Iop_MulF32
;
15424 = { Iop_I32StoF32
, Iop_I32StoF64
, Iop_I64StoF32
, Iop_I64StoF64
,
15425 Iop_I32UtoF32
, Iop_I32UtoF64
, Iop_I64UtoF32
, Iop_I64UtoF64
};
15426 IRExpr
* src
= getIRegOrZR(isI64
, nn
);
15427 IRExpr
* res
= (isF64
&& !isI64
)
15428 ? unop(ops
[ix
], src
)
15430 mkexpr(mk_get_IR_rounding_mode()), src
);
15431 putQReg128(dd
, mkV128(0));
15432 putQRegLO(dd
, triop(opMUL
, mkU32(Irrm_NEAREST
), res
, scaleE
));
15434 DIP("%ccvtf %s, %s, #%d\n",
15435 isU
? 'u' : 's', nameQRegLO(dd
, isF64
? Ity_F64
: Ity_F32
),
15436 nameIRegOrZR(isI64
, nn
), fbits
);
15446 Bool
dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult
* dres
, UInt insn
)
15448 /* 31 30 29 28 23 21 20 18 15 9 4
15449 sf 0 0 11110 type 1 rmode opcode 000000 n d
15450 The first 3 bits are really "sf 0 S", but S is always zero.
15451 Decode fields: sf,type,rmode,opcode
15453 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15454 if (INSN(30,29) != BITS2(0,0)
15455 || INSN(28,24) != BITS5(1,1,1,1,0)
15456 || INSN(21,21) != 1
15457 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
15460 UInt bitSF
= INSN(31,31);
15461 UInt ty
= INSN(23,22); // type
15462 UInt rm
= INSN(20,19); // rmode
15463 UInt op
= INSN(18,16); // opcode
15464 UInt nn
= INSN(9,5);
15465 UInt dd
= INSN(4,0);
15468 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
15469 /* 30 23 20 18 15 9 4
15470 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
15471 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
15472 ---------------- 01 -------------- FCVTP-------- (round to +inf)
15473 ---------------- 10 -------------- FCVTM-------- (round to -inf)
15474 ---------------- 11 -------------- FCVTZ-------- (round to zero)
15475 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
15476 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
15478 Rd is Xd when sf==1, Wd when sf==0
15479 Fn is Dn when x==1, Sn when x==0
15480 20:19 carry the rounding mode, using the same encoding as FPCR
15483 && ( ((op
== BITS3(0,0,0) || op
== BITS3(0,0,1)) && True
)
15484 || ((op
== BITS3(1,0,0) || op
== BITS3(1,0,1)) && rm
== BITS2(0,0))
15487 Bool isI64
= bitSF
== 1;
15488 Bool isF64
= (ty
& 1) == 1;
15489 Bool isU
= (op
& 1) == 1;
15490 /* Decide on the IR rounding mode to use. */
15491 IRRoundingMode irrm
= 8; /*impossible*/
15493 if (op
== BITS3(0,0,0) || op
== BITS3(0,0,1)) {
15495 case BITS2(0,0): ch
= 'n'; irrm
= Irrm_NEAREST
; break;
15496 case BITS2(0,1): ch
= 'p'; irrm
= Irrm_PosINF
; break;
15497 case BITS2(1,0): ch
= 'm'; irrm
= Irrm_NegINF
; break;
15498 case BITS2(1,1): ch
= 'z'; irrm
= Irrm_ZERO
; break;
15499 default: vassert(0);
15502 vassert(op
== BITS3(1,0,0) || op
== BITS3(1,0,1));
15504 case BITS2(0,0): ch
= 'a'; irrm
= Irrm_NEAREST
; break;
15505 default: vassert(0);
15508 vassert(irrm
!= 8);
15509 /* Decide on the conversion primop, based on the source size,
15510 dest size and signedness (8 possibilities). Case coding:
15520 UInt ix
= (isF64
? 4 : 0) | (isI64
? 2 : 0) | (isU
? 1 : 0);
15523 = { Iop_F32toI32S
, Iop_F32toI32U
, Iop_F32toI64S
, Iop_F32toI64U
,
15524 Iop_F64toI32S
, Iop_F64toI32U
, Iop_F64toI64S
, Iop_F64toI64U
};
15525 IROp iop
= iops
[ix
];
15526 // A bit of ATCery: bounce all cases we haven't seen an example of.
15527 if (/* F32toI32S */
15528 (iop
== Iop_F32toI32S
&& irrm
== Irrm_ZERO
) /* FCVTZS Wd,Sn */
15529 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_NegINF
) /* FCVTMS Wd,Sn */
15530 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_PosINF
) /* FCVTPS Wd,Sn */
15531 || (iop
== Iop_F32toI32S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S W,S */
15533 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_ZERO
) /* FCVTZU Wd,Sn */
15534 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_NegINF
) /* FCVTMU Wd,Sn */
15535 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_PosINF
) /* FCVTPU Wd,Sn */
15536 || (iop
== Iop_F32toI32U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U W,S */
15538 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_ZERO
) /* FCVTZS Xd,Sn */
15539 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_NegINF
) /* FCVTMS Xd,Sn */
15540 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_PosINF
) /* FCVTPS Xd,Sn */
15541 || (iop
== Iop_F32toI64S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S X,S */
15543 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_ZERO
) /* FCVTZU Xd,Sn */
15544 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_NegINF
) /* FCVTMU Xd,Sn */
15545 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_PosINF
) /* FCVTPU Xd,Sn */
15546 || (iop
== Iop_F32toI64U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U X,S */
15548 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_ZERO
) /* FCVTZS Wd,Dn */
15549 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_NegINF
) /* FCVTMS Wd,Dn */
15550 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_PosINF
) /* FCVTPS Wd,Dn */
15551 || (iop
== Iop_F64toI32S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S W,D */
15553 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_ZERO
) /* FCVTZU Wd,Dn */
15554 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_NegINF
) /* FCVTMU Wd,Dn */
15555 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_PosINF
) /* FCVTPU Wd,Dn */
15556 || (iop
== Iop_F64toI32U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U W,D */
15558 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_ZERO
) /* FCVTZS Xd,Dn */
15559 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_NegINF
) /* FCVTMS Xd,Dn */
15560 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_PosINF
) /* FCVTPS Xd,Dn */
15561 || (iop
== Iop_F64toI64S
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}S X,D */
15563 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_ZERO
) /* FCVTZU Xd,Dn */
15564 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_NegINF
) /* FCVTMU Xd,Dn */
15565 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_PosINF
) /* FCVTPU Xd,Dn */
15566 || (iop
== Iop_F64toI64U
&& irrm
== Irrm_NEAREST
)/* FCVT{A,N}U X,D */
15572 IRType srcTy
= isF64
? Ity_F64
: Ity_F32
;
15573 IRType dstTy
= isI64
? Ity_I64
: Ity_I32
;
15574 IRTemp src
= newTemp(srcTy
);
15575 IRTemp dst
= newTemp(dstTy
);
15576 assign(src
, getQRegLO(nn
, srcTy
));
15577 assign(dst
, binop(iop
, mkU32(irrm
), mkexpr(src
)));
15578 putIRegOrZR(isI64
, dd
, mkexpr(dst
));
15579 DIP("fcvt%c%c %s, %s\n", ch
, isU
? 'u' : 's',
15580 nameIRegOrZR(isI64
, dd
), nameQRegLO(nn
, srcTy
));
15585 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
15586 /* (ix) sf S 28 ty rm op 15 9 4
15587 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
15588 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
15589 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
15590 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
15592 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
15593 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
15594 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
15595 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
15597 These are signed/unsigned conversion from integer registers to
15598 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
15600 if (ty
<= X01
&& rm
== X00
&& (op
== BITS3(0,1,0) || op
== BITS3(0,1,1))) {
15601 Bool isI64
= bitSF
== 1;
15602 Bool isF64
= (ty
& 1) == 1;
15603 Bool isU
= (op
& 1) == 1;
15604 UInt ix
= (isU
? 4 : 0) | (isI64
? 2 : 0) | (isF64
? 1 : 0);
15606 = { Iop_I32StoF32
, Iop_I32StoF64
, Iop_I64StoF32
, Iop_I64StoF64
,
15607 Iop_I32UtoF32
, Iop_I32UtoF64
, Iop_I64UtoF32
, Iop_I64UtoF64
};
15608 IRExpr
* src
= getIRegOrZR(isI64
, nn
);
15609 IRExpr
* res
= (isF64
&& !isI64
)
15610 ? unop(ops
[ix
], src
)
15612 mkexpr(mk_get_IR_rounding_mode()), src
);
15613 putQReg128(dd
, mkV128(0));
15614 putQRegLO(dd
, res
);
15615 DIP("%ccvtf %s, %s\n",
15616 isU
? 'u' : 's', nameQRegLO(dd
, isF64
? Ity_F64
: Ity_F32
),
15617 nameIRegOrZR(isI64
, nn
));
15622 /* -------- FMOV (general) -------- */
15623 /* case sf S ty rm op 15 9 4
15624 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
15625 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
15626 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
15628 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
15629 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
15630 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
15633 UInt ix
= 0; // case
15635 if (ty
== BITS2(0,0) && rm
== BITS2(0,0) && op
== BITS3(1,1,1))
15638 if (ty
== BITS2(0,0) && rm
== BITS2(0,0) && op
== BITS3(1,1,0))
15641 vassert(bitSF
== 1);
15642 if (ty
== BITS2(0,1) && rm
== BITS2(0,0) && op
== BITS3(1,1,1))
15645 if (ty
== BITS2(0,1) && rm
== BITS2(0,0) && op
== BITS3(1,1,0))
15648 if (ty
== BITS2(1,0) && rm
== BITS2(0,1) && op
== BITS3(1,1,1))
15651 if (ty
== BITS2(1,0) && rm
== BITS2(0,1) && op
== BITS3(1,1,0))
15657 putQReg128(dd
, mkV128(0));
15658 putQRegLO(dd
, getIReg32orZR(nn
));
15659 DIP("fmov s%u, w%u\n", dd
, nn
);
15662 putQReg128(dd
, mkV128(0));
15663 putQRegLO(dd
, getIReg64orZR(nn
));
15664 DIP("fmov d%u, x%u\n", dd
, nn
);
15667 putQRegHI64(dd
, getIReg64orZR(nn
));
15668 DIP("fmov v%u.d[1], x%u\n", dd
, nn
);
15671 putIReg32orZR(dd
, getQRegLO(nn
, Ity_I32
));
15672 DIP("fmov w%u, s%u\n", dd
, nn
);
15675 putIReg64orZR(dd
, getQRegLO(nn
, Ity_I64
));
15676 DIP("fmov x%u, d%u\n", dd
, nn
);
15679 putIReg64orZR(dd
, getQRegHI64(nn
));
15680 DIP("fmov x%u, v%u.d[1]\n", dd
, nn
);
15687 /* undecodable; fall through */
15696 Bool
dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult
* dres
, UInt insn
,
15697 const VexArchInfo
* archinfo
, Bool sigill_diag
)
15700 ok
= dis_AdvSIMD_EXT(dres
, insn
);
15701 if (UNLIKELY(ok
)) return True
;
15702 ok
= dis_AdvSIMD_TBL_TBX(dres
, insn
);
15703 if (UNLIKELY(ok
)) return True
;
15704 ok
= dis_AdvSIMD_ZIP_UZP_TRN(dres
, insn
);
15705 if (UNLIKELY(ok
)) return True
;
15706 ok
= dis_AdvSIMD_across_lanes(dres
, insn
);
15707 if (UNLIKELY(ok
)) return True
;
15708 ok
= dis_AdvSIMD_copy(dres
, insn
);
15709 if (UNLIKELY(ok
)) return True
;
15710 ok
= dis_AdvSIMD_modified_immediate(dres
, insn
);
15711 if (UNLIKELY(ok
)) return True
;
15712 ok
= dis_AdvSIMD_scalar_copy(dres
, insn
);
15713 if (UNLIKELY(ok
)) return True
;
15714 ok
= dis_AdvSIMD_scalar_pairwise(dres
, insn
, archinfo
);
15715 if (UNLIKELY(ok
)) return True
;
15716 ok
= dis_AdvSIMD_scalar_shift_by_imm(dres
, insn
);
15717 if (UNLIKELY(ok
)) return True
;
15718 ok
= dis_AdvSIMD_scalar_three_different(dres
, insn
);
15719 if (UNLIKELY(ok
)) return True
;
15720 ok
= dis_AdvSIMD_scalar_three_same(dres
, insn
);
15721 if (UNLIKELY(ok
)) return True
;
15722 ok
= dis_AdvSIMD_scalar_three_same_extra(dres
, insn
, archinfo
);
15723 if (UNLIKELY(ok
)) return True
;
15724 ok
= dis_AdvSIMD_scalar_two_reg_misc(dres
, insn
);
15725 if (UNLIKELY(ok
)) return True
;
15726 ok
= dis_AdvSIMD_scalar_two_reg_misc_fp16(dres
, insn
, archinfo
);
15727 if (UNLIKELY(ok
)) return True
;
15728 ok
= dis_AdvSIMD_scalar_x_indexed_element(dres
, insn
);
15729 if (UNLIKELY(ok
)) return True
;
15730 ok
= dis_AdvSIMD_shift_by_immediate(dres
, insn
);
15731 if (UNLIKELY(ok
)) return True
;
15732 ok
= dis_AdvSIMD_three_different(dres
, insn
);
15733 if (UNLIKELY(ok
)) return True
;
15734 ok
= dis_AdvSIMD_three_same(dres
, insn
);
15735 if (UNLIKELY(ok
)) return True
;
15736 ok
= dis_AdvSIMD_three_same_extra(dres
, insn
);
15737 if (UNLIKELY(ok
)) return True
;
15738 ok
= dis_AdvSIMD_three_same_fp16(dres
, insn
, archinfo
);
15739 if (UNLIKELY(ok
)) return True
;
15740 ok
= dis_AdvSIMD_two_reg_misc(dres
, insn
);
15741 if (UNLIKELY(ok
)) return True
;
15742 ok
= dis_AdvSIMD_two_reg_misc_fp16(dres
, insn
, archinfo
);
15743 if (UNLIKELY(ok
)) return True
;
15744 ok
= dis_AdvSIMD_vector_x_indexed_elem(dres
, insn
);
15745 if (UNLIKELY(ok
)) return True
;
15746 ok
= dis_AdvSIMD_crypto_aes(dres
, insn
);
15747 if (UNLIKELY(ok
)) return True
;
15748 ok
= dis_AdvSIMD_crypto_three_reg_sha(dres
, insn
);
15749 if (UNLIKELY(ok
)) return True
;
15750 ok
= dis_AdvSIMD_crypto_two_reg_sha(dres
, insn
);
15751 if (UNLIKELY(ok
)) return True
;
15752 ok
= dis_AdvSIMD_fp_compare(dres
, insn
);
15753 if (UNLIKELY(ok
)) return True
;
15754 ok
= dis_AdvSIMD_fp_conditional_compare(dres
, insn
, archinfo
, sigill_diag
);
15755 if (UNLIKELY(ok
)) return True
;
15756 ok
= dis_AdvSIMD_fp_conditional_select(dres
, insn
);
15757 if (UNLIKELY(ok
)) return True
;
15758 ok
= dis_AdvSIMD_fp_data_proc_1_source(dres
, insn
);
15759 if (UNLIKELY(ok
)) return True
;
15760 ok
= dis_AdvSIMD_fp_data_proc_2_source(dres
, insn
, archinfo
);
15761 if (UNLIKELY(ok
)) return True
;
15762 ok
= dis_AdvSIMD_fp_data_proc_3_source(dres
, insn
);
15763 if (UNLIKELY(ok
)) return True
;
15764 ok
= dis_AdvSIMD_fp_immediate(dres
, insn
);
15765 if (UNLIKELY(ok
)) return True
;
15766 ok
= dis_AdvSIMD_fp_to_from_fixedp_conv(dres
, insn
);
15767 if (UNLIKELY(ok
)) return True
;
15768 ok
= dis_AdvSIMD_fp_to_from_int_conv(dres
, insn
);
15769 if (UNLIKELY(ok
)) return True
;
15774 /*------------------------------------------------------------*/
15775 /*--- Disassemble a single ARM64 instruction ---*/
15776 /*------------------------------------------------------------*/
15778 /* Disassemble a single ARM64 instruction into IR. The instruction
15779 has is located at |guest_instr| and has guest IP of
15780 |guest_PC_curr_instr|, which will have been set before the call
15781 here. Returns True iff the instruction was decoded, in which case
15782 *dres will be set accordingly, or False, in which case *dres should
15783 be ignored by the caller. */
15786 Bool
disInstr_ARM64_WRK (
15787 /*MB_OUT*/DisResult
* dres
,
15788 const UChar
* guest_instr
,
15789 const VexArchInfo
* archinfo
,
15790 const VexAbiInfo
* abiinfo
,
15794 // A macro to fish bits out of 'insn'.
15795 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15797 //ZZ DisResult dres;
15799 //ZZ //Bool allow_VFP = False;
15800 //ZZ //UInt hwcaps = archinfo->hwcaps;
15801 //ZZ IRTemp condT; /* :: Ity_I32 */
15803 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
15805 //ZZ /* What insn variants are we supporting today? */
15806 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
15809 /* Set result defaults. */
15810 dres
->whatNext
= Dis_Continue
;
15812 dres
->jk_StopHere
= Ijk_INVALID
;
15813 dres
->hint
= Dis_HintNone
;
15815 /* At least this is simple on ARM64: insns are all 4 bytes long, and
15816 4-aligned. So just fish the whole thing out of memory right now
15818 UInt insn
= getUIntLittleEndianly( guest_instr
);
15820 if (0) vex_printf("insn: 0x%x\n", insn
);
15822 DIP("\t(arm64) 0x%llx: ", (ULong
)guest_PC_curr_instr
);
15824 vassert(0 == (guest_PC_curr_instr
& 3ULL));
15826 /* ----------------------------------------------------------- */
15828 /* Spot "Special" instructions (see comment at top of file). */
15830 const UChar
* code
= guest_instr
;
15831 /* Spot the 16-byte preamble:
15832 93CC0D8C ror x12, x12, #3
15833 93CC358C ror x12, x12, #13
15834 93CCCD8C ror x12, x12, #51
15835 93CCF58C ror x12, x12, #61
15837 UInt word1
= 0x93CC0D8C;
15838 UInt word2
= 0x93CC358C;
15839 UInt word3
= 0x93CCCD8C;
15840 UInt word4
= 0x93CCF58C;
15841 if (getUIntLittleEndianly(code
+ 0) == word1
&&
15842 getUIntLittleEndianly(code
+ 4) == word2
&&
15843 getUIntLittleEndianly(code
+ 8) == word3
&&
15844 getUIntLittleEndianly(code
+12) == word4
) {
15845 /* Got a "Special" instruction preamble. Which one is it? */
15846 if (getUIntLittleEndianly(code
+16) == 0xAA0A014A
15847 /* orr x10,x10,x10 */) {
15848 /* X3 = client_request ( X4 ) */
15849 DIP("x3 = client_request ( x4 )\n");
15850 putPC(mkU64( guest_PC_curr_instr
+ 20 ));
15851 dres
->jk_StopHere
= Ijk_ClientReq
;
15852 dres
->whatNext
= Dis_StopHere
;
15856 if (getUIntLittleEndianly(code
+16) == 0xAA0B016B
15857 /* orr x11,x11,x11 */) {
15858 /* X3 = guest_NRADDR */
15859 DIP("x3 = guest_NRADDR\n");
15861 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
15865 if (getUIntLittleEndianly(code
+16) == 0xAA0C018C
15866 /* orr x12,x12,x12 */) {
15867 /* branch-and-link-to-noredir X8 */
15868 DIP("branch-and-link-to-noredir x8\n");
15869 putIReg64orZR(30, mkU64(guest_PC_curr_instr
+ 20));
15870 putPC(getIReg64orZR(8));
15871 dres
->jk_StopHere
= Ijk_NoRedir
;
15872 dres
->whatNext
= Dis_StopHere
;
15876 if (getUIntLittleEndianly(code
+16) == 0xAA090129
15877 /* orr x9,x9,x9 */) {
15879 DIP("IR injection\n");
15880 vex_inject_ir(irsb
, Iend_LE
);
15881 // Invalidate the current insn. The reason is that the IRop we're
15882 // injecting here can change. In which case the translation has to
15883 // be redone. For ease of handling, we simply invalidate all the
15885 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_PC_curr_instr
)));
15886 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(20)));
15887 putPC(mkU64( guest_PC_curr_instr
+ 20 ));
15888 dres
->whatNext
= Dis_StopHere
;
15889 dres
->jk_StopHere
= Ijk_InvalICache
;
15892 /* We don't know what it is. */
15898 /* ----------------------------------------------------------- */
15900 /* Main ARM64 instruction decoder starts here. */
15904 /* insn[28:25] determines the top-level grouping, so let's start
15907 For all of these dis_ARM64_ functions, we pass *dres with the
15908 normal default results "insn OK, 4 bytes long, keep decoding" so
15909 they don't need to change it. However, decodes of control-flow
15910 insns may cause *dres to change.
15912 switch (INSN(28,25)) {
15913 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
15914 // Data processing - immediate
15915 ok
= dis_ARM64_data_processing_immediate(dres
, insn
, sigill_diag
);
15917 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
15918 // Branch, exception generation and system instructions
15919 ok
= dis_ARM64_branch_etc(dres
, insn
, archinfo
, abiinfo
, sigill_diag
);
15921 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
15922 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
15923 // Loads and stores
15924 ok
= dis_ARM64_load_store(dres
, insn
, abiinfo
, sigill_diag
);
15926 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
15927 // Data processing - register
15928 ok
= dis_ARM64_data_processing_register(dres
, insn
, sigill_diag
);
15930 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
15931 // Data processing - SIMD and floating point
15932 ok
= dis_ARM64_simd_and_fp(dres
, insn
, archinfo
, sigill_diag
);
15934 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
15935 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
15939 vassert(0); /* Can't happen */
15942 /* If the next-level down decoders failed, make sure |dres| didn't
15945 vassert(dres
->whatNext
== Dis_Continue
);
15946 vassert(dres
->len
== 4);
15947 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
15956 /*------------------------------------------------------------*/
15957 /*--- Top-level fn ---*/
15958 /*------------------------------------------------------------*/
15960 /* Disassemble a single instruction into IR. The instruction
15961 is located in host memory at &guest_code[delta]. */
15963 DisResult
disInstr_ARM64 ( IRSB
* irsb_IN
,
15964 const UChar
* guest_code_IN
,
15967 VexArch guest_arch
,
15968 const VexArchInfo
* archinfo
,
15969 const VexAbiInfo
* abiinfo
,
15970 VexEndness host_endness_IN
,
15971 Bool sigill_diag_IN
)
15974 vex_bzero(&dres
, sizeof(dres
));
15976 /* Set globals (see top of this file) */
15977 vassert(guest_arch
== VexArchARM64
);
15980 host_endness
= host_endness_IN
;
15981 guest_PC_curr_instr
= (Addr64
)guest_IP
;
15983 /* Sanity checks */
15984 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
15985 vassert((archinfo
->arm64_dMinLine_lg2_szB
- 2) <= 15);
15986 vassert((archinfo
->arm64_iMinLine_lg2_szB
- 2) <= 15);
15988 /* Try to decode */
15989 Bool ok
= disInstr_ARM64_WRK( &dres
,
15990 &guest_code_IN
[delta_IN
],
15991 archinfo
, abiinfo
, sigill_diag_IN
);
15993 /* All decode successes end up here. */
15994 vassert(dres
.len
== 4 || dres
.len
== 20);
15995 switch (dres
.whatNext
) {
15997 putPC( mkU64(dres
.len
+ guest_PC_curr_instr
) );
16006 /* All decode failures end up here. */
16007 if (sigill_diag_IN
) {
16011 = getUIntLittleEndianly( &guest_code_IN
[delta_IN
] );
16012 vex_bzero(buf
, sizeof(buf
));
16013 for (i
= j
= 0; i
< 32; i
++) {
16015 if ((i
& 7) == 0) buf
[j
++] = ' ';
16016 else if ((i
& 3) == 0) buf
[j
++] = '\'';
16018 buf
[j
++] = (insn
& (1<<(31-i
))) ? '1' : '0';
16020 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn
);
16021 vex_printf("disInstr(arm64): %s\n", buf
);
16024 /* Tell the dispatcher that this insn cannot be decoded, and so
16025 has not been executed, and (is currently) the next to be
16026 executed. PC should be up-to-date since it is made so at the
16027 start of each insn, but nevertheless be paranoid and update
16028 it again right now. */
16029 putPC( mkU64(guest_PC_curr_instr
) );
16031 dres
.whatNext
= Dis_StopHere
;
16032 dres
.jk_StopHere
= Ijk_NoDecode
;
16038 /*--------------------------------------------------------------------*/
16039 /*--- end guest_arm64_toIR.c ---*/
16040 /*--------------------------------------------------------------------*/