drd/tests/tsan_thread_wrappers_pthread.h: Fix MyThread::ThreadBody()
[valgrind.git] / VEX / priv / guest_arm64_toIR.c
blob476eedc9edcf16d01043b77e97b5e2165fc6cff4
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2013-2017 OpenWorks
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
29 The GNU General Public License is contained in the file COPYING.
32 /* KNOWN LIMITATIONS 2014-Nov-16
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
42 FMULX is treated the same as FMUL. That's also not correct.
44 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
53 just rounds to nearest.
56 /* "Special" instructions.
58 This instruction decoder can decode four special instructions
59 which mean nothing natively (are no-ops as far as regs/mem are
60 concerned) but have meaning for supporting Valgrind. A special
61 instruction is flagged by a 16-byte preamble:
63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64 (ror x12, x12, #3; ror x12, x12, #13
65 ror x12, x12, #51; ror x12, x12, #61)
67 Following that, one of the following 3 are allowed
68 (standard interpretation in parentheses):
70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
73 AA090129 (orr x9,x9,x9) IR injection
75 Any other bytes following the 16-byte preamble are illegal and
76 constitute a failure in instruction decoding. This all assumes
77 that the preamble will never occur except in specific code
78 fragments designed for Valgrind to catch.
81 /* Translates ARM64 code to IR. */
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
85 #include "libvex.h"
86 #include "libvex_guest_arm64.h"
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
94 /*------------------------------------------------------------*/
95 /*--- Globals ---*/
96 /*------------------------------------------------------------*/
98 /* These are set at the start of the translation of a instruction, so
99 that we don't have to pass them around endlessly. CONST means does
100 not change during translation of the instruction.
103 /* CONST: what is the host's endianness? We need to know this in
104 order to do sub-register accesses to the SIMD/FP registers
105 correctly. */
106 static VexEndness host_endness;
108 /* CONST: The guest address for the instruction currently being
109 translated. */
110 static Addr64 guest_PC_curr_instr;
112 /* MOD: The IRSB* into which we're generating code. */
113 static IRSB* irsb;
116 /*------------------------------------------------------------*/
117 /*--- Debugging output ---*/
118 /*------------------------------------------------------------*/
120 #define DIP(format, args...) \
121 if (vex_traceflags & VEX_TRACE_FE) \
122 vex_printf(format, ## args)
124 #define DIS(buf, format, args...) \
125 if (vex_traceflags & VEX_TRACE_FE) \
126 vex_sprintf(buf, format, ## args)
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the ---*/
131 /*--- arm insn stream. ---*/
132 /*------------------------------------------------------------*/
134 /* Do a little-endian load of a 32-bit word, regardless of the
135 endianness of the underlying host. */
136 static inline UInt getUIntLittleEndianly ( const UChar* p )
138 UInt w = 0;
139 w = (w << 8) | p[3];
140 w = (w << 8) | p[2];
141 w = (w << 8) | p[1];
142 w = (w << 8) | p[0];
143 return w;
146 /* Sign extend a N-bit value up to 64 bits, by copying
147 bit N-1 into all higher positions. */
148 static ULong sx_to_64 ( ULong x, UInt n )
150 vassert(n > 1 && n < 64);
151 x <<= (64-n);
152 Long r = (Long)x;
153 r >>= (64-n);
154 return (ULong)r;
157 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
158 //ZZ endianness of the underlying host. */
159 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
160 //ZZ {
161 //ZZ UShort w = 0;
162 //ZZ w = (w << 8) | p[1];
163 //ZZ w = (w << 8) | p[0];
164 //ZZ return w;
165 //ZZ }
166 //ZZ
167 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
168 //ZZ vassert(sh >= 0 && sh < 32);
169 //ZZ if (sh == 0)
170 //ZZ return x;
171 //ZZ else
172 //ZZ return (x << (32-sh)) | (x >> sh);
173 //ZZ }
174 //ZZ
175 //ZZ static Int popcount32 ( UInt x )
176 //ZZ {
177 //ZZ Int res = 0, i;
178 //ZZ for (i = 0; i < 32; i++) {
179 //ZZ res += (x & 1);
180 //ZZ x >>= 1;
181 //ZZ }
182 //ZZ return res;
183 //ZZ }
184 //ZZ
185 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
186 //ZZ {
187 //ZZ UInt mask = 1 << ix;
188 //ZZ x &= ~mask;
189 //ZZ x |= ((b << ix) & mask);
190 //ZZ return x;
191 //ZZ }
193 #define BITS2(_b1,_b0) \
194 (((_b1) << 1) | (_b0))
196 #define BITS3(_b2,_b1,_b0) \
197 (((_b2) << 2) | ((_b1) << 1) | (_b0))
199 #define BITS4(_b3,_b2,_b1,_b0) \
200 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
202 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
203 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
204 | BITS4((_b3),(_b2),(_b1),(_b0)))
206 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
210 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
211 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
213 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
214 (((_b8) << 8) \
215 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
217 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
218 (((_b9) << 9) | ((_b8) << 8) \
219 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
221 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
222 (((_b10) << 10) \
223 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
225 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
226 (((_b11) << 11) \
227 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
229 #define X00 BITS2(0,0)
230 #define X01 BITS2(0,1)
231 #define X10 BITS2(1,0)
232 #define X11 BITS2(1,1)
234 // produces _uint[_bMax:_bMin]
235 #define SLICE_UInt(_uint,_bMax,_bMin) \
236 (( ((UInt)(_uint)) >> (_bMin)) \
237 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
240 /*------------------------------------------------------------*/
241 /*--- Helper bits and pieces for creating IR fragments. ---*/
242 /*------------------------------------------------------------*/
244 static IRExpr* mkV128 ( UShort w )
246 return IRExpr_Const(IRConst_V128(w));
249 static IRExpr* mkU64 ( ULong i )
251 return IRExpr_Const(IRConst_U64(i));
254 static IRExpr* mkU32 ( UInt i )
256 return IRExpr_Const(IRConst_U32(i));
259 static IRExpr* mkU16 ( UInt i )
261 vassert(i < 65536);
262 return IRExpr_Const(IRConst_U16(i));
265 static IRExpr* mkU8 ( UInt i )
267 vassert(i < 256);
268 return IRExpr_Const(IRConst_U8( (UChar)i ));
271 static IRExpr* mkexpr ( IRTemp tmp )
273 return IRExpr_RdTmp(tmp);
276 static IRExpr* unop ( IROp op, IRExpr* a )
278 return IRExpr_Unop(op, a);
281 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
283 return IRExpr_Binop(op, a1, a2);
286 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
288 return IRExpr_Triop(op, a1, a2, a3);
291 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
293 return IRExpr_Load(Iend_LE, ty, addr);
296 /* Add a statement to the list held by "irbb". */
297 static void stmt ( IRStmt* st )
299 addStmtToIRSB( irsb, st );
302 static void assign ( IRTemp dst, IRExpr* e )
304 stmt( IRStmt_WrTmp(dst, e) );
307 static void storeLE ( IRExpr* addr, IRExpr* data )
309 stmt( IRStmt_Store(Iend_LE, addr, data) );
312 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
313 //ZZ {
314 //ZZ if (guardT == IRTemp_INVALID) {
315 //ZZ /* unconditional */
316 //ZZ storeLE(addr, data);
317 //ZZ } else {
318 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
319 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
320 //ZZ }
321 //ZZ }
322 //ZZ
323 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
324 //ZZ IRExpr* addr, IRExpr* alt,
325 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
326 //ZZ {
327 //ZZ if (guardT == IRTemp_INVALID) {
328 //ZZ /* unconditional */
329 //ZZ IRExpr* loaded = NULL;
330 //ZZ switch (cvt) {
331 //ZZ case ILGop_Ident32:
332 //ZZ loaded = loadLE(Ity_I32, addr); break;
333 //ZZ case ILGop_8Uto32:
334 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
335 //ZZ case ILGop_8Sto32:
336 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
337 //ZZ case ILGop_16Uto32:
338 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
339 //ZZ case ILGop_16Sto32:
340 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
341 //ZZ default:
342 //ZZ vassert(0);
343 //ZZ }
344 //ZZ vassert(loaded != NULL);
345 //ZZ assign(dst, loaded);
346 //ZZ } else {
347 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
348 //ZZ loaded data before putting the data in 'dst'. If the load
349 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
350 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
351 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
352 //ZZ }
353 //ZZ }
355 /* Generate a new temporary of the given type. */
356 static IRTemp newTemp ( IRType ty )
358 vassert(isPlausibleIRType(ty));
359 return newIRTemp( irsb->tyenv, ty );
362 /* This is used in many places, so the brevity is an advantage. */
363 static IRTemp newTempV128(void)
365 return newTemp(Ity_V128);
368 /* Initialise V128 temporaries en masse. */
369 static
370 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
372 vassert(t1 && *t1 == IRTemp_INVALID);
373 vassert(t2 && *t2 == IRTemp_INVALID);
374 *t1 = newTempV128();
375 *t2 = newTempV128();
378 static
379 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
381 vassert(t1 && *t1 == IRTemp_INVALID);
382 vassert(t2 && *t2 == IRTemp_INVALID);
383 vassert(t3 && *t3 == IRTemp_INVALID);
384 *t1 = newTempV128();
385 *t2 = newTempV128();
386 *t3 = newTempV128();
389 static
390 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
392 vassert(t1 && *t1 == IRTemp_INVALID);
393 vassert(t2 && *t2 == IRTemp_INVALID);
394 vassert(t3 && *t3 == IRTemp_INVALID);
395 vassert(t4 && *t4 == IRTemp_INVALID);
396 *t1 = newTempV128();
397 *t2 = newTempV128();
398 *t3 = newTempV128();
399 *t4 = newTempV128();
402 static
403 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
404 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
406 vassert(t1 && *t1 == IRTemp_INVALID);
407 vassert(t2 && *t2 == IRTemp_INVALID);
408 vassert(t3 && *t3 == IRTemp_INVALID);
409 vassert(t4 && *t4 == IRTemp_INVALID);
410 vassert(t5 && *t5 == IRTemp_INVALID);
411 vassert(t6 && *t6 == IRTemp_INVALID);
412 vassert(t7 && *t7 == IRTemp_INVALID);
413 *t1 = newTempV128();
414 *t2 = newTempV128();
415 *t3 = newTempV128();
416 *t4 = newTempV128();
417 *t5 = newTempV128();
418 *t6 = newTempV128();
419 *t7 = newTempV128();
422 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
423 //ZZ IRRoundingMode. */
424 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
425 //ZZ {
426 //ZZ return mkU32(Irrm_NEAREST);
427 //ZZ }
428 //ZZ
429 //ZZ /* Generate an expression for SRC rotated right by ROT. */
430 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
431 //ZZ {
432 //ZZ vassert(rot >= 0 && rot < 32);
433 //ZZ if (rot == 0)
434 //ZZ return mkexpr(src);
435 //ZZ return
436 //ZZ binop(Iop_Or32,
437 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
438 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
439 //ZZ }
440 //ZZ
441 //ZZ static IRExpr* mkU128 ( ULong i )
442 //ZZ {
443 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
444 //ZZ }
445 //ZZ
446 //ZZ /* Generate a 4-aligned version of the given expression if
447 //ZZ the given condition is true. Else return it unchanged. */
448 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
449 //ZZ {
450 //ZZ if (b)
451 //ZZ return binop(Iop_And32, e, mkU32(~3));
452 //ZZ else
453 //ZZ return e;
454 //ZZ }
456 /* Other IR construction helpers. */
457 static IROp mkAND ( IRType ty ) {
458 switch (ty) {
459 case Ity_I32: return Iop_And32;
460 case Ity_I64: return Iop_And64;
461 default: vpanic("mkAND");
465 static IROp mkOR ( IRType ty ) {
466 switch (ty) {
467 case Ity_I32: return Iop_Or32;
468 case Ity_I64: return Iop_Or64;
469 default: vpanic("mkOR");
473 static IROp mkXOR ( IRType ty ) {
474 switch (ty) {
475 case Ity_I32: return Iop_Xor32;
476 case Ity_I64: return Iop_Xor64;
477 default: vpanic("mkXOR");
481 static IROp mkSHL ( IRType ty ) {
482 switch (ty) {
483 case Ity_I32: return Iop_Shl32;
484 case Ity_I64: return Iop_Shl64;
485 default: vpanic("mkSHL");
489 static IROp mkSHR ( IRType ty ) {
490 switch (ty) {
491 case Ity_I32: return Iop_Shr32;
492 case Ity_I64: return Iop_Shr64;
493 default: vpanic("mkSHR");
497 static IROp mkSAR ( IRType ty ) {
498 switch (ty) {
499 case Ity_I32: return Iop_Sar32;
500 case Ity_I64: return Iop_Sar64;
501 default: vpanic("mkSAR");
505 static IROp mkNOT ( IRType ty ) {
506 switch (ty) {
507 case Ity_I32: return Iop_Not32;
508 case Ity_I64: return Iop_Not64;
509 default: vpanic("mkNOT");
513 static IROp mkADD ( IRType ty ) {
514 switch (ty) {
515 case Ity_I32: return Iop_Add32;
516 case Ity_I64: return Iop_Add64;
517 default: vpanic("mkADD");
521 static IROp mkSUB ( IRType ty ) {
522 switch (ty) {
523 case Ity_I32: return Iop_Sub32;
524 case Ity_I64: return Iop_Sub64;
525 default: vpanic("mkSUB");
529 static IROp mkADDF ( IRType ty ) {
530 switch (ty) {
531 case Ity_F32: return Iop_AddF32;
532 case Ity_F64: return Iop_AddF64;
533 default: vpanic("mkADDF");
537 static IROp mkSUBF ( IRType ty ) {
538 switch (ty) {
539 case Ity_F32: return Iop_SubF32;
540 case Ity_F64: return Iop_SubF64;
541 default: vpanic("mkSUBF");
545 static IROp mkMULF ( IRType ty ) {
546 switch (ty) {
547 case Ity_F32: return Iop_MulF32;
548 case Ity_F64: return Iop_MulF64;
549 default: vpanic("mkMULF");
553 static IROp mkDIVF ( IRType ty ) {
554 switch (ty) {
555 case Ity_F32: return Iop_DivF32;
556 case Ity_F64: return Iop_DivF64;
557 default: vpanic("mkMULF");
561 static IROp mkNEGF ( IRType ty ) {
562 switch (ty) {
563 case Ity_F32: return Iop_NegF32;
564 case Ity_F64: return Iop_NegF64;
565 default: vpanic("mkNEGF");
569 static IROp mkABSF ( IRType ty ) {
570 switch (ty) {
571 case Ity_F32: return Iop_AbsF32;
572 case Ity_F64: return Iop_AbsF64;
573 default: vpanic("mkNEGF");
577 static IROp mkSQRTF ( IRType ty ) {
578 switch (ty) {
579 case Ity_F32: return Iop_SqrtF32;
580 case Ity_F64: return Iop_SqrtF64;
581 default: vpanic("mkNEGF");
585 static IROp mkVecADD ( UInt size ) {
586 const IROp ops[4]
587 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
588 vassert(size < 4);
589 return ops[size];
592 static IROp mkVecQADDU ( UInt size ) {
593 const IROp ops[4]
594 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
595 vassert(size < 4);
596 return ops[size];
599 static IROp mkVecQADDS ( UInt size ) {
600 const IROp ops[4]
601 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
602 vassert(size < 4);
603 return ops[size];
606 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
607 const IROp ops[4]
608 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
609 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
610 vassert(size < 4);
611 return ops[size];
614 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
615 const IROp ops[4]
616 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
617 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
618 vassert(size < 4);
619 return ops[size];
622 static IROp mkVecSUB ( UInt size ) {
623 const IROp ops[4]
624 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
625 vassert(size < 4);
626 return ops[size];
629 static IROp mkVecQSUBU ( UInt size ) {
630 const IROp ops[4]
631 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
632 vassert(size < 4);
633 return ops[size];
636 static IROp mkVecQSUBS ( UInt size ) {
637 const IROp ops[4]
638 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
639 vassert(size < 4);
640 return ops[size];
643 static IROp mkVecSARN ( UInt size ) {
644 const IROp ops[4]
645 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
646 vassert(size < 4);
647 return ops[size];
650 static IROp mkVecSHRN ( UInt size ) {
651 const IROp ops[4]
652 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
653 vassert(size < 4);
654 return ops[size];
657 static IROp mkVecSHLN ( UInt size ) {
658 const IROp ops[4]
659 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
660 vassert(size < 4);
661 return ops[size];
664 static IROp mkVecCATEVENLANES ( UInt size ) {
665 const IROp ops[4]
666 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
667 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
668 vassert(size < 4);
669 return ops[size];
672 static IROp mkVecCATODDLANES ( UInt size ) {
673 const IROp ops[4]
674 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
675 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
676 vassert(size < 4);
677 return ops[size];
680 static IROp mkVecINTERLEAVELO ( UInt size ) {
681 const IROp ops[4]
682 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
683 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
684 vassert(size < 4);
685 return ops[size];
688 static IROp mkVecINTERLEAVEHI ( UInt size ) {
689 const IROp ops[4]
690 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
691 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
692 vassert(size < 4);
693 return ops[size];
696 static IROp mkVecMAXU ( UInt size ) {
697 const IROp ops[4]
698 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
699 vassert(size < 4);
700 return ops[size];
703 static IROp mkVecMAXS ( UInt size ) {
704 const IROp ops[4]
705 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
706 vassert(size < 4);
707 return ops[size];
710 static IROp mkVecMINU ( UInt size ) {
711 const IROp ops[4]
712 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
713 vassert(size < 4);
714 return ops[size];
717 static IROp mkVecMINS ( UInt size ) {
718 const IROp ops[4]
719 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
720 vassert(size < 4);
721 return ops[size];
724 static IROp mkVecMUL ( UInt size ) {
725 const IROp ops[4]
726 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
727 vassert(size < 3);
728 return ops[size];
731 static IROp mkVecMULLU ( UInt sizeNarrow ) {
732 const IROp ops[4]
733 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
734 vassert(sizeNarrow < 3);
735 return ops[sizeNarrow];
738 static IROp mkVecMULLS ( UInt sizeNarrow ) {
739 const IROp ops[4]
740 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
741 vassert(sizeNarrow < 3);
742 return ops[sizeNarrow];
745 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
746 const IROp ops[4]
747 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
748 vassert(sizeNarrow < 3);
749 return ops[sizeNarrow];
752 static IROp mkVecCMPEQ ( UInt size ) {
753 const IROp ops[4]
754 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
755 vassert(size < 4);
756 return ops[size];
759 static IROp mkVecCMPGTU ( UInt size ) {
760 const IROp ops[4]
761 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
762 vassert(size < 4);
763 return ops[size];
766 static IROp mkVecCMPGTS ( UInt size ) {
767 const IROp ops[4]
768 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
769 vassert(size < 4);
770 return ops[size];
773 static IROp mkVecABS ( UInt size ) {
774 const IROp ops[4]
775 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
776 vassert(size < 4);
777 return ops[size];
780 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
781 const IROp ops[4]
782 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
783 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
784 vassert(size < 4);
785 return ops[size];
788 static IRExpr* mkU ( IRType ty, ULong imm ) {
789 switch (ty) {
790 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
791 case Ity_I64: return mkU64(imm);
792 default: vpanic("mkU");
796 static IROp mkVecQDMULHIS ( UInt size ) {
797 const IROp ops[4]
798 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
799 vassert(size < 4);
800 return ops[size];
803 static IROp mkVecQRDMULHIS ( UInt size ) {
804 const IROp ops[4]
805 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
806 vassert(size < 4);
807 return ops[size];
810 static IROp mkVecQANDUQSH ( UInt size ) {
811 const IROp ops[4]
812 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
813 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
814 vassert(size < 4);
815 return ops[size];
818 static IROp mkVecQANDSQSH ( UInt size ) {
819 const IROp ops[4]
820 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
821 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
822 vassert(size < 4);
823 return ops[size];
826 static IROp mkVecQANDUQRSH ( UInt size ) {
827 const IROp ops[4]
828 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
829 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
830 vassert(size < 4);
831 return ops[size];
834 static IROp mkVecQANDSQRSH ( UInt size ) {
835 const IROp ops[4]
836 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
837 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
838 vassert(size < 4);
839 return ops[size];
842 static IROp mkVecSHU ( UInt size ) {
843 const IROp ops[4]
844 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
845 vassert(size < 4);
846 return ops[size];
849 static IROp mkVecSHS ( UInt size ) {
850 const IROp ops[4]
851 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
852 vassert(size < 4);
853 return ops[size];
856 static IROp mkVecRSHU ( UInt size ) {
857 const IROp ops[4]
858 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
859 vassert(size < 4);
860 return ops[size];
863 static IROp mkVecRSHS ( UInt size ) {
864 const IROp ops[4]
865 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
866 vassert(size < 4);
867 return ops[size];
870 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
871 const IROp ops[4]
872 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
873 Iop_NarrowUn64to32x2, Iop_INVALID };
874 vassert(sizeNarrow < 4);
875 return ops[sizeNarrow];
878 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
879 const IROp ops[4]
880 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
881 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
882 vassert(sizeNarrow < 4);
883 return ops[sizeNarrow];
886 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
887 const IROp ops[4]
888 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
889 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
890 vassert(sizeNarrow < 4);
891 return ops[sizeNarrow];
894 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
895 const IROp ops[4]
896 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
897 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
898 vassert(sizeNarrow < 4);
899 return ops[sizeNarrow];
902 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
903 const IROp ops[4]
904 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
905 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
906 vassert(sizeNarrow < 4);
907 return ops[sizeNarrow];
910 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
911 const IROp ops[4]
912 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
913 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
914 vassert(sizeNarrow < 4);
915 return ops[sizeNarrow];
918 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
919 const IROp ops[4]
920 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
921 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
922 vassert(sizeNarrow < 4);
923 return ops[sizeNarrow];
926 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
927 const IROp ops[4]
928 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
929 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
930 vassert(sizeNarrow < 4);
931 return ops[sizeNarrow];
934 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
935 const IROp ops[4]
936 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
937 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
938 vassert(sizeNarrow < 4);
939 return ops[sizeNarrow];
942 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
943 const IROp ops[4]
944 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
945 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
946 vassert(sizeNarrow < 4);
947 return ops[sizeNarrow];
950 static IROp mkVecQSHLNSATUU ( UInt size ) {
951 const IROp ops[4]
952 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
953 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
954 vassert(size < 4);
955 return ops[size];
958 static IROp mkVecQSHLNSATSS ( UInt size ) {
959 const IROp ops[4]
960 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
961 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
962 vassert(size < 4);
963 return ops[size];
966 static IROp mkVecQSHLNSATSU ( UInt size ) {
967 const IROp ops[4]
968 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
969 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
970 vassert(size < 4);
971 return ops[size];
974 static IROp mkVecADDF ( UInt size ) {
975 const IROp ops[4]
976 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
977 vassert(size < 4);
978 return ops[size];
981 static IROp mkVecMAXF ( UInt size ) {
982 const IROp ops[4]
983 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
984 vassert(size < 4);
985 return ops[size];
988 static IROp mkVecMINF ( UInt size ) {
989 const IROp ops[4]
990 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
991 vassert(size < 4);
992 return ops[size];
995 /* Generate IR to create 'arg rotated right by imm', for sane values
996 of 'ty' and 'imm'. */
997 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
999 UInt w = 0;
1000 if (ty == Ity_I64) {
1001 w = 64;
1002 } else {
1003 vassert(ty == Ity_I32);
1004 w = 32;
1006 vassert(w != 0);
1007 vassert(imm < w);
1008 if (imm == 0) {
1009 return arg;
1011 IRTemp res = newTemp(ty);
1012 assign(res, binop(mkOR(ty),
1013 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1014 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1015 return res;
1018 /* Generate IR to set the returned temp to either all-zeroes or
1019 all ones, as a copy of arg<imm>. */
1020 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1022 UInt w = 0;
1023 if (ty == Ity_I64) {
1024 w = 64;
1025 } else {
1026 vassert(ty == Ity_I32);
1027 w = 32;
1029 vassert(w != 0);
1030 vassert(imm < w);
1031 IRTemp res = newTemp(ty);
1032 assign(res, binop(mkSAR(ty),
1033 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1034 mkU8(w - 1)));
1035 return res;
1038 /* U-widen 8/16/32/64 bit int expr to 64. */
1039 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1041 switch (srcTy) {
1042 case Ity_I64: return e;
1043 case Ity_I32: return unop(Iop_32Uto64, e);
1044 case Ity_I16: return unop(Iop_16Uto64, e);
1045 case Ity_I8: return unop(Iop_8Uto64, e);
1046 default: vpanic("widenUto64(arm64)");
1050 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1051 of these combinations make sense. */
1052 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1054 switch (dstTy) {
1055 case Ity_I64: return e;
1056 case Ity_I32: return unop(Iop_64to32, e);
1057 case Ity_I16: return unop(Iop_64to16, e);
1058 case Ity_I8: return unop(Iop_64to8, e);
1059 default: vpanic("narrowFrom64(arm64)");
1064 /*------------------------------------------------------------*/
1065 /*--- Helpers for accessing guest registers. ---*/
1066 /*------------------------------------------------------------*/
1068 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1069 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1070 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1071 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1072 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1073 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1074 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1075 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1076 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1077 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1078 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1079 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1080 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1081 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1082 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1083 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1084 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1085 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1086 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1087 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1088 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1089 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1090 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1091 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1092 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1093 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1094 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1095 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1096 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1097 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1098 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1100 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1101 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1103 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1104 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1105 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1106 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1108 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1109 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1111 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1112 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1113 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1114 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1115 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1116 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1117 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1118 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1119 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1120 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1121 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1122 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1123 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1124 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1125 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1126 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1127 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1128 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1129 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1130 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1131 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1132 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1133 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1134 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1135 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1136 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1137 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1138 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1139 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1140 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1141 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1142 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1144 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1145 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1147 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1148 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1150 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1151 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1152 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1155 /* ---------------- Integer registers ---------------- */
1157 static Int offsetIReg64 ( UInt iregNo )
1159 /* Do we care about endianness here? We do if sub-parts of integer
1160 registers are accessed. */
1161 switch (iregNo) {
1162 case 0: return OFFB_X0;
1163 case 1: return OFFB_X1;
1164 case 2: return OFFB_X2;
1165 case 3: return OFFB_X3;
1166 case 4: return OFFB_X4;
1167 case 5: return OFFB_X5;
1168 case 6: return OFFB_X6;
1169 case 7: return OFFB_X7;
1170 case 8: return OFFB_X8;
1171 case 9: return OFFB_X9;
1172 case 10: return OFFB_X10;
1173 case 11: return OFFB_X11;
1174 case 12: return OFFB_X12;
1175 case 13: return OFFB_X13;
1176 case 14: return OFFB_X14;
1177 case 15: return OFFB_X15;
1178 case 16: return OFFB_X16;
1179 case 17: return OFFB_X17;
1180 case 18: return OFFB_X18;
1181 case 19: return OFFB_X19;
1182 case 20: return OFFB_X20;
1183 case 21: return OFFB_X21;
1184 case 22: return OFFB_X22;
1185 case 23: return OFFB_X23;
1186 case 24: return OFFB_X24;
1187 case 25: return OFFB_X25;
1188 case 26: return OFFB_X26;
1189 case 27: return OFFB_X27;
1190 case 28: return OFFB_X28;
1191 case 29: return OFFB_X29;
1192 case 30: return OFFB_X30;
1193 /* but not 31 */
1194 default: vassert(0);
1198 static Int offsetIReg64orSP ( UInt iregNo )
1200 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1203 static const HChar* nameIReg64orZR ( UInt iregNo )
1205 vassert(iregNo < 32);
1206 static const HChar* names[32]
1207 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1208 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1209 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1210 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1211 return names[iregNo];
1214 static const HChar* nameIReg64orSP ( UInt iregNo )
1216 if (iregNo == 31) {
1217 return "sp";
1219 vassert(iregNo < 31);
1220 return nameIReg64orZR(iregNo);
1223 static IRExpr* getIReg64orSP ( UInt iregNo )
1225 vassert(iregNo < 32);
1226 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1229 static IRExpr* getIReg64orZR ( UInt iregNo )
1231 if (iregNo == 31) {
1232 return mkU64(0);
1234 vassert(iregNo < 31);
1235 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1238 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1240 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1241 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1244 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1246 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1247 if (iregNo == 31) {
1248 return;
1250 vassert(iregNo < 31);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1254 static const HChar* nameIReg32orZR ( UInt iregNo )
1256 vassert(iregNo < 32);
1257 static const HChar* names[32]
1258 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1259 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1260 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1261 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1262 return names[iregNo];
1265 static const HChar* nameIReg32orSP ( UInt iregNo )
1267 if (iregNo == 31) {
1268 return "wsp";
1270 vassert(iregNo < 31);
1271 return nameIReg32orZR(iregNo);
1274 static IRExpr* getIReg32orSP ( UInt iregNo )
1276 vassert(iregNo < 32);
1277 return unop(Iop_64to32,
1278 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1281 static IRExpr* getIReg32orZR ( UInt iregNo )
1283 if (iregNo == 31) {
1284 return mkU32(0);
1286 vassert(iregNo < 31);
1287 return unop(Iop_64to32,
1288 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1291 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1293 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1294 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1297 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1299 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1300 if (iregNo == 31) {
1301 return;
1303 vassert(iregNo < 31);
1304 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1307 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1309 vassert(is64 == True || is64 == False);
1310 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1313 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1315 vassert(is64 == True || is64 == False);
1316 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1319 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1321 vassert(is64 == True || is64 == False);
1322 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1325 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1327 vassert(is64 == True || is64 == False);
1328 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1331 static void putPC ( IRExpr* e )
1333 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1334 stmt( IRStmt_Put(OFFB_PC, e) );
1338 /* ---------------- Vector (Q) registers ---------------- */
1340 static Int offsetQReg128 ( UInt qregNo )
1342 /* We don't care about endianness at this point. It only becomes
1343 relevant when dealing with sections of these registers.*/
1344 switch (qregNo) {
1345 case 0: return OFFB_Q0;
1346 case 1: return OFFB_Q1;
1347 case 2: return OFFB_Q2;
1348 case 3: return OFFB_Q3;
1349 case 4: return OFFB_Q4;
1350 case 5: return OFFB_Q5;
1351 case 6: return OFFB_Q6;
1352 case 7: return OFFB_Q7;
1353 case 8: return OFFB_Q8;
1354 case 9: return OFFB_Q9;
1355 case 10: return OFFB_Q10;
1356 case 11: return OFFB_Q11;
1357 case 12: return OFFB_Q12;
1358 case 13: return OFFB_Q13;
1359 case 14: return OFFB_Q14;
1360 case 15: return OFFB_Q15;
1361 case 16: return OFFB_Q16;
1362 case 17: return OFFB_Q17;
1363 case 18: return OFFB_Q18;
1364 case 19: return OFFB_Q19;
1365 case 20: return OFFB_Q20;
1366 case 21: return OFFB_Q21;
1367 case 22: return OFFB_Q22;
1368 case 23: return OFFB_Q23;
1369 case 24: return OFFB_Q24;
1370 case 25: return OFFB_Q25;
1371 case 26: return OFFB_Q26;
1372 case 27: return OFFB_Q27;
1373 case 28: return OFFB_Q28;
1374 case 29: return OFFB_Q29;
1375 case 30: return OFFB_Q30;
1376 case 31: return OFFB_Q31;
1377 default: vassert(0);
1381 /* Write to a complete Qreg. */
1382 static void putQReg128 ( UInt qregNo, IRExpr* e )
1384 vassert(qregNo < 32);
1385 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1386 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1389 /* Read a complete Qreg. */
1390 static IRExpr* getQReg128 ( UInt qregNo )
1392 vassert(qregNo < 32);
1393 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1396 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1397 bit sub-parts we can choose either integer or float types, and
1398 choose float on the basis that that is the common use case and so
1399 will give least interference with Put-to-Get forwarding later
1400 on. */
1401 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1403 switch (szB) {
1404 case 1: return Ity_I8;
1405 case 2: return Ity_I16;
1406 case 4: return Ity_I32; //Ity_F32;
1407 case 8: return Ity_F64;
1408 case 16: return Ity_V128;
1409 default: vassert(0);
1413 /* Find the offset of the laneNo'th lane of type laneTy in the given
1414 Qreg. Since the host is little-endian, the least significant lane
1415 has the lowest offset. */
1416 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1418 vassert(host_endness == VexEndnessLE);
1419 Int base = offsetQReg128(qregNo);
1420 /* Since the host is little-endian, the least significant lane
1421 will be at the lowest address. */
1422 /* Restrict this to known types, so as to avoid silently accepting
1423 stupid types. */
1424 UInt laneSzB = 0;
1425 switch (laneTy) {
1426 case Ity_I8: laneSzB = 1; break;
1427 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1428 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1429 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1430 case Ity_V128: laneSzB = 16; break;
1431 default: break;
1433 vassert(laneSzB > 0);
1434 UInt minOff = laneNo * laneSzB;
1435 UInt maxOff = minOff + laneSzB - 1;
1436 vassert(maxOff < 16);
1437 return base + minOff;
1440 /* Put to the least significant lane of a Qreg. */
1441 static void putQRegLO ( UInt qregNo, IRExpr* e )
1443 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1444 Int off = offsetQRegLane(qregNo, ty, 0);
1445 switch (ty) {
1446 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1447 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1448 break;
1449 default:
1450 vassert(0); // Other cases are probably invalid
1452 stmt(IRStmt_Put(off, e));
1455 /* Get from the least significant lane of a Qreg. */
1456 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1458 Int off = offsetQRegLane(qregNo, ty, 0);
1459 switch (ty) {
1460 case Ity_I8:
1461 case Ity_F16: case Ity_I16:
1462 case Ity_I32: case Ity_I64:
1463 case Ity_F32: case Ity_F64: case Ity_V128:
1464 break;
1465 default:
1466 vassert(0); // Other cases are ATC
1468 return IRExpr_Get(off, ty);
1471 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1473 static const HChar* namesQ[32]
1474 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1475 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1476 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1477 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1478 static const HChar* namesD[32]
1479 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1480 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1481 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1482 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1483 static const HChar* namesS[32]
1484 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1485 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1486 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1487 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1488 static const HChar* namesH[32]
1489 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1490 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1491 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1492 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1493 static const HChar* namesB[32]
1494 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1495 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1496 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1497 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1498 vassert(qregNo < 32);
1499 switch (sizeofIRType(laneTy)) {
1500 case 1: return namesB[qregNo];
1501 case 2: return namesH[qregNo];
1502 case 4: return namesS[qregNo];
1503 case 8: return namesD[qregNo];
1504 case 16: return namesQ[qregNo];
1505 default: vassert(0);
1507 /*NOTREACHED*/
1510 static const HChar* nameQReg128 ( UInt qregNo )
1512 return nameQRegLO(qregNo, Ity_V128);
1515 /* Find the offset of the most significant half (8 bytes) of the given
1516 Qreg. This requires knowing the endianness of the host. */
1517 static Int offsetQRegHI64 ( UInt qregNo )
1519 return offsetQRegLane(qregNo, Ity_I64, 1);
1522 static IRExpr* getQRegHI64 ( UInt qregNo )
1524 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1527 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1529 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1530 Int off = offsetQRegHI64(qregNo);
1531 switch (ty) {
1532 case Ity_I64: case Ity_F64:
1533 break;
1534 default:
1535 vassert(0); // Other cases are plain wrong
1537 stmt(IRStmt_Put(off, e));
1540 /* Put to a specified lane of a Qreg. */
1541 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1543 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1544 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1545 switch (laneTy) {
1546 case Ity_F64: case Ity_I64:
1547 case Ity_I32: case Ity_F32:
1548 case Ity_I16: case Ity_F16:
1549 case Ity_I8:
1550 break;
1551 default:
1552 vassert(0); // Other cases are ATC
1554 stmt(IRStmt_Put(off, e));
1557 /* Get from a specified lane of a Qreg. */
1558 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1560 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1561 switch (laneTy) {
1562 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1563 case Ity_F64: case Ity_F32: case Ity_F16:
1564 break;
1565 default:
1566 vassert(0); // Other cases are ATC
1568 return IRExpr_Get(off, laneTy);
1572 //ZZ /* ---------------- Misc registers ---------------- */
1573 //ZZ
1574 //ZZ static void putMiscReg32 ( UInt gsoffset,
1575 //ZZ IRExpr* e, /* :: Ity_I32 */
1576 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1577 //ZZ {
1578 //ZZ switch (gsoffset) {
1579 //ZZ case OFFB_FPSCR: break;
1580 //ZZ case OFFB_QFLAG32: break;
1581 //ZZ case OFFB_GEFLAG0: break;
1582 //ZZ case OFFB_GEFLAG1: break;
1583 //ZZ case OFFB_GEFLAG2: break;
1584 //ZZ case OFFB_GEFLAG3: break;
1585 //ZZ default: vassert(0); /* awaiting more cases */
1586 //ZZ }
1587 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1588 //ZZ
1589 //ZZ if (guardT == IRTemp_INVALID) {
1590 //ZZ /* unconditional write */
1591 //ZZ stmt(IRStmt_Put(gsoffset, e));
1592 //ZZ } else {
1593 //ZZ stmt(IRStmt_Put(
1594 //ZZ gsoffset,
1595 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1596 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1597 //ZZ ));
1598 //ZZ }
1599 //ZZ }
1600 //ZZ
1601 //ZZ static IRTemp get_ITSTATE ( void )
1602 //ZZ {
1603 //ZZ ASSERT_IS_THUMB;
1604 //ZZ IRTemp t = newTemp(Ity_I32);
1605 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1606 //ZZ return t;
1607 //ZZ }
1608 //ZZ
1609 //ZZ static void put_ITSTATE ( IRTemp t )
1610 //ZZ {
1611 //ZZ ASSERT_IS_THUMB;
1612 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1613 //ZZ }
1614 //ZZ
1615 //ZZ static IRTemp get_QFLAG32 ( void )
1616 //ZZ {
1617 //ZZ IRTemp t = newTemp(Ity_I32);
1618 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1619 //ZZ return t;
1620 //ZZ }
1621 //ZZ
1622 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1623 //ZZ {
1624 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1625 //ZZ }
1626 //ZZ
1627 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1628 //ZZ Status Register) to indicate that overflow or saturation occurred.
1629 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1630 //ZZ value to indicate saturation. */
1631 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1632 //ZZ {
1633 //ZZ IRTemp old = get_QFLAG32();
1634 //ZZ IRTemp nyu = newTemp(Ity_I32);
1635 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1636 //ZZ put_QFLAG32(nyu, condT);
1637 //ZZ }
1640 /* ---------------- FPCR stuff ---------------- */
1642 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1643 convert them to IR format. Bind the final result to the
1644 returned temp. */
1645 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1647 /* The ARMvfp encoding for rounding mode bits is:
1648 00 to nearest
1649 01 to +infinity
1650 10 to -infinity
1651 11 to zero
1652 We need to convert that to the IR encoding:
1653 00 to nearest (the default)
1654 10 to +infinity
1655 01 to -infinity
1656 11 to zero
1657 Which can be done by swapping bits 0 and 1.
1658 The rmode bits are at 23:22 in FPSCR.
1660 IRTemp armEncd = newTemp(Ity_I32);
1661 IRTemp swapped = newTemp(Ity_I32);
1662 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1663 we don't zero out bits 24 and above, since the assignment to
1664 'swapped' will mask them out anyway. */
1665 assign(armEncd,
1666 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1667 /* Now swap them. */
1668 assign(swapped,
1669 binop(Iop_Or32,
1670 binop(Iop_And32,
1671 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1672 mkU32(2)),
1673 binop(Iop_And32,
1674 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1675 mkU32(1))
1677 return swapped;
1681 /*------------------------------------------------------------*/
1682 /*--- Helpers for flag handling and conditional insns ---*/
1683 /*------------------------------------------------------------*/
1685 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1687 switch (cond) {
1688 case ARM64CondEQ: return "eq";
1689 case ARM64CondNE: return "ne";
1690 case ARM64CondCS: return "cs"; // or 'hs'
1691 case ARM64CondCC: return "cc"; // or 'lo'
1692 case ARM64CondMI: return "mi";
1693 case ARM64CondPL: return "pl";
1694 case ARM64CondVS: return "vs";
1695 case ARM64CondVC: return "vc";
1696 case ARM64CondHI: return "hi";
1697 case ARM64CondLS: return "ls";
1698 case ARM64CondGE: return "ge";
1699 case ARM64CondLT: return "lt";
1700 case ARM64CondGT: return "gt";
1701 case ARM64CondLE: return "le";
1702 case ARM64CondAL: return "al";
1703 case ARM64CondNV: return "nv";
1704 default: vpanic("name_ARM64Condcode");
1708 /* and a handy shorthand for it */
1709 static const HChar* nameCC ( ARM64Condcode cond ) {
1710 return nameARM64Condcode(cond);
1714 /* Build IR to calculate some particular condition from stored
1715 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1716 Ity_I64, suitable for narrowing. Although the return type is
1717 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1718 :: Ity_I64 and must denote the condition to compute in
1719 bits 7:4, and be zero everywhere else.
1721 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1723 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1724 /* And 'cond' had better produce a value in which only bits 7:4 are
1725 nonzero. However, obviously we can't assert for that. */
1727 /* So what we're constructing for the first argument is
1728 "(cond << 4) | stored-operation".
1729 However, as per comments above, 'cond' must be supplied
1730 pre-shifted to this function.
1732 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1733 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1734 8 bits of the first argument. */
1735 IRExpr** args
1736 = mkIRExprVec_4(
1737 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1738 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1739 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1740 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1742 IRExpr* call
1743 = mkIRExprCCall(
1744 Ity_I64,
1745 0/*regparm*/,
1746 "arm64g_calculate_condition", &arm64g_calculate_condition,
1747 args
1750 /* Exclude the requested condition, OP and NDEP from definedness
1751 checking. We're only interested in DEP1 and DEP2. */
1752 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1753 return call;
1757 /* Build IR to calculate some particular condition from stored
1758 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1759 Ity_I64, suitable for narrowing. Although the return type is
1760 Ity_I64, the returned value is either 0 or 1.
1762 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1764 /* First arg is "(cond << 4) | condition". This requires that the
1765 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1766 (COND, OP) pair in the lowest 8 bits of the first argument. */
1767 vassert(cond >= 0 && cond <= 15);
1768 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1772 /* Build IR to calculate just the carry flag from stored
1773 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1774 Ity_I64. */
1775 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1777 IRExpr** args
1778 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1779 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1780 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1781 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1782 IRExpr* call
1783 = mkIRExprCCall(
1784 Ity_I64,
1785 0/*regparm*/,
1786 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1787 args
1789 /* Exclude OP and NDEP from definedness checking. We're only
1790 interested in DEP1 and DEP2. */
1791 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1792 return call;
1796 //ZZ /* Build IR to calculate just the overflow flag from stored
1797 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1798 //ZZ Ity_I32. */
1799 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1800 //ZZ {
1801 //ZZ IRExpr** args
1802 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1803 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1804 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1805 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1806 //ZZ IRExpr* call
1807 //ZZ = mkIRExprCCall(
1808 //ZZ Ity_I32,
1809 //ZZ 0/*regparm*/,
1810 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1811 //ZZ args
1812 //ZZ );
1813 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1814 //ZZ interested in DEP1 and DEP2. */
1815 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1816 //ZZ return call;
1817 //ZZ }
1820 /* Build IR to calculate N Z C V in bits 31:28 of the
1821 returned word. */
1822 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1824 IRExpr** args
1825 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1826 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1827 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1828 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1829 IRExpr* call
1830 = mkIRExprCCall(
1831 Ity_I64,
1832 0/*regparm*/,
1833 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1834 args
1836 /* Exclude OP and NDEP from definedness checking. We're only
1837 interested in DEP1 and DEP2. */
1838 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1839 return call;
1843 /* Build IR to set the flags thunk, in the most general case. */
1844 static
1845 void setFlags_D1_D2_ND ( UInt cc_op,
1846 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1848 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1849 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1850 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1851 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1852 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1853 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1854 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1855 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1858 /* Build IR to set the flags thunk after ADD or SUB. */
1859 static
1860 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1862 IRTemp argL64 = IRTemp_INVALID;
1863 IRTemp argR64 = IRTemp_INVALID;
1864 IRTemp z64 = newTemp(Ity_I64);
1865 if (is64) {
1866 argL64 = argL;
1867 argR64 = argR;
1868 } else {
1869 argL64 = newTemp(Ity_I64);
1870 argR64 = newTemp(Ity_I64);
1871 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1872 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1874 assign(z64, mkU64(0));
1875 UInt cc_op = ARM64G_CC_OP_NUMBER;
1876 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1877 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1878 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1879 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1880 else { vassert(0); }
1881 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1884 /* Build IR to set the flags thunk after ADC or SBC. */
1885 static
1886 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1887 IRTemp argL, IRTemp argR, IRTemp oldC )
1889 IRTemp argL64 = IRTemp_INVALID;
1890 IRTemp argR64 = IRTemp_INVALID;
1891 IRTemp oldC64 = IRTemp_INVALID;
1892 if (is64) {
1893 argL64 = argL;
1894 argR64 = argR;
1895 oldC64 = oldC;
1896 } else {
1897 argL64 = newTemp(Ity_I64);
1898 argR64 = newTemp(Ity_I64);
1899 oldC64 = newTemp(Ity_I64);
1900 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1901 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1902 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1904 UInt cc_op = ARM64G_CC_OP_NUMBER;
1905 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1906 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1907 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1908 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1909 else { vassert(0); }
1910 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1913 /* Build IR to set the flags thunk after ADD or SUB, if the given
1914 condition evaluates to True at run time. If not, the flags are set
1915 to the specified NZCV value. */
1916 static
1917 void setFlags_ADD_SUB_conditionally (
1918 Bool is64, Bool isSUB,
1919 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1922 /* Generate IR as follows:
1923 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1924 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1925 CC_DEP2 = ITE(cond, argR64, 0)
1926 CC_NDEP = 0
1929 IRTemp z64 = newTemp(Ity_I64);
1930 assign(z64, mkU64(0));
1932 /* Establish the operation and operands for the True case. */
1933 IRTemp t_dep1 = IRTemp_INVALID;
1934 IRTemp t_dep2 = IRTemp_INVALID;
1935 UInt t_op = ARM64G_CC_OP_NUMBER;
1936 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1937 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1938 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1939 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1940 else { vassert(0); }
1941 /* */
1942 if (is64) {
1943 t_dep1 = argL;
1944 t_dep2 = argR;
1945 } else {
1946 t_dep1 = newTemp(Ity_I64);
1947 t_dep2 = newTemp(Ity_I64);
1948 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1949 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1952 /* Establish the operation and operands for the False case. */
1953 IRTemp f_dep1 = newTemp(Ity_I64);
1954 IRTemp f_dep2 = z64;
1955 UInt f_op = ARM64G_CC_OP_COPY;
1956 assign(f_dep1, mkU64(nzcv << 28));
1958 /* Final thunk values */
1959 IRTemp dep1 = newTemp(Ity_I64);
1960 IRTemp dep2 = newTemp(Ity_I64);
1961 IRTemp op = newTemp(Ity_I64);
1963 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1964 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1965 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1967 /* finally .. */
1968 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1969 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1970 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1971 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1974 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1975 static
1976 void setFlags_LOGIC ( Bool is64, IRTemp res )
1978 IRTemp res64 = IRTemp_INVALID;
1979 IRTemp z64 = newTemp(Ity_I64);
1980 UInt cc_op = ARM64G_CC_OP_NUMBER;
1981 if (is64) {
1982 res64 = res;
1983 cc_op = ARM64G_CC_OP_LOGIC64;
1984 } else {
1985 res64 = newTemp(Ity_I64);
1986 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1987 cc_op = ARM64G_CC_OP_LOGIC32;
1989 assign(z64, mkU64(0));
1990 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1993 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
1994 located in bits 31:28 of the supplied value. */
1995 static
1996 void setFlags_COPY ( IRTemp nzcv_28x0 )
1998 IRTemp z64 = newTemp(Ity_I64);
1999 assign(z64, mkU64(0));
2000 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2004 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2005 //ZZ sets it at all) */
2006 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2007 //ZZ IRTemp t_dep2,
2008 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2009 //ZZ {
2010 //ZZ IRTemp z32 = newTemp(Ity_I32);
2011 //ZZ assign( z32, mkU32(0) );
2012 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2013 //ZZ }
2014 //ZZ
2015 //ZZ
2016 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2017 //ZZ sets it at all) */
2018 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2019 //ZZ IRTemp t_ndep,
2020 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2021 //ZZ {
2022 //ZZ IRTemp z32 = newTemp(Ity_I32);
2023 //ZZ assign( z32, mkU32(0) );
2024 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2025 //ZZ }
2026 //ZZ
2027 //ZZ
2028 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2029 //ZZ sets them at all) */
2030 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2031 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2032 //ZZ {
2033 //ZZ IRTemp z32 = newTemp(Ity_I32);
2034 //ZZ assign( z32, mkU32(0) );
2035 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2036 //ZZ }
2039 /*------------------------------------------------------------*/
2040 /*--- Misc math helpers ---*/
2041 /*------------------------------------------------------------*/
2043 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2044 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2046 IRTemp maskT = newTemp(Ity_I64);
2047 IRTemp res = newTemp(Ity_I64);
2048 vassert(sh >= 1 && sh <= 63);
2049 assign(maskT, mkU64(mask));
2050 assign( res,
2051 binop(Iop_Or64,
2052 binop(Iop_Shr64,
2053 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2054 mkU8(sh)),
2055 binop(Iop_And64,
2056 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2057 mkexpr(maskT))
2060 return res;
2063 /* Generates byte swaps within 32-bit lanes. */
2064 static IRTemp math_UINTSWAP64 ( IRTemp src )
2066 IRTemp res;
2067 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2068 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2069 return res;
2072 /* Generates byte swaps within 16-bit lanes. */
2073 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2075 IRTemp res;
2076 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2077 return res;
2080 /* Generates a 64-bit byte swap. */
2081 static IRTemp math_BYTESWAP64 ( IRTemp src )
2083 IRTemp res;
2084 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2085 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2086 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2087 return res;
2090 /* Generates a 64-bit bit swap. */
2091 static IRTemp math_BITSWAP64 ( IRTemp src )
2093 IRTemp res;
2094 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2095 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2096 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2097 return math_BYTESWAP64(res);
2100 /* Duplicates the bits at the bottom of the given word to fill the
2101 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2102 except for the bottom bits. */
2103 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2105 if (srcTy == Ity_I8) {
2106 IRTemp t16 = newTemp(Ity_I64);
2107 assign(t16, binop(Iop_Or64, mkexpr(src),
2108 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2109 IRTemp t32 = newTemp(Ity_I64);
2110 assign(t32, binop(Iop_Or64, mkexpr(t16),
2111 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2112 IRTemp t64 = newTemp(Ity_I64);
2113 assign(t64, binop(Iop_Or64, mkexpr(t32),
2114 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2115 return t64;
2117 if (srcTy == Ity_I16) {
2118 IRTemp t32 = newTemp(Ity_I64);
2119 assign(t32, binop(Iop_Or64, mkexpr(src),
2120 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2121 IRTemp t64 = newTemp(Ity_I64);
2122 assign(t64, binop(Iop_Or64, mkexpr(t32),
2123 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2124 return t64;
2126 if (srcTy == Ity_I32) {
2127 IRTemp t64 = newTemp(Ity_I64);
2128 assign(t64, binop(Iop_Or64, mkexpr(src),
2129 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2130 return t64;
2132 if (srcTy == Ity_I64) {
2133 return src;
2135 vassert(0);
2139 /* Duplicates the src element exactly so as to fill a V128 value. */
2140 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2142 IRTemp res = newTempV128();
2143 if (srcTy == Ity_F64) {
2144 IRTemp i64 = newTemp(Ity_I64);
2145 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2146 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2147 return res;
2149 if (srcTy == Ity_F32) {
2150 IRTemp i64a = newTemp(Ity_I64);
2151 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2152 IRTemp i64b = newTemp(Ity_I64);
2153 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2154 mkexpr(i64a)));
2155 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2156 return res;
2158 if (srcTy == Ity_I64) {
2159 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2160 return res;
2162 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2163 IRTemp t1 = newTemp(Ity_I64);
2164 assign(t1, widenUto64(srcTy, mkexpr(src)));
2165 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2166 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2167 return res;
2169 vassert(0);
2173 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2174 zero out the upper half. */
2175 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2177 if (bitQ == 1) return mkexpr(fullWidth);
2178 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2179 vassert(0);
2182 /* The same, but from an expression instead. */
2183 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2185 IRTemp fullWidthT = newTempV128();
2186 assign(fullWidthT, fullWidth);
2187 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2191 /*------------------------------------------------------------*/
2192 /*--- FP comparison helpers ---*/
2193 /*------------------------------------------------------------*/
2195 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2196 as an IRCmpF64Result. Generate code to convert it to an
2197 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2198 Assign a new temp to hold that value, and return the temp. */
2199 static
2200 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2202 IRTemp ix = newTemp(Ity_I64);
2203 IRTemp termL = newTemp(Ity_I64);
2204 IRTemp termR = newTemp(Ity_I64);
2205 IRTemp nzcv = newTemp(Ity_I64);
2206 IRTemp irRes = newTemp(Ity_I64);
2208 /* This is where the fun starts. We have to convert 'irRes' from
2209 an IR-convention return result (IRCmpF64Result) to an
2210 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2211 4 bits of 'nzcv'. */
2212 /* Map compare result from IR to ARM(nzcv) */
2214 FP cmp result | IR | ARM(nzcv)
2215 --------------------------------
2216 UN 0x45 0011
2217 LT 0x01 1000
2218 GT 0x00 0010
2219 EQ 0x40 0110
2221 /* Now since you're probably wondering WTF ..
2223 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2224 places them side by side, giving a number which is 0, 1, 2 or 3.
2226 termL is a sequence cooked up by GNU superopt. It converts ix
2227 into an almost correct value NZCV value (incredibly), except
2228 for the case of UN, where it produces 0100 instead of the
2229 required 0011.
2231 termR is therefore a correction term, also computed from ix. It
2232 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2233 the final correct value, we subtract termR from termL.
2235 Don't take my word for it. There's a test program at the bottom
2236 of guest_arm_toIR.c, to try this out with.
2238 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2240 assign(
2242 binop(Iop_Or64,
2243 binop(Iop_And64,
2244 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2245 mkU64(3)),
2246 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2248 assign(
2249 termL,
2250 binop(Iop_Add64,
2251 binop(Iop_Shr64,
2252 binop(Iop_Sub64,
2253 binop(Iop_Shl64,
2254 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2255 mkU8(62)),
2256 mkU64(1)),
2257 mkU8(61)),
2258 mkU64(1)));
2260 assign(
2261 termR,
2262 binop(Iop_And64,
2263 binop(Iop_And64,
2264 mkexpr(ix),
2265 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2266 mkU64(1)));
2268 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2269 return nzcv;
2273 /*------------------------------------------------------------*/
2274 /*--- Data processing (immediate) ---*/
2275 /*------------------------------------------------------------*/
2277 /* Helper functions for supporting "DecodeBitMasks" */
2279 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2281 vassert(width > 0 && width <= 64);
2282 vassert(rot >= 0 && rot < width);
2283 if (rot == 0) return x;
2284 ULong res = x >> rot;
2285 res |= (x << (width - rot));
2286 if (width < 64)
2287 res &= ((1ULL << width) - 1);
2288 return res;
2291 static ULong dbm_RepTo64( Int esize, ULong x )
2293 switch (esize) {
2294 case 64:
2295 return x;
2296 case 32:
2297 x &= 0xFFFFFFFF; x |= (x << 32);
2298 return x;
2299 case 16:
2300 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2301 return x;
2302 case 8:
2303 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2304 return x;
2305 case 4:
2306 x &= 0xF; x |= (x << 4); x |= (x << 8);
2307 x |= (x << 16); x |= (x << 32);
2308 return x;
2309 case 2:
2310 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2311 x |= (x << 16); x |= (x << 32);
2312 return x;
2313 default:
2314 break;
2316 vpanic("dbm_RepTo64");
2317 /*NOTREACHED*/
2318 return 0;
2321 static Int dbm_highestSetBit ( ULong x )
2323 Int i;
2324 for (i = 63; i >= 0; i--) {
2325 if (x & (1ULL << i))
2326 return i;
2328 vassert(x == 0);
2329 return -1;
2332 static
2333 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2334 ULong immN, ULong imms, ULong immr, Bool immediate,
2335 UInt M /*32 or 64*/)
2337 vassert(immN < (1ULL << 1));
2338 vassert(imms < (1ULL << 6));
2339 vassert(immr < (1ULL << 6));
2340 vassert(immediate == False || immediate == True);
2341 vassert(M == 32 || M == 64);
2343 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2344 if (len < 1) { /* printf("fail1\n"); */ return False; }
2345 vassert(len <= 6);
2346 vassert(M >= (1 << len));
2348 vassert(len >= 1 && len <= 6);
2349 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2350 (1 << len) - 1;
2351 vassert(levels >= 1 && levels <= 63);
2353 if (immediate && ((imms & levels) == levels)) {
2354 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2355 return False;
2358 ULong S = imms & levels;
2359 ULong R = immr & levels;
2360 Int diff = S - R;
2361 diff &= 63;
2362 Int esize = 1 << len;
2363 vassert(2 <= esize && esize <= 64);
2365 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2366 same below with d. S can be 63 in which case we have an out of
2367 range and hence undefined shift. */
2368 vassert(S >= 0 && S <= 63);
2369 vassert(esize >= (S+1));
2370 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2371 //(1ULL << (S+1)) - 1;
2372 ((1ULL << S) - 1) + (1ULL << S);
2374 Int d = // diff<len-1:0>
2375 diff & ((1 << len)-1);
2376 vassert(esize >= (d+1));
2377 vassert(d >= 0 && d <= 63);
2379 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2380 //(1ULL << (d+1)) - 1;
2381 ((1ULL << d) - 1) + (1ULL << d);
2383 if (esize != 64) vassert(elem_s < (1ULL << esize));
2384 if (esize != 64) vassert(elem_d < (1ULL << esize));
2386 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2387 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2389 return True;
2393 static
2394 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2395 UInt insn)
2397 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2399 /* insn[28:23]
2400 10000x PC-rel addressing
2401 10001x Add/subtract (immediate)
2402 100100 Logical (immediate)
2403 100101 Move Wide (immediate)
2404 100110 Bitfield
2405 100111 Extract
2408 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2409 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2410 Bool is64 = INSN(31,31) == 1;
2411 Bool isSub = INSN(30,30) == 1;
2412 Bool setCC = INSN(29,29) == 1;
2413 UInt sh = INSN(23,22);
2414 UInt uimm12 = INSN(21,10);
2415 UInt nn = INSN(9,5);
2416 UInt dd = INSN(4,0);
2417 const HChar* nm = isSub ? "sub" : "add";
2418 if (sh >= 2) {
2419 /* Invalid; fall through */
2420 } else {
2421 vassert(sh <= 1);
2422 uimm12 <<= (12 * sh);
2423 if (is64) {
2424 IRTemp argL = newTemp(Ity_I64);
2425 IRTemp argR = newTemp(Ity_I64);
2426 IRTemp res = newTemp(Ity_I64);
2427 assign(argL, getIReg64orSP(nn));
2428 assign(argR, mkU64(uimm12));
2429 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2430 mkexpr(argL), mkexpr(argR)));
2431 if (setCC) {
2432 putIReg64orZR(dd, mkexpr(res));
2433 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2434 DIP("%ss %s, %s, 0x%x\n",
2435 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2436 } else {
2437 putIReg64orSP(dd, mkexpr(res));
2438 DIP("%s %s, %s, 0x%x\n",
2439 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2441 } else {
2442 IRTemp argL = newTemp(Ity_I32);
2443 IRTemp argR = newTemp(Ity_I32);
2444 IRTemp res = newTemp(Ity_I32);
2445 assign(argL, getIReg32orSP(nn));
2446 assign(argR, mkU32(uimm12));
2447 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2448 mkexpr(argL), mkexpr(argR)));
2449 if (setCC) {
2450 putIReg32orZR(dd, mkexpr(res));
2451 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2452 DIP("%ss %s, %s, 0x%x\n",
2453 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2454 } else {
2455 putIReg32orSP(dd, mkexpr(res));
2456 DIP("%s %s, %s, 0x%x\n",
2457 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2460 return True;
2464 /* -------------------- ADR/ADRP -------------------- */
2465 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2466 UInt bP = INSN(31,31);
2467 UInt immLo = INSN(30,29);
2468 UInt immHi = INSN(23,5);
2469 UInt rD = INSN(4,0);
2470 ULong uimm = (immHi << 2) | immLo;
2471 ULong simm = sx_to_64(uimm, 21);
2472 ULong val;
2473 if (bP) {
2474 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2475 } else {
2476 val = guest_PC_curr_instr + simm;
2478 putIReg64orZR(rD, mkU64(val));
2479 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2480 return True;
2483 /* -------------------- LOGIC(imm) -------------------- */
2484 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2485 /* 31 30 28 22 21 15 9 4
2486 sf op 100100 N immr imms Rn Rd
2487 op=00: AND Rd|SP, Rn, #imm
2488 op=01: ORR Rd|SP, Rn, #imm
2489 op=10: EOR Rd|SP, Rn, #imm
2490 op=11: ANDS Rd|ZR, Rn, #imm
2492 Bool is64 = INSN(31,31) == 1;
2493 UInt op = INSN(30,29);
2494 UInt N = INSN(22,22);
2495 UInt immR = INSN(21,16);
2496 UInt immS = INSN(15,10);
2497 UInt nn = INSN(9,5);
2498 UInt dd = INSN(4,0);
2499 ULong imm = 0;
2500 Bool ok;
2501 if (N == 1 && !is64)
2502 goto after_logic_imm; /* not allowed; fall through */
2503 ok = dbm_DecodeBitMasks(&imm, NULL,
2504 N, immS, immR, True, is64 ? 64 : 32);
2505 if (!ok)
2506 goto after_logic_imm;
2508 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2509 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2510 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2512 vassert(op < 4);
2513 if (is64) {
2514 IRExpr* argL = getIReg64orZR(nn);
2515 IRExpr* argR = mkU64(imm);
2516 IRTemp res = newTemp(Ity_I64);
2517 assign(res, binop(ops64[op], argL, argR));
2518 if (op < 3) {
2519 putIReg64orSP(dd, mkexpr(res));
2520 DIP("%s %s, %s, 0x%llx\n", names[op],
2521 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2522 } else {
2523 putIReg64orZR(dd, mkexpr(res));
2524 setFlags_LOGIC(True/*is64*/, res);
2525 DIP("%s %s, %s, 0x%llx\n", names[op],
2526 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2528 } else {
2529 IRExpr* argL = getIReg32orZR(nn);
2530 IRExpr* argR = mkU32((UInt)imm);
2531 IRTemp res = newTemp(Ity_I32);
2532 assign(res, binop(ops32[op], argL, argR));
2533 if (op < 3) {
2534 putIReg32orSP(dd, mkexpr(res));
2535 DIP("%s %s, %s, 0x%x\n", names[op],
2536 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2537 } else {
2538 putIReg32orZR(dd, mkexpr(res));
2539 setFlags_LOGIC(False/*!is64*/, res);
2540 DIP("%s %s, %s, 0x%x\n", names[op],
2541 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2544 return True;
2546 after_logic_imm:
2548 /* -------------------- MOV{Z,N,K} -------------------- */
2549 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2550 /* 31 30 28 22 20 4
2551 | | | | | |
2552 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2553 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2554 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2556 Bool is64 = INSN(31,31) == 1;
2557 UInt subopc = INSN(30,29);
2558 UInt hw = INSN(22,21);
2559 UInt imm16 = INSN(20,5);
2560 UInt dd = INSN(4,0);
2561 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2562 /* invalid; fall through */
2563 } else {
2564 ULong imm64 = ((ULong)imm16) << (16 * hw);
2565 if (!is64)
2566 vassert(imm64 < 0x100000000ULL);
2567 switch (subopc) {
2568 case BITS2(1,0): // MOVZ
2569 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2570 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2571 break;
2572 case BITS2(0,0): // MOVN
2573 imm64 = ~imm64;
2574 if (!is64)
2575 imm64 &= 0xFFFFFFFFULL;
2576 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2577 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2578 break;
2579 case BITS2(1,1): // MOVK
2580 /* This is more complex. We are inserting a slice into
2581 the destination register, so we need to have the old
2582 value of it. */
2583 if (is64) {
2584 IRTemp old = newTemp(Ity_I64);
2585 assign(old, getIReg64orZR(dd));
2586 ULong mask = 0xFFFFULL << (16 * hw);
2587 IRExpr* res
2588 = binop(Iop_Or64,
2589 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2590 mkU64(imm64));
2591 putIReg64orZR(dd, res);
2592 DIP("movk %s, 0x%x, lsl %u\n",
2593 nameIReg64orZR(dd), imm16, 16*hw);
2594 } else {
2595 IRTemp old = newTemp(Ity_I32);
2596 assign(old, getIReg32orZR(dd));
2597 vassert(hw <= 1);
2598 UInt mask = ((UInt)0xFFFF) << (16 * hw);
2599 IRExpr* res
2600 = binop(Iop_Or32,
2601 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2602 mkU32((UInt)imm64));
2603 putIReg32orZR(dd, res);
2604 DIP("movk %s, 0x%x, lsl %u\n",
2605 nameIReg32orZR(dd), imm16, 16*hw);
2607 break;
2608 default:
2609 vassert(0);
2611 return True;
2615 /* -------------------- {U,S,}BFM -------------------- */
2616 /* 30 28 22 21 15 9 4
2618 sf 10 100110 N immr imms nn dd
2619 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2620 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2622 sf 00 100110 N immr imms nn dd
2623 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2624 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2626 sf 01 100110 N immr imms nn dd
2627 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2628 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2630 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2631 UInt sf = INSN(31,31);
2632 UInt opc = INSN(30,29);
2633 UInt N = INSN(22,22);
2634 UInt immR = INSN(21,16);
2635 UInt immS = INSN(15,10);
2636 UInt nn = INSN(9,5);
2637 UInt dd = INSN(4,0);
2638 Bool inZero = False;
2639 Bool extend = False;
2640 const HChar* nm = "???";
2641 /* skip invalid combinations */
2642 switch (opc) {
2643 case BITS2(0,0):
2644 inZero = True; extend = True; nm = "sbfm"; break;
2645 case BITS2(0,1):
2646 inZero = False; extend = False; nm = "bfm"; break;
2647 case BITS2(1,0):
2648 inZero = True; extend = False; nm = "ubfm"; break;
2649 case BITS2(1,1):
2650 goto after_bfm; /* invalid */
2651 default:
2652 vassert(0);
2654 if (sf == 1 && N != 1) goto after_bfm;
2655 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2656 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2657 ULong wmask = 0, tmask = 0;
2658 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2659 N, immS, immR, False, sf == 1 ? 64 : 32);
2660 if (!ok) goto after_bfm; /* hmmm */
2662 Bool is64 = sf == 1;
2663 IRType ty = is64 ? Ity_I64 : Ity_I32;
2665 IRTemp dst = newTemp(ty);
2666 IRTemp src = newTemp(ty);
2667 IRTemp bot = newTemp(ty);
2668 IRTemp top = newTemp(ty);
2669 IRTemp res = newTemp(ty);
2670 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2671 assign(src, getIRegOrZR(is64, nn));
2672 /* perform bitfield move on low bits */
2673 assign(bot, binop(mkOR(ty),
2674 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2675 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2676 mkU(ty, wmask))));
2677 /* determine extension bits (sign, zero or dest register) */
2678 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2679 /* combine extension bits and result bits */
2680 assign(res, binop(mkOR(ty),
2681 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2682 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2683 putIRegOrZR(is64, dd, mkexpr(res));
2684 DIP("%s %s, %s, immR=%u, immS=%u\n",
2685 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2686 return True;
2688 after_bfm:
2690 /* ---------------------- EXTR ---------------------- */
2691 /* 30 28 22 20 15 9 4
2692 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2693 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2695 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2696 Bool is64 = INSN(31,31) == 1;
2697 UInt mm = INSN(20,16);
2698 UInt imm6 = INSN(15,10);
2699 UInt nn = INSN(9,5);
2700 UInt dd = INSN(4,0);
2701 Bool valid = True;
2702 if (INSN(31,31) != INSN(22,22))
2703 valid = False;
2704 if (!is64 && imm6 >= 32)
2705 valid = False;
2706 if (!valid) goto after_extr;
2707 IRType ty = is64 ? Ity_I64 : Ity_I32;
2708 IRTemp srcHi = newTemp(ty);
2709 IRTemp srcLo = newTemp(ty);
2710 IRTemp res = newTemp(ty);
2711 assign(srcHi, getIRegOrZR(is64, nn));
2712 assign(srcLo, getIRegOrZR(is64, mm));
2713 if (imm6 == 0) {
2714 assign(res, mkexpr(srcLo));
2715 } else {
2716 UInt szBits = 8 * sizeofIRType(ty);
2717 vassert(imm6 > 0 && imm6 < szBits);
2718 assign(res, binop(mkOR(ty),
2719 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2720 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2722 putIRegOrZR(is64, dd, mkexpr(res));
2723 DIP("extr %s, %s, %s, #%u\n",
2724 nameIRegOrZR(is64,dd),
2725 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2726 return True;
2728 after_extr:
2730 vex_printf("ARM64 front end: data_processing_immediate\n");
2731 return False;
2732 # undef INSN
2736 /*------------------------------------------------------------*/
2737 /*--- Data processing (register) instructions ---*/
2738 /*------------------------------------------------------------*/
2740 static const HChar* nameSH ( UInt sh ) {
2741 switch (sh) {
2742 case 0: return "lsl";
2743 case 1: return "lsr";
2744 case 2: return "asr";
2745 case 3: return "ror";
2746 default: vassert(0);
2750 /* Generate IR to get a register value, possibly shifted by an
2751 immediate. Returns either a 32- or 64-bit temporary holding the
2752 result. After the shift, the value can optionally be NOT-ed
2753 too.
2755 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2756 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2757 isn't allowed, but it's the job of the caller to check that.
2759 static IRTemp getShiftedIRegOrZR ( Bool is64,
2760 UInt sh_how, UInt sh_amt, UInt regNo,
2761 Bool invert )
2763 vassert(sh_how < 4);
2764 vassert(sh_amt < (is64 ? 64 : 32));
2765 IRType ty = is64 ? Ity_I64 : Ity_I32;
2766 IRTemp t0 = newTemp(ty);
2767 assign(t0, getIRegOrZR(is64, regNo));
2768 IRTemp t1 = newTemp(ty);
2769 switch (sh_how) {
2770 case BITS2(0,0):
2771 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2772 break;
2773 case BITS2(0,1):
2774 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2775 break;
2776 case BITS2(1,0):
2777 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2778 break;
2779 case BITS2(1,1):
2780 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2781 break;
2782 default:
2783 vassert(0);
2785 if (invert) {
2786 IRTemp t2 = newTemp(ty);
2787 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2788 return t2;
2789 } else {
2790 return t1;
2795 static
2796 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2797 UInt insn)
2799 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2801 /* ------------------- ADD/SUB(reg) ------------------- */
2802 /* x==0 => 32 bit op x==1 => 64 bit op
2803 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2805 31 30 29 28 23 21 20 15 9 4
2806 | | | | | | | | | |
2807 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2808 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2809 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2810 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2812 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2813 UInt bX = INSN(31,31);
2814 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2815 UInt bS = INSN(29, 29); /* set flags? */
2816 UInt sh = INSN(23,22);
2817 UInt rM = INSN(20,16);
2818 UInt imm6 = INSN(15,10);
2819 UInt rN = INSN(9,5);
2820 UInt rD = INSN(4,0);
2821 Bool isSUB = bOP == 1;
2822 Bool is64 = bX == 1;
2823 IRType ty = is64 ? Ity_I64 : Ity_I32;
2824 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2825 /* invalid; fall through */
2826 } else {
2827 IRTemp argL = newTemp(ty);
2828 assign(argL, getIRegOrZR(is64, rN));
2829 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2830 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2831 IRTemp res = newTemp(ty);
2832 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2833 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2834 if (bS) {
2835 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2837 DIP("%s%s %s, %s, %s, %s #%u\n",
2838 bOP ? "sub" : "add", bS ? "s" : "",
2839 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2840 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2841 return True;
2845 /* ------------------- ADC/SBC(reg) ------------------- */
2846 /* x==0 => 32 bit op x==1 => 64 bit op
2848 31 30 29 28 23 21 20 15 9 4
2849 | | | | | | | | | |
2850 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2851 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2852 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2853 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2856 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2857 UInt bX = INSN(31,31);
2858 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2859 UInt bS = INSN(29,29); /* set flags */
2860 UInt rM = INSN(20,16);
2861 UInt rN = INSN(9,5);
2862 UInt rD = INSN(4,0);
2864 Bool isSUB = bOP == 1;
2865 Bool is64 = bX == 1;
2866 IRType ty = is64 ? Ity_I64 : Ity_I32;
2868 IRTemp oldC = newTemp(ty);
2869 assign(oldC,
2870 is64 ? mk_arm64g_calculate_flag_c()
2871 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2873 IRTemp argL = newTemp(ty);
2874 assign(argL, getIRegOrZR(is64, rN));
2875 IRTemp argR = newTemp(ty);
2876 assign(argR, getIRegOrZR(is64, rM));
2878 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2879 IRTemp res = newTemp(ty);
2880 if (isSUB) {
2881 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2882 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2883 assign(res,
2884 binop(op,
2885 binop(op, mkexpr(argL), mkexpr(argR)),
2886 binop(xorOp, mkexpr(oldC), one)));
2887 } else {
2888 assign(res,
2889 binop(op,
2890 binop(op, mkexpr(argL), mkexpr(argR)),
2891 mkexpr(oldC)));
2894 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2896 if (bS) {
2897 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2900 DIP("%s%s %s, %s, %s\n",
2901 bOP ? "sbc" : "adc", bS ? "s" : "",
2902 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2903 nameIRegOrZR(is64, rM));
2904 return True;
2907 /* -------------------- LOGIC(reg) -------------------- */
2908 /* x==0 => 32 bit op x==1 => 64 bit op
2909 N==0 => inv? is no-op (no inversion)
2910 N==1 => inv? is NOT
2911 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2913 31 30 28 23 21 20 15 9 4
2914 | | | | | | | | |
2915 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2916 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2917 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2918 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2919 With N=1, the names are: BIC ORN EON BICS
2921 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2922 UInt bX = INSN(31,31);
2923 UInt sh = INSN(23,22);
2924 UInt bN = INSN(21,21);
2925 UInt rM = INSN(20,16);
2926 UInt imm6 = INSN(15,10);
2927 UInt rN = INSN(9,5);
2928 UInt rD = INSN(4,0);
2929 Bool is64 = bX == 1;
2930 IRType ty = is64 ? Ity_I64 : Ity_I32;
2931 if (!is64 && imm6 > 31) {
2932 /* invalid; fall though */
2933 } else {
2934 IRTemp argL = newTemp(ty);
2935 assign(argL, getIRegOrZR(is64, rN));
2936 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2937 IROp op = Iop_INVALID;
2938 switch (INSN(30,29)) {
2939 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2940 case BITS2(0,1): op = mkOR(ty); break;
2941 case BITS2(1,0): op = mkXOR(ty); break;
2942 default: vassert(0);
2944 IRTemp res = newTemp(ty);
2945 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2946 if (INSN(30,29) == BITS2(1,1)) {
2947 setFlags_LOGIC(is64, res);
2949 putIRegOrZR(is64, rD, mkexpr(res));
2951 static const HChar* names_op[8]
2952 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2953 vassert(((bN << 2) | INSN(30,29)) < 8);
2954 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2955 /* Special-case the printing of "MOV" */
2956 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2957 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2958 nameIRegOrZR(is64, rM));
2959 } else {
2960 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2961 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2962 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2964 return True;
2968 /* -------------------- {U,S}MULH -------------------- */
2969 /* 31 23 22 20 15 9 4
2970 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2971 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2973 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2974 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2975 Bool isU = INSN(23,23) == 1;
2976 UInt mm = INSN(20,16);
2977 UInt nn = INSN(9,5);
2978 UInt dd = INSN(4,0);
2979 putIReg64orZR(dd, unop(Iop_128HIto64,
2980 binop(isU ? Iop_MullU64 : Iop_MullS64,
2981 getIReg64orZR(nn), getIReg64orZR(mm))));
2982 DIP("%cmulh %s, %s, %s\n",
2983 isU ? 'u' : 's',
2984 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2985 return True;
2988 /* -------------------- M{ADD,SUB} -------------------- */
2989 /* 31 30 20 15 14 9 4
2990 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2991 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2993 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2994 Bool is64 = INSN(31,31) == 1;
2995 UInt mm = INSN(20,16);
2996 Bool isAdd = INSN(15,15) == 0;
2997 UInt aa = INSN(14,10);
2998 UInt nn = INSN(9,5);
2999 UInt dd = INSN(4,0);
3000 if (is64) {
3001 putIReg64orZR(
3003 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3004 getIReg64orZR(aa),
3005 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3006 } else {
3007 putIReg32orZR(
3009 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3010 getIReg32orZR(aa),
3011 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3013 DIP("%s %s, %s, %s, %s\n",
3014 isAdd ? "madd" : "msub",
3015 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3016 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3017 return True;
3020 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3021 /* 31 30 28 20 15 11 9 4
3022 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3023 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3024 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3025 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3026 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3028 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3029 Bool is64 = INSN(31,31) == 1;
3030 UInt b30 = INSN(30,30);
3031 UInt mm = INSN(20,16);
3032 UInt cond = INSN(15,12);
3033 UInt b10 = INSN(10,10);
3034 UInt nn = INSN(9,5);
3035 UInt dd = INSN(4,0);
3036 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3037 IRType ty = is64 ? Ity_I64 : Ity_I32;
3038 IRExpr* argL = getIRegOrZR(is64, nn);
3039 IRExpr* argR = getIRegOrZR(is64, mm);
3040 switch (op) {
3041 case BITS2(0,0):
3042 break;
3043 case BITS2(0,1):
3044 argR = binop(mkADD(ty), argR, mkU(ty,1));
3045 break;
3046 case BITS2(1,0):
3047 argR = unop(mkNOT(ty), argR);
3048 break;
3049 case BITS2(1,1):
3050 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3051 break;
3052 default:
3053 vassert(0);
3055 putIRegOrZR(
3056 is64, dd,
3057 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3058 argL, argR)
3060 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3061 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3062 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3063 nameIRegOrZR(is64, mm), nameCC(cond));
3064 return True;
3067 /* -------------- ADD/SUB(extended reg) -------------- */
3068 /* 28 20 15 12 9 4
3069 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3070 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3072 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3073 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3075 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3076 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3078 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3079 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3081 The 'm' operand is extended per opt, thusly:
3083 000 Xm & 0xFF UXTB
3084 001 Xm & 0xFFFF UXTH
3085 010 Xm & (2^32)-1 UXTW
3086 011 Xm UXTX
3088 100 Xm sx from bit 7 SXTB
3089 101 Xm sx from bit 15 SXTH
3090 110 Xm sx from bit 31 SXTW
3091 111 Xm SXTX
3093 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3094 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3095 are the identity operation on Wm.
3097 After extension, the value is shifted left by imm3 bits, which
3098 may only be in the range 0 .. 4 inclusive.
3100 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3101 Bool is64 = INSN(31,31) == 1;
3102 Bool isSub = INSN(30,30) == 1;
3103 Bool setCC = INSN(29,29) == 1;
3104 UInt mm = INSN(20,16);
3105 UInt opt = INSN(15,13);
3106 UInt imm3 = INSN(12,10);
3107 UInt nn = INSN(9,5);
3108 UInt dd = INSN(4,0);
3109 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3110 "sxtb", "sxth", "sxtw", "sxtx" };
3111 /* Do almost the same thing in the 32- and 64-bit cases. */
3112 IRTemp xN = newTemp(Ity_I64);
3113 IRTemp xM = newTemp(Ity_I64);
3114 assign(xN, getIReg64orSP(nn));
3115 assign(xM, getIReg64orZR(mm));
3116 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3117 Int shSX = 0;
3118 /* widen Xm .. */
3119 switch (opt) {
3120 case BITS3(0,0,0): // UXTB
3121 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3122 case BITS3(0,0,1): // UXTH
3123 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3124 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3125 if (is64) {
3126 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3128 break;
3129 case BITS3(0,1,1): // UXTX -- always a noop
3130 break;
3131 case BITS3(1,0,0): // SXTB
3132 shSX = 56; goto sxTo64;
3133 case BITS3(1,0,1): // SXTH
3134 shSX = 48; goto sxTo64;
3135 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3136 if (is64) {
3137 shSX = 32; goto sxTo64;
3139 break;
3140 case BITS3(1,1,1): // SXTX -- always a noop
3141 break;
3142 sxTo64:
3143 vassert(shSX >= 32);
3144 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3145 mkU8(shSX));
3146 break;
3147 default:
3148 vassert(0);
3150 /* and now shift */
3151 IRTemp argL = xN;
3152 IRTemp argR = newTemp(Ity_I64);
3153 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3154 IRTemp res = newTemp(Ity_I64);
3155 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3156 mkexpr(argL), mkexpr(argR)));
3157 if (is64) {
3158 if (setCC) {
3159 putIReg64orZR(dd, mkexpr(res));
3160 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3161 } else {
3162 putIReg64orSP(dd, mkexpr(res));
3164 } else {
3165 if (setCC) {
3166 IRTemp argL32 = newTemp(Ity_I32);
3167 IRTemp argR32 = newTemp(Ity_I32);
3168 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3169 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3170 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3171 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3172 } else {
3173 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3176 DIP("%s%s %s, %s, %s %s lsl %u\n",
3177 isSub ? "sub" : "add", setCC ? "s" : "",
3178 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3179 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3180 nameExt[opt], imm3);
3181 return True;
3184 /* ---------------- CCMP/CCMN(imm) ---------------- */
3185 /* Bizarrely, these appear in the "data processing register"
3186 category, even though they are operations against an
3187 immediate. */
3188 /* 31 29 20 15 11 9 3
3189 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3190 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3192 Operation is:
3193 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3194 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3196 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3197 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3198 Bool is64 = INSN(31,31) == 1;
3199 Bool isSUB = INSN(30,30) == 1;
3200 UInt imm5 = INSN(20,16);
3201 UInt cond = INSN(15,12);
3202 UInt nn = INSN(9,5);
3203 UInt nzcv = INSN(3,0);
3205 IRTemp condT = newTemp(Ity_I1);
3206 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3208 IRType ty = is64 ? Ity_I64 : Ity_I32;
3209 IRTemp argL = newTemp(ty);
3210 IRTemp argR = newTemp(ty);
3212 if (is64) {
3213 assign(argL, getIReg64orZR(nn));
3214 assign(argR, mkU64(imm5));
3215 } else {
3216 assign(argL, getIReg32orZR(nn));
3217 assign(argR, mkU32(imm5));
3219 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3221 DIP("ccm%c %s, #%u, #%u, %s\n",
3222 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3223 imm5, nzcv, nameCC(cond));
3224 return True;
3227 /* ---------------- CCMP/CCMN(reg) ---------------- */
3228 /* 31 29 20 15 11 9 3
3229 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3230 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3231 Operation is:
3232 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3233 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3235 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3236 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3237 Bool is64 = INSN(31,31) == 1;
3238 Bool isSUB = INSN(30,30) == 1;
3239 UInt mm = INSN(20,16);
3240 UInt cond = INSN(15,12);
3241 UInt nn = INSN(9,5);
3242 UInt nzcv = INSN(3,0);
3244 IRTemp condT = newTemp(Ity_I1);
3245 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3247 IRType ty = is64 ? Ity_I64 : Ity_I32;
3248 IRTemp argL = newTemp(ty);
3249 IRTemp argR = newTemp(ty);
3251 if (is64) {
3252 assign(argL, getIReg64orZR(nn));
3253 assign(argR, getIReg64orZR(mm));
3254 } else {
3255 assign(argL, getIReg32orZR(nn));
3256 assign(argR, getIReg32orZR(mm));
3258 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3260 DIP("ccm%c %s, %s, #%u, %s\n",
3261 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3262 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3263 return True;
3267 /* -------------- REV/REV16/REV32/RBIT -------------- */
3268 /* 31 30 28 20 15 11 9 4
3270 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3271 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3273 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3274 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3276 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3277 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3279 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3281 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3282 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3283 UInt b31 = INSN(31,31);
3284 UInt opc = INSN(11,10);
3286 UInt ix = 0;
3287 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3288 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3289 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3290 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3291 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3292 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3293 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3294 if (ix >= 1 && ix <= 7) {
3295 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3296 UInt nn = INSN(9,5);
3297 UInt dd = INSN(4,0);
3298 IRTemp src = newTemp(Ity_I64);
3299 IRTemp dst = IRTemp_INVALID;
3300 IRTemp (*math)(IRTemp) = NULL;
3301 switch (ix) {
3302 case 1: case 2: math = math_BYTESWAP64; break;
3303 case 3: case 4: math = math_BITSWAP64; break;
3304 case 5: case 6: math = math_USHORTSWAP64; break;
3305 case 7: math = math_UINTSWAP64; break;
3306 default: vassert(0);
3308 const HChar* names[7]
3309 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3310 const HChar* nm = names[ix-1];
3311 vassert(math);
3312 if (ix == 6) {
3313 /* This has to be special cased, since the logic below doesn't
3314 handle it correctly. */
3315 assign(src, getIReg64orZR(nn));
3316 dst = math(src);
3317 putIReg64orZR(dd,
3318 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3319 } else if (is64) {
3320 assign(src, getIReg64orZR(nn));
3321 dst = math(src);
3322 putIReg64orZR(dd, mkexpr(dst));
3323 } else {
3324 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3325 dst = math(src);
3326 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3328 DIP("%s %s, %s\n", nm,
3329 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3330 return True;
3332 /* else fall through */
3335 /* -------------------- CLZ/CLS -------------------- */
3336 /* 30 28 24 20 15 9 4
3337 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3338 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3340 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3341 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3342 Bool is64 = INSN(31,31) == 1;
3343 Bool isCLS = INSN(10,10) == 1;
3344 UInt nn = INSN(9,5);
3345 UInt dd = INSN(4,0);
3346 IRTemp src = newTemp(Ity_I64);
3347 IRTemp srcZ = newTemp(Ity_I64);
3348 IRTemp dst = newTemp(Ity_I64);
3349 /* Get the argument, widened out to 64 bit */
3350 if (is64) {
3351 assign(src, getIReg64orZR(nn));
3352 } else {
3353 assign(src, binop(Iop_Shl64,
3354 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3356 /* If this is CLS, mash the arg around accordingly */
3357 if (isCLS) {
3358 IRExpr* one = mkU8(1);
3359 assign(srcZ,
3360 binop(Iop_Xor64,
3361 binop(Iop_Shl64, mkexpr(src), one),
3362 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3363 } else {
3364 assign(srcZ, mkexpr(src));
3366 /* And compute CLZ. */
3367 if (is64) {
3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369 mkU64(isCLS ? 63 : 64),
3370 unop(Iop_Clz64, mkexpr(srcZ))));
3371 putIReg64orZR(dd, mkexpr(dst));
3372 } else {
3373 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3374 mkU64(isCLS ? 31 : 32),
3375 unop(Iop_Clz64, mkexpr(srcZ))));
3376 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3378 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3379 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3380 return True;
3383 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3384 /* 30 28 20 15 11 9 4
3385 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3386 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3387 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3388 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3390 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3391 && INSN(15,12) == BITS4(0,0,1,0)) {
3392 Bool is64 = INSN(31,31) == 1;
3393 UInt mm = INSN(20,16);
3394 UInt op = INSN(11,10);
3395 UInt nn = INSN(9,5);
3396 UInt dd = INSN(4,0);
3397 IRType ty = is64 ? Ity_I64 : Ity_I32;
3398 IRTemp srcL = newTemp(ty);
3399 IRTemp srcR = newTemp(Ity_I64);
3400 IRTemp res = newTemp(ty);
3401 IROp iop = Iop_INVALID;
3402 assign(srcL, getIRegOrZR(is64, nn));
3403 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3404 mkU64(is64 ? 63 : 31)));
3405 if (op < 3) {
3406 // LSLV, LSRV, ASRV
3407 switch (op) {
3408 case BITS2(0,0): iop = mkSHL(ty); break;
3409 case BITS2(0,1): iop = mkSHR(ty); break;
3410 case BITS2(1,0): iop = mkSAR(ty); break;
3411 default: vassert(0);
3413 assign(res, binop(iop, mkexpr(srcL),
3414 unop(Iop_64to8, mkexpr(srcR))));
3415 } else {
3416 // RORV
3417 IROp opSHL = mkSHL(ty);
3418 IROp opSHR = mkSHR(ty);
3419 IROp opOR = mkOR(ty);
3420 IRExpr* width = mkU64(is64 ? 64: 32);
3421 assign(
3422 res,
3423 IRExpr_ITE(
3424 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3425 mkexpr(srcL),
3426 binop(opOR,
3427 binop(opSHL,
3428 mkexpr(srcL),
3429 unop(Iop_64to8, binop(Iop_Sub64, width,
3430 mkexpr(srcR)))),
3431 binop(opSHR,
3432 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3435 putIRegOrZR(is64, dd, mkexpr(res));
3436 vassert(op < 4);
3437 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3438 DIP("%s %s, %s, %s\n",
3439 names[op], nameIRegOrZR(is64,dd),
3440 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3441 return True;
3444 /* -------------------- SDIV/UDIV -------------------- */
3445 /* 30 28 20 15 10 9 4
3446 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3447 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3449 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3450 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3451 Bool is64 = INSN(31,31) == 1;
3452 UInt mm = INSN(20,16);
3453 Bool isS = INSN(10,10) == 1;
3454 UInt nn = INSN(9,5);
3455 UInt dd = INSN(4,0);
3456 if (isS) {
3457 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3458 getIRegOrZR(is64, nn),
3459 getIRegOrZR(is64, mm)));
3460 } else {
3461 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3462 getIRegOrZR(is64, nn),
3463 getIRegOrZR(is64, mm)));
3465 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3466 nameIRegOrZR(is64, dd),
3467 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3468 return True;
3471 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3472 /* 31 23 20 15 14 9 4
3473 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3474 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3475 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3476 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3477 with operation
3478 Xd = Xa +/- (Wn *u/s Wm)
3480 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3481 Bool isU = INSN(23,23) == 1;
3482 UInt mm = INSN(20,16);
3483 Bool isAdd = INSN(15,15) == 0;
3484 UInt aa = INSN(14,10);
3485 UInt nn = INSN(9,5);
3486 UInt dd = INSN(4,0);
3487 IRTemp wN = newTemp(Ity_I32);
3488 IRTemp wM = newTemp(Ity_I32);
3489 IRTemp xA = newTemp(Ity_I64);
3490 IRTemp muld = newTemp(Ity_I64);
3491 IRTemp res = newTemp(Ity_I64);
3492 assign(wN, getIReg32orZR(nn));
3493 assign(wM, getIReg32orZR(mm));
3494 assign(xA, getIReg64orZR(aa));
3495 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3496 mkexpr(wN), mkexpr(wM)));
3497 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3498 mkexpr(xA), mkexpr(muld)));
3499 putIReg64orZR(dd, mkexpr(res));
3500 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3501 nameIReg64orZR(dd), nameIReg32orZR(nn),
3502 nameIReg32orZR(mm), nameIReg64orZR(aa));
3503 return True;
3506 /* -------------------- CRC32/CRC32C -------------------- */
3507 /* 31 30 20 15 11 9 4
3508 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3509 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3511 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3512 && INSN(15,13) == BITS3(0,1,0)) {
3513 UInt bitSF = INSN(31,31);
3514 UInt mm = INSN(20,16);
3515 UInt bitC = INSN(12,12);
3516 UInt sz = INSN(11,10);
3517 UInt nn = INSN(9,5);
3518 UInt dd = INSN(4,0);
3519 vassert(sz >= 0 && sz <= 3);
3520 if ((bitSF == 0 && sz <= BITS2(1,0))
3521 || (bitSF == 1 && sz == BITS2(1,1))) {
3522 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3523 void* helpers[8]
3524 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3525 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3526 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3527 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3528 const HChar* hNames[8]
3529 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3530 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3531 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3532 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3533 const HChar* iNames[8]
3534 = { "crc32b", "crc32h", "crc32w", "crc32x",
3535 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3537 IRTemp srcN = newTemp(Ity_I64);
3538 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3540 IRTemp srcM = newTemp(Ity_I64);
3541 IRExpr* at64 = getIReg64orZR(mm);
3542 switch (sz) {
3543 case BITS2(0,0):
3544 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3545 case BITS2(0,1):
3546 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3547 case BITS2(1,0):
3548 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3549 case BITS2(1,1):
3550 assign(srcM, at64); break;
3551 default:
3552 vassert(0);
3555 vassert(ix >= 0 && ix <= 7);
3557 putIReg64orZR(
3559 unop(Iop_32Uto64,
3560 unop(Iop_64to32,
3561 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3562 hNames[ix], helpers[ix],
3563 mkIRExprVec_2(mkexpr(srcN),
3564 mkexpr(srcM))))));
3566 DIP("%s %s, %s, %s\n", iNames[ix],
3567 nameIReg32orZR(dd),
3568 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3569 return True;
3571 /* fall through */
3574 vex_printf("ARM64 front end: data_processing_register\n");
3575 return False;
3576 # undef INSN
3580 /*------------------------------------------------------------*/
3581 /*--- Math helpers for vector interleave/deinterleave ---*/
3582 /*------------------------------------------------------------*/
3584 #define EX(_tmp) \
3585 mkexpr(_tmp)
3586 #define SL(_hi128,_lo128,_nbytes) \
3587 ( (_nbytes) == 0 \
3588 ? (_lo128) \
3589 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3590 #define ROR(_v128,_nbytes) \
3591 SL((_v128),(_v128),(_nbytes))
3592 #define ROL(_v128,_nbytes) \
3593 SL((_v128),(_v128),16-(_nbytes))
3594 #define SHR(_v128,_nbytes) \
3595 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3596 #define SHL(_v128,_nbytes) \
3597 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3598 #define ILO64x2(_argL,_argR) \
3599 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3600 #define IHI64x2(_argL,_argR) \
3601 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3602 #define ILO32x4(_argL,_argR) \
3603 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3604 #define IHI32x4(_argL,_argR) \
3605 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3606 #define ILO16x8(_argL,_argR) \
3607 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3608 #define IHI16x8(_argL,_argR) \
3609 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3610 #define ILO8x16(_argL,_argR) \
3611 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3612 #define IHI8x16(_argL,_argR) \
3613 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3614 #define CEV32x4(_argL,_argR) \
3615 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3616 #define COD32x4(_argL,_argR) \
3617 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3618 #define COD16x8(_argL,_argR) \
3619 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3620 #define COD8x16(_argL,_argR) \
3621 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3622 #define CEV8x16(_argL,_argR) \
3623 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3624 #define AND(_arg1,_arg2) \
3625 binop(Iop_AndV128,(_arg1),(_arg2))
3626 #define OR2(_arg1,_arg2) \
3627 binop(Iop_OrV128,(_arg1),(_arg2))
3628 #define OR3(_arg1,_arg2,_arg3) \
3629 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3630 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3631 binop(Iop_OrV128, \
3632 binop(Iop_OrV128,(_arg1),(_arg2)), \
3633 binop(Iop_OrV128,(_arg3),(_arg4)))
3636 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3637 static
3638 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3639 UInt laneSzBlg2, IRTemp u0 )
3641 assign(*i0, mkexpr(u0));
3645 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3646 static
3647 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3648 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3650 /* This is pretty easy, since we have primitives directly to
3651 hand. */
3652 if (laneSzBlg2 == 3) {
3653 // 64x2
3654 // u1 == B1 B0, u0 == A1 A0
3655 // i1 == B1 A1, i0 == B0 A0
3656 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3657 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3658 return;
3660 if (laneSzBlg2 == 2) {
3661 // 32x4
3662 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3663 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3664 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3665 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3666 return;
3668 if (laneSzBlg2 == 1) {
3669 // 16x8
3670 // u1 == B{7..0}, u0 == A{7..0}
3671 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3672 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3673 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3674 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3675 return;
3677 if (laneSzBlg2 == 0) {
3678 // 8x16
3679 // u1 == B{f..0}, u0 == A{f..0}
3680 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3681 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3682 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3683 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3684 return;
3686 /*NOTREACHED*/
3687 vassert(0);
3691 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3692 static
3693 void math_INTERLEAVE3_128(
3694 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3695 UInt laneSzBlg2,
3696 IRTemp u0, IRTemp u1, IRTemp u2 )
3698 if (laneSzBlg2 == 3) {
3699 // 64x2
3700 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3701 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3702 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3703 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3704 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3705 return;
3708 if (laneSzBlg2 == 2) {
3709 // 32x4
3710 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3711 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3712 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3713 IRTemp p0 = newTempV128();
3714 IRTemp p1 = newTempV128();
3715 IRTemp p2 = newTempV128();
3716 IRTemp c1100 = newTempV128();
3717 IRTemp c0011 = newTempV128();
3718 IRTemp c0110 = newTempV128();
3719 assign(c1100, mkV128(0xFF00));
3720 assign(c0011, mkV128(0x00FF));
3721 assign(c0110, mkV128(0x0FF0));
3722 // First interleave them at 64x2 granularity,
3723 // generating partial ("p") values.
3724 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3725 // And more shuffling around for the final answer
3726 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3727 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3728 assign(*i1, OR3( SHL(EX(p2),12),
3729 AND(EX(p1),EX(c0110)),
3730 SHR(EX(p0),12) ));
3731 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3732 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3733 return;
3736 if (laneSzBlg2 == 1) {
3737 // 16x8
3738 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3739 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3740 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3742 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3743 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3744 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3746 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3747 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3748 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3749 IRTemp p0 = newTempV128();
3750 IRTemp p1 = newTempV128();
3751 IRTemp p2 = newTempV128();
3752 IRTemp c1000 = newTempV128();
3753 IRTemp c0100 = newTempV128();
3754 IRTemp c0010 = newTempV128();
3755 IRTemp c0001 = newTempV128();
3756 assign(c1000, mkV128(0xF000));
3757 assign(c0100, mkV128(0x0F00));
3758 assign(c0010, mkV128(0x00F0));
3759 assign(c0001, mkV128(0x000F));
3760 // First interleave them at 32x4 granularity,
3761 // generating partial ("p") values.
3762 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3763 // And more shuffling around for the final answer
3764 assign(*i2,
3765 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3766 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3767 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3768 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3770 assign(*i1,
3771 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3772 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3773 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3774 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3776 assign(*i0,
3777 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3778 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3779 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3780 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3782 return;
3785 if (laneSzBlg2 == 0) {
3786 // 8x16. It doesn't seem worth the hassle of first doing a
3787 // 16x8 interleave, so just generate all 24 partial results
3788 // directly :-(
3789 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3790 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3791 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3792 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3794 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3795 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3796 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3797 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3798 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3799 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3800 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3801 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3802 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3804 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3805 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3807 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3808 IRTemp t_##_tempName = newTempV128(); \
3809 assign(t_##_tempName, \
3810 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3811 ROR(EX(_srcVec2),(_srcShift2)) ) )
3813 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3814 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3816 // The slicing and reassembly are done as interleavedly as possible,
3817 // so as to minimise the demand for registers in the back end, which
3818 // was observed to be a problem in testing.
3820 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3821 XXXX(AfCe, AA, 0xf, CC, 0xe);
3822 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3824 XXXX(BeAe, BB, 0xe, AA, 0xe);
3825 XXXX(CdBd, CC, 0xd, BB, 0xd);
3826 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3827 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3829 XXXX(AdCc, AA, 0xd, CC, 0xc);
3830 XXXX(BcAc, BB, 0xc, AA, 0xc);
3831 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3833 XXXX(CbBb, CC, 0xb, BB, 0xb);
3834 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3835 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3836 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3837 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3839 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3840 XXXX(C9B9, CC, 0x9, BB, 0x9);
3841 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3843 XXXX(A9C8, AA, 0x9, CC, 0x8);
3844 XXXX(B8A8, BB, 0x8, AA, 0x8);
3845 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3846 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3848 XXXX(C7B7, CC, 0x7, BB, 0x7);
3849 XXXX(A7C6, AA, 0x7, CC, 0x6);
3850 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3852 XXXX(B6A6, BB, 0x6, AA, 0x6);
3853 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3854 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3855 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3856 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3858 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3859 XXXX(B4A4, BB, 0x4, AA, 0x4);
3860 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3862 XXXX(C3B3, CC, 0x3, BB, 0x3);
3863 XXXX(A3C2, AA, 0x3, CC, 0x2);
3864 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3865 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3867 XXXX(B2A2, BB, 0x2, AA, 0x2);
3868 XXXX(C1B1, CC, 0x1, BB, 0x1);
3869 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3871 XXXX(A1C0, AA, 0x1, CC, 0x0);
3872 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3873 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3874 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3875 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3877 # undef XXXX
3878 return;
3881 /*NOTREACHED*/
3882 vassert(0);
3886 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3887 static
3888 void math_INTERLEAVE4_128(
3889 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3890 UInt laneSzBlg2,
3891 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3893 if (laneSzBlg2 == 3) {
3894 // 64x2
3895 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3896 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3897 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3898 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3899 return;
3901 if (laneSzBlg2 == 2) {
3902 // 32x4
3903 // First, interleave at the 64-bit lane size.
3904 IRTemp p0 = newTempV128();
3905 IRTemp p1 = newTempV128();
3906 IRTemp p2 = newTempV128();
3907 IRTemp p3 = newTempV128();
3908 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3909 // And interleave (cat) at the 32 bit size.
3910 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3911 assign(*i1, COD32x4(EX(p1), EX(p0)));
3912 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3913 assign(*i3, COD32x4(EX(p3), EX(p2)));
3914 return;
3916 if (laneSzBlg2 == 1) {
3917 // 16x8
3918 // First, interleave at the 32-bit lane size.
3919 IRTemp p0 = newTempV128();
3920 IRTemp p1 = newTempV128();
3921 IRTemp p2 = newTempV128();
3922 IRTemp p3 = newTempV128();
3923 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3924 // And rearrange within each vector, to get the right 16 bit lanes.
3925 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3926 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3927 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3928 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3929 return;
3931 if (laneSzBlg2 == 0) {
3932 // 8x16
3933 // First, interleave at the 16-bit lane size.
3934 IRTemp p0 = newTempV128();
3935 IRTemp p1 = newTempV128();
3936 IRTemp p2 = newTempV128();
3937 IRTemp p3 = newTempV128();
3938 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3939 // And rearrange within each vector, to get the right 8 bit lanes.
3940 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3941 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3942 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3943 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3944 return;
3946 /*NOTREACHED*/
3947 vassert(0);
3951 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3952 static
3953 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3954 UInt laneSzBlg2, IRTemp i0 )
3956 assign(*u0, mkexpr(i0));
3960 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3961 static
3962 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3963 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3965 /* This is pretty easy, since we have primitives directly to
3966 hand. */
3967 if (laneSzBlg2 == 3) {
3968 // 64x2
3969 // i1 == B1 A1, i0 == B0 A0
3970 // u1 == B1 B0, u0 == A1 A0
3971 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3972 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3973 return;
3975 if (laneSzBlg2 == 2) {
3976 // 32x4
3977 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3978 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3979 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3980 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3981 return;
3983 if (laneSzBlg2 == 1) {
3984 // 16x8
3985 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3986 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3987 // u1 == B{7..0}, u0 == A{7..0}
3988 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3989 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3990 return;
3992 if (laneSzBlg2 == 0) {
3993 // 8x16
3994 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3995 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3996 // u1 == B{f..0}, u0 == A{f..0}
3997 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3998 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3999 return;
4001 /*NOTREACHED*/
4002 vassert(0);
4006 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4007 static
4008 void math_DEINTERLEAVE3_128(
4009 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4010 UInt laneSzBlg2,
4011 IRTemp i0, IRTemp i1, IRTemp i2 )
4013 if (laneSzBlg2 == 3) {
4014 // 64x2
4015 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4016 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4017 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4018 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4019 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4020 return;
4023 if (laneSzBlg2 == 2) {
4024 // 32x4
4025 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4026 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4027 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4028 IRTemp t_a1c0b0a0 = newTempV128();
4029 IRTemp t_a2c1b1a1 = newTempV128();
4030 IRTemp t_a3c2b2a2 = newTempV128();
4031 IRTemp t_a0c3b3a3 = newTempV128();
4032 IRTemp p0 = newTempV128();
4033 IRTemp p1 = newTempV128();
4034 IRTemp p2 = newTempV128();
4035 // Compute some intermediate values.
4036 assign(t_a1c0b0a0, EX(i0));
4037 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4038 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4039 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4040 // First deinterleave into lane-pairs
4041 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4042 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4043 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4044 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4045 // Then deinterleave at 64x2 granularity.
4046 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4047 return;
4050 if (laneSzBlg2 == 1) {
4051 // 16x8
4052 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4053 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4054 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4056 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4057 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4058 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4060 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4061 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4062 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4064 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4065 s0 = s1 = s2 = s3
4066 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4067 newTempsV128_4(&s0, &s1, &s2, &s3);
4068 newTempsV128_4(&t0, &t1, &t2, &t3);
4069 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4071 // s0 == b2a2 c1b1a1 c0b0a0
4072 // s1 == b4a4 c3b3c3 c2b2a2
4073 // s2 == b6a6 c5b5a5 c4b4a4
4074 // s3 == b0a0 c7b7a7 c6b6a6
4075 assign(s0, EX(i0));
4076 assign(s1, SL(EX(i1),EX(i0),6*2));
4077 assign(s2, SL(EX(i2),EX(i1),4*2));
4078 assign(s3, SL(EX(i0),EX(i2),2*2));
4080 // t0 == 0 0 c1c0 b1b0 a1a0
4081 // t1 == 0 0 c3c2 b3b2 a3a2
4082 // t2 == 0 0 c5c4 b5b4 a5a4
4083 // t3 == 0 0 c7c6 b7b6 a7a6
4084 assign(c00111111, mkV128(0x0FFF));
4085 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4086 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4087 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4088 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4090 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4091 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4092 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4094 // Then deinterleave at 32x4 granularity.
4095 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4096 return;
4099 if (laneSzBlg2 == 0) {
4100 // 8x16. This is the same scheme as for 16x8, with twice the
4101 // number of intermediate values.
4103 // u2 == C{f..0}
4104 // u1 == B{f..0}
4105 // u0 == A{f..0}
4107 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4108 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4109 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4111 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4112 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4113 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4115 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4116 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4117 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4118 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4119 = IRTemp_INVALID;
4120 newTempsV128_4(&s0, &s1, &s2, &s3);
4121 newTempsV128_4(&s4, &s5, &s6, &s7);
4122 newTempsV128_4(&t0, &t1, &t2, &t3);
4123 newTempsV128_4(&t4, &t5, &t6, &t7);
4124 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4126 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4127 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4128 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4129 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4130 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4131 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4132 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4133 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4134 assign(s0, SL(EX(i1),EX(i0), 0));
4135 assign(s1, SL(EX(i1),EX(i0), 6));
4136 assign(s2, SL(EX(i1),EX(i0),12));
4137 assign(s3, SL(EX(i2),EX(i1), 2));
4138 assign(s4, SL(EX(i2),EX(i1), 8));
4139 assign(s5, SL(EX(i2),EX(i1),14));
4140 assign(s6, SL(EX(i0),EX(i2), 4));
4141 assign(s7, SL(EX(i0),EX(i2),10));
4143 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4144 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4145 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4146 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4147 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4148 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4149 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4150 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4151 assign(cMASK, mkV128(0x003F));
4152 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4153 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4154 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4155 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4156 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4157 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4158 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4159 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4161 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4162 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4163 SHL(EX(t3),2), SHR(EX(t2),4) ));
4164 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4166 // Then deinterleave at 16x8 granularity.
4167 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4168 return;
4171 /*NOTREACHED*/
4172 vassert(0);
4176 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4177 static
4178 void math_DEINTERLEAVE4_128(
4179 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4180 UInt laneSzBlg2,
4181 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4183 if (laneSzBlg2 == 3) {
4184 // 64x2
4185 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4186 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4187 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4188 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4189 return;
4191 if (laneSzBlg2 == 2) {
4192 // 32x4
4193 IRTemp p0 = newTempV128();
4194 IRTemp p2 = newTempV128();
4195 IRTemp p1 = newTempV128();
4196 IRTemp p3 = newTempV128();
4197 assign(p0, ILO32x4(EX(i1), EX(i0)));
4198 assign(p1, IHI32x4(EX(i1), EX(i0)));
4199 assign(p2, ILO32x4(EX(i3), EX(i2)));
4200 assign(p3, IHI32x4(EX(i3), EX(i2)));
4201 // And now do what we did for the 64-bit case.
4202 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4203 return;
4205 if (laneSzBlg2 == 1) {
4206 // 16x8
4207 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4208 IRTemp p0 = newTempV128();
4209 IRTemp p1 = newTempV128();
4210 IRTemp p2 = newTempV128();
4211 IRTemp p3 = newTempV128();
4212 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4213 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4214 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4215 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4216 // From here on is like the 32 bit case.
4217 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4218 return;
4220 if (laneSzBlg2 == 0) {
4221 // 8x16
4222 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4223 IRTemp p0 = newTempV128();
4224 IRTemp p1 = newTempV128();
4225 IRTemp p2 = newTempV128();
4226 IRTemp p3 = newTempV128();
4227 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4228 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4229 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4230 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4231 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4232 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4233 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4234 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4235 // From here on is like the 16 bit case.
4236 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4237 return;
4239 /*NOTREACHED*/
4240 vassert(0);
4244 /* Wrappers that use the full-width (de)interleavers to do half-width
4245 (de)interleaving. The scheme is to clone each input lane in the
4246 lower half of each incoming value, do a full width (de)interleave
4247 at the next lane size up, and remove every other lane of the the
4248 result. The returned values may have any old junk in the upper
4249 64 bits -- the caller must ignore that. */
4251 /* Helper function -- get doubling and narrowing operations. */
4252 static
4253 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4254 /*OUT*/IROp* halver,
4255 UInt laneSzBlg2 )
4257 switch (laneSzBlg2) {
4258 case 2:
4259 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4260 break;
4261 case 1:
4262 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4263 break;
4264 case 0:
4265 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4266 break;
4267 default:
4268 vassert(0);
4272 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4273 static
4274 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4275 UInt laneSzBlg2, IRTemp u0 )
4277 assign(*i0, mkexpr(u0));
4281 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4282 static
4283 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4284 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4286 if (laneSzBlg2 == 3) {
4287 // 1x64, degenerate case
4288 assign(*i0, EX(u0));
4289 assign(*i1, EX(u1));
4290 return;
4293 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4294 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4295 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4297 IRTemp du0 = newTempV128();
4298 IRTemp du1 = newTempV128();
4299 assign(du0, binop(doubler, EX(u0), EX(u0)));
4300 assign(du1, binop(doubler, EX(u1), EX(u1)));
4301 IRTemp di0 = newTempV128();
4302 IRTemp di1 = newTempV128();
4303 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4304 assign(*i0, binop(halver, EX(di0), EX(di0)));
4305 assign(*i1, binop(halver, EX(di1), EX(di1)));
4309 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4310 static
4311 void math_INTERLEAVE3_64(
4312 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4313 UInt laneSzBlg2,
4314 IRTemp u0, IRTemp u1, IRTemp u2 )
4316 if (laneSzBlg2 == 3) {
4317 // 1x64, degenerate case
4318 assign(*i0, EX(u0));
4319 assign(*i1, EX(u1));
4320 assign(*i2, EX(u2));
4321 return;
4324 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4325 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4326 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4328 IRTemp du0 = newTempV128();
4329 IRTemp du1 = newTempV128();
4330 IRTemp du2 = newTempV128();
4331 assign(du0, binop(doubler, EX(u0), EX(u0)));
4332 assign(du1, binop(doubler, EX(u1), EX(u1)));
4333 assign(du2, binop(doubler, EX(u2), EX(u2)));
4334 IRTemp di0 = newTempV128();
4335 IRTemp di1 = newTempV128();
4336 IRTemp di2 = newTempV128();
4337 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4338 assign(*i0, binop(halver, EX(di0), EX(di0)));
4339 assign(*i1, binop(halver, EX(di1), EX(di1)));
4340 assign(*i2, binop(halver, EX(di2), EX(di2)));
4344 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4345 static
4346 void math_INTERLEAVE4_64(
4347 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4348 UInt laneSzBlg2,
4349 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4351 if (laneSzBlg2 == 3) {
4352 // 1x64, degenerate case
4353 assign(*i0, EX(u0));
4354 assign(*i1, EX(u1));
4355 assign(*i2, EX(u2));
4356 assign(*i3, EX(u3));
4357 return;
4360 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4361 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4362 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4364 IRTemp du0 = newTempV128();
4365 IRTemp du1 = newTempV128();
4366 IRTemp du2 = newTempV128();
4367 IRTemp du3 = newTempV128();
4368 assign(du0, binop(doubler, EX(u0), EX(u0)));
4369 assign(du1, binop(doubler, EX(u1), EX(u1)));
4370 assign(du2, binop(doubler, EX(u2), EX(u2)));
4371 assign(du3, binop(doubler, EX(u3), EX(u3)));
4372 IRTemp di0 = newTempV128();
4373 IRTemp di1 = newTempV128();
4374 IRTemp di2 = newTempV128();
4375 IRTemp di3 = newTempV128();
4376 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4377 laneSzBlg2 + 1, du0, du1, du2, du3);
4378 assign(*i0, binop(halver, EX(di0), EX(di0)));
4379 assign(*i1, binop(halver, EX(di1), EX(di1)));
4380 assign(*i2, binop(halver, EX(di2), EX(di2)));
4381 assign(*i3, binop(halver, EX(di3), EX(di3)));
4385 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4386 static
4387 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4388 UInt laneSzBlg2, IRTemp i0 )
4390 assign(*u0, mkexpr(i0));
4394 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4395 static
4396 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4397 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4399 if (laneSzBlg2 == 3) {
4400 // 1x64, degenerate case
4401 assign(*u0, EX(i0));
4402 assign(*u1, EX(i1));
4403 return;
4406 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4407 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4408 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4410 IRTemp di0 = newTempV128();
4411 IRTemp di1 = newTempV128();
4412 assign(di0, binop(doubler, EX(i0), EX(i0)));
4413 assign(di1, binop(doubler, EX(i1), EX(i1)));
4415 IRTemp du0 = newTempV128();
4416 IRTemp du1 = newTempV128();
4417 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4418 assign(*u0, binop(halver, EX(du0), EX(du0)));
4419 assign(*u1, binop(halver, EX(du1), EX(du1)));
4423 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4424 static
4425 void math_DEINTERLEAVE3_64(
4426 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4427 UInt laneSzBlg2,
4428 IRTemp i0, IRTemp i1, IRTemp i2 )
4430 if (laneSzBlg2 == 3) {
4431 // 1x64, degenerate case
4432 assign(*u0, EX(i0));
4433 assign(*u1, EX(i1));
4434 assign(*u2, EX(i2));
4435 return;
4438 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4439 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4440 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4442 IRTemp di0 = newTempV128();
4443 IRTemp di1 = newTempV128();
4444 IRTemp di2 = newTempV128();
4445 assign(di0, binop(doubler, EX(i0), EX(i0)));
4446 assign(di1, binop(doubler, EX(i1), EX(i1)));
4447 assign(di2, binop(doubler, EX(i2), EX(i2)));
4448 IRTemp du0 = newTempV128();
4449 IRTemp du1 = newTempV128();
4450 IRTemp du2 = newTempV128();
4451 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4452 assign(*u0, binop(halver, EX(du0), EX(du0)));
4453 assign(*u1, binop(halver, EX(du1), EX(du1)));
4454 assign(*u2, binop(halver, EX(du2), EX(du2)));
4458 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4459 static
4460 void math_DEINTERLEAVE4_64(
4461 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4462 UInt laneSzBlg2,
4463 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4465 if (laneSzBlg2 == 3) {
4466 // 1x64, degenerate case
4467 assign(*u0, EX(i0));
4468 assign(*u1, EX(i1));
4469 assign(*u2, EX(i2));
4470 assign(*u3, EX(i3));
4471 return;
4474 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4475 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4476 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4478 IRTemp di0 = newTempV128();
4479 IRTemp di1 = newTempV128();
4480 IRTemp di2 = newTempV128();
4481 IRTemp di3 = newTempV128();
4482 assign(di0, binop(doubler, EX(i0), EX(i0)));
4483 assign(di1, binop(doubler, EX(i1), EX(i1)));
4484 assign(di2, binop(doubler, EX(i2), EX(i2)));
4485 assign(di3, binop(doubler, EX(i3), EX(i3)));
4486 IRTemp du0 = newTempV128();
4487 IRTemp du1 = newTempV128();
4488 IRTemp du2 = newTempV128();
4489 IRTemp du3 = newTempV128();
4490 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4491 laneSzBlg2 + 1, di0, di1, di2, di3);
4492 assign(*u0, binop(halver, EX(du0), EX(du0)));
4493 assign(*u1, binop(halver, EX(du1), EX(du1)));
4494 assign(*u2, binop(halver, EX(du2), EX(du2)));
4495 assign(*u3, binop(halver, EX(du3), EX(du3)));
4499 #undef EX
4500 #undef SL
4501 #undef ROR
4502 #undef ROL
4503 #undef SHR
4504 #undef SHL
4505 #undef ILO64x2
4506 #undef IHI64x2
4507 #undef ILO32x4
4508 #undef IHI32x4
4509 #undef ILO16x8
4510 #undef IHI16x8
4511 #undef ILO16x8
4512 #undef IHI16x8
4513 #undef CEV32x4
4514 #undef COD32x4
4515 #undef COD16x8
4516 #undef COD8x16
4517 #undef CEV8x16
4518 #undef AND
4519 #undef OR2
4520 #undef OR3
4521 #undef OR4
4524 /*------------------------------------------------------------*/
4525 /*--- Load and Store instructions ---*/
4526 /*------------------------------------------------------------*/
4528 /* Generate the EA for a "reg + reg" style amode. This is done from
4529 parts of the insn, but for sanity checking sake it takes the whole
4530 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4531 and S=insn[12]:
4533 The possible forms, along with their opt:S values, are:
4534 011:0 Xn|SP + Xm
4535 111:0 Xn|SP + Xm
4536 011:1 Xn|SP + Xm * transfer_szB
4537 111:1 Xn|SP + Xm * transfer_szB
4538 010:0 Xn|SP + 32Uto64(Wm)
4539 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4540 110:0 Xn|SP + 32Sto64(Wm)
4541 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4543 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4544 the transfer size is insn[23,31,30]. For integer loads/stores,
4545 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4547 If the decoding fails, it returns IRTemp_INVALID.
4549 isInt is True iff this is decoding is for transfers to/from integer
4550 registers. If False it is for transfers to/from vector registers.
4552 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4554 UInt optS = SLICE_UInt(insn, 15, 12);
4555 UInt mm = SLICE_UInt(insn, 20, 16);
4556 UInt nn = SLICE_UInt(insn, 9, 5);
4557 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4558 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4560 buf[0] = 0;
4562 /* Sanity checks, that this really is a load/store insn. */
4563 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4564 goto fail;
4566 if (isInt
4567 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4568 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4569 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4570 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4571 goto fail;
4573 if (!isInt
4574 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4575 goto fail;
4577 /* Throw out non-verified but possibly valid cases. */
4578 switch (szLg2) {
4579 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4580 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4581 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4582 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4583 case BITS3(1,0,0): // can only ever be valid for the vector case
4584 if (isInt) goto fail; else break;
4585 case BITS3(1,0,1): // these sizes are never valid
4586 case BITS3(1,1,0):
4587 case BITS3(1,1,1): goto fail;
4589 default: vassert(0);
4592 IRExpr* rhs = NULL;
4593 switch (optS) {
4594 case BITS4(1,1,1,0): goto fail; //ATC
4595 case BITS4(0,1,1,0):
4596 rhs = getIReg64orZR(mm);
4597 vex_sprintf(buf, "[%s, %s]",
4598 nameIReg64orZR(nn), nameIReg64orZR(mm));
4599 break;
4600 case BITS4(1,1,1,1): goto fail; //ATC
4601 case BITS4(0,1,1,1):
4602 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4603 vex_sprintf(buf, "[%s, %s lsl %u]",
4604 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4605 break;
4606 case BITS4(0,1,0,0):
4607 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4608 vex_sprintf(buf, "[%s, %s uxtx]",
4609 nameIReg64orZR(nn), nameIReg32orZR(mm));
4610 break;
4611 case BITS4(0,1,0,1):
4612 rhs = binop(Iop_Shl64,
4613 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4614 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4615 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4616 break;
4617 case BITS4(1,1,0,0):
4618 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4619 vex_sprintf(buf, "[%s, %s sxtx]",
4620 nameIReg64orZR(nn), nameIReg32orZR(mm));
4621 break;
4622 case BITS4(1,1,0,1):
4623 rhs = binop(Iop_Shl64,
4624 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4625 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4626 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4627 break;
4628 default:
4629 /* The rest appear to be genuinely invalid */
4630 goto fail;
4633 vassert(rhs);
4634 IRTemp res = newTemp(Ity_I64);
4635 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4636 return res;
4638 fail:
4639 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4640 return IRTemp_INVALID;
4644 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4645 bits of DATAE :: Ity_I64. */
4646 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4648 IRExpr* addrE = mkexpr(addr);
4649 switch (szB) {
4650 case 8:
4651 storeLE(addrE, dataE);
4652 break;
4653 case 4:
4654 storeLE(addrE, unop(Iop_64to32, dataE));
4655 break;
4656 case 2:
4657 storeLE(addrE, unop(Iop_64to16, dataE));
4658 break;
4659 case 1:
4660 storeLE(addrE, unop(Iop_64to8, dataE));
4661 break;
4662 default:
4663 vassert(0);
4668 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4669 placing the result in an Ity_I64 temporary. */
4670 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4672 IRTemp res = newTemp(Ity_I64);
4673 IRExpr* addrE = mkexpr(addr);
4674 switch (szB) {
4675 case 8:
4676 assign(res, loadLE(Ity_I64,addrE));
4677 break;
4678 case 4:
4679 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4680 break;
4681 case 2:
4682 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4683 break;
4684 case 1:
4685 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4686 break;
4687 default:
4688 vassert(0);
4690 return res;
4694 /* Generate a "standard 7" name, from bitQ and size. But also
4695 allow ".1d" since that's occasionally useful. */
4696 static
4697 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4699 vassert(bitQ <= 1 && size <= 3);
4700 const HChar* nms[8]
4701 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4702 UInt ix = (bitQ << 2) | size;
4703 vassert(ix < 8);
4704 return nms[ix];
4708 static
4709 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4710 const VexAbiInfo* abiinfo
4713 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4715 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4716 /* uimm12 is scaled by the transfer size
4718 31 29 26 21 9 4
4719 | | | | | |
4720 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4721 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4723 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4724 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4726 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4727 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4729 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4730 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4732 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4733 UInt szLg2 = INSN(31,30);
4734 UInt szB = 1 << szLg2;
4735 Bool isLD = INSN(22,22) == 1;
4736 UInt offs = INSN(21,10) * szB;
4737 UInt nn = INSN(9,5);
4738 UInt tt = INSN(4,0);
4739 IRTemp ta = newTemp(Ity_I64);
4740 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4741 if (nn == 31) { /* FIXME generate stack alignment check */ }
4742 vassert(szLg2 < 4);
4743 if (isLD) {
4744 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4745 } else {
4746 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4748 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4749 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4750 DIP("%s %s, [%s, #%u]\n",
4751 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4752 nameIReg64orSP(nn), offs);
4753 return True;
4756 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4758 31 29 26 20 11 9 4
4759 | | | | | | |
4760 (at-Rn-then-Rn=EA) | | |
4761 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4762 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4764 (at-EA-then-Rn=EA)
4765 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4766 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4768 (at-EA)
4769 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4770 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4772 simm9 is unscaled.
4774 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4775 load case this is because would create two competing values for
4776 Rt. In the store case the reason is unclear, but the spec
4777 disallows it anyway.
4779 Stores are narrowing, loads are unsigned widening. sz encodes
4780 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4782 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4783 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4784 UInt szLg2 = INSN(31,30);
4785 UInt szB = 1 << szLg2;
4786 Bool isLoad = INSN(22,22) == 1;
4787 UInt imm9 = INSN(20,12);
4788 UInt nn = INSN(9,5);
4789 UInt tt = INSN(4,0);
4790 Bool wBack = INSN(10,10) == 1;
4791 UInt how = INSN(11,10);
4792 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4793 /* undecodable; fall through */
4794 } else {
4795 if (nn == 31) { /* FIXME generate stack alignment check */ }
4797 // Compute the transfer address TA and the writeback address WA.
4798 IRTemp tRN = newTemp(Ity_I64);
4799 assign(tRN, getIReg64orSP(nn));
4800 IRTemp tEA = newTemp(Ity_I64);
4801 Long simm9 = (Long)sx_to_64(imm9, 9);
4802 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4804 IRTemp tTA = newTemp(Ity_I64);
4805 IRTemp tWA = newTemp(Ity_I64);
4806 switch (how) {
4807 case BITS2(0,1):
4808 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4809 case BITS2(1,1):
4810 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4811 case BITS2(0,0):
4812 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4813 default:
4814 vassert(0); /* NOTREACHED */
4817 /* Normally rN would be updated after the transfer. However, in
4818 the special cases typifed by
4819 str x30, [sp,#-16]!
4820 str w1, [sp,#-32]!
4821 it is necessary to update SP before the transfer, (1)
4822 because Memcheck will otherwise complain about a write
4823 below the stack pointer, and (2) because the segfault
4824 stack extension mechanism will otherwise extend the stack
4825 only down to SP before the instruction, which might not be
4826 far enough, if the -16/-32 bit takes the actual access
4827 address to the next page.
4829 Bool earlyWBack
4830 = wBack && simm9 < 0 && (szB == 8 || szB == 4)
4831 && how == BITS2(1,1) && nn == 31 && !isLoad;
4833 if (wBack && earlyWBack)
4834 putIReg64orSP(nn, mkexpr(tEA));
4836 if (isLoad) {
4837 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4838 } else {
4839 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4842 if (wBack && !earlyWBack)
4843 putIReg64orSP(nn, mkexpr(tEA));
4845 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4846 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4847 const HChar* fmt_str = NULL;
4848 switch (how) {
4849 case BITS2(0,1):
4850 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4851 break;
4852 case BITS2(1,1):
4853 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4854 break;
4855 case BITS2(0,0):
4856 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4857 break;
4858 default:
4859 vassert(0);
4861 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4862 nameIRegOrZR(szB == 8, tt),
4863 nameIReg64orSP(nn), simm9);
4864 return True;
4868 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4869 /* L==1 => mm==LD
4870 L==0 => mm==ST
4871 x==0 => 32 bit transfers, and zero extended loads
4872 x==1 => 64 bit transfers
4873 simm7 is scaled by the (single-register) transfer size
4875 (at-Rn-then-Rn=EA)
4876 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4878 (at-EA-then-Rn=EA)
4879 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4881 (at-EA)
4882 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4884 UInt insn_30_23 = INSN(30,23);
4885 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4886 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4887 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4888 UInt bL = INSN(22,22);
4889 UInt bX = INSN(31,31);
4890 UInt bWBack = INSN(23,23);
4891 UInt rT1 = INSN(4,0);
4892 UInt rN = INSN(9,5);
4893 UInt rT2 = INSN(14,10);
4894 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4895 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4896 || (bL && rT1 == rT2)) {
4897 /* undecodable; fall through */
4898 } else {
4899 if (rN == 31) { /* FIXME generate stack alignment check */ }
4901 // Compute the transfer address TA and the writeback address WA.
4902 IRTemp tRN = newTemp(Ity_I64);
4903 assign(tRN, getIReg64orSP(rN));
4904 IRTemp tEA = newTemp(Ity_I64);
4905 simm7 = (bX ? 8 : 4) * simm7;
4906 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4908 IRTemp tTA = newTemp(Ity_I64);
4909 IRTemp tWA = newTemp(Ity_I64);
4910 switch (INSN(24,23)) {
4911 case BITS2(0,1):
4912 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4913 case BITS2(1,1):
4914 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4915 case BITS2(1,0):
4916 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4917 default:
4918 vassert(0); /* NOTREACHED */
4921 /* Normally rN would be updated after the transfer. However, in
4922 the special case typifed by
4923 stp x29, x30, [sp,#-112]!
4924 it is necessary to update SP before the transfer, (1)
4925 because Memcheck will otherwise complain about a write
4926 below the stack pointer, and (2) because the segfault
4927 stack extension mechanism will otherwise extend the stack
4928 only down to SP before the instruction, which might not be
4929 far enough, if the -112 bit takes the actual access
4930 address to the next page.
4932 Bool earlyWBack
4933 = bWBack && simm7 < 0
4934 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4936 if (bWBack && earlyWBack)
4937 putIReg64orSP(rN, mkexpr(tEA));
4939 /**/ if (bL == 1 && bX == 1) {
4940 // 64 bit load
4941 putIReg64orZR(rT1, loadLE(Ity_I64,
4942 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4943 putIReg64orZR(rT2, loadLE(Ity_I64,
4944 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4945 } else if (bL == 1 && bX == 0) {
4946 // 32 bit load
4947 putIReg32orZR(rT1, loadLE(Ity_I32,
4948 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4949 putIReg32orZR(rT2, loadLE(Ity_I32,
4950 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4951 } else if (bL == 0 && bX == 1) {
4952 // 64 bit store
4953 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4954 getIReg64orZR(rT1));
4955 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4956 getIReg64orZR(rT2));
4957 } else {
4958 vassert(bL == 0 && bX == 0);
4959 // 32 bit store
4960 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4961 getIReg32orZR(rT1));
4962 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4963 getIReg32orZR(rT2));
4966 if (bWBack && !earlyWBack)
4967 putIReg64orSP(rN, mkexpr(tEA));
4969 const HChar* fmt_str = NULL;
4970 switch (INSN(24,23)) {
4971 case BITS2(0,1):
4972 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4973 break;
4974 case BITS2(1,1):
4975 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4976 break;
4977 case BITS2(1,0):
4978 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4979 break;
4980 default:
4981 vassert(0);
4983 DIP(fmt_str, bL == 0 ? "st" : "ld",
4984 nameIRegOrZR(bX == 1, rT1),
4985 nameIRegOrZR(bX == 1, rT2),
4986 nameIReg64orSP(rN), simm7);
4987 return True;
4991 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
4992 /* Does 32 bit transfers which are sign extended to 64 bits.
4993 simm7 is scaled by the (single-register) transfer size
4995 (at-Rn-then-Rn=EA)
4996 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
4998 (at-EA-then-Rn=EA)
4999 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5001 (at-EA)
5002 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5004 UInt insn_31_22 = INSN(31,22);
5005 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5006 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5007 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5008 UInt bWBack = INSN(23,23);
5009 UInt rT1 = INSN(4,0);
5010 UInt rN = INSN(9,5);
5011 UInt rT2 = INSN(14,10);
5012 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5013 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5014 || (rT1 == rT2)) {
5015 /* undecodable; fall through */
5016 } else {
5017 if (rN == 31) { /* FIXME generate stack alignment check */ }
5019 // Compute the transfer address TA and the writeback address WA.
5020 IRTemp tRN = newTemp(Ity_I64);
5021 assign(tRN, getIReg64orSP(rN));
5022 IRTemp tEA = newTemp(Ity_I64);
5023 simm7 = 4 * simm7;
5024 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5026 IRTemp tTA = newTemp(Ity_I64);
5027 IRTemp tWA = newTemp(Ity_I64);
5028 switch (INSN(24,23)) {
5029 case BITS2(0,1):
5030 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5031 case BITS2(1,1):
5032 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5033 case BITS2(1,0):
5034 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5035 default:
5036 vassert(0); /* NOTREACHED */
5039 // 32 bit load, sign extended to 64 bits
5040 putIReg64orZR(rT1, unop(Iop_32Sto64,
5041 loadLE(Ity_I32, binop(Iop_Add64,
5042 mkexpr(tTA),
5043 mkU64(0)))));
5044 putIReg64orZR(rT2, unop(Iop_32Sto64,
5045 loadLE(Ity_I32, binop(Iop_Add64,
5046 mkexpr(tTA),
5047 mkU64(4)))));
5048 if (bWBack)
5049 putIReg64orSP(rN, mkexpr(tEA));
5051 const HChar* fmt_str = NULL;
5052 switch (INSN(24,23)) {
5053 case BITS2(0,1):
5054 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5055 break;
5056 case BITS2(1,1):
5057 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5058 break;
5059 case BITS2(1,0):
5060 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5061 break;
5062 default:
5063 vassert(0);
5065 DIP(fmt_str, nameIReg64orZR(rT1),
5066 nameIReg64orZR(rT2),
5067 nameIReg64orSP(rN), simm7);
5068 return True;
5072 /* ---------------- LDR (literal, int reg) ---------------- */
5073 /* 31 29 23 4
5074 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5075 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5076 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5077 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5078 Just handles the first two cases for now.
5080 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5081 UInt imm19 = INSN(23,5);
5082 UInt rT = INSN(4,0);
5083 UInt bX = INSN(30,30);
5084 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5085 if (bX) {
5086 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5087 } else {
5088 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5090 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5091 return True;
5094 /* -------------- {LD,ST}R (integer register) --------------- */
5095 /* 31 29 20 15 12 11 9 4
5096 | | | | | | | |
5097 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5098 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5099 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5100 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5102 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5103 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5104 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5105 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5107 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5108 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5109 HChar dis_buf[64];
5110 UInt szLg2 = INSN(31,30);
5111 Bool isLD = INSN(22,22) == 1;
5112 UInt tt = INSN(4,0);
5113 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5114 if (ea != IRTemp_INVALID) {
5115 switch (szLg2) {
5116 case 3: /* 64 bit */
5117 if (isLD) {
5118 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5119 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5120 } else {
5121 storeLE(mkexpr(ea), getIReg64orZR(tt));
5122 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5124 break;
5125 case 2: /* 32 bit */
5126 if (isLD) {
5127 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5128 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5129 } else {
5130 storeLE(mkexpr(ea), getIReg32orZR(tt));
5131 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5133 break;
5134 case 1: /* 16 bit */
5135 if (isLD) {
5136 putIReg64orZR(tt, unop(Iop_16Uto64,
5137 loadLE(Ity_I16, mkexpr(ea))));
5138 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5139 } else {
5140 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5141 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5143 break;
5144 case 0: /* 8 bit */
5145 if (isLD) {
5146 putIReg64orZR(tt, unop(Iop_8Uto64,
5147 loadLE(Ity_I8, mkexpr(ea))));
5148 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5149 } else {
5150 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5151 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5153 break;
5154 default:
5155 vassert(0);
5157 return True;
5161 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5162 /* 31 29 26 23 21 9 4
5163 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5164 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5165 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5166 where
5167 Rt is Wt when x==1, Xt when x==0
5169 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5170 /* Further checks on bits 31:30 and 22 */
5171 Bool valid = False;
5172 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5173 case BITS3(1,0,0):
5174 case BITS3(0,1,0): case BITS3(0,1,1):
5175 case BITS3(0,0,0): case BITS3(0,0,1):
5176 valid = True;
5177 break;
5179 if (valid) {
5180 UInt szLg2 = INSN(31,30);
5181 UInt bitX = INSN(22,22);
5182 UInt imm12 = INSN(21,10);
5183 UInt nn = INSN(9,5);
5184 UInt tt = INSN(4,0);
5185 UInt szB = 1 << szLg2;
5186 IRExpr* ea = binop(Iop_Add64,
5187 getIReg64orSP(nn), mkU64(imm12 * szB));
5188 switch (szB) {
5189 case 4:
5190 vassert(bitX == 0);
5191 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5192 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5193 nameIReg64orSP(nn), imm12 * szB);
5194 break;
5195 case 2:
5196 if (bitX == 1) {
5197 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5198 } else {
5199 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5201 DIP("ldrsh %s, [%s, #%u]\n",
5202 nameIRegOrZR(bitX == 0, tt),
5203 nameIReg64orSP(nn), imm12 * szB);
5204 break;
5205 case 1:
5206 if (bitX == 1) {
5207 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5208 } else {
5209 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5211 DIP("ldrsb %s, [%s, #%u]\n",
5212 nameIRegOrZR(bitX == 0, tt),
5213 nameIReg64orSP(nn), imm12 * szB);
5214 break;
5215 default:
5216 vassert(0);
5218 return True;
5220 /* else fall through */
5223 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5224 /* (at-Rn-then-Rn=EA)
5225 31 29 23 21 20 11 9 4
5226 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5227 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5228 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5230 (at-EA-then-Rn=EA)
5231 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5232 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5233 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5234 where
5235 Rt is Wt when x==1, Xt when x==0
5236 transfer-at-Rn when [11]==0, at EA when [11]==1
5238 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5239 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5240 /* Further checks on bits 31:30 and 22 */
5241 Bool valid = False;
5242 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5243 case BITS3(1,0,0): // LDRSW Xt
5244 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5245 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5246 valid = True;
5247 break;
5249 if (valid) {
5250 UInt szLg2 = INSN(31,30);
5251 UInt imm9 = INSN(20,12);
5252 Bool atRN = INSN(11,11) == 0;
5253 UInt nn = INSN(9,5);
5254 UInt tt = INSN(4,0);
5255 IRTemp tRN = newTemp(Ity_I64);
5256 IRTemp tEA = newTemp(Ity_I64);
5257 IRTemp tTA = IRTemp_INVALID;
5258 ULong simm9 = sx_to_64(imm9, 9);
5259 Bool is64 = INSN(22,22) == 0;
5260 assign(tRN, getIReg64orSP(nn));
5261 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5262 tTA = atRN ? tRN : tEA;
5263 HChar ch = '?';
5264 /* There are 5 cases:
5265 byte load, SX to 64
5266 byte load, SX to 32, ZX to 64
5267 halfword load, SX to 64
5268 halfword load, SX to 32, ZX to 64
5269 word load, SX to 64
5270 The ifs below handle them in the listed order.
5272 if (szLg2 == 0) {
5273 ch = 'b';
5274 if (is64) {
5275 putIReg64orZR(tt, unop(Iop_8Sto64,
5276 loadLE(Ity_I8, mkexpr(tTA))));
5277 } else {
5278 putIReg32orZR(tt, unop(Iop_8Sto32,
5279 loadLE(Ity_I8, mkexpr(tTA))));
5282 else if (szLg2 == 1) {
5283 ch = 'h';
5284 if (is64) {
5285 putIReg64orZR(tt, unop(Iop_16Sto64,
5286 loadLE(Ity_I16, mkexpr(tTA))));
5287 } else {
5288 putIReg32orZR(tt, unop(Iop_16Sto32,
5289 loadLE(Ity_I16, mkexpr(tTA))));
5292 else if (szLg2 == 2 && is64) {
5293 ch = 'w';
5294 putIReg64orZR(tt, unop(Iop_32Sto64,
5295 loadLE(Ity_I32, mkexpr(tTA))));
5297 else {
5298 vassert(0);
5300 putIReg64orSP(nn, mkexpr(tEA));
5301 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5302 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5303 return True;
5305 /* else fall through */
5308 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5309 /* 31 29 23 21 20 11 9 4
5310 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5311 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5312 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5313 where
5314 Rt is Wt when x==1, Xt when x==0
5316 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5317 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5318 /* Further checks on bits 31:30 and 22 */
5319 Bool valid = False;
5320 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5321 case BITS3(1,0,0): // LDURSW Xt
5322 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5323 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5324 valid = True;
5325 break;
5327 if (valid) {
5328 UInt szLg2 = INSN(31,30);
5329 UInt imm9 = INSN(20,12);
5330 UInt nn = INSN(9,5);
5331 UInt tt = INSN(4,0);
5332 IRTemp tRN = newTemp(Ity_I64);
5333 IRTemp tEA = newTemp(Ity_I64);
5334 ULong simm9 = sx_to_64(imm9, 9);
5335 Bool is64 = INSN(22,22) == 0;
5336 assign(tRN, getIReg64orSP(nn));
5337 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5338 HChar ch = '?';
5339 /* There are 5 cases:
5340 byte load, SX to 64
5341 byte load, SX to 32, ZX to 64
5342 halfword load, SX to 64
5343 halfword load, SX to 32, ZX to 64
5344 word load, SX to 64
5345 The ifs below handle them in the listed order.
5347 if (szLg2 == 0) {
5348 ch = 'b';
5349 if (is64) {
5350 putIReg64orZR(tt, unop(Iop_8Sto64,
5351 loadLE(Ity_I8, mkexpr(tEA))));
5352 } else {
5353 putIReg32orZR(tt, unop(Iop_8Sto32,
5354 loadLE(Ity_I8, mkexpr(tEA))));
5357 else if (szLg2 == 1) {
5358 ch = 'h';
5359 if (is64) {
5360 putIReg64orZR(tt, unop(Iop_16Sto64,
5361 loadLE(Ity_I16, mkexpr(tEA))));
5362 } else {
5363 putIReg32orZR(tt, unop(Iop_16Sto32,
5364 loadLE(Ity_I16, mkexpr(tEA))));
5367 else if (szLg2 == 2 && is64) {
5368 ch = 'w';
5369 putIReg64orZR(tt, unop(Iop_32Sto64,
5370 loadLE(Ity_I32, mkexpr(tEA))));
5372 else {
5373 vassert(0);
5375 DIP("ldurs%c %s, [%s, #%lld]",
5376 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5377 return True;
5379 /* else fall through */
5382 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5383 /* L==1 => mm==LD
5384 L==0 => mm==ST
5385 sz==00 => 32 bit (S) transfers
5386 sz==01 => 64 bit (D) transfers
5387 sz==10 => 128 bit (Q) transfers
5388 sz==11 isn't allowed
5389 simm7 is scaled by the (single-register) transfer size
5391 31 29 26 22 21 14 9 4
5393 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5394 (at-EA, with nontemporal hint)
5396 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5397 (at-Rn-then-Rn=EA)
5399 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5400 (at-EA)
5402 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5403 (at-EA-then-Rn=EA)
5405 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5406 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5407 Bool isLD = INSN(22,22) == 1;
5408 Bool wBack = INSN(23,23) == 1;
5409 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5410 UInt tt2 = INSN(14,10);
5411 UInt nn = INSN(9,5);
5412 UInt tt1 = INSN(4,0);
5413 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5414 /* undecodable; fall through */
5415 } else {
5416 if (nn == 31) { /* FIXME generate stack alignment check */ }
5418 // Compute the transfer address TA and the writeback address WA.
5419 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5420 IRTemp tRN = newTemp(Ity_I64);
5421 assign(tRN, getIReg64orSP(nn));
5422 IRTemp tEA = newTemp(Ity_I64);
5423 simm7 = szB * simm7;
5424 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5426 IRTemp tTA = newTemp(Ity_I64);
5427 IRTemp tWA = newTemp(Ity_I64);
5428 switch (INSN(24,23)) {
5429 case BITS2(0,1):
5430 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5431 case BITS2(1,1):
5432 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5433 case BITS2(1,0):
5434 case BITS2(0,0):
5435 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5436 default:
5437 vassert(0); /* NOTREACHED */
5440 IRType ty = Ity_INVALID;
5441 switch (szB) {
5442 case 4: ty = Ity_F32; break;
5443 case 8: ty = Ity_F64; break;
5444 case 16: ty = Ity_V128; break;
5445 default: vassert(0);
5448 /* Normally rN would be updated after the transfer. However, in
5449 the special cases typifed by
5450 stp q0, q1, [sp,#-512]!
5451 stp d0, d1, [sp,#-512]!
5452 stp s0, s1, [sp,#-512]!
5453 it is necessary to update SP before the transfer, (1)
5454 because Memcheck will otherwise complain about a write
5455 below the stack pointer, and (2) because the segfault
5456 stack extension mechanism will otherwise extend the stack
5457 only down to SP before the instruction, which might not be
5458 far enough, if the -512 bit takes the actual access
5459 address to the next page.
5461 Bool earlyWBack
5462 = wBack && simm7 < 0
5463 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5465 if (wBack && earlyWBack)
5466 putIReg64orSP(nn, mkexpr(tEA));
5468 if (isLD) {
5469 if (szB < 16) {
5470 putQReg128(tt1, mkV128(0x0000));
5472 putQRegLO(tt1,
5473 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5474 if (szB < 16) {
5475 putQReg128(tt2, mkV128(0x0000));
5477 putQRegLO(tt2,
5478 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5479 } else {
5480 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5481 getQRegLO(tt1, ty));
5482 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5483 getQRegLO(tt2, ty));
5486 if (wBack && !earlyWBack)
5487 putIReg64orSP(nn, mkexpr(tEA));
5489 const HChar* fmt_str = NULL;
5490 switch (INSN(24,23)) {
5491 case BITS2(0,1):
5492 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5493 break;
5494 case BITS2(1,1):
5495 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5496 break;
5497 case BITS2(1,0):
5498 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5499 break;
5500 case BITS2(0,0):
5501 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5502 break;
5503 default:
5504 vassert(0);
5506 DIP(fmt_str, isLD ? "ld" : "st",
5507 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5508 nameIReg64orSP(nn), simm7);
5509 return True;
5513 /* -------------- {LD,ST}R (vector register) --------------- */
5514 /* 31 29 23 20 15 12 11 9 4
5515 | | | | | | | | |
5516 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5517 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5518 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5519 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5520 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5522 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5523 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5524 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5525 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5526 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5528 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5529 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5530 HChar dis_buf[64];
5531 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5532 Bool isLD = INSN(22,22) == 1;
5533 UInt tt = INSN(4,0);
5534 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5535 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5536 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5537 switch (szLg2) {
5538 case 0: /* 8 bit */
5539 if (isLD) {
5540 putQReg128(tt, mkV128(0x0000));
5541 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5542 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5543 } else {
5544 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5545 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5547 break;
5548 case 1:
5549 if (isLD) {
5550 putQReg128(tt, mkV128(0x0000));
5551 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5552 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5553 } else {
5554 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5555 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5557 break;
5558 case 2: /* 32 bit */
5559 if (isLD) {
5560 putQReg128(tt, mkV128(0x0000));
5561 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5562 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5563 } else {
5564 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5565 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5567 break;
5568 case 3: /* 64 bit */
5569 if (isLD) {
5570 putQReg128(tt, mkV128(0x0000));
5571 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5572 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5573 } else {
5574 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5575 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5577 break;
5578 case 4:
5579 if (isLD) {
5580 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5581 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5582 } else {
5583 storeLE(mkexpr(ea), getQReg128(tt));
5584 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5586 break;
5587 default:
5588 vassert(0);
5590 return True;
5592 after_LDR_STR_vector_register:
5594 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5595 /* 31 29 22 20 15 12 11 9 4
5596 | | | | | | | | |
5597 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5599 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5600 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5602 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5603 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5605 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5606 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5607 HChar dis_buf[64];
5608 UInt szLg2 = INSN(31,30);
5609 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5610 UInt tt = INSN(4,0);
5611 if (szLg2 == 3) goto after_LDRS_integer_register;
5612 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5613 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5614 /* Enumerate the 5 variants explicitly. */
5615 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5616 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5617 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5618 return True;
5620 else
5621 if (szLg2 == 1/*16 bit*/) {
5622 if (sxTo64) {
5623 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5624 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5625 } else {
5626 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5627 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5629 return True;
5631 else
5632 if (szLg2 == 0/*8 bit*/) {
5633 if (sxTo64) {
5634 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5635 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5636 } else {
5637 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5638 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5640 return True;
5642 /* else it's an invalid combination */
5644 after_LDRS_integer_register:
5646 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5647 /* This is the Unsigned offset variant only. The Post-Index and
5648 Pre-Index variants are below.
5650 31 29 23 21 9 4
5651 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5652 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5653 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5654 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5655 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5657 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5658 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5659 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5660 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5661 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5663 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5664 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5665 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5666 Bool isLD = INSN(22,22) == 1;
5667 UInt pimm12 = INSN(21,10) << szLg2;
5668 UInt nn = INSN(9,5);
5669 UInt tt = INSN(4,0);
5670 IRTemp tEA = newTemp(Ity_I64);
5671 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5672 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5673 if (isLD) {
5674 if (szLg2 < 4) {
5675 putQReg128(tt, mkV128(0x0000));
5677 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5678 } else {
5679 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5681 DIP("%s %s, [%s, #%u]\n",
5682 isLD ? "ldr" : "str",
5683 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5684 return True;
5687 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5688 /* These are the Post-Index and Pre-Index variants.
5690 31 29 23 20 11 9 4
5691 (at-Rn-then-Rn=EA)
5692 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5693 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5694 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5695 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5696 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5698 (at-EA-then-Rn=EA)
5699 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5700 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5701 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5702 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5703 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5705 Stores are the same except with bit 22 set to 0.
5707 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5708 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5709 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5710 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5711 Bool isLD = INSN(22,22) == 1;
5712 UInt imm9 = INSN(20,12);
5713 Bool atRN = INSN(11,11) == 0;
5714 UInt nn = INSN(9,5);
5715 UInt tt = INSN(4,0);
5716 IRTemp tRN = newTemp(Ity_I64);
5717 IRTemp tEA = newTemp(Ity_I64);
5718 IRTemp tTA = IRTemp_INVALID;
5719 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5720 ULong simm9 = sx_to_64(imm9, 9);
5721 assign(tRN, getIReg64orSP(nn));
5722 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5723 tTA = atRN ? tRN : tEA;
5725 /* Do early writeback for the cases typified by
5726 str d8, [sp, #-32]!
5727 str d10, [sp, #-128]!
5728 str q1, [sp, #-32]!
5729 for the same reasons as described in a similar comment in the
5730 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5732 Bool earlyWBack
5733 = !atRN && !isLD && (ty == Ity_F64 || ty == Ity_V128)
5734 && nn == 31 && ((Long)simm9) < 0;
5736 if (earlyWBack)
5737 putIReg64orSP(nn, mkexpr(tEA));
5739 if (isLD) {
5740 if (szLg2 < 4) {
5741 putQReg128(tt, mkV128(0x0000));
5743 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5744 } else {
5745 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5748 if (!earlyWBack)
5749 putIReg64orSP(nn, mkexpr(tEA));
5751 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5752 isLD ? "ldr" : "str",
5753 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5754 return True;
5757 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5758 /* 31 29 23 20 11 9 4
5759 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5760 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5761 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5762 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5763 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5765 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5766 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5767 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5768 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5769 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5771 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5772 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5773 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5774 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5775 Bool isLD = INSN(22,22) == 1;
5776 UInt imm9 = INSN(20,12);
5777 UInt nn = INSN(9,5);
5778 UInt tt = INSN(4,0);
5779 ULong simm9 = sx_to_64(imm9, 9);
5780 IRTemp tEA = newTemp(Ity_I64);
5781 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5782 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5783 if (isLD) {
5784 if (szLg2 < 4) {
5785 putQReg128(tt, mkV128(0x0000));
5787 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5788 } else {
5789 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5791 DIP("%s %s, [%s, #%lld]\n",
5792 isLD ? "ldur" : "stur",
5793 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5794 return True;
5797 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5798 /* 31 29 23 4
5799 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5800 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5801 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5803 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5804 UInt szB = 4 << INSN(31,30);
5805 UInt imm19 = INSN(23,5);
5806 UInt tt = INSN(4,0);
5807 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5808 IRType ty = preferredVectorSubTypeFromSize(szB);
5809 putQReg128(tt, mkV128(0x0000));
5810 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5811 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5812 return True;
5815 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5816 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5817 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5818 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5819 /* 31 29 26 22 21 20 15 11 9 4
5821 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5822 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5824 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5825 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5827 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5828 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5830 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5831 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5833 T = defined by Q and sz in the normal way
5834 step = if m == 11111 then transfer-size else Xm
5835 xx = case L of 1 -> LD ; 0 -> ST
5837 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5838 && INSN(21,21) == 0) {
5839 Bool bitQ = INSN(30,30);
5840 Bool isPX = INSN(23,23) == 1;
5841 Bool isLD = INSN(22,22) == 1;
5842 UInt mm = INSN(20,16);
5843 UInt opc = INSN(15,12);
5844 UInt sz = INSN(11,10);
5845 UInt nn = INSN(9,5);
5846 UInt tt = INSN(4,0);
5847 Bool isQ = bitQ == 1;
5848 Bool is1d = sz == BITS2(1,1) && !isQ;
5849 UInt nRegs = 0;
5850 switch (opc) {
5851 case BITS4(0,0,0,0): nRegs = 4; break;
5852 case BITS4(0,1,0,0): nRegs = 3; break;
5853 case BITS4(1,0,0,0): nRegs = 2; break;
5854 case BITS4(0,1,1,1): nRegs = 1; break;
5855 default: break;
5858 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5859 If we see it, set nRegs to 0 so as to cause the next conditional
5860 to fail. */
5861 if (!isPX && mm != 0)
5862 nRegs = 0;
5864 if (nRegs == 1 /* .1d is allowed */
5865 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5867 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5869 /* Generate the transfer address (TA) and if necessary the
5870 writeback address (WB) */
5871 IRTemp tTA = newTemp(Ity_I64);
5872 assign(tTA, getIReg64orSP(nn));
5873 if (nn == 31) { /* FIXME generate stack alignment check */ }
5874 IRTemp tWB = IRTemp_INVALID;
5875 if (isPX) {
5876 tWB = newTemp(Ity_I64);
5877 assign(tWB, binop(Iop_Add64,
5878 mkexpr(tTA),
5879 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5880 : getIReg64orZR(mm)));
5883 /* -- BEGIN generate the transfers -- */
5885 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5886 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5887 switch (nRegs) {
5888 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5889 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5890 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5891 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5892 default: vassert(0);
5895 /* -- Multiple 128 or 64 bit stores -- */
5896 if (!isLD) {
5897 switch (nRegs) {
5898 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5899 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5900 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5901 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5902 default: vassert(0);
5904 switch (nRegs) {
5905 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5906 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5907 break;
5908 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5909 (&i0, &i1, &i2, sz, u0, u1, u2);
5910 break;
5911 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5912 (&i0, &i1, sz, u0, u1);
5913 break;
5914 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5915 (&i0, sz, u0);
5916 break;
5917 default: vassert(0);
5919 # define MAYBE_NARROW_TO_64(_expr) \
5920 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5921 UInt step = isQ ? 16 : 8;
5922 switch (nRegs) {
5923 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5924 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5925 /* fallthru */
5926 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5927 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5928 /* fallthru */
5929 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5930 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5931 /* fallthru */
5932 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5933 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5934 break;
5935 default: vassert(0);
5937 # undef MAYBE_NARROW_TO_64
5940 /* -- Multiple 128 or 64 bit loads -- */
5941 else /* isLD */ {
5942 UInt step = isQ ? 16 : 8;
5943 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5944 # define MAYBE_WIDEN_FROM_64(_expr) \
5945 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5946 switch (nRegs) {
5947 case 4:
5948 assign(i3, MAYBE_WIDEN_FROM_64(
5949 loadLE(loadTy,
5950 binop(Iop_Add64, mkexpr(tTA),
5951 mkU64(3 * step)))));
5952 /* fallthru */
5953 case 3:
5954 assign(i2, MAYBE_WIDEN_FROM_64(
5955 loadLE(loadTy,
5956 binop(Iop_Add64, mkexpr(tTA),
5957 mkU64(2 * step)))));
5958 /* fallthru */
5959 case 2:
5960 assign(i1, MAYBE_WIDEN_FROM_64(
5961 loadLE(loadTy,
5962 binop(Iop_Add64, mkexpr(tTA),
5963 mkU64(1 * step)))));
5964 /* fallthru */
5965 case 1:
5966 assign(i0, MAYBE_WIDEN_FROM_64(
5967 loadLE(loadTy,
5968 binop(Iop_Add64, mkexpr(tTA),
5969 mkU64(0 * step)))));
5970 break;
5971 default:
5972 vassert(0);
5974 # undef MAYBE_WIDEN_FROM_64
5975 switch (nRegs) {
5976 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5977 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5978 break;
5979 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5980 (&u0, &u1, &u2, sz, i0, i1, i2);
5981 break;
5982 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5983 (&u0, &u1, sz, i0, i1);
5984 break;
5985 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5986 (&u0, sz, i0);
5987 break;
5988 default: vassert(0);
5990 switch (nRegs) {
5991 case 4: putQReg128( (tt+3) % 32,
5992 math_MAYBE_ZERO_HI64(bitQ, u3));
5993 /* fallthru */
5994 case 3: putQReg128( (tt+2) % 32,
5995 math_MAYBE_ZERO_HI64(bitQ, u2));
5996 /* fallthru */
5997 case 2: putQReg128( (tt+1) % 32,
5998 math_MAYBE_ZERO_HI64(bitQ, u1));
5999 /* fallthru */
6000 case 1: putQReg128( (tt+0) % 32,
6001 math_MAYBE_ZERO_HI64(bitQ, u0));
6002 break;
6003 default: vassert(0);
6007 /* -- END generate the transfers -- */
6009 /* Do the writeback, if necessary */
6010 if (isPX) {
6011 putIReg64orSP(nn, mkexpr(tWB));
6014 HChar pxStr[20];
6015 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6016 if (isPX) {
6017 if (mm == BITS5(1,1,1,1,1))
6018 vex_sprintf(pxStr, ", #%u", xferSzB);
6019 else
6020 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6022 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6023 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6024 isLD ? "ld" : "st", nRegs,
6025 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6026 pxStr);
6028 return True;
6030 /* else fall through */
6033 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6034 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6035 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6036 /* 31 29 26 22 21 20 15 11 9 4
6038 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6039 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6041 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6042 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6044 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6045 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6047 T = defined by Q and sz in the normal way
6048 step = if m == 11111 then transfer-size else Xm
6049 xx = case L of 1 -> LD ; 0 -> ST
6051 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6052 && INSN(21,21) == 0) {
6053 Bool bitQ = INSN(30,30);
6054 Bool isPX = INSN(23,23) == 1;
6055 Bool isLD = INSN(22,22) == 1;
6056 UInt mm = INSN(20,16);
6057 UInt opc = INSN(15,12);
6058 UInt sz = INSN(11,10);
6059 UInt nn = INSN(9,5);
6060 UInt tt = INSN(4,0);
6061 Bool isQ = bitQ == 1;
6062 UInt nRegs = 0;
6063 switch (opc) {
6064 case BITS4(0,0,1,0): nRegs = 4; break;
6065 case BITS4(0,1,1,0): nRegs = 3; break;
6066 case BITS4(1,0,1,0): nRegs = 2; break;
6067 default: break;
6070 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6071 If we see it, set nRegs to 0 so as to cause the next conditional
6072 to fail. */
6073 if (!isPX && mm != 0)
6074 nRegs = 0;
6076 if (nRegs >= 2 && nRegs <= 4) {
6078 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6080 /* Generate the transfer address (TA) and if necessary the
6081 writeback address (WB) */
6082 IRTemp tTA = newTemp(Ity_I64);
6083 assign(tTA, getIReg64orSP(nn));
6084 if (nn == 31) { /* FIXME generate stack alignment check */ }
6085 IRTemp tWB = IRTemp_INVALID;
6086 if (isPX) {
6087 tWB = newTemp(Ity_I64);
6088 assign(tWB, binop(Iop_Add64,
6089 mkexpr(tTA),
6090 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6091 : getIReg64orZR(mm)));
6094 /* -- BEGIN generate the transfers -- */
6096 IRTemp u0, u1, u2, u3;
6097 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6098 switch (nRegs) {
6099 case 4: u3 = newTempV128(); /* fallthru */
6100 case 3: u2 = newTempV128(); /* fallthru */
6101 case 2: u1 = newTempV128();
6102 u0 = newTempV128(); break;
6103 default: vassert(0);
6106 /* -- Multiple 128 or 64 bit stores -- */
6107 if (!isLD) {
6108 switch (nRegs) {
6109 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6110 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6111 case 2: assign(u1, getQReg128((tt+1) % 32));
6112 assign(u0, getQReg128((tt+0) % 32)); break;
6113 default: vassert(0);
6115 # define MAYBE_NARROW_TO_64(_expr) \
6116 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6117 UInt step = isQ ? 16 : 8;
6118 switch (nRegs) {
6119 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6120 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6121 /* fallthru */
6122 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6123 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6124 /* fallthru */
6125 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6126 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6127 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6128 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6129 break;
6130 default: vassert(0);
6132 # undef MAYBE_NARROW_TO_64
6135 /* -- Multiple 128 or 64 bit loads -- */
6136 else /* isLD */ {
6137 UInt step = isQ ? 16 : 8;
6138 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6139 # define MAYBE_WIDEN_FROM_64(_expr) \
6140 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6141 switch (nRegs) {
6142 case 4:
6143 assign(u3, MAYBE_WIDEN_FROM_64(
6144 loadLE(loadTy,
6145 binop(Iop_Add64, mkexpr(tTA),
6146 mkU64(3 * step)))));
6147 /* fallthru */
6148 case 3:
6149 assign(u2, MAYBE_WIDEN_FROM_64(
6150 loadLE(loadTy,
6151 binop(Iop_Add64, mkexpr(tTA),
6152 mkU64(2 * step)))));
6153 /* fallthru */
6154 case 2:
6155 assign(u1, MAYBE_WIDEN_FROM_64(
6156 loadLE(loadTy,
6157 binop(Iop_Add64, mkexpr(tTA),
6158 mkU64(1 * step)))));
6159 assign(u0, MAYBE_WIDEN_FROM_64(
6160 loadLE(loadTy,
6161 binop(Iop_Add64, mkexpr(tTA),
6162 mkU64(0 * step)))));
6163 break;
6164 default:
6165 vassert(0);
6167 # undef MAYBE_WIDEN_FROM_64
6168 switch (nRegs) {
6169 case 4: putQReg128( (tt+3) % 32,
6170 math_MAYBE_ZERO_HI64(bitQ, u3));
6171 /* fallthru */
6172 case 3: putQReg128( (tt+2) % 32,
6173 math_MAYBE_ZERO_HI64(bitQ, u2));
6174 /* fallthru */
6175 case 2: putQReg128( (tt+1) % 32,
6176 math_MAYBE_ZERO_HI64(bitQ, u1));
6177 putQReg128( (tt+0) % 32,
6178 math_MAYBE_ZERO_HI64(bitQ, u0));
6179 break;
6180 default: vassert(0);
6184 /* -- END generate the transfers -- */
6186 /* Do the writeback, if necessary */
6187 if (isPX) {
6188 putIReg64orSP(nn, mkexpr(tWB));
6191 HChar pxStr[20];
6192 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6193 if (isPX) {
6194 if (mm == BITS5(1,1,1,1,1))
6195 vex_sprintf(pxStr, ", #%u", xferSzB);
6196 else
6197 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6199 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6200 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6201 isLD ? "ld" : "st",
6202 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6203 pxStr);
6205 return True;
6207 /* else fall through */
6210 /* ---------- LD1R (single structure, replicate) ---------- */
6211 /* ---------- LD2R (single structure, replicate) ---------- */
6212 /* ---------- LD3R (single structure, replicate) ---------- */
6213 /* ---------- LD4R (single structure, replicate) ---------- */
6214 /* 31 29 22 20 15 11 9 4
6215 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6216 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6218 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6219 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6221 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6222 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6224 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6225 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6227 step = if m == 11111 then transfer-size else Xm
6229 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6230 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6231 && INSN(12,12) == 0) {
6232 UInt bitQ = INSN(30,30);
6233 Bool isPX = INSN(23,23) == 1;
6234 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6235 UInt mm = INSN(20,16);
6236 UInt sz = INSN(11,10);
6237 UInt nn = INSN(9,5);
6238 UInt tt = INSN(4,0);
6240 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6241 if (isPX || mm == 0) {
6243 IRType ty = integerIRTypeOfSize(1 << sz);
6245 UInt laneSzB = 1 << sz;
6246 UInt xferSzB = laneSzB * nRegs;
6248 /* Generate the transfer address (TA) and if necessary the
6249 writeback address (WB) */
6250 IRTemp tTA = newTemp(Ity_I64);
6251 assign(tTA, getIReg64orSP(nn));
6252 if (nn == 31) { /* FIXME generate stack alignment check */ }
6253 IRTemp tWB = IRTemp_INVALID;
6254 if (isPX) {
6255 tWB = newTemp(Ity_I64);
6256 assign(tWB, binop(Iop_Add64,
6257 mkexpr(tTA),
6258 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6259 : getIReg64orZR(mm)));
6262 /* Do the writeback, if necessary */
6263 if (isPX) {
6264 putIReg64orSP(nn, mkexpr(tWB));
6267 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6268 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6269 switch (nRegs) {
6270 case 4:
6271 e3 = newTemp(ty);
6272 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6273 mkU64(3 * laneSzB))));
6274 v3 = math_DUP_TO_V128(e3, ty);
6275 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6276 /* fallthrough */
6277 case 3:
6278 e2 = newTemp(ty);
6279 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6280 mkU64(2 * laneSzB))));
6281 v2 = math_DUP_TO_V128(e2, ty);
6282 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6283 /* fallthrough */
6284 case 2:
6285 e1 = newTemp(ty);
6286 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6287 mkU64(1 * laneSzB))));
6288 v1 = math_DUP_TO_V128(e1, ty);
6289 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6290 /* fallthrough */
6291 case 1:
6292 e0 = newTemp(ty);
6293 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6294 mkU64(0 * laneSzB))));
6295 v0 = math_DUP_TO_V128(e0, ty);
6296 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6297 break;
6298 default:
6299 vassert(0);
6302 HChar pxStr[20];
6303 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6304 if (isPX) {
6305 if (mm == BITS5(1,1,1,1,1))
6306 vex_sprintf(pxStr, ", #%u", xferSzB);
6307 else
6308 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6310 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6311 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6312 nRegs,
6313 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6314 pxStr);
6316 return True;
6318 /* else fall through */
6321 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6322 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6323 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6324 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6325 /* 31 29 22 21 20 15 11 9 4
6326 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6327 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6329 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6330 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6332 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6333 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6335 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6336 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6338 step = if m == 11111 then transfer-size else Xm
6339 op = case L of 1 -> LD ; 0 -> ST
6341 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6342 01:b:b:b0 -> 2, bbb
6343 10:b:b:00 -> 4, bb
6344 10:b:0:01 -> 8, b
6346 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6347 UInt bitQ = INSN(30,30);
6348 Bool isPX = INSN(23,23) == 1;
6349 Bool isLD = INSN(22,22) == 1;
6350 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6351 UInt mm = INSN(20,16);
6352 UInt xx = INSN(15,14);
6353 UInt bitS = INSN(12,12);
6354 UInt sz = INSN(11,10);
6355 UInt nn = INSN(9,5);
6356 UInt tt = INSN(4,0);
6358 Bool valid = True;
6360 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6361 if (!isPX && mm != 0)
6362 valid = False;
6364 UInt laneSzB = 0; /* invalid */
6365 UInt ix = 16; /* invalid */
6367 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6368 switch (xx_q_S_sz) {
6369 case 0x00: case 0x01: case 0x02: case 0x03:
6370 case 0x04: case 0x05: case 0x06: case 0x07:
6371 case 0x08: case 0x09: case 0x0A: case 0x0B:
6372 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6373 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6374 break;
6375 case 0x10: case 0x12: case 0x14: case 0x16:
6376 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6377 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6378 break;
6379 case 0x20: case 0x24: case 0x28: case 0x2C:
6380 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6381 break;
6382 case 0x21: case 0x29:
6383 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6384 break;
6385 default:
6386 break;
6389 if (valid && laneSzB != 0) {
6391 IRType ty = integerIRTypeOfSize(laneSzB);
6392 UInt xferSzB = laneSzB * nRegs;
6394 /* Generate the transfer address (TA) and if necessary the
6395 writeback address (WB) */
6396 IRTemp tTA = newTemp(Ity_I64);
6397 assign(tTA, getIReg64orSP(nn));
6398 if (nn == 31) { /* FIXME generate stack alignment check */ }
6399 IRTemp tWB = IRTemp_INVALID;
6400 if (isPX) {
6401 tWB = newTemp(Ity_I64);
6402 assign(tWB, binop(Iop_Add64,
6403 mkexpr(tTA),
6404 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6405 : getIReg64orZR(mm)));
6408 /* Do the writeback, if necessary */
6409 if (isPX) {
6410 putIReg64orSP(nn, mkexpr(tWB));
6413 switch (nRegs) {
6414 case 4: {
6415 IRExpr* addr
6416 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6417 if (isLD) {
6418 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6419 } else {
6420 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6422 /* fallthrough */
6424 case 3: {
6425 IRExpr* addr
6426 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6427 if (isLD) {
6428 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6429 } else {
6430 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6432 /* fallthrough */
6434 case 2: {
6435 IRExpr* addr
6436 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6437 if (isLD) {
6438 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6439 } else {
6440 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6442 /* fallthrough */
6444 case 1: {
6445 IRExpr* addr
6446 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6447 if (isLD) {
6448 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6449 } else {
6450 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6452 break;
6454 default:
6455 vassert(0);
6458 HChar pxStr[20];
6459 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6460 if (isPX) {
6461 if (mm == BITS5(1,1,1,1,1))
6462 vex_sprintf(pxStr, ", #%u", xferSzB);
6463 else
6464 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6466 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6467 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6468 isLD ? "ld" : "st", nRegs,
6469 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6470 ix, nameIReg64orSP(nn), pxStr);
6472 return True;
6474 /* else fall through */
6477 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6478 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6479 /* 31 29 23 20 14 9 4
6480 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6481 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6482 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6483 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6485 /* For the "standard" implementation we pass through the LL and SC to
6486 the host. For the "fallback" implementation, for details see
6487 https://bugs.kde.org/show_bug.cgi?id=344524 and
6488 https://bugs.kde.org/show_bug.cgi?id=369459,
6489 but in short:
6491 LoadLinked(addr)
6492 gs.LLsize = load_size // 1, 2, 4 or 8
6493 gs.LLaddr = addr
6494 gs.LLdata = zeroExtend(*addr)
6496 StoreCond(addr, data)
6497 tmp_LLsize = gs.LLsize
6498 gs.LLsize = 0 // "no transaction"
6499 if tmp_LLsize != store_size -> fail
6500 if addr != gs.LLaddr -> fail
6501 if zeroExtend(*addr) != gs.LLdata -> fail
6502 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6503 if !cas_ok -> fail
6504 succeed
6506 When thread scheduled
6507 gs.LLsize = 0 // "no transaction"
6508 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6509 has to do this bit)
6511 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6512 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6513 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6514 UInt szBlg2 = INSN(31,30);
6515 Bool isLD = INSN(22,22) == 1;
6516 Bool isAcqOrRel = INSN(15,15) == 1;
6517 UInt ss = INSN(20,16);
6518 UInt nn = INSN(9,5);
6519 UInt tt = INSN(4,0);
6521 vassert(szBlg2 < 4);
6522 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6523 IRType ty = integerIRTypeOfSize(szB);
6524 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6526 IRTemp ea = newTemp(Ity_I64);
6527 assign(ea, getIReg64orSP(nn));
6528 /* FIXME generate check that ea is szB-aligned */
6530 if (isLD && ss == BITS5(1,1,1,1,1)) {
6531 IRTemp res = newTemp(ty);
6532 if (abiinfo->guest__use_fallback_LLSC) {
6533 // Do the load first so we don't update any guest state
6534 // if it faults.
6535 IRTemp loaded_data64 = newTemp(Ity_I64);
6536 assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6537 stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
6538 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6539 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6540 putIReg64orZR(tt, mkexpr(loaded_data64));
6541 } else {
6542 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6543 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6545 if (isAcqOrRel) {
6546 stmt(IRStmt_MBE(Imbe_Fence));
6548 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6549 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6550 abiinfo->guest__use_fallback_LLSC
6551 ? "(fallback implementation)" : "");
6552 return True;
6554 if (!isLD) {
6555 if (isAcqOrRel) {
6556 stmt(IRStmt_MBE(Imbe_Fence));
6558 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6559 if (abiinfo->guest__use_fallback_LLSC) {
6560 // This is really ugly, since we don't have any way to do
6561 // proper if-then-else. First, set up as if the SC failed,
6562 // and jump forwards if it really has failed.
6564 // Continuation address
6565 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6567 // "the SC failed". Any non-zero value means failure.
6568 putIReg64orZR(ss, mkU64(1));
6570 IRTemp tmp_LLsize = newTemp(Ity_I64);
6571 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6572 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6574 // Fail if no or wrong-size transaction
6575 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6576 stmt( IRStmt_Exit(
6577 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6578 Ijk_Boring, nia, OFFB_PC
6580 // Fail if the address doesn't match the LL address
6581 stmt( IRStmt_Exit(
6582 binop(Iop_CmpNE64, mkexpr(ea),
6583 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6584 Ijk_Boring, nia, OFFB_PC
6586 // Fail if the data doesn't match the LL data
6587 IRTemp llsc_data64 = newTemp(Ity_I64);
6588 assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
6589 stmt( IRStmt_Exit(
6590 binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6591 mkexpr(llsc_data64)),
6592 Ijk_Boring, nia, OFFB_PC
6594 // Try to CAS the new value in.
6595 IRTemp old = newTemp(ty);
6596 IRTemp expd = newTemp(ty);
6597 assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6598 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6599 Iend_LE, mkexpr(ea),
6600 /*expdHi*/NULL, mkexpr(expd),
6601 /*dataHi*/NULL, data
6602 )));
6603 // Fail if the CAS failed (viz, old != expd)
6604 stmt( IRStmt_Exit(
6605 binop(Iop_CmpNE64,
6606 widenUto64(ty, mkexpr(old)),
6607 widenUto64(ty, mkexpr(expd))),
6608 Ijk_Boring, nia, OFFB_PC
6610 // Otherwise we succeeded (!)
6611 putIReg64orZR(ss, mkU64(0));
6612 } else {
6613 IRTemp res = newTemp(Ity_I1);
6614 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6615 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6616 Need to set rS to 1 on failure, 0 on success. */
6617 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6618 mkU64(1)));
6620 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6621 nameIRegOrZR(False, ss),
6622 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6623 abiinfo->guest__use_fallback_LLSC
6624 ? "(fallback implementation)" : "");
6625 return True;
6627 /* else fall through */
6630 /* ------------------ LDA{R,RH,RB} ------------------ */
6631 /* ------------------ STL{R,RH,RB} ------------------ */
6632 /* 31 29 23 20 14 9 4
6633 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6634 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6636 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6637 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6638 UInt szBlg2 = INSN(31,30);
6639 Bool isLD = INSN(22,22) == 1;
6640 UInt nn = INSN(9,5);
6641 UInt tt = INSN(4,0);
6643 vassert(szBlg2 < 4);
6644 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6645 IRType ty = integerIRTypeOfSize(szB);
6646 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6648 IRTemp ea = newTemp(Ity_I64);
6649 assign(ea, getIReg64orSP(nn));
6650 /* FIXME generate check that ea is szB-aligned */
6652 if (isLD) {
6653 IRTemp res = newTemp(ty);
6654 assign(res, loadLE(ty, mkexpr(ea)));
6655 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6656 stmt(IRStmt_MBE(Imbe_Fence));
6657 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6658 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6659 } else {
6660 stmt(IRStmt_MBE(Imbe_Fence));
6661 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6662 storeLE(mkexpr(ea), data);
6663 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6664 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6666 return True;
6669 /* The PRFM cases that follow are possibly allow Rt values (the
6670 prefetch operation) which are not allowed by the documentation.
6671 This should be looked into. */
6672 /* ------------------ PRFM (immediate) ------------------ */
6673 /* 31 21 9 4
6674 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6676 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6677 UInt imm12 = INSN(21,10);
6678 UInt nn = INSN(9,5);
6679 UInt tt = INSN(4,0);
6680 /* Generating any IR here is pointless, except for documentation
6681 purposes, as it will get optimised away later. */
6682 IRTemp ea = newTemp(Ity_I64);
6683 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6684 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6685 return True;
6688 /* ------------------ PRFM (register) ------------------ */
6689 /* 31 29 22 20 15 12 11 9 4
6690 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6692 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6693 && INSN(11,10) == BITS2(1,0)) {
6694 HChar dis_buf[64];
6695 UInt tt = INSN(4,0);
6696 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6697 if (ea != IRTemp_INVALID) {
6698 /* No actual code to generate. */
6699 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6700 return True;
6704 /* ------------------ PRFM (unscaled offset) ------------------ */
6705 /* 31 29 22 20 11 9 4
6706 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6708 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6709 && INSN(11,10) == BITS2(0,0)) {
6710 ULong imm9 = INSN(20,12);
6711 UInt nn = INSN(9,5);
6712 UInt tt = INSN(4,0);
6713 ULong offset = sx_to_64(imm9, 9);
6714 IRTemp ea = newTemp(Ity_I64);
6715 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
6716 /* No actual code to generate. */
6717 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
6718 return True;
6721 vex_printf("ARM64 front end: load_store\n");
6722 return False;
6723 # undef INSN
6727 /*------------------------------------------------------------*/
6728 /*--- Control flow and misc instructions ---*/
6729 /*------------------------------------------------------------*/
6731 static
6732 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6733 const VexArchInfo* archinfo,
6734 const VexAbiInfo* abiinfo)
6736 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6738 /* ---------------------- B cond ----------------------- */
6739 /* 31 24 4 3
6740 0101010 0 imm19 0 cond */
6741 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6742 UInt cond = INSN(3,0);
6743 ULong uimm64 = INSN(23,5) << 2;
6744 Long simm64 = (Long)sx_to_64(uimm64, 21);
6745 vassert(dres->whatNext == Dis_Continue);
6746 vassert(dres->len == 4);
6747 vassert(dres->continueAt == 0);
6748 vassert(dres->jk_StopHere == Ijk_INVALID);
6749 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6750 Ijk_Boring,
6751 IRConst_U64(guest_PC_curr_instr + simm64),
6752 OFFB_PC) );
6753 putPC(mkU64(guest_PC_curr_instr + 4));
6754 dres->whatNext = Dis_StopHere;
6755 dres->jk_StopHere = Ijk_Boring;
6756 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6757 return True;
6760 /* -------------------- B{L} uncond -------------------- */
6761 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6762 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6763 100101 imm26 B (PC + sxTo64(imm26 << 2))
6765 UInt bLink = INSN(31,31);
6766 ULong uimm64 = INSN(25,0) << 2;
6767 Long simm64 = (Long)sx_to_64(uimm64, 28);
6768 if (bLink) {
6769 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6771 putPC(mkU64(guest_PC_curr_instr + simm64));
6772 dres->whatNext = Dis_StopHere;
6773 dres->jk_StopHere = Ijk_Call;
6774 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6775 guest_PC_curr_instr + simm64);
6776 return True;
6779 /* --------------------- B{L} reg --------------------- */
6780 /* 31 24 22 20 15 9 4
6781 1101011 00 10 11111 000000 nn 00000 RET Rn
6782 1101011 00 01 11111 000000 nn 00000 CALL Rn
6783 1101011 00 00 11111 000000 nn 00000 JMP Rn
6785 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6786 && INSN(20,16) == BITS5(1,1,1,1,1)
6787 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6788 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6789 UInt branch_type = INSN(22,21);
6790 UInt nn = INSN(9,5);
6791 if (branch_type == BITS2(1,0) /* RET */) {
6792 putPC(getIReg64orZR(nn));
6793 dres->whatNext = Dis_StopHere;
6794 dres->jk_StopHere = Ijk_Ret;
6795 DIP("ret %s\n", nameIReg64orZR(nn));
6796 return True;
6798 if (branch_type == BITS2(0,1) /* CALL */) {
6799 IRTemp dst = newTemp(Ity_I64);
6800 assign(dst, getIReg64orZR(nn));
6801 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6802 putPC(mkexpr(dst));
6803 dres->whatNext = Dis_StopHere;
6804 dres->jk_StopHere = Ijk_Call;
6805 DIP("blr %s\n", nameIReg64orZR(nn));
6806 return True;
6808 if (branch_type == BITS2(0,0) /* JMP */) {
6809 putPC(getIReg64orZR(nn));
6810 dres->whatNext = Dis_StopHere;
6811 dres->jk_StopHere = Ijk_Boring;
6812 DIP("jmp %s\n", nameIReg64orZR(nn));
6813 return True;
6817 /* -------------------- CB{N}Z -------------------- */
6818 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6819 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6821 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6822 Bool is64 = INSN(31,31) == 1;
6823 Bool bIfZ = INSN(24,24) == 0;
6824 ULong uimm64 = INSN(23,5) << 2;
6825 UInt rT = INSN(4,0);
6826 Long simm64 = (Long)sx_to_64(uimm64, 21);
6827 IRExpr* cond = NULL;
6828 if (is64) {
6829 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6830 getIReg64orZR(rT), mkU64(0));
6831 } else {
6832 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6833 getIReg32orZR(rT), mkU32(0));
6835 stmt( IRStmt_Exit(cond,
6836 Ijk_Boring,
6837 IRConst_U64(guest_PC_curr_instr + simm64),
6838 OFFB_PC) );
6839 putPC(mkU64(guest_PC_curr_instr + 4));
6840 dres->whatNext = Dis_StopHere;
6841 dres->jk_StopHere = Ijk_Boring;
6842 DIP("cb%sz %s, 0x%llx\n",
6843 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6844 guest_PC_curr_instr + simm64);
6845 return True;
6848 /* -------------------- TB{N}Z -------------------- */
6849 /* 31 30 24 23 18 5 4
6850 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6851 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6853 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6854 UInt b5 = INSN(31,31);
6855 Bool bIfZ = INSN(24,24) == 0;
6856 UInt b40 = INSN(23,19);
6857 UInt imm14 = INSN(18,5);
6858 UInt tt = INSN(4,0);
6859 UInt bitNo = (b5 << 5) | b40;
6860 ULong uimm64 = imm14 << 2;
6861 Long simm64 = sx_to_64(uimm64, 16);
6862 IRExpr* cond
6863 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6864 binop(Iop_And64,
6865 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6866 mkU64(1)),
6867 mkU64(0));
6868 stmt( IRStmt_Exit(cond,
6869 Ijk_Boring,
6870 IRConst_U64(guest_PC_curr_instr + simm64),
6871 OFFB_PC) );
6872 putPC(mkU64(guest_PC_curr_instr + 4));
6873 dres->whatNext = Dis_StopHere;
6874 dres->jk_StopHere = Ijk_Boring;
6875 DIP("tb%sz %s, #%u, 0x%llx\n",
6876 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6877 guest_PC_curr_instr + simm64);
6878 return True;
6881 /* -------------------- SVC -------------------- */
6882 /* 11010100 000 imm16 000 01
6883 Don't bother with anything except the imm16==0 case.
6885 if (INSN(31,0) == 0xD4000001) {
6886 putPC(mkU64(guest_PC_curr_instr + 4));
6887 dres->whatNext = Dis_StopHere;
6888 dres->jk_StopHere = Ijk_Sys_syscall;
6889 DIP("svc #0\n");
6890 return True;
6893 /* ------------------ M{SR,RS} ------------------ */
6894 /* ---- Cases for TPIDR_EL0 ----
6895 0xD51BD0 010 Rt MSR tpidr_el0, rT
6896 0xD53BD0 010 Rt MRS rT, tpidr_el0
6898 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6899 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6900 Bool toSys = INSN(21,21) == 0;
6901 UInt tt = INSN(4,0);
6902 if (toSys) {
6903 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6904 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6905 } else {
6906 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6907 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6909 return True;
6911 /* ---- Cases for FPCR ----
6912 0xD51B44 000 Rt MSR fpcr, rT
6913 0xD53B44 000 Rt MSR rT, fpcr
6915 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6916 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6917 Bool toSys = INSN(21,21) == 0;
6918 UInt tt = INSN(4,0);
6919 if (toSys) {
6920 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6921 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6922 } else {
6923 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6924 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6926 return True;
6928 /* ---- Cases for FPSR ----
6929 0xD51B44 001 Rt MSR fpsr, rT
6930 0xD53B44 001 Rt MSR rT, fpsr
6931 The only part of this we model is FPSR.QC. All other bits
6932 are ignored when writing to it and RAZ when reading from it.
6934 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6935 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6936 Bool toSys = INSN(21,21) == 0;
6937 UInt tt = INSN(4,0);
6938 if (toSys) {
6939 /* Just deal with FPSR.QC. Make up a V128 value which is
6940 zero if Xt[27] is zero and any other value if Xt[27] is
6941 nonzero. */
6942 IRTemp qc64 = newTemp(Ity_I64);
6943 assign(qc64, binop(Iop_And64,
6944 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6945 mkU64(1)));
6946 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6947 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6948 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6949 } else {
6950 /* Generate a value which is all zeroes except for bit 27,
6951 which must be zero if QCFLAG is all zeroes and one otherwise. */
6952 IRTemp qcV128 = newTempV128();
6953 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6954 IRTemp qc64 = newTemp(Ity_I64);
6955 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6956 unop(Iop_V128to64, mkexpr(qcV128))));
6957 IRExpr* res = binop(Iop_Shl64,
6958 unop(Iop_1Uto64,
6959 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6960 mkU8(27));
6961 putIReg64orZR(tt, res);
6962 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6964 return True;
6966 /* ---- Cases for NZCV ----
6967 D51B42 000 Rt MSR nzcv, rT
6968 D53B42 000 Rt MRS rT, nzcv
6969 The only parts of NZCV that actually exist are bits 31:28, which
6970 are the N Z C and V bits themselves. Hence the flags thunk provides
6971 all the state we need.
6973 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6974 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6975 Bool toSys = INSN(21,21) == 0;
6976 UInt tt = INSN(4,0);
6977 if (toSys) {
6978 IRTemp t = newTemp(Ity_I64);
6979 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6980 setFlags_COPY(t);
6981 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6982 } else {
6983 IRTemp res = newTemp(Ity_I64);
6984 assign(res, mk_arm64g_calculate_flags_nzcv());
6985 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6986 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6988 return True;
6990 /* ---- Cases for DCZID_EL0 ----
6991 Don't support arbitrary reads and writes to this register. Just
6992 return the value 16, which indicates that the DC ZVA instruction
6993 is not permitted, so we don't have to emulate it.
6994 D5 3B 00 111 Rt MRS rT, dczid_el0
6996 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6997 UInt tt = INSN(4,0);
6998 putIReg64orZR(tt, mkU64(1<<4));
6999 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
7000 return True;
7002 /* ---- Cases for CTR_EL0 ----
7003 We just handle reads, and make up a value from the D and I line
7004 sizes in the VexArchInfo we are given, and patch in the following
7005 fields that the Foundation model gives ("natively"):
7006 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7007 D5 3B 00 001 Rt MRS rT, dczid_el0
7009 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7010 UInt tt = INSN(4,0);
7011 /* Need to generate a value from dMinLine_lg2_szB and
7012 dMinLine_lg2_szB. The value in the register is in 32-bit
7013 units, so need to subtract 2 from the values in the
7014 VexArchInfo. We can assume that the values here are valid --
7015 disInstr_ARM64 checks them -- so there's no need to deal with
7016 out-of-range cases. */
7017 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7018 && archinfo->arm64_dMinLine_lg2_szB <= 17
7019 && archinfo->arm64_iMinLine_lg2_szB >= 2
7020 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7021 UInt val
7022 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7023 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7024 putIReg64orZR(tt, mkU64(val));
7025 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7026 return True;
7028 /* ---- Cases for CNTVCT_EL0 ----
7029 This is a timestamp counter of some sort. Support reads of it only
7030 by passing through to the host.
7031 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7033 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7034 UInt tt = INSN(4,0);
7035 IRTemp val = newTemp(Ity_I64);
7036 IRExpr** args = mkIRExprVec_0();
7037 IRDirty* d = unsafeIRDirty_1_N (
7038 val,
7039 0/*regparms*/,
7040 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7041 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7042 args
7044 /* execute the dirty call, dumping the result in val. */
7045 stmt( IRStmt_Dirty(d) );
7046 putIReg64orZR(tt, mkexpr(val));
7047 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7048 return True;
7050 /* ---- Cases for CNTFRQ_EL0 ----
7051 This is always RO at EL0, so it's safe to pass through to the host.
7052 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7054 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7055 UInt tt = INSN(4,0);
7056 IRTemp val = newTemp(Ity_I64);
7057 IRExpr** args = mkIRExprVec_0();
7058 IRDirty* d = unsafeIRDirty_1_N (
7059 val,
7060 0/*regparms*/,
7061 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7062 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7063 args
7065 /* execute the dirty call, dumping the result in val. */
7066 stmt( IRStmt_Dirty(d) );
7067 putIReg64orZR(tt, mkexpr(val));
7068 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7069 return True;
7072 /* ------------------ IC_IVAU ------------------ */
7073 /* D5 0B 75 001 Rt ic ivau, rT
7075 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7076 /* We will always be provided with a valid iMinLine value. */
7077 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7078 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7079 /* Round the requested address, in rT, down to the start of the
7080 containing block. */
7081 UInt tt = INSN(4,0);
7082 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7083 IRTemp addr = newTemp(Ity_I64);
7084 assign( addr, binop( Iop_And64,
7085 getIReg64orZR(tt),
7086 mkU64(~(lineszB - 1))) );
7087 /* Set the invalidation range, request exit-and-invalidate, with
7088 continuation at the next instruction. */
7089 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7090 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7091 /* be paranoid ... */
7092 stmt( IRStmt_MBE(Imbe_Fence) );
7093 putPC(mkU64( guest_PC_curr_instr + 4 ));
7094 dres->whatNext = Dis_StopHere;
7095 dres->jk_StopHere = Ijk_InvalICache;
7096 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7097 return True;
7100 /* ------------------ DC_CVAU ------------------ */
7101 /* D5 0B 7B 001 Rt dc cvau, rT
7103 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
7104 /* Exactly the same scheme as for IC IVAU, except we observe the
7105 dMinLine size, and request an Ijk_FlushDCache instead of
7106 Ijk_InvalICache. */
7107 /* We will always be provided with a valid dMinLine value. */
7108 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7109 && archinfo->arm64_dMinLine_lg2_szB <= 17);
7110 /* Round the requested address, in rT, down to the start of the
7111 containing block. */
7112 UInt tt = INSN(4,0);
7113 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
7114 IRTemp addr = newTemp(Ity_I64);
7115 assign( addr, binop( Iop_And64,
7116 getIReg64orZR(tt),
7117 mkU64(~(lineszB - 1))) );
7118 /* Set the flush range, request exit-and-flush, with
7119 continuation at the next instruction. */
7120 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7121 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7122 /* be paranoid ... */
7123 stmt( IRStmt_MBE(Imbe_Fence) );
7124 putPC(mkU64( guest_PC_curr_instr + 4 ));
7125 dres->whatNext = Dis_StopHere;
7126 dres->jk_StopHere = Ijk_FlushDCache;
7127 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
7128 return True;
7131 /* ------------------ ISB, DMB, DSB ------------------ */
7132 /* 31 21 11 7 6 4
7133 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7134 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7135 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7137 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7138 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7139 && INSN(7,7) == 1
7140 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7141 UInt opc = INSN(6,5);
7142 UInt CRm = INSN(11,8);
7143 vassert(opc <= 2 && CRm <= 15);
7144 stmt(IRStmt_MBE(Imbe_Fence));
7145 const HChar* opNames[3]
7146 = { "dsb", "dmb", "isb" };
7147 const HChar* howNames[16]
7148 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7149 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7150 DIP("%s %s\n", opNames[opc], howNames[CRm]);
7151 return True;
7154 /* -------------------- NOP -------------------- */
7155 if (INSN(31,0) == 0xD503201F) {
7156 DIP("nop\n");
7157 return True;
7160 /* -------------------- BRK -------------------- */
7161 /* 31 23 20 4
7162 1101 0100 001 imm16 00000 BRK #imm16
7164 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7165 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7166 UInt imm16 = INSN(20,5);
7167 /* Request SIGTRAP and then restart of this insn. */
7168 putPC(mkU64(guest_PC_curr_instr + 0));
7169 dres->whatNext = Dis_StopHere;
7170 dres->jk_StopHere = Ijk_SigTRAP;
7171 DIP("brk #%u\n", imm16);
7172 return True;
7175 /* ------------------- YIELD ------------------- */
7176 /* 31 23 15 7
7177 1101 0101 0000 0011 0010 0000 0011 1111
7179 if (INSN(31,0) == 0xD503203F) {
7180 /* Request yield followed by continuation at the next insn. */
7181 putPC(mkU64(guest_PC_curr_instr + 4));
7182 dres->whatNext = Dis_StopHere;
7183 dres->jk_StopHere = Ijk_Yield;
7184 DIP("yield\n");
7185 return True;
7188 /* -------------------- HINT ------------------- */
7189 /* 31 23 15 11 4 3
7190 1101 0101 0000 0011 0010 imm7 1 1111
7191 Catch otherwise unhandled HINT instructions - any
7192 like YIELD which are explicitly handled should go
7193 above this case.
7195 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7196 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7197 && INSN(15,12) == BITS4(0,0,1,0)
7198 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7199 UInt imm7 = INSN(11,5);
7200 DIP("hint #%u\n", imm7);
7201 return True;
7204 /* ------------------- CLREX ------------------ */
7205 /* 31 23 15 11 7
7206 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7207 CRm is apparently ignored.
7209 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7210 UInt mm = INSN(11,8);
7211 /* AFAICS, this simply cancels a (all?) reservations made by a
7212 (any?) preceding LDREX(es). Arrange to hand it through to
7213 the back end. */
7214 if (abiinfo->guest__use_fallback_LLSC) {
7215 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
7216 } else {
7217 stmt( IRStmt_MBE(Imbe_CancelReservation) );
7219 DIP("clrex #%u\n", mm);
7220 return True;
7223 vex_printf("ARM64 front end: branch_etc\n");
7224 return False;
7225 # undef INSN
7229 /*------------------------------------------------------------*/
7230 /*--- SIMD and FP instructions: helper functions ---*/
7231 /*------------------------------------------------------------*/
7233 /* Some constructors for interleave/deinterleave expressions. */
7235 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7236 // returns a0 b0
7237 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7240 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7241 // returns a1 b1
7242 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7245 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7246 // returns a2 a0 b2 b0
7247 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7250 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7251 // returns a3 a1 b3 b1
7252 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7255 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7256 // returns a1 b1 a0 b0
7257 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7260 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7261 // returns a3 b3 a2 b2
7262 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7265 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7266 // returns a6 a4 a2 a0 b6 b4 b2 b0
7267 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7270 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7271 // returns a7 a5 a3 a1 b7 b5 b3 b1
7272 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7275 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7276 // returns a3 b3 a2 b2 a1 b1 a0 b0
7277 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7280 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7281 // returns a7 b7 a6 b6 a5 b5 a4 b4
7282 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7285 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7286 IRTemp bFEDCBA9876543210 ) {
7287 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7288 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7289 mkexpr(bFEDCBA9876543210));
7292 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7293 IRTemp bFEDCBA9876543210 ) {
7294 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7295 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7296 mkexpr(bFEDCBA9876543210));
7299 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7300 IRTemp bFEDCBA9876543210 ) {
7301 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7302 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7303 mkexpr(bFEDCBA9876543210));
7306 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7307 IRTemp bFEDCBA9876543210 ) {
7308 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7309 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7310 mkexpr(bFEDCBA9876543210));
7313 /* Generate N copies of |bit| in the bottom of a ULong. */
7314 static ULong Replicate ( ULong bit, Int N )
7316 vassert(bit <= 1 && N >= 1 && N < 64);
7317 if (bit == 0) {
7318 return 0;
7319 } else {
7320 /* Careful. This won't work for N == 64. */
7321 return (1ULL << N) - 1;
7325 static ULong Replicate32x2 ( ULong bits32 )
7327 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7328 return (bits32 << 32) | bits32;
7331 static ULong Replicate16x4 ( ULong bits16 )
7333 vassert(0 == (bits16 & ~0xFFFFULL));
7334 return Replicate32x2((bits16 << 16) | bits16);
7337 static ULong Replicate8x8 ( ULong bits8 )
7339 vassert(0 == (bits8 & ~0xFFULL));
7340 return Replicate16x4((bits8 << 8) | bits8);
7343 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7344 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7345 is 64. In the former case, the upper 32 bits of the returned value
7346 are guaranteed to be zero. */
7347 static ULong VFPExpandImm ( ULong imm8, Int N )
7349 vassert(imm8 <= 0xFF);
7350 vassert(N == 32 || N == 64);
7351 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7352 Int F = N - E - 1;
7353 ULong imm8_6 = (imm8 >> 6) & 1;
7354 /* sign: 1 bit */
7355 /* exp: E bits */
7356 /* frac: F bits */
7357 ULong sign = (imm8 >> 7) & 1;
7358 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7359 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7360 vassert(sign < (1ULL << 1));
7361 vassert(exp < (1ULL << E));
7362 vassert(frac < (1ULL << F));
7363 vassert(1 + E + F == N);
7364 ULong res = (sign << (E+F)) | (exp << F) | frac;
7365 return res;
7368 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7369 This might fail, as indicated by the returned Bool. Page 2530 of
7370 the manual. */
7371 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7372 UInt op, UInt cmode, UInt imm8 )
7374 vassert(op <= 1);
7375 vassert(cmode <= 15);
7376 vassert(imm8 <= 255);
7378 *res = 0; /* will overwrite iff returning True */
7380 ULong imm64 = 0;
7381 Bool testimm8 = False;
7383 switch (cmode >> 1) {
7384 case 0:
7385 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7386 case 1:
7387 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7388 case 2:
7389 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7390 case 3:
7391 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7392 case 4:
7393 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7394 case 5:
7395 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7396 case 6:
7397 testimm8 = True;
7398 if ((cmode & 1) == 0)
7399 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7400 else
7401 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7402 break;
7403 case 7:
7404 testimm8 = False;
7405 if ((cmode & 1) == 0 && op == 0)
7406 imm64 = Replicate8x8(imm8);
7407 if ((cmode & 1) == 0 && op == 1) {
7408 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7409 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7410 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7411 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7412 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7413 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7414 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7415 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7417 if ((cmode & 1) == 1 && op == 0) {
7418 ULong imm8_7 = (imm8 >> 7) & 1;
7419 ULong imm8_6 = (imm8 >> 6) & 1;
7420 ULong imm8_50 = imm8 & 63;
7421 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7422 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7423 | (Replicate(imm8_6, 5) << (6 + 19))
7424 | (imm8_50 << 19);
7425 imm64 = Replicate32x2(imm32);
7427 if ((cmode & 1) == 1 && op == 1) {
7428 // imm64 = imm8<7>:NOT(imm8<6>)
7429 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7430 ULong imm8_7 = (imm8 >> 7) & 1;
7431 ULong imm8_6 = (imm8 >> 6) & 1;
7432 ULong imm8_50 = imm8 & 63;
7433 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7434 | (Replicate(imm8_6, 8) << 54)
7435 | (imm8_50 << 48);
7437 break;
7438 default:
7439 vassert(0);
7442 if (testimm8 && imm8 == 0)
7443 return False;
7445 *res = imm64;
7446 return True;
7449 /* Help a bit for decoding laneage for vector operations that can be
7450 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7451 and SZ bits, typically for vector floating point. */
7452 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7453 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7454 /*OUT*/const HChar** arrSpec,
7455 Bool bitQ, Bool bitSZ )
7457 vassert(bitQ == True || bitQ == False);
7458 vassert(bitSZ == True || bitSZ == False);
7459 if (bitQ && bitSZ) { // 2x64
7460 if (tyI) *tyI = Ity_I64;
7461 if (tyF) *tyF = Ity_F64;
7462 if (nLanes) *nLanes = 2;
7463 if (zeroUpper) *zeroUpper = False;
7464 if (arrSpec) *arrSpec = "2d";
7465 return True;
7467 if (bitQ && !bitSZ) { // 4x32
7468 if (tyI) *tyI = Ity_I32;
7469 if (tyF) *tyF = Ity_F32;
7470 if (nLanes) *nLanes = 4;
7471 if (zeroUpper) *zeroUpper = False;
7472 if (arrSpec) *arrSpec = "4s";
7473 return True;
7475 if (!bitQ && !bitSZ) { // 2x32
7476 if (tyI) *tyI = Ity_I32;
7477 if (tyF) *tyF = Ity_F32;
7478 if (nLanes) *nLanes = 2;
7479 if (zeroUpper) *zeroUpper = True;
7480 if (arrSpec) *arrSpec = "2s";
7481 return True;
7483 // Else impliedly 1x64, which isn't allowed.
7484 return False;
7487 /* Helper for decoding laneage for shift-style vector operations
7488 that involve an immediate shift amount. */
7489 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7490 UInt immh, UInt immb )
7492 vassert(immh < (1<<4));
7493 vassert(immb < (1<<3));
7494 UInt immhb = (immh << 3) | immb;
7495 if (immh & 8) {
7496 if (shift) *shift = 128 - immhb;
7497 if (szBlg2) *szBlg2 = 3;
7498 return True;
7500 if (immh & 4) {
7501 if (shift) *shift = 64 - immhb;
7502 if (szBlg2) *szBlg2 = 2;
7503 return True;
7505 if (immh & 2) {
7506 if (shift) *shift = 32 - immhb;
7507 if (szBlg2) *szBlg2 = 1;
7508 return True;
7510 if (immh & 1) {
7511 if (shift) *shift = 16 - immhb;
7512 if (szBlg2) *szBlg2 = 0;
7513 return True;
7515 return False;
7518 /* Generate IR to fold all lanes of the V128 value in 'src' as
7519 characterised by the operator 'op', and return the result in the
7520 bottom bits of a V128, with all other bits set to zero. */
7521 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7523 /* The basic idea is to use repeated applications of Iop_CatEven*
7524 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7525 a complete vector. Then fold all those vectors with 'op' and
7526 zero out all but the least significant lane. */
7527 switch (op) {
7528 case Iop_Min8Sx16: case Iop_Min8Ux16:
7529 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7530 /* NB: temp naming here is misleading -- the naming is for 8
7531 lanes of 16 bit, whereas what is being operated on is 16
7532 lanes of 8 bits. */
7533 IRTemp x76543210 = src;
7534 IRTemp x76547654 = newTempV128();
7535 IRTemp x32103210 = newTempV128();
7536 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7537 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7538 IRTemp x76767676 = newTempV128();
7539 IRTemp x54545454 = newTempV128();
7540 IRTemp x32323232 = newTempV128();
7541 IRTemp x10101010 = newTempV128();
7542 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7543 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7544 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7545 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7546 IRTemp x77777777 = newTempV128();
7547 IRTemp x66666666 = newTempV128();
7548 IRTemp x55555555 = newTempV128();
7549 IRTemp x44444444 = newTempV128();
7550 IRTemp x33333333 = newTempV128();
7551 IRTemp x22222222 = newTempV128();
7552 IRTemp x11111111 = newTempV128();
7553 IRTemp x00000000 = newTempV128();
7554 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7555 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7556 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7557 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7558 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7559 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7560 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7561 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7562 /* Naming not misleading after here. */
7563 IRTemp xAllF = newTempV128();
7564 IRTemp xAllE = newTempV128();
7565 IRTemp xAllD = newTempV128();
7566 IRTemp xAllC = newTempV128();
7567 IRTemp xAllB = newTempV128();
7568 IRTemp xAllA = newTempV128();
7569 IRTemp xAll9 = newTempV128();
7570 IRTemp xAll8 = newTempV128();
7571 IRTemp xAll7 = newTempV128();
7572 IRTemp xAll6 = newTempV128();
7573 IRTemp xAll5 = newTempV128();
7574 IRTemp xAll4 = newTempV128();
7575 IRTemp xAll3 = newTempV128();
7576 IRTemp xAll2 = newTempV128();
7577 IRTemp xAll1 = newTempV128();
7578 IRTemp xAll0 = newTempV128();
7579 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7580 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7581 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7582 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7583 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7584 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7585 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7586 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7587 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7588 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7589 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7590 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7591 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7592 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7593 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7594 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7595 IRTemp maxFE = newTempV128();
7596 IRTemp maxDC = newTempV128();
7597 IRTemp maxBA = newTempV128();
7598 IRTemp max98 = newTempV128();
7599 IRTemp max76 = newTempV128();
7600 IRTemp max54 = newTempV128();
7601 IRTemp max32 = newTempV128();
7602 IRTemp max10 = newTempV128();
7603 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7604 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7605 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7606 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7607 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7608 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7609 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7610 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7611 IRTemp maxFEDC = newTempV128();
7612 IRTemp maxBA98 = newTempV128();
7613 IRTemp max7654 = newTempV128();
7614 IRTemp max3210 = newTempV128();
7615 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7616 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7617 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7618 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7619 IRTemp maxFEDCBA98 = newTempV128();
7620 IRTemp max76543210 = newTempV128();
7621 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7622 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7623 IRTemp maxAllLanes = newTempV128();
7624 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7625 mkexpr(max76543210)));
7626 IRTemp res = newTempV128();
7627 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7628 return res;
7630 case Iop_Min16Sx8: case Iop_Min16Ux8:
7631 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7632 IRTemp x76543210 = src;
7633 IRTemp x76547654 = newTempV128();
7634 IRTemp x32103210 = newTempV128();
7635 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7636 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7637 IRTemp x76767676 = newTempV128();
7638 IRTemp x54545454 = newTempV128();
7639 IRTemp x32323232 = newTempV128();
7640 IRTemp x10101010 = newTempV128();
7641 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7642 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7643 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7644 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7645 IRTemp x77777777 = newTempV128();
7646 IRTemp x66666666 = newTempV128();
7647 IRTemp x55555555 = newTempV128();
7648 IRTemp x44444444 = newTempV128();
7649 IRTemp x33333333 = newTempV128();
7650 IRTemp x22222222 = newTempV128();
7651 IRTemp x11111111 = newTempV128();
7652 IRTemp x00000000 = newTempV128();
7653 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7654 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7655 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7656 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7657 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7658 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7659 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7660 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7661 IRTemp max76 = newTempV128();
7662 IRTemp max54 = newTempV128();
7663 IRTemp max32 = newTempV128();
7664 IRTemp max10 = newTempV128();
7665 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7666 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7667 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7668 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7669 IRTemp max7654 = newTempV128();
7670 IRTemp max3210 = newTempV128();
7671 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7672 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7673 IRTemp max76543210 = newTempV128();
7674 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7675 IRTemp res = newTempV128();
7676 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7677 return res;
7679 case Iop_Max32Fx4: case Iop_Min32Fx4:
7680 case Iop_Min32Sx4: case Iop_Min32Ux4:
7681 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7682 IRTemp x3210 = src;
7683 IRTemp x3232 = newTempV128();
7684 IRTemp x1010 = newTempV128();
7685 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7686 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7687 IRTemp x3333 = newTempV128();
7688 IRTemp x2222 = newTempV128();
7689 IRTemp x1111 = newTempV128();
7690 IRTemp x0000 = newTempV128();
7691 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7692 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7693 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7694 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7695 IRTemp max32 = newTempV128();
7696 IRTemp max10 = newTempV128();
7697 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7698 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7699 IRTemp max3210 = newTempV128();
7700 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7701 IRTemp res = newTempV128();
7702 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7703 return res;
7705 case Iop_Add64x2: {
7706 IRTemp x10 = src;
7707 IRTemp x00 = newTempV128();
7708 IRTemp x11 = newTempV128();
7709 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7710 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7711 IRTemp max10 = newTempV128();
7712 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7713 IRTemp res = newTempV128();
7714 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7715 return res;
7717 default:
7718 vassert(0);
7723 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7724 only. */
7725 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7726 IRTemp oor_values )
7728 vassert(len >= 0 && len <= 3);
7730 /* Generate some useful constants as concisely as possible. */
7731 IRTemp half15 = newTemp(Ity_I64);
7732 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7733 IRTemp half16 = newTemp(Ity_I64);
7734 assign(half16, mkU64(0x1010101010101010ULL));
7736 /* A zero vector */
7737 IRTemp allZero = newTempV128();
7738 assign(allZero, mkV128(0x0000));
7739 /* A vector containing 15 in each 8-bit lane */
7740 IRTemp all15 = newTempV128();
7741 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7742 /* A vector containing 16 in each 8-bit lane */
7743 IRTemp all16 = newTempV128();
7744 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7745 /* A vector containing 32 in each 8-bit lane */
7746 IRTemp all32 = newTempV128();
7747 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7748 /* A vector containing 48 in each 8-bit lane */
7749 IRTemp all48 = newTempV128();
7750 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7751 /* A vector containing 64 in each 8-bit lane */
7752 IRTemp all64 = newTempV128();
7753 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7755 /* Group the 16/32/48/64 vectors so as to be indexable. */
7756 IRTemp allXX[4] = { all16, all32, all48, all64 };
7758 /* Compute the result for each table vector, with zeroes in places
7759 where the index values are out of range, and OR them into the
7760 running vector. */
7761 IRTemp running_result = newTempV128();
7762 assign(running_result, mkV128(0));
7764 UInt tabent;
7765 for (tabent = 0; tabent <= len; tabent++) {
7766 vassert(tabent >= 0 && tabent < 4);
7767 IRTemp bias = newTempV128();
7768 assign(bias,
7769 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7770 IRTemp biased_indices = newTempV128();
7771 assign(biased_indices,
7772 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7773 IRTemp valid_mask = newTempV128();
7774 assign(valid_mask,
7775 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7776 IRTemp safe_biased_indices = newTempV128();
7777 assign(safe_biased_indices,
7778 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7779 IRTemp results_or_junk = newTempV128();
7780 assign(results_or_junk,
7781 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7782 mkexpr(safe_biased_indices)));
7783 IRTemp results_or_zero = newTempV128();
7784 assign(results_or_zero,
7785 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7786 /* And OR that into the running result. */
7787 IRTemp tmp = newTempV128();
7788 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7789 mkexpr(running_result)));
7790 running_result = tmp;
7793 /* So now running_result holds the overall result where the indices
7794 are in range, and zero in out-of-range lanes. Now we need to
7795 compute an overall validity mask and use this to copy in the
7796 lanes in the oor_values for out of range indices. This is
7797 unnecessary for TBL but will get folded out by iropt, so we lean
7798 on that and generate the same code for TBL and TBX here. */
7799 IRTemp overall_valid_mask = newTempV128();
7800 assign(overall_valid_mask,
7801 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7802 IRTemp result = newTempV128();
7803 assign(result,
7804 binop(Iop_OrV128,
7805 mkexpr(running_result),
7806 binop(Iop_AndV128,
7807 mkexpr(oor_values),
7808 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7809 return result;
7813 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7814 an op which takes two I64s and produces a V128. That is, a widening
7815 operator. Generate IR which applies |opI64x2toV128| to either the
7816 lower (if |is2| is False) or upper (if |is2| is True) halves of
7817 |argL| and |argR|, and return the value in a new IRTemp.
7819 static
7820 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7821 IRExpr* argL, IRExpr* argR )
7823 IRTemp res = newTempV128();
7824 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7825 assign(res, binop(opI64x2toV128, unop(slice, argL),
7826 unop(slice, argR)));
7827 return res;
7831 /* Generate signed/unsigned absolute difference vector IR. */
7832 static
7833 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7835 vassert(size <= 3);
7836 IRTemp argL = newTempV128();
7837 IRTemp argR = newTempV128();
7838 IRTemp msk = newTempV128();
7839 IRTemp res = newTempV128();
7840 assign(argL, argLE);
7841 assign(argR, argRE);
7842 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7843 mkexpr(argL), mkexpr(argR)));
7844 assign(res,
7845 binop(Iop_OrV128,
7846 binop(Iop_AndV128,
7847 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7848 mkexpr(msk)),
7849 binop(Iop_AndV128,
7850 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7851 unop(Iop_NotV128, mkexpr(msk)))));
7852 return res;
7856 /* Generate IR that takes a V128 and sign- or zero-widens
7857 either the lower or upper set of lanes to twice-as-wide,
7858 resulting in a new V128 value. */
7859 static
7860 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7861 UInt sizeNarrow, IRExpr* srcE )
7863 IRTemp src = newTempV128();
7864 IRTemp res = newTempV128();
7865 assign(src, srcE);
7866 switch (sizeNarrow) {
7867 case X10:
7868 assign(res,
7869 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7870 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7871 : Iop_InterleaveLO32x4,
7872 mkexpr(src),
7873 mkexpr(src)),
7874 mkU8(32)));
7875 break;
7876 case X01:
7877 assign(res,
7878 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7879 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7880 : Iop_InterleaveLO16x8,
7881 mkexpr(src),
7882 mkexpr(src)),
7883 mkU8(16)));
7884 break;
7885 case X00:
7886 assign(res,
7887 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7888 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7889 : Iop_InterleaveLO8x16,
7890 mkexpr(src),
7891 mkexpr(src)),
7892 mkU8(8)));
7893 break;
7894 default:
7895 vassert(0);
7897 return res;
7901 /* Generate IR that takes a V128 and sign- or zero-widens
7902 either the even or odd lanes to twice-as-wide,
7903 resulting in a new V128 value. */
7904 static
7905 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7906 UInt sizeNarrow, IRExpr* srcE )
7908 IRTemp src = newTempV128();
7909 IRTemp res = newTempV128();
7910 IROp opSAR = mkVecSARN(sizeNarrow+1);
7911 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7912 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7913 IROp opSxR = zWiden ? opSHR : opSAR;
7914 UInt amt = 0;
7915 switch (sizeNarrow) {
7916 case X10: amt = 32; break;
7917 case X01: amt = 16; break;
7918 case X00: amt = 8; break;
7919 default: vassert(0);
7921 assign(src, srcE);
7922 if (fromOdd) {
7923 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7924 } else {
7925 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7926 mkU8(amt)));
7928 return res;
7932 /* Generate IR that takes two V128s and narrows (takes lower half)
7933 of each lane, producing a single V128 value. */
7934 static
7935 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7937 IRTemp res = newTempV128();
7938 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7939 mkexpr(argHi), mkexpr(argLo)));
7940 return res;
7944 /* Return a temp which holds the vector dup of the lane of width
7945 (1 << size) obtained from src[laneNo]. */
7946 static
7947 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7949 vassert(size <= 3);
7950 /* Normalise |laneNo| so it is of the form
7951 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7952 This puts the bits we want to inspect at constant offsets
7953 regardless of the value of |size|.
7955 UInt ix = laneNo << size;
7956 vassert(ix <= 15);
7957 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7958 switch (size) {
7959 case 0: /* B */
7960 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7961 /* fallthrough */
7962 case 1: /* H */
7963 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7964 /* fallthrough */
7965 case 2: /* S */
7966 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7967 /* fallthrough */
7968 case 3: /* D */
7969 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7970 break;
7971 default:
7972 vassert(0);
7974 IRTemp res = newTempV128();
7975 assign(res, src);
7976 Int i;
7977 for (i = 3; i >= 0; i--) {
7978 if (ops[i] == Iop_INVALID)
7979 break;
7980 IRTemp tmp = newTempV128();
7981 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7982 res = tmp;
7984 return res;
7988 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7989 selector encoded as shown below. Return a new V128 holding the
7990 selected lane from |srcV| dup'd out to V128, and also return the
7991 lane number, log2 of the lane size in bytes, and width-character via
7992 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7993 is an invalid selector, in which case return
7994 IRTemp_INVALID, 0, 0 and '?' respectively.
7996 imm5 = xxxx1 signifies .b[xxxx]
7997 = xxx10 .h[xxx]
7998 = xx100 .s[xx]
7999 = x1000 .d[x]
8000 otherwise invalid
8002 static
8003 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
8004 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
8005 IRExpr* srcV, UInt imm5 )
8007 *laneNo = 0;
8008 *laneSzLg2 = 0;
8009 *laneCh = '?';
8011 if (imm5 & 1) {
8012 *laneNo = (imm5 >> 1) & 15;
8013 *laneSzLg2 = 0;
8014 *laneCh = 'b';
8016 else if (imm5 & 2) {
8017 *laneNo = (imm5 >> 2) & 7;
8018 *laneSzLg2 = 1;
8019 *laneCh = 'h';
8021 else if (imm5 & 4) {
8022 *laneNo = (imm5 >> 3) & 3;
8023 *laneSzLg2 = 2;
8024 *laneCh = 's';
8026 else if (imm5 & 8) {
8027 *laneNo = (imm5 >> 4) & 1;
8028 *laneSzLg2 = 3;
8029 *laneCh = 'd';
8031 else {
8032 /* invalid */
8033 return IRTemp_INVALID;
8036 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8040 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8041 static
8042 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8044 IRType ty = Ity_INVALID;
8045 IRTemp rcS = IRTemp_INVALID;
8046 switch (size) {
8047 case X01:
8048 vassert(imm <= 0xFFFFULL);
8049 ty = Ity_I16;
8050 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8051 break;
8052 case X10:
8053 vassert(imm <= 0xFFFFFFFFULL);
8054 ty = Ity_I32;
8055 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8056 break;
8057 case X11:
8058 ty = Ity_I64;
8059 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8060 default:
8061 vassert(0);
8063 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8064 return rcV;
8068 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8069 and the upper can contain any value -- it is ignored. If |is2| is False,
8070 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8071 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8072 half of vector reg |dd| and leave the lower half unchanged. This
8073 simulates the behaviour of the "foo/foo2" instructions in which the
8074 destination is half the width of sources, for example addhn/addhn2.
8076 static
8077 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
8079 if (is2) {
8080 /* Get the old contents of Vdd, zero the upper half, and replace
8081 it with 'x'. */
8082 IRTemp t_zero_oldLO = newTempV128();
8083 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
8084 IRTemp t_newHI_zero = newTempV128();
8085 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
8086 mkV128(0x0000)));
8087 IRTemp res = newTempV128();
8088 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
8089 mkexpr(t_newHI_zero)));
8090 putQReg128(dd, mkexpr(res));
8091 } else {
8092 /* This is simple. */
8093 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
8098 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8099 the q result in |*qabs| and the normal result in |*nabs|. */
8100 static
8101 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
8102 IRExpr* srcE, UInt size )
8104 IRTemp src, mask, maskn, nsub, qsub;
8105 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
8106 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
8107 assign(src, srcE);
8108 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
8109 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
8110 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8111 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8112 assign(*nabs, binop(Iop_OrV128,
8113 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
8114 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8115 assign(*qabs, binop(Iop_OrV128,
8116 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
8117 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8121 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8122 the q result in |*qneg| and the normal result in |*nneg|. */
8123 static
8124 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
8125 IRExpr* srcE, UInt size )
8127 IRTemp src = IRTemp_INVALID;
8128 newTempsV128_3(&src, nneg, qneg);
8129 assign(src, srcE);
8130 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8131 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8135 /* Zero all except the least significant lane of |srcE|, where |size|
8136 indicates the lane size in the usual way. */
8137 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
8139 vassert(size < 4);
8140 IRTemp t = newTempV128();
8141 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
8142 return t;
8146 /* Generate IR to compute vector widening MULL from either the lower
8147 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8148 widening multiplies are unsigned when isU==True and signed when
8149 isU==False. |size| is the narrow lane size indication. Optionally,
8150 the product may be added to or subtracted from vecD, at the wide lane
8151 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8152 is 'm' (only multiply) then the accumulate part does not happen, and
8153 |vecD| is expected to == IRTemp_INVALID.
8155 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8156 are allowed. The result is returned in a new IRTemp, which is
8157 returned in *res. */
8158 static
8159 void math_MULL_ACC ( /*OUT*/IRTemp* res,
8160 Bool is2, Bool isU, UInt size, HChar mas,
8161 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8163 vassert(res && *res == IRTemp_INVALID);
8164 vassert(size <= 2);
8165 vassert(mas == 'm' || mas == 'a' || mas == 's');
8166 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
8167 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
8168 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
8169 : (mas == 's' ? mkVecSUB(size+1)
8170 : Iop_INVALID);
8171 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
8172 mkexpr(vecN), mkexpr(vecM));
8173 *res = newTempV128();
8174 assign(*res, mas == 'm' ? mkexpr(mul)
8175 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
8179 /* Same as math_MULL_ACC, except the multiply is signed widening,
8180 the multiplied value is then doubled, before being added to or
8181 subtracted from the accumulated value. And everything is
8182 saturated. In all cases, saturation residuals are returned
8183 via (sat1q, sat1n), and in the accumulate cases,
8184 via (sat2q, sat2n) too. All results are returned in new temporaries.
8185 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8186 so the caller can tell this has happened. */
8187 static
8188 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8189 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8190 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8191 Bool is2, UInt size, HChar mas,
8192 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8194 vassert(size <= 2);
8195 vassert(mas == 'm' || mas == 'a' || mas == 's');
8196 /* Compute
8197 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8198 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8199 IOW take either the low or high halves of vecN and vecM, signed widen,
8200 multiply, double that, and signedly saturate. Also compute the same
8201 but without saturation.
8203 vassert(sat2q && *sat2q == IRTemp_INVALID);
8204 vassert(sat2n && *sat2n == IRTemp_INVALID);
8205 newTempsV128_3(sat1q, sat1n, res);
8206 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8207 mkexpr(vecN), mkexpr(vecM));
8208 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8209 mkexpr(vecN), mkexpr(vecM));
8210 assign(*sat1q, mkexpr(tq));
8211 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8213 /* If there is no accumulation, the final result is sat1q,
8214 and there's no assignment to sat2q or sat2n. */
8215 if (mas == 'm') {
8216 assign(*res, mkexpr(*sat1q));
8217 return;
8220 /* Compute
8221 sat2q = vecD +sq/-sq sat1q
8222 sat2n = vecD +/- sat1n
8223 result = sat2q
8225 newTempsV128_2(sat2q, sat2n);
8226 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8227 mkexpr(vecD), mkexpr(*sat1q)));
8228 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8229 mkexpr(vecD), mkexpr(*sat1n)));
8230 assign(*res, mkexpr(*sat2q));
8234 /* Generate IR for widening signed vector multiplies. The operands
8235 have their lane width signedly widened, and they are then multiplied
8236 at the wider width, returning results in two new IRTemps. */
8237 static
8238 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8239 UInt sizeNarrow, IRTemp argL, IRTemp argR )
8241 vassert(sizeNarrow <= 2);
8242 newTempsV128_2(resHI, resLO);
8243 IRTemp argLhi = newTemp(Ity_I64);
8244 IRTemp argLlo = newTemp(Ity_I64);
8245 IRTemp argRhi = newTemp(Ity_I64);
8246 IRTemp argRlo = newTemp(Ity_I64);
8247 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8248 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
8249 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8250 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
8251 IROp opMulls = mkVecMULLS(sizeNarrow);
8252 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8253 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8257 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8258 double that, possibly add a rounding constant (R variants), and take
8259 the high half. */
8260 static
8261 void math_SQDMULH ( /*OUT*/IRTemp* res,
8262 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8263 Bool isR, UInt size, IRTemp vN, IRTemp vM )
8265 vassert(size == X01 || size == X10); /* s or h only */
8267 newTempsV128_3(res, sat1q, sat1n);
8269 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8270 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8272 IRTemp addWide = mkVecADD(size+1);
8274 if (isR) {
8275 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8277 Int rcShift = size == X01 ? 15 : 31;
8278 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8279 assign(*sat1n,
8280 binop(mkVecCATODDLANES(size),
8281 binop(addWide,
8282 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8283 mkexpr(roundConst)),
8284 binop(addWide,
8285 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8286 mkexpr(roundConst))));
8287 } else {
8288 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8290 assign(*sat1n,
8291 binop(mkVecCATODDLANES(size),
8292 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8293 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8296 assign(*res, mkexpr(*sat1q));
8300 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8301 a new temp in *res, and the Q difference pair in new temps in
8302 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8303 three operations it is. */
8304 static
8305 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8306 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8307 IRTemp src, UInt size, UInt shift, const HChar* nm )
8309 vassert(size <= 3);
8310 UInt laneBits = 8 << size;
8311 vassert(shift < laneBits);
8312 newTempsV128_3(res, qDiff1, qDiff2);
8313 IRTemp z128 = newTempV128();
8314 assign(z128, mkV128(0x0000));
8316 /* UQSHL */
8317 if (vex_streq(nm, "uqshl")) {
8318 IROp qop = mkVecQSHLNSATUU(size);
8319 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8320 if (shift == 0) {
8321 /* No shift means no saturation. */
8322 assign(*qDiff1, mkexpr(z128));
8323 assign(*qDiff2, mkexpr(z128));
8324 } else {
8325 /* Saturation has occurred if any of the shifted-out bits are
8326 nonzero. We get the shifted-out bits by right-shifting the
8327 original value. */
8328 UInt rshift = laneBits - shift;
8329 vassert(rshift >= 1 && rshift < laneBits);
8330 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8331 assign(*qDiff2, mkexpr(z128));
8333 return;
8336 /* SQSHL */
8337 if (vex_streq(nm, "sqshl")) {
8338 IROp qop = mkVecQSHLNSATSS(size);
8339 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8340 if (shift == 0) {
8341 /* No shift means no saturation. */
8342 assign(*qDiff1, mkexpr(z128));
8343 assign(*qDiff2, mkexpr(z128));
8344 } else {
8345 /* Saturation has occurred if any of the shifted-out bits are
8346 different from the top bit of the original value. */
8347 UInt rshift = laneBits - 1 - shift;
8348 vassert(rshift >= 0 && rshift < laneBits-1);
8349 /* qDiff1 is the shifted out bits, and the top bit of the original
8350 value, preceded by zeroes. */
8351 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8352 /* qDiff2 is the top bit of the original value, cloned the
8353 correct number of times. */
8354 assign(*qDiff2, binop(mkVecSHRN(size),
8355 binop(mkVecSARN(size), mkexpr(src),
8356 mkU8(laneBits-1)),
8357 mkU8(rshift)));
8358 /* This also succeeds in comparing the top bit of the original
8359 value to itself, which is a bit stupid, but not wrong. */
8361 return;
8364 /* SQSHLU */
8365 if (vex_streq(nm, "sqshlu")) {
8366 IROp qop = mkVecQSHLNSATSU(size);
8367 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8368 if (shift == 0) {
8369 /* If there's no shift, saturation depends on the top bit
8370 of the source. */
8371 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8372 assign(*qDiff2, mkexpr(z128));
8373 } else {
8374 /* Saturation has occurred if any of the shifted-out bits are
8375 nonzero. We get the shifted-out bits by right-shifting the
8376 original value. */
8377 UInt rshift = laneBits - shift;
8378 vassert(rshift >= 1 && rshift < laneBits);
8379 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8380 assign(*qDiff2, mkexpr(z128));
8382 return;
8385 vassert(0);
8389 /* Generate IR to do SRHADD and URHADD. */
8390 static
8391 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8393 /* Generate this:
8394 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8396 vassert(size <= 3);
8397 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8398 IROp opADD = mkVecADD(size);
8399 /* The only tricky bit is to generate the correct vector 1 constant. */
8400 const ULong ones64[4]
8401 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8402 0x0000000100000001ULL, 0x0000000000000001ULL };
8403 IRTemp imm64 = newTemp(Ity_I64);
8404 assign(imm64, mkU64(ones64[size]));
8405 IRTemp vecOne = newTempV128();
8406 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8407 IRTemp scaOne = newTemp(Ity_I8);
8408 assign(scaOne, mkU8(1));
8409 IRTemp res = newTempV128();
8410 assign(res,
8411 binop(opADD,
8412 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8413 binop(opADD,
8414 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8415 binop(opSHR,
8416 binop(opADD,
8417 binop(opADD,
8418 binop(Iop_AndV128, mkexpr(aa),
8419 mkexpr(vecOne)),
8420 binop(Iop_AndV128, mkexpr(bb),
8421 mkexpr(vecOne))
8423 mkexpr(vecOne)
8425 mkexpr(scaOne)
8430 return res;
8434 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8435 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8436 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8437 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8438 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8439 unmodified. The presence |opZHI| means this function can be used to
8440 generate QCFLAG update code for both scalar and vector SIMD operations.
8442 static
8443 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8445 IRTemp diff = newTempV128();
8446 IRTemp oldQCFLAG = newTempV128();
8447 IRTemp newQCFLAG = newTempV128();
8448 if (opZHI == Iop_INVALID) {
8449 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8450 } else {
8451 vassert(opZHI == Iop_ZeroHI64ofV128
8452 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8453 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8455 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8456 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8457 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8461 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8462 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8463 operations. */
8464 static
8465 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8467 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8471 /* Generate IR to rearrange two vector values in a way which is useful
8472 for doing S/D add-pair etc operations. There are 3 cases:
8474 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8476 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8478 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8480 The cases are distinguished as follows:
8481 isD == True, bitQ == 1 => 2d
8482 isD == False, bitQ == 1 => 4s
8483 isD == False, bitQ == 0 => 2s
8485 static
8486 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8487 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8488 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8491 vassert(rearrL && *rearrL == IRTemp_INVALID);
8492 vassert(rearrR && *rearrR == IRTemp_INVALID);
8493 *rearrL = newTempV128();
8494 *rearrR = newTempV128();
8495 if (isD) {
8496 // 2d case
8497 vassert(bitQ == 1);
8498 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8499 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8501 else if (!isD && bitQ == 1) {
8502 // 4s case
8503 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8504 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8505 } else {
8506 // 2s case
8507 vassert(!isD && bitQ == 0);
8508 IRTemp m1n1m0n0 = newTempV128();
8509 IRTemp m0n0m1n1 = newTempV128();
8510 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8511 mkexpr(vecM), mkexpr(vecN)));
8512 assign(m0n0m1n1, triop(Iop_SliceV128,
8513 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8514 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8515 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8520 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8521 static Double two_to_the_minus ( Int n )
8523 if (n == 1) return 0.5;
8524 vassert(n >= 2 && n <= 64);
8525 Int half = n / 2;
8526 return two_to_the_minus(half) * two_to_the_minus(n - half);
8530 /* Returns 2.0 ^ n for n in 1 .. 64 */
8531 static Double two_to_the_plus ( Int n )
8533 if (n == 1) return 2.0;
8534 vassert(n >= 2 && n <= 64);
8535 Int half = n / 2;
8536 return two_to_the_plus(half) * two_to_the_plus(n - half);
8540 /*------------------------------------------------------------*/
8541 /*--- SIMD and FP instructions ---*/
8542 /*------------------------------------------------------------*/
8544 static
8545 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8547 /* 31 29 23 21 20 15 14 10 9 4
8548 0 q 101110 op2 0 m 0 imm4 0 n d
8549 Decode fields: op2
8551 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8552 if (INSN(31,31) != 0
8553 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8554 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8555 return False;
8557 UInt bitQ = INSN(30,30);
8558 UInt op2 = INSN(23,22);
8559 UInt mm = INSN(20,16);
8560 UInt imm4 = INSN(14,11);
8561 UInt nn = INSN(9,5);
8562 UInt dd = INSN(4,0);
8564 if (op2 == BITS2(0,0)) {
8565 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8566 IRTemp sHi = newTempV128();
8567 IRTemp sLo = newTempV128();
8568 IRTemp res = newTempV128();
8569 assign(sHi, getQReg128(mm));
8570 assign(sLo, getQReg128(nn));
8571 if (bitQ == 1) {
8572 if (imm4 == 0) {
8573 assign(res, mkexpr(sLo));
8574 } else {
8575 vassert(imm4 >= 1 && imm4 <= 15);
8576 assign(res, triop(Iop_SliceV128,
8577 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8579 putQReg128(dd, mkexpr(res));
8580 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8581 } else {
8582 if (imm4 >= 8) return False;
8583 if (imm4 == 0) {
8584 assign(res, mkexpr(sLo));
8585 } else {
8586 vassert(imm4 >= 1 && imm4 <= 7);
8587 IRTemp hi64lo64 = newTempV128();
8588 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8589 mkexpr(sHi), mkexpr(sLo)));
8590 assign(res, triop(Iop_SliceV128,
8591 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8593 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8594 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8596 return True;
8599 return False;
8600 # undef INSN
8604 static
8605 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8607 /* 31 29 23 21 20 15 14 12 11 9 4
8608 0 q 001110 op2 0 m 0 len op 00 n d
8609 Decode fields: op2,len,op
8611 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8612 if (INSN(31,31) != 0
8613 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8614 || INSN(21,21) != 0
8615 || INSN(15,15) != 0
8616 || INSN(11,10) != BITS2(0,0)) {
8617 return False;
8619 UInt bitQ = INSN(30,30);
8620 UInt op2 = INSN(23,22);
8621 UInt mm = INSN(20,16);
8622 UInt len = INSN(14,13);
8623 UInt bitOP = INSN(12,12);
8624 UInt nn = INSN(9,5);
8625 UInt dd = INSN(4,0);
8627 if (op2 == X00) {
8628 /* -------- 00,xx,0 TBL, xx register table -------- */
8629 /* -------- 00,xx,1 TBX, xx register table -------- */
8630 /* 31 28 20 15 14 12 9 4
8631 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8632 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8633 where Ta = 16b(q=1) or 8b(q=0)
8635 Bool isTBX = bitOP == 1;
8636 /* The out-of-range values to use. */
8637 IRTemp oor_values = newTempV128();
8638 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8639 /* src value */
8640 IRTemp src = newTempV128();
8641 assign(src, getQReg128(mm));
8642 /* The table values */
8643 IRTemp tab[4];
8644 UInt i;
8645 for (i = 0; i <= len; i++) {
8646 vassert(i < 4);
8647 tab[i] = newTempV128();
8648 assign(tab[i], getQReg128((nn + i) % 32));
8650 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8651 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8652 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8653 const HChar* nm = isTBX ? "tbx" : "tbl";
8654 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8655 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8656 return True;
8659 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8660 return False;
8661 # undef INSN
8665 static
8666 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8668 /* 31 29 23 21 20 15 14 11 9 4
8669 0 q 001110 size 0 m 0 opcode 10 n d
8670 Decode fields: opcode
8672 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8673 if (INSN(31,31) != 0
8674 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8675 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8676 return False;
8678 UInt bitQ = INSN(30,30);
8679 UInt size = INSN(23,22);
8680 UInt mm = INSN(20,16);
8681 UInt opcode = INSN(14,12);
8682 UInt nn = INSN(9,5);
8683 UInt dd = INSN(4,0);
8685 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8686 /* -------- 001 UZP1 std7_std7_std7 -------- */
8687 /* -------- 101 UZP2 std7_std7_std7 -------- */
8688 if (bitQ == 0 && size == X11) return False; // implied 1d case
8689 Bool isUZP1 = opcode == BITS3(0,0,1);
8690 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8691 : mkVecCATODDLANES(size);
8692 IRTemp preL = newTempV128();
8693 IRTemp preR = newTempV128();
8694 IRTemp res = newTempV128();
8695 if (bitQ == 0) {
8696 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8697 getQReg128(nn)));
8698 assign(preR, mkexpr(preL));
8699 } else {
8700 assign(preL, getQReg128(mm));
8701 assign(preR, getQReg128(nn));
8703 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8704 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8705 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8706 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8707 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8708 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8709 return True;
8712 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8713 /* -------- 010 TRN1 std7_std7_std7 -------- */
8714 /* -------- 110 TRN2 std7_std7_std7 -------- */
8715 if (bitQ == 0 && size == X11) return False; // implied 1d case
8716 Bool isTRN1 = opcode == BITS3(0,1,0);
8717 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8718 : mkVecCATODDLANES(size);
8719 IROp op2 = mkVecINTERLEAVEHI(size);
8720 IRTemp srcM = newTempV128();
8721 IRTemp srcN = newTempV128();
8722 IRTemp res = newTempV128();
8723 assign(srcM, getQReg128(mm));
8724 assign(srcN, getQReg128(nn));
8725 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8726 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8727 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8728 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8729 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8730 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8731 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8732 return True;
8735 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8736 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8737 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8738 if (bitQ == 0 && size == X11) return False; // implied 1d case
8739 Bool isZIP1 = opcode == BITS3(0,1,1);
8740 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8741 : mkVecINTERLEAVEHI(size);
8742 IRTemp preL = newTempV128();
8743 IRTemp preR = newTempV128();
8744 IRTemp res = newTempV128();
8745 if (bitQ == 0 && !isZIP1) {
8746 IRTemp z128 = newTempV128();
8747 assign(z128, mkV128(0x0000));
8748 // preL = Vm shifted left 32 bits
8749 // preR = Vn shifted left 32 bits
8750 assign(preL, triop(Iop_SliceV128,
8751 getQReg128(mm), mkexpr(z128), mkU8(12)));
8752 assign(preR, triop(Iop_SliceV128,
8753 getQReg128(nn), mkexpr(z128), mkU8(12)));
8755 } else {
8756 assign(preL, getQReg128(mm));
8757 assign(preR, getQReg128(nn));
8759 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8760 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8761 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8762 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8763 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8764 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8765 return True;
8768 return False;
8769 # undef INSN
8773 static
8774 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8776 /* 31 28 23 21 16 11 9 4
8777 0 q u 01110 size 11000 opcode 10 n d
8778 Decode fields: u,size,opcode
8780 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8781 if (INSN(31,31) != 0
8782 || INSN(28,24) != BITS5(0,1,1,1,0)
8783 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8784 return False;
8786 UInt bitQ = INSN(30,30);
8787 UInt bitU = INSN(29,29);
8788 UInt size = INSN(23,22);
8789 UInt opcode = INSN(16,12);
8790 UInt nn = INSN(9,5);
8791 UInt dd = INSN(4,0);
8793 if (opcode == BITS5(0,0,0,1,1)) {
8794 /* -------- 0,xx,00011 SADDLV -------- */
8795 /* -------- 1,xx,00011 UADDLV -------- */
8796 /* size is the narrow size */
8797 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8798 Bool isU = bitU == 1;
8799 IRTemp src = newTempV128();
8800 assign(src, getQReg128(nn));
8801 /* The basic plan is to widen the lower half, and if Q = 1,
8802 the upper half too. Add them together (if Q = 1), and in
8803 either case fold with add at twice the lane width.
8805 IRExpr* widened
8806 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8807 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8808 if (bitQ == 1) {
8809 widened
8810 = binop(mkVecADD(size+1),
8811 widened,
8812 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8813 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8816 /* Now fold. */
8817 IRTemp tWi = newTempV128();
8818 assign(tWi, widened);
8819 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8820 putQReg128(dd, mkexpr(res));
8821 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8822 const HChar ch = "bhsd"[size];
8823 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8824 nameQReg128(dd), ch, nameQReg128(nn), arr);
8825 return True;
8828 UInt ix = 0;
8829 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8830 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8831 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8832 /**/
8833 if (ix != 0) {
8834 /* -------- 0,xx,01010: SMAXV -------- (1) */
8835 /* -------- 1,xx,01010: UMAXV -------- (2) */
8836 /* -------- 0,xx,11010: SMINV -------- (3) */
8837 /* -------- 1,xx,11010: UMINV -------- (4) */
8838 /* -------- 0,xx,11011: ADDV -------- (5) */
8839 vassert(ix >= 1 && ix <= 5);
8840 if (size == X11) return False; // 1d,2d cases not allowed
8841 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8842 const IROp opMAXS[3]
8843 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8844 const IROp opMAXU[3]
8845 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8846 const IROp opMINS[3]
8847 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8848 const IROp opMINU[3]
8849 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8850 const IROp opADD[3]
8851 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
8852 vassert(size < 3);
8853 IROp op = Iop_INVALID;
8854 const HChar* nm = NULL;
8855 switch (ix) {
8856 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8857 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8858 case 3: op = opMINS[size]; nm = "sminv"; break;
8859 case 4: op = opMINU[size]; nm = "uminv"; break;
8860 case 5: op = opADD[size]; nm = "addv"; break;
8861 default: vassert(0);
8863 vassert(op != Iop_INVALID && nm != NULL);
8864 IRTemp tN1 = newTempV128();
8865 assign(tN1, getQReg128(nn));
8866 /* If Q == 0, we're just folding lanes in the lower half of
8867 the value. In which case, copy the lower half of the
8868 source into the upper half, so we can then treat it the
8869 same as the full width case. Except for the addition case,
8870 in which we have to zero out the upper half. */
8871 IRTemp tN2 = newTempV128();
8872 assign(tN2, bitQ == 0
8873 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8874 : mk_CatEvenLanes64x2(tN1,tN1))
8875 : mkexpr(tN1));
8876 IRTemp res = math_FOLDV(tN2, op);
8877 if (res == IRTemp_INVALID)
8878 return False; /* means math_FOLDV
8879 doesn't handle this case yet */
8880 putQReg128(dd, mkexpr(res));
8881 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8882 IRType laneTy = tys[size];
8883 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8884 DIP("%s %s, %s.%s\n", nm,
8885 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8886 return True;
8889 if ((size == X00 || size == X10)
8890 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8891 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8892 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8893 /* -------- 1,00,01111: FMAXV s_4s -------- */
8894 /* -------- 1,10,01111: FMINV s_4s -------- */
8895 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8896 if (bitQ == 0) return False; // Only 4s is allowed
8897 Bool isMIN = (size & 2) == 2;
8898 Bool isNM = opcode == BITS5(0,1,1,0,0);
8899 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8900 IRTemp src = newTempV128();
8901 assign(src, getQReg128(nn));
8902 IRTemp res = math_FOLDV(src, opMXX);
8903 putQReg128(dd, mkexpr(res));
8904 DIP("%s%sv s%u, %u.4s\n",
8905 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8906 return True;
8909 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8910 return False;
8911 # undef INSN
8915 static
8916 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8918 /* 31 28 20 15 14 10 9 4
8919 0 q op 01110000 imm5 0 imm4 1 n d
8920 Decode fields: q,op,imm4
8922 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8923 if (INSN(31,31) != 0
8924 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8925 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8926 return False;
8928 UInt bitQ = INSN(30,30);
8929 UInt bitOP = INSN(29,29);
8930 UInt imm5 = INSN(20,16);
8931 UInt imm4 = INSN(14,11);
8932 UInt nn = INSN(9,5);
8933 UInt dd = INSN(4,0);
8935 /* -------- x,0,0000: DUP (element, vector) -------- */
8936 /* 31 28 20 15 9 4
8937 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8939 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8940 UInt laneNo = 0;
8941 UInt laneSzLg2 = 0;
8942 HChar laneCh = '?';
8943 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8944 getQReg128(nn), imm5);
8945 if (res == IRTemp_INVALID)
8946 return False;
8947 if (bitQ == 0 && laneSzLg2 == X11)
8948 return False; /* .1d case */
8949 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8950 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8951 DIP("dup %s.%s, %s.%c[%u]\n",
8952 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8953 return True;
8956 /* -------- x,0,0001: DUP (general, vector) -------- */
8957 /* 31 28 20 15 9 4
8958 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8959 Q=0 writes 64, Q=1 writes 128
8960 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8961 xxx10 4H(q=0) or 8H(q=1), R=W
8962 xx100 2S(q=0) or 4S(q=1), R=W
8963 x1000 Invalid(q=0) or 2D(q=1), R=X
8964 x0000 Invalid(q=0) or Invalid(q=1)
8965 Require op=0, imm4=0001
8967 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8968 Bool isQ = bitQ == 1;
8969 IRTemp w0 = newTemp(Ity_I64);
8970 const HChar* arT = "??";
8971 IRType laneTy = Ity_INVALID;
8972 if (imm5 & 1) {
8973 arT = isQ ? "16b" : "8b";
8974 laneTy = Ity_I8;
8975 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8977 else if (imm5 & 2) {
8978 arT = isQ ? "8h" : "4h";
8979 laneTy = Ity_I16;
8980 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8982 else if (imm5 & 4) {
8983 arT = isQ ? "4s" : "2s";
8984 laneTy = Ity_I32;
8985 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8987 else if ((imm5 & 8) && isQ) {
8988 arT = "2d";
8989 laneTy = Ity_I64;
8990 assign(w0, getIReg64orZR(nn));
8992 else {
8993 /* invalid; leave laneTy unchanged. */
8995 /* */
8996 if (laneTy != Ity_INVALID) {
8997 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8998 putQReg128(dd, binop(Iop_64HLtoV128,
8999 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
9000 DIP("dup %s.%s, %s\n",
9001 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
9002 return True;
9004 /* invalid */
9005 return False;
9008 /* -------- 1,0,0011: INS (general) -------- */
9009 /* 31 28 20 15 9 4
9010 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
9011 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
9012 xxx10 -> H, xxx
9013 xx100 -> S, xx
9014 x1000 -> D, x
9016 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
9017 HChar ts = '?';
9018 UInt laneNo = 16;
9019 IRExpr* src = NULL;
9020 if (imm5 & 1) {
9021 src = unop(Iop_64to8, getIReg64orZR(nn));
9022 laneNo = (imm5 >> 1) & 15;
9023 ts = 'b';
9025 else if (imm5 & 2) {
9026 src = unop(Iop_64to16, getIReg64orZR(nn));
9027 laneNo = (imm5 >> 2) & 7;
9028 ts = 'h';
9030 else if (imm5 & 4) {
9031 src = unop(Iop_64to32, getIReg64orZR(nn));
9032 laneNo = (imm5 >> 3) & 3;
9033 ts = 's';
9035 else if (imm5 & 8) {
9036 src = getIReg64orZR(nn);
9037 laneNo = (imm5 >> 4) & 1;
9038 ts = 'd';
9040 /* */
9041 if (src) {
9042 vassert(laneNo < 16);
9043 putQRegLane(dd, laneNo, src);
9044 DIP("ins %s.%c[%u], %s\n",
9045 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
9046 return True;
9048 /* invalid */
9049 return False;
9052 /* -------- x,0,0101: SMOV -------- */
9053 /* -------- x,0,0111: UMOV -------- */
9054 /* 31 28 20 15 9 4
9055 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9056 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9057 dest is Xd when q==1, Wd when q==0
9058 UMOV:
9059 Ts,index,ops = case q:imm5 of
9060 0:xxxx1 -> B, xxxx, 8Uto64
9061 1:xxxx1 -> invalid
9062 0:xxx10 -> H, xxx, 16Uto64
9063 1:xxx10 -> invalid
9064 0:xx100 -> S, xx, 32Uto64
9065 1:xx100 -> invalid
9066 1:x1000 -> D, x, copy64
9067 other -> invalid
9068 SMOV:
9069 Ts,index,ops = case q:imm5 of
9070 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9071 1:xxxx1 -> B, xxxx, 8Sto64
9072 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9073 1:xxx10 -> H, xxx, 16Sto64
9074 0:xx100 -> invalid
9075 1:xx100 -> S, xx, 32Sto64
9076 1:x1000 -> invalid
9077 other -> invalid
9079 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
9080 Bool isU = (imm4 & 2) == 2;
9081 const HChar* arTs = "??";
9082 UInt laneNo = 16; /* invalid */
9083 // Setting 'res' to non-NULL determines valid/invalid
9084 IRExpr* res = NULL;
9085 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
9086 laneNo = (imm5 >> 1) & 15;
9087 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9088 res = isU ? unop(Iop_8Uto64, lane)
9089 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
9090 arTs = "b";
9092 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
9093 laneNo = (imm5 >> 1) & 15;
9094 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9095 res = isU ? NULL
9096 : unop(Iop_8Sto64, lane);
9097 arTs = "b";
9099 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
9100 laneNo = (imm5 >> 2) & 7;
9101 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9102 res = isU ? unop(Iop_16Uto64, lane)
9103 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
9104 arTs = "h";
9106 else if (bitQ && (imm5 & 2)) { // 1:xxx10
9107 laneNo = (imm5 >> 2) & 7;
9108 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9109 res = isU ? NULL
9110 : unop(Iop_16Sto64, lane);
9111 arTs = "h";
9113 else if (!bitQ && (imm5 & 4)) { // 0:xx100
9114 laneNo = (imm5 >> 3) & 3;
9115 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9116 res = isU ? unop(Iop_32Uto64, lane)
9117 : NULL;
9118 arTs = "s";
9120 else if (bitQ && (imm5 & 4)) { // 1:xxx10
9121 laneNo = (imm5 >> 3) & 3;
9122 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9123 res = isU ? NULL
9124 : unop(Iop_32Sto64, lane);
9125 arTs = "s";
9127 else if (bitQ && (imm5 & 8)) { // 1:x1000
9128 laneNo = (imm5 >> 4) & 1;
9129 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
9130 res = isU ? lane
9131 : NULL;
9132 arTs = "d";
9134 /* */
9135 if (res) {
9136 vassert(laneNo < 16);
9137 putIReg64orZR(dd, res);
9138 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
9139 nameIRegOrZR(bitQ == 1, dd),
9140 nameQReg128(nn), arTs, laneNo);
9141 return True;
9143 /* invalid */
9144 return False;
9147 /* -------- 1,1,xxxx: INS (element) -------- */
9148 /* 31 28 20 14 9 4
9149 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9150 where Ts,ix1,ix2
9151 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9152 xxx10 -> H, xxx, imm4[3:1]
9153 xx100 -> S, xx, imm4[3:2]
9154 x1000 -> D, x, imm4[3:3]
9156 if (bitQ == 1 && bitOP == 1) {
9157 HChar ts = '?';
9158 IRType ity = Ity_INVALID;
9159 UInt ix1 = 16;
9160 UInt ix2 = 16;
9161 if (imm5 & 1) {
9162 ts = 'b';
9163 ity = Ity_I8;
9164 ix1 = (imm5 >> 1) & 15;
9165 ix2 = (imm4 >> 0) & 15;
9167 else if (imm5 & 2) {
9168 ts = 'h';
9169 ity = Ity_I16;
9170 ix1 = (imm5 >> 2) & 7;
9171 ix2 = (imm4 >> 1) & 7;
9173 else if (imm5 & 4) {
9174 ts = 's';
9175 ity = Ity_I32;
9176 ix1 = (imm5 >> 3) & 3;
9177 ix2 = (imm4 >> 2) & 3;
9179 else if (imm5 & 8) {
9180 ts = 'd';
9181 ity = Ity_I64;
9182 ix1 = (imm5 >> 4) & 1;
9183 ix2 = (imm4 >> 3) & 1;
9185 /* */
9186 if (ity != Ity_INVALID) {
9187 vassert(ix1 < 16);
9188 vassert(ix2 < 16);
9189 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9190 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9191 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9192 return True;
9194 /* invalid */
9195 return False;
9198 return False;
9199 # undef INSN
9203 static
9204 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9206 /* 31 28 18 15 11 9 4
9207 0q op 01111 00000 abc cmode 01 defgh d
9208 Decode fields: q,op,cmode
9209 Bit 11 is really "o2", but it is always zero.
9211 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9212 if (INSN(31,31) != 0
9213 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9214 || INSN(11,10) != BITS2(0,1)) {
9215 return False;
9217 UInt bitQ = INSN(30,30);
9218 UInt bitOP = INSN(29,29);
9219 UInt cmode = INSN(15,12);
9220 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9221 UInt dd = INSN(4,0);
9223 ULong imm64lo = 0;
9224 UInt op_cmode = (bitOP << 4) | cmode;
9225 Bool ok = False;
9226 Bool isORR = False;
9227 Bool isBIC = False;
9228 Bool isMOV = False;
9229 Bool isMVN = False;
9230 Bool isFMOV = False;
9231 switch (op_cmode) {
9232 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9233 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9234 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9235 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9236 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9237 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9238 ok = True; isMOV = True; break;
9240 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9241 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9242 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9243 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9244 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9245 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9246 ok = True; isORR = True; break;
9248 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9249 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9250 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9251 ok = True; isMOV = True; break;
9253 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9254 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9255 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9256 ok = True; isORR = True; break;
9258 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9259 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9260 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9261 ok = True; isMOV = True; break;
9263 /* -------- x,0,1110 MOVI 8-bit -------- */
9264 case BITS5(0,1,1,1,0):
9265 ok = True; isMOV = True; break;
9267 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9268 case BITS5(0,1,1,1,1): // 0:1111
9269 ok = True; isFMOV = True; break;
9271 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9272 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9273 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9274 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9275 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9276 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9277 ok = True; isMVN = True; break;
9279 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9280 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9281 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9282 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9283 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9284 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9285 ok = True; isBIC = True; break;
9287 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9288 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9289 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9290 ok = True; isMVN = True; break;
9292 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9293 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9294 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9295 ok = True; isBIC = True; break;
9297 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9298 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9299 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9300 ok = True; isMVN = True; break;
9302 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9303 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9304 case BITS5(1,1,1,1,0):
9305 ok = True; isMOV = True; break;
9307 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9308 case BITS5(1,1,1,1,1): // 1:1111
9309 ok = bitQ == 1; isFMOV = True; break;
9311 default:
9312 break;
9314 if (ok) {
9315 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9316 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
9317 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9319 if (ok) {
9320 if (isORR || isBIC) {
9321 ULong inv
9322 = isORR ? 0ULL : ~0ULL;
9323 IRExpr* immV128
9324 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9325 IRExpr* res
9326 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
9327 const HChar* nm = isORR ? "orr" : "bic";
9328 if (bitQ == 0) {
9329 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9330 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9331 } else {
9332 putQReg128(dd, res);
9333 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9334 nameQReg128(dd), imm64lo, imm64lo);
9337 else if (isMOV || isMVN || isFMOV) {
9338 if (isMVN) imm64lo = ~imm64lo;
9339 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
9340 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9341 mkU64(imm64lo));
9342 putQReg128(dd, immV128);
9343 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9345 return True;
9347 /* else fall through */
9349 return False;
9350 # undef INSN
9354 static
9355 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9357 /* 31 28 20 15 14 10 9 4
9358 01 op 11110000 imm5 0 imm4 1 n d
9359 Decode fields: op,imm4
9361 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9362 if (INSN(31,30) != BITS2(0,1)
9363 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9364 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9365 return False;
9367 UInt bitOP = INSN(29,29);
9368 UInt imm5 = INSN(20,16);
9369 UInt imm4 = INSN(14,11);
9370 UInt nn = INSN(9,5);
9371 UInt dd = INSN(4,0);
9373 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9374 /* -------- 0,0000 DUP (element, scalar) -------- */
9375 IRTemp w0 = newTemp(Ity_I64);
9376 const HChar* arTs = "??";
9377 IRType laneTy = Ity_INVALID;
9378 UInt laneNo = 16; /* invalid */
9379 if (imm5 & 1) {
9380 arTs = "b";
9381 laneNo = (imm5 >> 1) & 15;
9382 laneTy = Ity_I8;
9383 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9385 else if (imm5 & 2) {
9386 arTs = "h";
9387 laneNo = (imm5 >> 2) & 7;
9388 laneTy = Ity_I16;
9389 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9391 else if (imm5 & 4) {
9392 arTs = "s";
9393 laneNo = (imm5 >> 3) & 3;
9394 laneTy = Ity_I32;
9395 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9397 else if (imm5 & 8) {
9398 arTs = "d";
9399 laneNo = (imm5 >> 4) & 1;
9400 laneTy = Ity_I64;
9401 assign(w0, getQRegLane(nn, laneNo, laneTy));
9403 else {
9404 /* invalid; leave laneTy unchanged. */
9406 /* */
9407 if (laneTy != Ity_INVALID) {
9408 vassert(laneNo < 16);
9409 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9410 DIP("dup %s, %s.%s[%u]\n",
9411 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9412 return True;
9414 /* else fall through */
9417 return False;
9418 # undef INSN
9422 static
9423 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9425 /* 31 28 23 21 16 11 9 4
9426 01 u 11110 sz 11000 opcode 10 n d
9427 Decode fields: u,sz,opcode
9429 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9430 if (INSN(31,30) != BITS2(0,1)
9431 || INSN(28,24) != BITS5(1,1,1,1,0)
9432 || INSN(21,17) != BITS5(1,1,0,0,0)
9433 || INSN(11,10) != BITS2(1,0)) {
9434 return False;
9436 UInt bitU = INSN(29,29);
9437 UInt sz = INSN(23,22);
9438 UInt opcode = INSN(16,12);
9439 UInt nn = INSN(9,5);
9440 UInt dd = INSN(4,0);
9442 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9443 /* -------- 0,11,11011 ADDP d_2d -------- */
9444 IRTemp xy = newTempV128();
9445 IRTemp xx = newTempV128();
9446 assign(xy, getQReg128(nn));
9447 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9448 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9449 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9450 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9451 return True;
9454 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9455 /* -------- 1,00,01101 ADDP s_2s -------- */
9456 /* -------- 1,01,01101 ADDP d_2d -------- */
9457 Bool isD = sz == X01;
9458 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9459 IROp opADD = mkVecADDF(isD ? 3 : 2);
9460 IRTemp src = newTempV128();
9461 IRTemp argL = newTempV128();
9462 IRTemp argR = newTempV128();
9463 assign(src, getQReg128(nn));
9464 assign(argL, unop(opZHI, mkexpr(src)));
9465 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9466 mkU8(isD ? 8 : 4))));
9467 putQReg128(dd, unop(opZHI,
9468 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9469 mkexpr(argL), mkexpr(argR))));
9470 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9471 return True;
9474 if (bitU == 1
9475 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9476 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9477 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9478 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9479 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9480 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9481 Bool isD = (sz & 1) == 1;
9482 Bool isMIN = (sz & 2) == 2;
9483 Bool isNM = opcode == BITS5(0,1,1,0,0);
9484 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9485 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9486 IRTemp src = newTempV128();
9487 IRTemp argL = newTempV128();
9488 IRTemp argR = newTempV128();
9489 assign(src, getQReg128(nn));
9490 assign(argL, unop(opZHI, mkexpr(src)));
9491 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9492 mkU8(isD ? 8 : 4))));
9493 putQReg128(dd, unop(opZHI,
9494 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9495 HChar c = isD ? 'd' : 's';
9496 DIP("%s%sp %c%u, v%u.2%c\n",
9497 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9498 return True;
9501 return False;
9502 # undef INSN
9506 static
9507 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9509 /* 31 28 22 18 15 10 9 4
9510 01 u 111110 immh immb opcode 1 n d
9511 Decode fields: u,immh,opcode
9513 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9514 if (INSN(31,30) != BITS2(0,1)
9515 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9516 return False;
9518 UInt bitU = INSN(29,29);
9519 UInt immh = INSN(22,19);
9520 UInt immb = INSN(18,16);
9521 UInt opcode = INSN(15,11);
9522 UInt nn = INSN(9,5);
9523 UInt dd = INSN(4,0);
9524 UInt immhb = (immh << 3) | immb;
9526 if ((immh & 8) == 8
9527 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9528 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9529 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9530 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9531 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9532 Bool isU = bitU == 1;
9533 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9534 UInt sh = 128 - immhb;
9535 vassert(sh >= 1 && sh <= 64);
9536 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9537 IRExpr* src = getQReg128(nn);
9538 IRTemp shf = newTempV128();
9539 IRTemp res = newTempV128();
9540 if (sh == 64 && isU) {
9541 assign(shf, mkV128(0x0000));
9542 } else {
9543 UInt nudge = 0;
9544 if (sh == 64) {
9545 vassert(!isU);
9546 nudge = 1;
9548 assign(shf, binop(op, src, mkU8(sh - nudge)));
9550 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9551 : mkexpr(shf));
9552 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9553 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9554 : (isU ? "ushr" : "sshr");
9555 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9556 return True;
9559 if ((immh & 8) == 8
9560 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9561 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9562 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9563 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9564 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9565 Bool isU = bitU == 1;
9566 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9567 UInt sh = 128 - immhb;
9568 vassert(sh >= 1 && sh <= 64);
9569 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9570 vassert(sh >= 1 && sh <= 64);
9571 IRExpr* src = getQReg128(nn);
9572 IRTemp imm8 = newTemp(Ity_I8);
9573 assign(imm8, mkU8((UChar)(-sh)));
9574 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9575 IRTemp shf = newTempV128();
9576 IRTemp res = newTempV128();
9577 assign(shf, binop(op, src, amt));
9578 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9579 : mkexpr(shf));
9580 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9581 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9582 : (isU ? "urshr" : "srshr");
9583 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9584 return True;
9587 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9588 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9589 UInt sh = 128 - immhb;
9590 vassert(sh >= 1 && sh <= 64);
9591 if (sh == 64) {
9592 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9593 } else {
9594 /* sh is in range 1 .. 63 */
9595 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9596 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9597 IRTemp res = newTempV128();
9598 assign(res, binop(Iop_OrV128,
9599 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9600 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9601 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9603 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9604 return True;
9607 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9608 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9609 UInt sh = immhb - 64;
9610 vassert(sh >= 0 && sh < 64);
9611 putQReg128(dd,
9612 unop(Iop_ZeroHI64ofV128,
9613 sh == 0 ? getQReg128(nn)
9614 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9615 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9616 return True;
9619 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9620 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9621 UInt sh = immhb - 64;
9622 vassert(sh >= 0 && sh < 64);
9623 if (sh == 0) {
9624 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9625 } else {
9626 /* sh is in range 1 .. 63 */
9627 ULong nmask = (1ULL << sh) - 1;
9628 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9629 IRTemp res = newTempV128();
9630 assign(res, binop(Iop_OrV128,
9631 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9632 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9633 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9635 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9636 return True;
9639 if (opcode == BITS5(0,1,1,1,0)
9640 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9641 /* -------- 0,01110 SQSHL #imm -------- */
9642 /* -------- 1,01110 UQSHL #imm -------- */
9643 /* -------- 1,01100 SQSHLU #imm -------- */
9644 UInt size = 0;
9645 UInt shift = 0;
9646 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9647 if (!ok) return False;
9648 vassert(size >= 0 && size <= 3);
9649 /* The shift encoding has opposite sign for the leftwards case.
9650 Adjust shift to compensate. */
9651 UInt lanebits = 8 << size;
9652 shift = lanebits - shift;
9653 vassert(shift >= 0 && shift < lanebits);
9654 const HChar* nm = NULL;
9655 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9656 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9657 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9658 else vassert(0);
9659 IRTemp qDiff1 = IRTemp_INVALID;
9660 IRTemp qDiff2 = IRTemp_INVALID;
9661 IRTemp res = IRTemp_INVALID;
9662 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9663 /* This relies on the fact that the zeroed out lanes generate zeroed
9664 result lanes and don't saturate, so there's no point in trimming
9665 the resulting res, qDiff1 or qDiff2 values. */
9666 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9667 putQReg128(dd, mkexpr(res));
9668 updateQCFLAGwithDifference(qDiff1, qDiff2);
9669 const HChar arr = "bhsd"[size];
9670 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9671 return True;
9674 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9675 || (bitU == 1
9676 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9677 /* -------- 0,10010 SQSHRN #imm -------- */
9678 /* -------- 1,10010 UQSHRN #imm -------- */
9679 /* -------- 0,10011 SQRSHRN #imm -------- */
9680 /* -------- 1,10011 UQRSHRN #imm -------- */
9681 /* -------- 1,10000 SQSHRUN #imm -------- */
9682 /* -------- 1,10001 SQRSHRUN #imm -------- */
9683 UInt size = 0;
9684 UInt shift = 0;
9685 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9686 if (!ok || size == X11) return False;
9687 vassert(size >= X00 && size <= X10);
9688 vassert(shift >= 1 && shift <= (8 << size));
9689 const HChar* nm = "??";
9690 IROp op = Iop_INVALID;
9691 /* Decide on the name and the operation. */
9692 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9693 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9695 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9696 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9698 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9699 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9701 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9702 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9704 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9705 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9707 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9708 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9710 else vassert(0);
9711 /* Compute the result (Q, shifted value) pair. */
9712 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9713 IRTemp pair = newTempV128();
9714 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9715 /* Update the result reg */
9716 IRTemp res64in128 = newTempV128();
9717 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9718 putQReg128(dd, mkexpr(res64in128));
9719 /* Update the Q flag. */
9720 IRTemp q64q64 = newTempV128();
9721 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9722 IRTemp z128 = newTempV128();
9723 assign(z128, mkV128(0x0000));
9724 updateQCFLAGwithDifference(q64q64, z128);
9725 /* */
9726 const HChar arrNarrow = "bhsd"[size];
9727 const HChar arrWide = "bhsd"[size+1];
9728 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9729 return True;
9732 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9733 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9734 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9735 UInt size = 0;
9736 UInt fbits = 0;
9737 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9738 /* The following holds because immh is never zero. */
9739 vassert(ok);
9740 /* The following holds because immh >= 0100. */
9741 vassert(size == X10 || size == X11);
9742 Bool isD = size == X11;
9743 Bool isU = bitU == 1;
9744 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9745 Double scale = two_to_the_minus(fbits);
9746 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9747 : IRExpr_Const(IRConst_F32( (Float)scale ));
9748 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9749 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9750 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9751 IRType tyF = isD ? Ity_F64 : Ity_F32;
9752 IRType tyI = isD ? Ity_I64 : Ity_I32;
9753 IRTemp src = newTemp(tyI);
9754 IRTemp res = newTemp(tyF);
9755 IRTemp rm = mk_get_IR_rounding_mode();
9756 assign(src, getQRegLane(nn, 0, tyI));
9757 assign(res, triop(opMUL, mkexpr(rm),
9758 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9759 putQRegLane(dd, 0, mkexpr(res));
9760 if (!isD) {
9761 putQRegLane(dd, 1, mkU32(0));
9763 putQRegLane(dd, 1, mkU64(0));
9764 const HChar ch = isD ? 'd' : 's';
9765 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9766 ch, dd, ch, nn, fbits);
9767 return True;
9770 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9771 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9772 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9773 UInt size = 0;
9774 UInt fbits = 0;
9775 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9776 /* The following holds because immh is never zero. */
9777 vassert(ok);
9778 /* The following holds because immh >= 0100. */
9779 vassert(size == X10 || size == X11);
9780 Bool isD = size == X11;
9781 Bool isU = bitU == 1;
9782 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9783 Double scale = two_to_the_plus(fbits);
9784 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9785 : IRExpr_Const(IRConst_F32( (Float)scale ));
9786 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9787 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9788 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9789 IRType tyF = isD ? Ity_F64 : Ity_F32;
9790 IRType tyI = isD ? Ity_I64 : Ity_I32;
9791 IRTemp src = newTemp(tyF);
9792 IRTemp res = newTemp(tyI);
9793 IRTemp rm = newTemp(Ity_I32);
9794 assign(src, getQRegLane(nn, 0, tyF));
9795 assign(rm, mkU32(Irrm_ZERO));
9796 assign(res, binop(opCVT, mkexpr(rm),
9797 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9798 putQRegLane(dd, 0, mkexpr(res));
9799 if (!isD) {
9800 putQRegLane(dd, 1, mkU32(0));
9802 putQRegLane(dd, 1, mkU64(0));
9803 const HChar ch = isD ? 'd' : 's';
9804 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9805 ch, dd, ch, nn, fbits);
9806 return True;
9809 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9810 return False;
9811 # undef INSN
9815 static
9816 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9818 /* 31 29 28 23 21 20 15 11 9 4
9819 01 U 11110 size 1 m opcode 00 n d
9820 Decode fields: u,opcode
9822 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9823 if (INSN(31,30) != BITS2(0,1)
9824 || INSN(28,24) != BITS5(1,1,1,1,0)
9825 || INSN(21,21) != 1
9826 || INSN(11,10) != BITS2(0,0)) {
9827 return False;
9829 UInt bitU = INSN(29,29);
9830 UInt size = INSN(23,22);
9831 UInt mm = INSN(20,16);
9832 UInt opcode = INSN(15,12);
9833 UInt nn = INSN(9,5);
9834 UInt dd = INSN(4,0);
9835 vassert(size < 4);
9837 if (bitU == 0
9838 && (opcode == BITS4(1,1,0,1)
9839 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9840 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9841 /* -------- 0,1001 SQDMLAL -------- */ // 1
9842 /* -------- 0,1011 SQDMLSL -------- */ // 2
9843 /* Widens, and size refers to the narrowed lanes. */
9844 UInt ks = 3;
9845 switch (opcode) {
9846 case BITS4(1,1,0,1): ks = 0; break;
9847 case BITS4(1,0,0,1): ks = 1; break;
9848 case BITS4(1,0,1,1): ks = 2; break;
9849 default: vassert(0);
9851 vassert(ks >= 0 && ks <= 2);
9852 if (size == X00 || size == X11) return False;
9853 vassert(size <= 2);
9854 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9855 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9856 newTempsV128_3(&vecN, &vecM, &vecD);
9857 assign(vecN, getQReg128(nn));
9858 assign(vecM, getQReg128(mm));
9859 assign(vecD, getQReg128(dd));
9860 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9861 False/*!is2*/, size, "mas"[ks],
9862 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9863 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9864 putQReg128(dd, unop(opZHI, mkexpr(res)));
9865 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9866 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9867 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9868 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9870 const HChar* nm = ks == 0 ? "sqdmull"
9871 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9872 const HChar arrNarrow = "bhsd"[size];
9873 const HChar arrWide = "bhsd"[size+1];
9874 DIP("%s %c%u, %c%u, %c%u\n",
9875 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9876 return True;
9879 return False;
9880 # undef INSN
9884 static
9885 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9887 /* 31 29 28 23 21 20 15 10 9 4
9888 01 U 11110 size 1 m opcode 1 n d
9889 Decode fields: u,size,opcode
9891 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9892 if (INSN(31,30) != BITS2(0,1)
9893 || INSN(28,24) != BITS5(1,1,1,1,0)
9894 || INSN(21,21) != 1
9895 || INSN(10,10) != 1) {
9896 return False;
9898 UInt bitU = INSN(29,29);
9899 UInt size = INSN(23,22);
9900 UInt mm = INSN(20,16);
9901 UInt opcode = INSN(15,11);
9902 UInt nn = INSN(9,5);
9903 UInt dd = INSN(4,0);
9904 vassert(size < 4);
9906 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9907 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9908 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9909 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9910 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9911 Bool isADD = opcode == BITS5(0,0,0,0,1);
9912 Bool isU = bitU == 1;
9913 IROp qop = Iop_INVALID;
9914 IROp nop = Iop_INVALID;
9915 if (isADD) {
9916 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9917 nop = mkVecADD(size);
9918 } else {
9919 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9920 nop = mkVecSUB(size);
9922 IRTemp argL = newTempV128();
9923 IRTemp argR = newTempV128();
9924 IRTemp qres = newTempV128();
9925 IRTemp nres = newTempV128();
9926 assign(argL, getQReg128(nn));
9927 assign(argR, getQReg128(mm));
9928 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9929 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9930 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9931 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9932 putQReg128(dd, mkexpr(qres));
9933 updateQCFLAGwithDifference(qres, nres);
9934 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9935 : (isU ? "uqsub" : "sqsub");
9936 const HChar arr = "bhsd"[size];
9937 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9938 return True;
9941 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9942 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9943 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9944 Bool isGT = bitU == 0;
9945 IRExpr* argL = getQReg128(nn);
9946 IRExpr* argR = getQReg128(mm);
9947 IRTemp res = newTempV128();
9948 assign(res,
9949 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9950 : binop(Iop_CmpGT64Ux2, argL, argR));
9951 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9952 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9953 nameQRegLO(dd, Ity_I64),
9954 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9955 return True;
9958 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9959 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9960 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9961 Bool isGE = bitU == 0;
9962 IRExpr* argL = getQReg128(nn);
9963 IRExpr* argR = getQReg128(mm);
9964 IRTemp res = newTempV128();
9965 assign(res,
9966 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9967 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9968 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9969 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9970 nameQRegLO(dd, Ity_I64),
9971 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9972 return True;
9975 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9976 || opcode == BITS5(0,1,0,1,0))) {
9977 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9978 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9979 /* -------- 1,xx,01000 USHL d_d_d -------- */
9980 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9981 Bool isU = bitU == 1;
9982 Bool isR = opcode == BITS5(0,1,0,1,0);
9983 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9984 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9985 IRTemp res = newTempV128();
9986 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9987 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9988 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9989 : (isU ? "ushl" : "sshl");
9990 DIP("%s %s, %s, %s\n", nm,
9991 nameQRegLO(dd, Ity_I64),
9992 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9993 return True;
9996 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9997 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9998 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9999 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
10000 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
10001 Bool isU = bitU == 1;
10002 Bool isR = opcode == BITS5(0,1,0,1,1);
10003 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
10004 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
10005 /* This is a bit tricky. Since we're only interested in the lowest
10006 lane of the result, we zero out all the rest in the operands, so
10007 as to ensure that other lanes don't pollute the returned Q value.
10008 This works because it means, for the lanes we don't care about, we
10009 are shifting zero by zero, which can never saturate. */
10010 IRTemp res256 = newTemp(Ity_V256);
10011 IRTemp resSH = newTempV128();
10012 IRTemp resQ = newTempV128();
10013 IRTemp zero = newTempV128();
10014 assign(
10015 res256,
10016 binop(op,
10017 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
10018 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
10019 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10020 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
10021 assign(zero, mkV128(0x0000));
10022 putQReg128(dd, mkexpr(resSH));
10023 updateQCFLAGwithDifference(resQ, zero);
10024 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
10025 : (isU ? "uqshl" : "sqshl");
10026 const HChar arr = "bhsd"[size];
10027 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10028 return True;
10031 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
10032 /* -------- 0,11,10000 ADD d_d_d -------- */
10033 /* -------- 1,11,10000 SUB d_d_d -------- */
10034 Bool isSUB = bitU == 1;
10035 IRTemp res = newTemp(Ity_I64);
10036 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
10037 getQRegLane(nn, 0, Ity_I64),
10038 getQRegLane(mm, 0, Ity_I64)));
10039 putQRegLane(dd, 0, mkexpr(res));
10040 putQRegLane(dd, 1, mkU64(0));
10041 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
10042 nameQRegLO(dd, Ity_I64),
10043 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10044 return True;
10047 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
10048 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10049 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10050 Bool isEQ = bitU == 1;
10051 IRExpr* argL = getQReg128(nn);
10052 IRExpr* argR = getQReg128(mm);
10053 IRTemp res = newTempV128();
10054 assign(res,
10055 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10056 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
10057 binop(Iop_AndV128, argL, argR),
10058 mkV128(0x0000))));
10059 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10060 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
10061 nameQRegLO(dd, Ity_I64),
10062 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10063 return True;
10066 if (opcode == BITS5(1,0,1,1,0)) {
10067 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10068 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10069 if (size == X00 || size == X11) return False;
10070 Bool isR = bitU == 1;
10071 IRTemp res, sat1q, sat1n, vN, vM;
10072 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10073 newTempsV128_2(&vN, &vM);
10074 assign(vN, getQReg128(nn));
10075 assign(vM, getQReg128(mm));
10076 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10077 putQReg128(dd,
10078 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10079 updateQCFLAGwithDifference(
10080 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
10081 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
10082 const HChar arr = "bhsd"[size];
10083 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10084 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10085 return True;
10088 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
10089 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10090 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
10091 IRTemp res = newTemp(ity);
10092 assign(res, unop(mkABSF(ity),
10093 triop(mkSUBF(ity),
10094 mkexpr(mk_get_IR_rounding_mode()),
10095 getQRegLO(nn,ity), getQRegLO(mm,ity))));
10096 putQReg128(dd, mkV128(0x0000));
10097 putQRegLO(dd, mkexpr(res));
10098 DIP("fabd %s, %s, %s\n",
10099 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10100 return True;
10103 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
10104 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10105 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10106 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
10107 IRTemp res = newTemp(ity);
10108 assign(res, triop(mkMULF(ity),
10109 mkexpr(mk_get_IR_rounding_mode()),
10110 getQRegLO(nn,ity), getQRegLO(mm,ity)));
10111 putQReg128(dd, mkV128(0x0000));
10112 putQRegLO(dd, mkexpr(res));
10113 DIP("fmulx %s, %s, %s\n",
10114 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10115 return True;
10118 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
10119 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10120 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10121 Bool isD = size == X01;
10122 IRType ity = isD ? Ity_F64 : Ity_F32;
10123 Bool isGE = bitU == 1;
10124 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
10125 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
10126 IRTemp res = newTempV128();
10127 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
10128 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
10129 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10130 mkexpr(res))));
10131 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
10132 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10133 return True;
10136 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
10137 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10138 Bool isD = size == X11;
10139 IRType ity = isD ? Ity_F64 : Ity_F32;
10140 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10141 IRTemp res = newTempV128();
10142 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
10143 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10144 mkexpr(res))));
10145 DIP("%s %s, %s, %s\n", "fcmgt",
10146 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10147 return True;
10150 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
10151 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10152 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10153 Bool isD = (size & 1) == 1;
10154 IRType ity = isD ? Ity_F64 : Ity_F32;
10155 Bool isGT = (size & 2) == 2;
10156 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
10157 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
10158 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
10159 IRTemp res = newTempV128();
10160 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
10161 unop(opABS, getQReg128(nn)))); // swapd
10162 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10163 mkexpr(res))));
10164 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
10165 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10166 return True;
10169 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
10170 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10171 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10172 Bool isSQRT = (size & 2) == 2;
10173 Bool isD = (size & 1) == 1;
10174 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
10175 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
10176 IRTemp res = newTempV128();
10177 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10178 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10179 mkexpr(res))));
10180 HChar c = isD ? 'd' : 's';
10181 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
10182 c, dd, c, nn, c, mm);
10183 return True;
10186 return False;
10187 # undef INSN
10191 static
10192 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10194 /* 31 29 28 23 21 16 11 9 4
10195 01 U 11110 size 10000 opcode 10 n d
10196 Decode fields: u,size,opcode
10198 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10199 if (INSN(31,30) != BITS2(0,1)
10200 || INSN(28,24) != BITS5(1,1,1,1,0)
10201 || INSN(21,17) != BITS5(1,0,0,0,0)
10202 || INSN(11,10) != BITS2(1,0)) {
10203 return False;
10205 UInt bitU = INSN(29,29);
10206 UInt size = INSN(23,22);
10207 UInt opcode = INSN(16,12);
10208 UInt nn = INSN(9,5);
10209 UInt dd = INSN(4,0);
10210 vassert(size < 4);
10212 if (opcode == BITS5(0,0,0,1,1)) {
10213 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10214 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10215 /* These are a bit tricky (to say the least). See comments on
10216 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10217 details. */
10218 Bool isUSQADD = bitU == 1;
10219 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10220 : mkVecQADDEXTUSSATSS(size);
10221 IROp nop = mkVecADD(size);
10222 IRTemp argL = newTempV128();
10223 IRTemp argR = newTempV128();
10224 assign(argL, getQReg128(nn));
10225 assign(argR, getQReg128(dd));
10226 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10227 size, binop(qop, mkexpr(argL), mkexpr(argR)));
10228 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10229 size, binop(nop, mkexpr(argL), mkexpr(argR)));
10230 putQReg128(dd, mkexpr(qres));
10231 updateQCFLAGwithDifference(qres, nres);
10232 const HChar arr = "bhsd"[size];
10233 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10234 return True;
10237 if (opcode == BITS5(0,0,1,1,1)) {
10238 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10239 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10240 Bool isNEG = bitU == 1;
10241 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10242 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10243 getQReg128(nn), size );
10244 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10245 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
10246 putQReg128(dd, mkexpr(qres));
10247 updateQCFLAGwithDifference(qres, nres);
10248 const HChar arr = "bhsd"[size];
10249 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
10250 return True;
10253 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10254 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10255 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10256 Bool isGT = bitU == 0;
10257 IRExpr* argL = getQReg128(nn);
10258 IRExpr* argR = mkV128(0x0000);
10259 IRTemp res = newTempV128();
10260 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10261 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10262 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10263 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10264 return True;
10267 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10268 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10269 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10270 Bool isEQ = bitU == 0;
10271 IRExpr* argL = getQReg128(nn);
10272 IRExpr* argR = mkV128(0x0000);
10273 IRTemp res = newTempV128();
10274 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10275 : unop(Iop_NotV128,
10276 binop(Iop_CmpGT64Sx2, argL, argR)));
10277 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10278 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10279 return True;
10282 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10283 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10284 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10285 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10286 getQReg128(nn))));
10287 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
10288 return True;
10291 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10292 /* -------- 0,11,01011 ABS d_d -------- */
10293 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10294 unop(Iop_Abs64x2, getQReg128(nn))));
10295 DIP("abs d%u, d%u\n", dd, nn);
10296 return True;
10299 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10300 /* -------- 1,11,01011 NEG d_d -------- */
10301 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10302 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10303 DIP("neg d%u, d%u\n", dd, nn);
10304 return True;
10307 UInt ix = 0; /*INVALID*/
10308 if (size >= X10) {
10309 switch (opcode) {
10310 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10311 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10312 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10313 default: break;
10316 if (ix > 0) {
10317 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10318 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10319 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10320 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10321 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10322 Bool isD = size == X11;
10323 IRType ity = isD ? Ity_F64 : Ity_F32;
10324 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10325 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10326 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10327 IROp opCmp = Iop_INVALID;
10328 Bool swap = False;
10329 const HChar* nm = "??";
10330 switch (ix) {
10331 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10332 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10333 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10334 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10335 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10336 default: vassert(0);
10338 IRExpr* zero = mkV128(0x0000);
10339 IRTemp res = newTempV128();
10340 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10341 : binop(opCmp, getQReg128(nn), zero));
10342 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10343 mkexpr(res))));
10345 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10346 return True;
10349 if (opcode == BITS5(1,0,1,0,0)
10350 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10351 /* -------- 0,xx,10100: SQXTN -------- */
10352 /* -------- 1,xx,10100: UQXTN -------- */
10353 /* -------- 1,xx,10010: SQXTUN -------- */
10354 if (size == X11) return False;
10355 vassert(size < 3);
10356 IROp opN = Iop_INVALID;
10357 Bool zWiden = True;
10358 const HChar* nm = "??";
10359 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10360 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10362 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10363 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10365 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10366 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10368 else vassert(0);
10369 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10370 size+1, getQReg128(nn));
10371 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10372 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10373 putQReg128(dd, mkexpr(resN));
10374 /* This widens zero lanes to zero, and compares it against zero, so all
10375 of the non-participating lanes make no contribution to the
10376 Q flag state. */
10377 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10378 size, mkexpr(resN));
10379 updateQCFLAGwithDifference(src, resW);
10380 const HChar arrNarrow = "bhsd"[size];
10381 const HChar arrWide = "bhsd"[size+1];
10382 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10383 return True;
10386 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10387 /* -------- 1,01,10110 FCVTXN s_d -------- */
10388 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10389 odd" but I don't know what that really means. */
10390 putQRegLO(dd,
10391 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10392 getQRegLO(nn, Ity_F64)));
10393 putQRegLane(dd, 1, mkU32(0));
10394 putQRegLane(dd, 1, mkU64(0));
10395 DIP("fcvtxn s%u, d%u\n", dd, nn);
10396 return True;
10399 ix = 0; /*INVALID*/
10400 switch (opcode) {
10401 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10402 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10403 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10404 default: break;
10406 if (ix > 0) {
10407 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10408 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10409 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10410 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10411 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10412 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10413 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10414 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10415 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10416 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10417 Bool isD = (size & 1) == 1;
10418 IRType tyF = isD ? Ity_F64 : Ity_F32;
10419 IRType tyI = isD ? Ity_I64 : Ity_I32;
10420 IRRoundingMode irrm = 8; /*impossible*/
10421 HChar ch = '?';
10422 switch (ix) {
10423 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10424 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10425 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10426 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10427 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10428 default: vassert(0);
10430 IROp cvt = Iop_INVALID;
10431 if (bitU == 1) {
10432 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10433 } else {
10434 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10436 IRTemp src = newTemp(tyF);
10437 IRTemp res = newTemp(tyI);
10438 assign(src, getQRegLane(nn, 0, tyF));
10439 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10440 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10441 if (!isD) {
10442 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10444 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10445 HChar sOrD = isD ? 'd' : 's';
10446 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10447 sOrD, dd, sOrD, nn);
10448 return True;
10451 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10452 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10453 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10454 Bool isU = bitU == 1;
10455 Bool isD = (size & 1) == 1;
10456 IRType tyI = isD ? Ity_I64 : Ity_I32;
10457 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10458 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10459 IRTemp rm = mk_get_IR_rounding_mode();
10460 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10461 if (!isD) {
10462 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10464 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10465 HChar c = isD ? 'd' : 's';
10466 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10467 return True;
10470 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10471 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10472 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10473 Bool isSQRT = bitU == 1;
10474 Bool isD = (size & 1) == 1;
10475 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10476 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10477 IRTemp resV = newTempV128();
10478 assign(resV, unop(op, getQReg128(nn)));
10479 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10480 mkexpr(resV))));
10481 HChar c = isD ? 'd' : 's';
10482 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10483 return True;
10486 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10487 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10488 Bool isD = (size & 1) == 1;
10489 IRType ty = isD ? Ity_F64 : Ity_F32;
10490 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10491 IRTemp res = newTemp(ty);
10492 IRTemp rm = mk_get_IR_rounding_mode();
10493 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10494 putQReg128(dd, mkV128(0x0000));
10495 putQRegLane(dd, 0, mkexpr(res));
10496 HChar c = isD ? 'd' : 's';
10497 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10498 return True;
10501 return False;
10502 # undef INSN
10506 static
10507 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10509 /* 31 28 23 21 20 19 15 11 9 4
10510 01 U 11111 size L M m opcode H 0 n d
10511 Decode fields are: u,size,opcode
10512 M is really part of the mm register number. Individual
10513 cases need to inspect L and H though.
10515 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10516 if (INSN(31,30) != BITS2(0,1)
10517 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10518 return False;
10520 UInt bitU = INSN(29,29);
10521 UInt size = INSN(23,22);
10522 UInt bitL = INSN(21,21);
10523 UInt bitM = INSN(20,20);
10524 UInt mmLO4 = INSN(19,16);
10525 UInt opcode = INSN(15,12);
10526 UInt bitH = INSN(11,11);
10527 UInt nn = INSN(9,5);
10528 UInt dd = INSN(4,0);
10529 vassert(size < 4);
10530 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10532 if (bitU == 0 && size >= X10
10533 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10534 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10535 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10536 Bool isD = (size & 1) == 1;
10537 Bool isSUB = opcode == BITS4(0,1,0,1);
10538 UInt index;
10539 if (!isD) index = (bitH << 1) | bitL;
10540 else if (isD && bitL == 0) index = bitH;
10541 else return False; // sz:L == x11 => unallocated encoding
10542 vassert(index < (isD ? 2 : 4));
10543 IRType ity = isD ? Ity_F64 : Ity_F32;
10544 IRTemp elem = newTemp(ity);
10545 UInt mm = (bitM << 4) | mmLO4;
10546 assign(elem, getQRegLane(mm, index, ity));
10547 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10548 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10549 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10550 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10551 IRTemp rm = mk_get_IR_rounding_mode();
10552 IRTemp t1 = newTempV128();
10553 IRTemp t2 = newTempV128();
10554 // FIXME: double rounding; use FMA primops instead
10555 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10556 assign(t2, triop(isSUB ? opSUB : opADD,
10557 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10558 putQReg128(dd,
10559 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10560 mkexpr(t2))));
10561 const HChar c = isD ? 'd' : 's';
10562 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10563 c, dd, c, nn, nameQReg128(mm), c, index);
10564 return True;
10567 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10568 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10569 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10570 Bool isD = (size & 1) == 1;
10571 Bool isMULX = bitU == 1;
10572 UInt index;
10573 if (!isD) index = (bitH << 1) | bitL;
10574 else if (isD && bitL == 0) index = bitH;
10575 else return False; // sz:L == x11 => unallocated encoding
10576 vassert(index < (isD ? 2 : 4));
10577 IRType ity = isD ? Ity_F64 : Ity_F32;
10578 IRTemp elem = newTemp(ity);
10579 UInt mm = (bitM << 4) | mmLO4;
10580 assign(elem, getQRegLane(mm, index, ity));
10581 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10582 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10583 IRTemp rm = mk_get_IR_rounding_mode();
10584 IRTemp t1 = newTempV128();
10585 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10586 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10587 putQReg128(dd,
10588 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10589 mkexpr(t1))));
10590 const HChar c = isD ? 'd' : 's';
10591 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10592 c, dd, c, nn, nameQReg128(mm), c, index);
10593 return True;
10596 if (bitU == 0
10597 && (opcode == BITS4(1,0,1,1)
10598 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10599 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10600 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10601 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10602 /* Widens, and size refers to the narrowed lanes. */
10603 UInt ks = 3;
10604 switch (opcode) {
10605 case BITS4(1,0,1,1): ks = 0; break;
10606 case BITS4(0,0,1,1): ks = 1; break;
10607 case BITS4(0,1,1,1): ks = 2; break;
10608 default: vassert(0);
10610 vassert(ks >= 0 && ks <= 2);
10611 UInt mm = 32; // invalid
10612 UInt ix = 16; // invalid
10613 switch (size) {
10614 case X00:
10615 return False; // h_b_b[] case is not allowed
10616 case X01:
10617 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10618 case X10:
10619 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10620 case X11:
10621 return False; // q_d_d[] case is not allowed
10622 default:
10623 vassert(0);
10625 vassert(mm < 32 && ix < 16);
10626 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10627 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10628 newTempsV128_2(&vecN, &vecD);
10629 assign(vecN, getQReg128(nn));
10630 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10631 assign(vecD, getQReg128(dd));
10632 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10633 False/*!is2*/, size, "mas"[ks],
10634 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10635 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10636 putQReg128(dd, unop(opZHI, mkexpr(res)));
10637 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10638 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10639 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10640 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10642 const HChar* nm = ks == 0 ? "sqmull"
10643 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10644 const HChar arrNarrow = "bhsd"[size];
10645 const HChar arrWide = "bhsd"[size+1];
10646 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10647 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10648 return True;
10651 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10652 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10653 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10654 UInt mm = 32; // invalid
10655 UInt ix = 16; // invalid
10656 switch (size) {
10657 case X00:
10658 return False; // b case is not allowed
10659 case X01:
10660 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10661 case X10:
10662 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10663 case X11:
10664 return False; // q case is not allowed
10665 default:
10666 vassert(0);
10668 vassert(mm < 32 && ix < 16);
10669 Bool isR = opcode == BITS4(1,1,0,1);
10670 IRTemp res, sat1q, sat1n, vN, vM;
10671 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10672 vN = newTempV128();
10673 assign(vN, getQReg128(nn));
10674 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10675 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10676 IROp opZHI = mkVecZEROHIxxOFV128(size);
10677 putQReg128(dd, unop(opZHI, mkexpr(res)));
10678 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10679 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10680 HChar ch = size == X01 ? 'h' : 's';
10681 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10682 return True;
10685 return False;
10686 # undef INSN
10690 static
10691 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10693 /* 31 28 22 18 15 10 9 4
10694 0 q u 011110 immh immb opcode 1 n d
10695 Decode fields: u,opcode
10697 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10698 if (INSN(31,31) != 0
10699 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10700 return False;
10702 UInt bitQ = INSN(30,30);
10703 UInt bitU = INSN(29,29);
10704 UInt immh = INSN(22,19);
10705 UInt immb = INSN(18,16);
10706 UInt opcode = INSN(15,11);
10707 UInt nn = INSN(9,5);
10708 UInt dd = INSN(4,0);
10710 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10711 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10712 /* -------- 1,00000 USHR std7_std7_#imm -------- */
10713 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10714 /* -------- 1,00010 USRA std7_std7_#imm -------- */
10715 /* laneTy, shift = case immh:immb of
10716 0001:xxx -> B, SHR:8-xxx
10717 001x:xxx -> H, SHR:16-xxxx
10718 01xx:xxx -> S, SHR:32-xxxxx
10719 1xxx:xxx -> D, SHR:64-xxxxxx
10720 other -> invalid
10722 UInt size = 0;
10723 UInt shift = 0;
10724 Bool isQ = bitQ == 1;
10725 Bool isU = bitU == 1;
10726 Bool isAcc = opcode == BITS5(0,0,0,1,0);
10727 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10728 if (!ok || (bitQ == 0 && size == X11)) return False;
10729 vassert(size >= 0 && size <= 3);
10730 UInt lanebits = 8 << size;
10731 vassert(shift >= 1 && shift <= lanebits);
10732 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10733 IRExpr* src = getQReg128(nn);
10734 IRTemp shf = newTempV128();
10735 IRTemp res = newTempV128();
10736 if (shift == lanebits && isU) {
10737 assign(shf, mkV128(0x0000));
10738 } else {
10739 UInt nudge = 0;
10740 if (shift == lanebits) {
10741 vassert(!isU);
10742 nudge = 1;
10744 assign(shf, binop(op, src, mkU8(shift - nudge)));
10746 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10747 : mkexpr(shf));
10748 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10749 HChar laneCh = "bhsd"[size];
10750 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10751 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10752 : (isU ? "ushr" : "sshr");
10753 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10754 nameQReg128(dd), nLanes, laneCh,
10755 nameQReg128(nn), nLanes, laneCh, shift);
10756 return True;
10759 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10760 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10761 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10762 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10763 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10764 /* laneTy, shift = case immh:immb of
10765 0001:xxx -> B, SHR:8-xxx
10766 001x:xxx -> H, SHR:16-xxxx
10767 01xx:xxx -> S, SHR:32-xxxxx
10768 1xxx:xxx -> D, SHR:64-xxxxxx
10769 other -> invalid
10771 UInt size = 0;
10772 UInt shift = 0;
10773 Bool isQ = bitQ == 1;
10774 Bool isU = bitU == 1;
10775 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10776 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10777 if (!ok || (bitQ == 0 && size == X11)) return False;
10778 vassert(size >= 0 && size <= 3);
10779 UInt lanebits = 8 << size;
10780 vassert(shift >= 1 && shift <= lanebits);
10781 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10782 IRExpr* src = getQReg128(nn);
10783 IRTemp imm8 = newTemp(Ity_I8);
10784 assign(imm8, mkU8((UChar)(-shift)));
10785 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10786 IRTemp shf = newTempV128();
10787 IRTemp res = newTempV128();
10788 assign(shf, binop(op, src, amt));
10789 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10790 : mkexpr(shf));
10791 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10792 HChar laneCh = "bhsd"[size];
10793 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10794 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10795 : (isU ? "urshr" : "srshr");
10796 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10797 nameQReg128(dd), nLanes, laneCh,
10798 nameQReg128(nn), nLanes, laneCh, shift);
10799 return True;
10802 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10803 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10804 /* laneTy, shift = case immh:immb of
10805 0001:xxx -> B, SHR:8-xxx
10806 001x:xxx -> H, SHR:16-xxxx
10807 01xx:xxx -> S, SHR:32-xxxxx
10808 1xxx:xxx -> D, SHR:64-xxxxxx
10809 other -> invalid
10811 UInt size = 0;
10812 UInt shift = 0;
10813 Bool isQ = bitQ == 1;
10814 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10815 if (!ok || (bitQ == 0 && size == X11)) return False;
10816 vassert(size >= 0 && size <= 3);
10817 UInt lanebits = 8 << size;
10818 vassert(shift >= 1 && shift <= lanebits);
10819 IRExpr* src = getQReg128(nn);
10820 IRTemp res = newTempV128();
10821 if (shift == lanebits) {
10822 assign(res, getQReg128(dd));
10823 } else {
10824 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10825 IRExpr* nmask = binop(mkVecSHLN(size),
10826 mkV128(0xFFFF), mkU8(lanebits - shift));
10827 IRTemp tmp = newTempV128();
10828 assign(tmp, binop(Iop_OrV128,
10829 mkexpr(res),
10830 binop(Iop_AndV128, getQReg128(dd), nmask)));
10831 res = tmp;
10833 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10834 HChar laneCh = "bhsd"[size];
10835 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10836 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10837 nameQReg128(dd), nLanes, laneCh,
10838 nameQReg128(nn), nLanes, laneCh, shift);
10839 return True;
10842 if (opcode == BITS5(0,1,0,1,0)) {
10843 /* -------- 0,01010 SHL std7_std7_#imm -------- */
10844 /* -------- 1,01010 SLI std7_std7_#imm -------- */
10845 /* laneTy, shift = case immh:immb of
10846 0001:xxx -> B, xxx
10847 001x:xxx -> H, xxxx
10848 01xx:xxx -> S, xxxxx
10849 1xxx:xxx -> D, xxxxxx
10850 other -> invalid
10852 UInt size = 0;
10853 UInt shift = 0;
10854 Bool isSLI = bitU == 1;
10855 Bool isQ = bitQ == 1;
10856 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10857 if (!ok || (bitQ == 0 && size == X11)) return False;
10858 vassert(size >= 0 && size <= 3);
10859 /* The shift encoding has opposite sign for the leftwards case.
10860 Adjust shift to compensate. */
10861 UInt lanebits = 8 << size;
10862 shift = lanebits - shift;
10863 vassert(shift >= 0 && shift < lanebits);
10864 IROp op = mkVecSHLN(size);
10865 IRExpr* src = getQReg128(nn);
10866 IRTemp res = newTempV128();
10867 if (shift == 0) {
10868 assign(res, src);
10869 } else {
10870 assign(res, binop(op, src, mkU8(shift)));
10871 if (isSLI) {
10872 IRExpr* nmask = binop(mkVecSHRN(size),
10873 mkV128(0xFFFF), mkU8(lanebits - shift));
10874 IRTemp tmp = newTempV128();
10875 assign(tmp, binop(Iop_OrV128,
10876 mkexpr(res),
10877 binop(Iop_AndV128, getQReg128(dd), nmask)));
10878 res = tmp;
10881 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10882 HChar laneCh = "bhsd"[size];
10883 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10884 const HChar* nm = isSLI ? "sli" : "shl";
10885 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10886 nameQReg128(dd), nLanes, laneCh,
10887 nameQReg128(nn), nLanes, laneCh, shift);
10888 return True;
10891 if (opcode == BITS5(0,1,1,1,0)
10892 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10893 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10894 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10895 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10896 UInt size = 0;
10897 UInt shift = 0;
10898 Bool isQ = bitQ == 1;
10899 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10900 if (!ok || (bitQ == 0 && size == X11)) return False;
10901 vassert(size >= 0 && size <= 3);
10902 /* The shift encoding has opposite sign for the leftwards case.
10903 Adjust shift to compensate. */
10904 UInt lanebits = 8 << size;
10905 shift = lanebits - shift;
10906 vassert(shift >= 0 && shift < lanebits);
10907 const HChar* nm = NULL;
10908 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10909 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10910 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10911 else vassert(0);
10912 IRTemp qDiff1 = IRTemp_INVALID;
10913 IRTemp qDiff2 = IRTemp_INVALID;
10914 IRTemp res = IRTemp_INVALID;
10915 IRTemp src = newTempV128();
10916 assign(src, getQReg128(nn));
10917 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10918 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10919 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10920 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10921 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10922 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10923 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10924 return True;
10927 if (bitU == 0
10928 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10929 /* -------- 0,10000 SHRN{,2} #imm -------- */
10930 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10931 /* Narrows, and size is the narrow size. */
10932 UInt size = 0;
10933 UInt shift = 0;
10934 Bool is2 = bitQ == 1;
10935 Bool isR = opcode == BITS5(1,0,0,0,1);
10936 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10937 if (!ok || size == X11) return False;
10938 vassert(shift >= 1);
10939 IRTemp t1 = newTempV128();
10940 IRTemp t2 = newTempV128();
10941 IRTemp t3 = newTempV128();
10942 assign(t1, getQReg128(nn));
10943 assign(t2, isR ? binop(mkVecADD(size+1),
10944 mkexpr(t1),
10945 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10946 : mkexpr(t1));
10947 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10948 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10949 putLO64andZUorPutHI64(is2, dd, t4);
10950 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10951 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10952 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10953 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10954 return True;
10957 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10958 || (bitU == 1
10959 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10960 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10961 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10962 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10963 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10964 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10965 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10966 UInt size = 0;
10967 UInt shift = 0;
10968 Bool is2 = bitQ == 1;
10969 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10970 if (!ok || size == X11) return False;
10971 vassert(shift >= 1 && shift <= (8 << size));
10972 const HChar* nm = "??";
10973 IROp op = Iop_INVALID;
10974 /* Decide on the name and the operation. */
10975 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10976 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10978 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10979 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10981 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10982 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10984 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10985 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10987 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10988 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10990 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10991 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10993 else vassert(0);
10994 /* Compute the result (Q, shifted value) pair. */
10995 IRTemp src128 = newTempV128();
10996 assign(src128, getQReg128(nn));
10997 IRTemp pair = newTempV128();
10998 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10999 /* Update the result reg */
11000 IRTemp res64in128 = newTempV128();
11001 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
11002 putLO64andZUorPutHI64(is2, dd, res64in128);
11003 /* Update the Q flag. */
11004 IRTemp q64q64 = newTempV128();
11005 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
11006 IRTemp z128 = newTempV128();
11007 assign(z128, mkV128(0x0000));
11008 updateQCFLAGwithDifference(q64q64, z128);
11009 /* */
11010 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11011 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11012 DIP("%s %s.%s, %s.%s, #%u\n", nm,
11013 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
11014 return True;
11017 if (opcode == BITS5(1,0,1,0,0)) {
11018 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11019 /* -------- 1,10100 USHLL{,2} #imm -------- */
11020 /* 31 28 22 18 15 9 4
11021 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11022 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11023 where Ta,Tb,sh
11024 = case immh of 1xxx -> invalid
11025 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11026 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11027 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11028 0000 -> AdvSIMD modified immediate (???)
11030 Bool isQ = bitQ == 1;
11031 Bool isU = bitU == 1;
11032 UInt immhb = (immh << 3) | immb;
11033 IRTemp src = newTempV128();
11034 IRTemp zero = newTempV128();
11035 IRExpr* res = NULL;
11036 UInt sh = 0;
11037 const HChar* ta = "??";
11038 const HChar* tb = "??";
11039 assign(src, getQReg128(nn));
11040 assign(zero, mkV128(0x0000));
11041 if (immh & 8) {
11042 /* invalid; don't assign to res */
11044 else if (immh & 4) {
11045 sh = immhb - 32;
11046 vassert(sh < 32); /* so 32-sh is 1..32 */
11047 ta = "2d";
11048 tb = isQ ? "4s" : "2s";
11049 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
11050 : mk_InterleaveLO32x4(src, zero);
11051 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
11053 else if (immh & 2) {
11054 sh = immhb - 16;
11055 vassert(sh < 16); /* so 16-sh is 1..16 */
11056 ta = "4s";
11057 tb = isQ ? "8h" : "4h";
11058 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
11059 : mk_InterleaveLO16x8(src, zero);
11060 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
11062 else if (immh & 1) {
11063 sh = immhb - 8;
11064 vassert(sh < 8); /* so 8-sh is 1..8 */
11065 ta = "8h";
11066 tb = isQ ? "16b" : "8b";
11067 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
11068 : mk_InterleaveLO8x16(src, zero);
11069 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
11070 } else {
11071 vassert(immh == 0);
11072 /* invalid; don't assign to res */
11074 /* */
11075 if (res) {
11076 putQReg128(dd, res);
11077 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11078 isU ? 'u' : 's', isQ ? "2" : "",
11079 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
11080 return True;
11082 return False;
11085 if (opcode == BITS5(1,1,1,0,0)) {
11086 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11087 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11088 /* If immh is of the form 00xx, the insn is invalid. */
11089 if (immh < BITS4(0,1,0,0)) return False;
11090 UInt size = 0;
11091 UInt fbits = 0;
11092 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11093 /* The following holds because immh is never zero. */
11094 vassert(ok);
11095 /* The following holds because immh >= 0100. */
11096 vassert(size == X10 || size == X11);
11097 Bool isD = size == X11;
11098 Bool isU = bitU == 1;
11099 Bool isQ = bitQ == 1;
11100 if (isD && !isQ) return False; /* reject .1d case */
11101 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11102 Double scale = two_to_the_minus(fbits);
11103 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11104 : IRExpr_Const(IRConst_F32( (Float)scale ));
11105 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11106 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11107 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11108 IRType tyF = isD ? Ity_F64 : Ity_F32;
11109 IRType tyI = isD ? Ity_I64 : Ity_I32;
11110 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11111 vassert(nLanes == 2 || nLanes == 4);
11112 for (UInt i = 0; i < nLanes; i++) {
11113 IRTemp src = newTemp(tyI);
11114 IRTemp res = newTemp(tyF);
11115 IRTemp rm = mk_get_IR_rounding_mode();
11116 assign(src, getQRegLane(nn, i, tyI));
11117 assign(res, triop(opMUL, mkexpr(rm),
11118 binop(opCVT, mkexpr(rm), mkexpr(src)),
11119 scaleE));
11120 putQRegLane(dd, i, mkexpr(res));
11122 if (!isQ) {
11123 putQRegLane(dd, 1, mkU64(0));
11125 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11126 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
11127 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11128 return True;
11131 if (opcode == BITS5(1,1,1,1,1)) {
11132 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11133 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11134 /* If immh is of the form 00xx, the insn is invalid. */
11135 if (immh < BITS4(0,1,0,0)) return False;
11136 UInt size = 0;
11137 UInt fbits = 0;
11138 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11139 /* The following holds because immh is never zero. */
11140 vassert(ok);
11141 /* The following holds because immh >= 0100. */
11142 vassert(size == X10 || size == X11);
11143 Bool isD = size == X11;
11144 Bool isU = bitU == 1;
11145 Bool isQ = bitQ == 1;
11146 if (isD && !isQ) return False; /* reject .1d case */
11147 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11148 Double scale = two_to_the_plus(fbits);
11149 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11150 : IRExpr_Const(IRConst_F32( (Float)scale ));
11151 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11152 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
11153 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
11154 IRType tyF = isD ? Ity_F64 : Ity_F32;
11155 IRType tyI = isD ? Ity_I64 : Ity_I32;
11156 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11157 vassert(nLanes == 2 || nLanes == 4);
11158 for (UInt i = 0; i < nLanes; i++) {
11159 IRTemp src = newTemp(tyF);
11160 IRTemp res = newTemp(tyI);
11161 IRTemp rm = newTemp(Ity_I32);
11162 assign(src, getQRegLane(nn, i, tyF));
11163 assign(rm, mkU32(Irrm_ZERO));
11164 assign(res, binop(opCVT, mkexpr(rm),
11165 triop(opMUL, mkexpr(rm),
11166 mkexpr(src), scaleE)));
11167 putQRegLane(dd, i, mkexpr(res));
11169 if (!isQ) {
11170 putQRegLane(dd, 1, mkU64(0));
11172 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11173 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
11174 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11175 return True;
11178 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11179 return False;
11180 # undef INSN
11184 static
11185 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11187 /* 31 30 29 28 23 21 20 15 11 9 4
11188 0 Q U 01110 size 1 m opcode 00 n d
11189 Decode fields: u,opcode
11191 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11192 if (INSN(31,31) != 0
11193 || INSN(28,24) != BITS5(0,1,1,1,0)
11194 || INSN(21,21) != 1
11195 || INSN(11,10) != BITS2(0,0)) {
11196 return False;
11198 UInt bitQ = INSN(30,30);
11199 UInt bitU = INSN(29,29);
11200 UInt size = INSN(23,22);
11201 UInt mm = INSN(20,16);
11202 UInt opcode = INSN(15,12);
11203 UInt nn = INSN(9,5);
11204 UInt dd = INSN(4,0);
11205 vassert(size < 4);
11206 Bool is2 = bitQ == 1;
11208 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11209 /* -------- 0,0000 SADDL{2} -------- */
11210 /* -------- 1,0000 UADDL{2} -------- */
11211 /* -------- 0,0010 SSUBL{2} -------- */
11212 /* -------- 1,0010 USUBL{2} -------- */
11213 /* Widens, and size refers to the narrow lanes. */
11214 if (size == X11) return False;
11215 vassert(size <= 2);
11216 Bool isU = bitU == 1;
11217 Bool isADD = opcode == BITS4(0,0,0,0);
11218 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11219 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11220 IRTemp res = newTempV128();
11221 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11222 mkexpr(argL), mkexpr(argR)));
11223 putQReg128(dd, mkexpr(res));
11224 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11225 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11226 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
11227 : (isU ? "usubl" : "ssubl");
11228 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11229 nameQReg128(dd), arrWide,
11230 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11231 return True;
11234 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11235 /* -------- 0,0001 SADDW{2} -------- */
11236 /* -------- 1,0001 UADDW{2} -------- */
11237 /* -------- 0,0011 SSUBW{2} -------- */
11238 /* -------- 1,0011 USUBW{2} -------- */
11239 /* Widens, and size refers to the narrow lanes. */
11240 if (size == X11) return False;
11241 vassert(size <= 2);
11242 Bool isU = bitU == 1;
11243 Bool isADD = opcode == BITS4(0,0,0,1);
11244 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11245 IRTemp res = newTempV128();
11246 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11247 getQReg128(nn), mkexpr(argR)));
11248 putQReg128(dd, mkexpr(res));
11249 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11250 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11251 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
11252 : (isU ? "usubw" : "ssubw");
11253 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11254 nameQReg128(dd), arrWide,
11255 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11256 return True;
11259 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11260 /* -------- 0,0100 ADDHN{2} -------- */
11261 /* -------- 1,0100 RADDHN{2} -------- */
11262 /* -------- 0,0110 SUBHN{2} -------- */
11263 /* -------- 1,0110 RSUBHN{2} -------- */
11264 /* Narrows, and size refers to the narrowed lanes. */
11265 if (size == X11) return False;
11266 vassert(size <= 2);
11267 const UInt shift[3] = { 8, 16, 32 };
11268 Bool isADD = opcode == BITS4(0,1,0,0);
11269 Bool isR = bitU == 1;
11270 /* Combined elements in wide lanes */
11271 IRTemp wide = newTempV128();
11272 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11273 getQReg128(nn), getQReg128(mm));
11274 if (isR) {
11275 wideE = binop(mkVecADD(size+1),
11276 wideE,
11277 mkexpr(math_VEC_DUP_IMM(size+1,
11278 1ULL << (shift[size]-1))));
11280 assign(wide, wideE);
11281 /* Top halves of elements, still in wide lanes */
11282 IRTemp shrd = newTempV128();
11283 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
11284 /* Elements now compacted into lower 64 bits */
11285 IRTemp new64 = newTempV128();
11286 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
11287 putLO64andZUorPutHI64(is2, dd, new64);
11288 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11289 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11290 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11291 : (isR ? "rsubhn" : "subhn");
11292 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11293 nameQReg128(dd), arrNarrow,
11294 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11295 return True;
11298 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11299 /* -------- 0,0101 SABAL{2} -------- */
11300 /* -------- 1,0101 UABAL{2} -------- */
11301 /* -------- 0,0111 SABDL{2} -------- */
11302 /* -------- 1,0111 UABDL{2} -------- */
11303 /* Widens, and size refers to the narrow lanes. */
11304 if (size == X11) return False;
11305 vassert(size <= 2);
11306 Bool isU = bitU == 1;
11307 Bool isACC = opcode == BITS4(0,1,0,1);
11308 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11309 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11310 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
11311 IRTemp res = newTempV128();
11312 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
11313 : mkexpr(abd));
11314 putQReg128(dd, mkexpr(res));
11315 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11316 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11317 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
11318 : (isU ? "uabdl" : "sabdl");
11319 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11320 nameQReg128(dd), arrWide,
11321 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11322 return True;
11325 if (opcode == BITS4(1,1,0,0)
11326 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
11327 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11328 /* -------- 1,1100 UMULL{2} -------- */ // 0
11329 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11330 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11331 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11332 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11333 /* Widens, and size refers to the narrow lanes. */
11334 UInt ks = 3;
11335 switch (opcode) {
11336 case BITS4(1,1,0,0): ks = 0; break;
11337 case BITS4(1,0,0,0): ks = 1; break;
11338 case BITS4(1,0,1,0): ks = 2; break;
11339 default: vassert(0);
11341 vassert(ks >= 0 && ks <= 2);
11342 if (size == X11) return False;
11343 vassert(size <= 2);
11344 Bool isU = bitU == 1;
11345 IRTemp vecN = newTempV128();
11346 IRTemp vecM = newTempV128();
11347 IRTemp vecD = newTempV128();
11348 assign(vecN, getQReg128(nn));
11349 assign(vecM, getQReg128(mm));
11350 assign(vecD, getQReg128(dd));
11351 IRTemp res = IRTemp_INVALID;
11352 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11353 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11354 putQReg128(dd, mkexpr(res));
11355 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11356 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11357 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11358 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11359 nameQReg128(dd), arrWide,
11360 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11361 return True;
11364 if (bitU == 0
11365 && (opcode == BITS4(1,1,0,1)
11366 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11367 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11368 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11369 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11370 /* Widens, and size refers to the narrow lanes. */
11371 UInt ks = 3;
11372 switch (opcode) {
11373 case BITS4(1,1,0,1): ks = 0; break;
11374 case BITS4(1,0,0,1): ks = 1; break;
11375 case BITS4(1,0,1,1): ks = 2; break;
11376 default: vassert(0);
11378 vassert(ks >= 0 && ks <= 2);
11379 if (size == X00 || size == X11) return False;
11380 vassert(size <= 2);
11381 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11382 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11383 newTempsV128_3(&vecN, &vecM, &vecD);
11384 assign(vecN, getQReg128(nn));
11385 assign(vecM, getQReg128(mm));
11386 assign(vecD, getQReg128(dd));
11387 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11388 is2, size, "mas"[ks],
11389 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11390 putQReg128(dd, mkexpr(res));
11391 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11392 updateQCFLAGwithDifference(sat1q, sat1n);
11393 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11394 updateQCFLAGwithDifference(sat2q, sat2n);
11396 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11397 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11398 const HChar* nm = ks == 0 ? "sqdmull"
11399 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11400 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11401 nameQReg128(dd), arrWide,
11402 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11403 return True;
11406 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11407 /* -------- 0,1110 PMULL{2} -------- */
11408 /* Widens, and size refers to the narrow lanes. */
11409 if (size != X00 && size != X11) return False;
11410 IRTemp res = IRTemp_INVALID;
11411 IRExpr* srcN = getQReg128(nn);
11412 IRExpr* srcM = getQReg128(mm);
11413 const HChar* arrNarrow = NULL;
11414 const HChar* arrWide = NULL;
11415 if (size == X00) {
11416 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11417 srcN, srcM);
11418 arrNarrow = nameArr_Q_SZ(bitQ, size);
11419 arrWide = nameArr_Q_SZ(1, size+1);
11420 } else {
11421 /* The same thing as the X00 case, except we have to call
11422 a helper to do it. */
11423 vassert(size == X11);
11424 res = newTemp(Ity_V128);
11425 IROp slice
11426 = is2 ? Iop_V128HIto64 : Iop_V128to64;
11427 IRExpr** args
11428 = mkIRExprVec_3( IRExpr_VECRET(),
11429 unop(slice, srcN), unop(slice, srcM));
11430 IRDirty* di
11431 = unsafeIRDirty_1_N( res, 0/*regparms*/,
11432 "arm64g_dirtyhelper_PMULLQ",
11433 &arm64g_dirtyhelper_PMULLQ, args);
11434 stmt(IRStmt_Dirty(di));
11435 /* We can't use nameArr_Q_SZ for this because it can't deal with
11436 Q-sized (128 bit) results. Hence do it by hand. */
11437 arrNarrow = bitQ == 0 ? "1d" : "2d";
11438 arrWide = "1q";
11440 putQReg128(dd, mkexpr(res));
11441 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11442 nameQReg128(dd), arrWide,
11443 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11444 return True;
11447 return False;
11448 # undef INSN
11452 static
11453 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11455 /* 31 30 29 28 23 21 20 15 10 9 4
11456 0 Q U 01110 size 1 m opcode 1 n d
11457 Decode fields: u,size,opcode
11459 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11460 if (INSN(31,31) != 0
11461 || INSN(28,24) != BITS5(0,1,1,1,0)
11462 || INSN(21,21) != 1
11463 || INSN(10,10) != 1) {
11464 return False;
11466 UInt bitQ = INSN(30,30);
11467 UInt bitU = INSN(29,29);
11468 UInt size = INSN(23,22);
11469 UInt mm = INSN(20,16);
11470 UInt opcode = INSN(15,11);
11471 UInt nn = INSN(9,5);
11472 UInt dd = INSN(4,0);
11473 vassert(size < 4);
11475 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11476 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11477 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11478 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11479 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11480 if (size == X11) return False;
11481 Bool isADD = opcode == BITS5(0,0,0,0,0);
11482 Bool isU = bitU == 1;
11483 /* Widen both args out, do the math, narrow to final result. */
11484 IRTemp argL = newTempV128();
11485 IRTemp argLhi = IRTemp_INVALID;
11486 IRTemp argLlo = IRTemp_INVALID;
11487 IRTemp argR = newTempV128();
11488 IRTemp argRhi = IRTemp_INVALID;
11489 IRTemp argRlo = IRTemp_INVALID;
11490 IRTemp resHi = newTempV128();
11491 IRTemp resLo = newTempV128();
11492 IRTemp res = IRTemp_INVALID;
11493 assign(argL, getQReg128(nn));
11494 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11495 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11496 assign(argR, getQReg128(mm));
11497 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11498 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11499 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11500 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11501 assign(resHi, binop(opSxR,
11502 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11503 mkU8(1)));
11504 assign(resLo, binop(opSxR,
11505 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11506 mkU8(1)));
11507 res = math_NARROW_LANES ( resHi, resLo, size );
11508 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11509 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11510 : (isU ? "uhsub" : "shsub");
11511 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11512 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11513 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11514 return True;
11517 if (opcode == BITS5(0,0,0,1,0)) {
11518 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11519 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11520 if (bitQ == 0 && size == X11) return False; // implied 1d case
11521 Bool isU = bitU == 1;
11522 IRTemp argL = newTempV128();
11523 IRTemp argR = newTempV128();
11524 assign(argL, getQReg128(nn));
11525 assign(argR, getQReg128(mm));
11526 IRTemp res = math_RHADD(size, isU, argL, argR);
11527 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11528 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11529 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11530 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11531 return True;
11534 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11535 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11536 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11537 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11538 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11539 if (bitQ == 0 && size == X11) return False; // implied 1d case
11540 Bool isADD = opcode == BITS5(0,0,0,0,1);
11541 Bool isU = bitU == 1;
11542 IROp qop = Iop_INVALID;
11543 IROp nop = Iop_INVALID;
11544 if (isADD) {
11545 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11546 nop = mkVecADD(size);
11547 } else {
11548 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11549 nop = mkVecSUB(size);
11551 IRTemp argL = newTempV128();
11552 IRTemp argR = newTempV128();
11553 IRTemp qres = newTempV128();
11554 IRTemp nres = newTempV128();
11555 assign(argL, getQReg128(nn));
11556 assign(argR, getQReg128(mm));
11557 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11558 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11559 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11560 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11561 putQReg128(dd, mkexpr(qres));
11562 updateQCFLAGwithDifference(qres, nres);
11563 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11564 : (isU ? "uqsub" : "sqsub");
11565 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11566 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11567 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11568 return True;
11571 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11572 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11573 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11574 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11575 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11576 Bool isORx = (size & 2) == 2;
11577 Bool invert = (size & 1) == 1;
11578 IRTemp res = newTempV128();
11579 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11580 getQReg128(nn),
11581 invert ? unop(Iop_NotV128, getQReg128(mm))
11582 : getQReg128(mm)));
11583 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11584 const HChar* names[4] = { "and", "bic", "orr", "orn" };
11585 const HChar* ar = bitQ == 1 ? "16b" : "8b";
11586 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11587 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11588 return True;
11591 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11592 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11593 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11594 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11595 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11596 IRTemp argD = newTempV128();
11597 IRTemp argN = newTempV128();
11598 IRTemp argM = newTempV128();
11599 assign(argD, getQReg128(dd));
11600 assign(argN, getQReg128(nn));
11601 assign(argM, getQReg128(mm));
11602 const IROp opXOR = Iop_XorV128;
11603 const IROp opAND = Iop_AndV128;
11604 const IROp opNOT = Iop_NotV128;
11605 IRTemp res = newTempV128();
11606 switch (size) {
11607 case BITS2(0,0): /* EOR */
11608 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11609 break;
11610 case BITS2(0,1): /* BSL */
11611 assign(res, binop(opXOR, mkexpr(argM),
11612 binop(opAND,
11613 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11614 mkexpr(argD))));
11615 break;
11616 case BITS2(1,0): /* BIT */
11617 assign(res, binop(opXOR, mkexpr(argD),
11618 binop(opAND,
11619 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11620 mkexpr(argM))));
11621 break;
11622 case BITS2(1,1): /* BIF */
11623 assign(res, binop(opXOR, mkexpr(argD),
11624 binop(opAND,
11625 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11626 unop(opNOT, mkexpr(argM)))));
11627 break;
11628 default:
11629 vassert(0);
11631 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11632 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11633 const HChar* arr = bitQ == 1 ? "16b" : "8b";
11634 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11635 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11636 return True;
11639 if (opcode == BITS5(0,0,1,1,0)) {
11640 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11641 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11642 if (bitQ == 0 && size == X11) return False; // implied 1d case
11643 Bool isGT = bitU == 0;
11644 IRExpr* argL = getQReg128(nn);
11645 IRExpr* argR = getQReg128(mm);
11646 IRTemp res = newTempV128();
11647 assign(res,
11648 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11649 : binop(mkVecCMPGTU(size), argL, argR));
11650 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11651 const HChar* nm = isGT ? "cmgt" : "cmhi";
11652 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11653 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11654 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11655 return True;
11658 if (opcode == BITS5(0,0,1,1,1)) {
11659 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11660 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11661 if (bitQ == 0 && size == X11) return False; // implied 1d case
11662 Bool isGE = bitU == 0;
11663 IRExpr* argL = getQReg128(nn);
11664 IRExpr* argR = getQReg128(mm);
11665 IRTemp res = newTempV128();
11666 assign(res,
11667 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11668 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11669 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11670 const HChar* nm = isGE ? "cmge" : "cmhs";
11671 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11672 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11673 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11674 return True;
11677 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11678 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11679 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11680 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11681 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11682 if (bitQ == 0 && size == X11) return False; // implied 1d case
11683 Bool isU = bitU == 1;
11684 Bool isR = opcode == BITS5(0,1,0,1,0);
11685 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11686 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11687 IRTemp res = newTempV128();
11688 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11689 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11690 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11691 : (isU ? "ushl" : "sshl");
11692 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11693 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11694 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11695 return True;
11698 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11699 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11700 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11701 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11702 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11703 if (bitQ == 0 && size == X11) return False; // implied 1d case
11704 Bool isU = bitU == 1;
11705 Bool isR = opcode == BITS5(0,1,0,1,1);
11706 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11707 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11708 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11709 of the result (viz, bitQ == 0), then we must adjust the operands to
11710 ensure that the upper part of the result, that we don't care about,
11711 doesn't pollute the returned Q value. To do this, zero out the upper
11712 operand halves beforehand. This works because it means, for the
11713 lanes we don't care about, we are shifting zero by zero, which can
11714 never saturate. */
11715 IRTemp res256 = newTemp(Ity_V256);
11716 IRTemp resSH = newTempV128();
11717 IRTemp resQ = newTempV128();
11718 IRTemp zero = newTempV128();
11719 assign(res256, binop(op,
11720 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11721 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11722 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11723 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11724 assign(zero, mkV128(0x0000));
11725 putQReg128(dd, mkexpr(resSH));
11726 updateQCFLAGwithDifference(resQ, zero);
11727 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11728 : (isU ? "uqshl" : "sqshl");
11729 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11730 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11731 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11732 return True;
11735 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11736 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11737 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11738 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11739 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11740 if (bitQ == 0 && size == X11) return False; // implied 1d case
11741 Bool isU = bitU == 1;
11742 Bool isMAX = (opcode & 1) == 0;
11743 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11744 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11745 IRTemp t = newTempV128();
11746 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11747 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11748 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11749 : (isU ? "umin" : "smin");
11750 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11751 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11752 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11753 return True;
11756 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11757 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11758 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11759 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11760 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11761 if (size == X11) return False; // 1d/2d cases not allowed
11762 Bool isU = bitU == 1;
11763 Bool isACC = opcode == BITS5(0,1,1,1,1);
11764 vassert(size <= 2);
11765 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11766 IRTemp t2 = newTempV128();
11767 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11768 : mkexpr(t1));
11769 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11770 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
11771 : (isU ? "uabd" : "sabd");
11772 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11773 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11774 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11775 return True;
11778 if (opcode == BITS5(1,0,0,0,0)) {
11779 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11780 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11781 if (bitQ == 0 && size == X11) return False; // implied 1d case
11782 Bool isSUB = bitU == 1;
11783 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
11784 IRTemp t = newTempV128();
11785 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11786 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11787 const HChar* nm = isSUB ? "sub" : "add";
11788 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11789 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11790 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11791 return True;
11794 if (opcode == BITS5(1,0,0,0,1)) {
11795 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11796 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11797 if (bitQ == 0 && size == X11) return False; // implied 1d case
11798 Bool isEQ = bitU == 1;
11799 IRExpr* argL = getQReg128(nn);
11800 IRExpr* argR = getQReg128(mm);
11801 IRTemp res = newTempV128();
11802 assign(res,
11803 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11804 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11805 binop(Iop_AndV128, argL, argR),
11806 mkV128(0x0000))));
11807 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11808 const HChar* nm = isEQ ? "cmeq" : "cmtst";
11809 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11810 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11811 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11812 return True;
11815 if (opcode == BITS5(1,0,0,1,0)) {
11816 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11817 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11818 if (bitQ == 0 && size == X11) return False; // implied 1d case
11819 Bool isMLS = bitU == 1;
11820 IROp opMUL = mkVecMUL(size);
11821 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11822 IRTemp res = newTempV128();
11823 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11824 assign(res, binop(opADDSUB,
11825 getQReg128(dd),
11826 binop(opMUL, getQReg128(nn), getQReg128(mm))));
11827 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11828 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11829 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11830 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11831 return True;
11833 return False;
11836 if (opcode == BITS5(1,0,0,1,1)) {
11837 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11838 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11839 if (bitQ == 0 && size == X11) return False; // implied 1d case
11840 Bool isPMUL = bitU == 1;
11841 const IROp opsPMUL[4]
11842 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11843 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11844 IRTemp res = newTempV128();
11845 if (opMUL != Iop_INVALID) {
11846 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11847 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11848 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11849 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11850 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11851 return True;
11853 return False;
11856 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11857 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11858 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11859 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11860 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11861 if (size == X11) return False;
11862 Bool isU = bitU == 1;
11863 Bool isMAX = opcode == BITS5(1,0,1,0,0);
11864 IRTemp vN = newTempV128();
11865 IRTemp vM = newTempV128();
11866 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11867 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11868 assign(vN, getQReg128(nn));
11869 assign(vM, getQReg128(mm));
11870 IRTemp res128 = newTempV128();
11871 assign(res128,
11872 binop(op,
11873 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11874 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11875 /* In the half-width case, use CatEL32x4 to extract the half-width
11876 result from the full-width result. */
11877 IRExpr* res
11878 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11879 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11880 mkexpr(res128)))
11881 : mkexpr(res128);
11882 putQReg128(dd, res);
11883 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11884 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
11885 : (isU ? "uminp" : "sminp");
11886 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11887 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11888 return True;
11891 if (opcode == BITS5(1,0,1,1,0)) {
11892 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11893 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11894 if (size == X00 || size == X11) return False;
11895 Bool isR = bitU == 1;
11896 IRTemp res, sat1q, sat1n, vN, vM;
11897 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11898 newTempsV128_2(&vN, &vM);
11899 assign(vN, getQReg128(nn));
11900 assign(vM, getQReg128(mm));
11901 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11902 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11903 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11904 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11905 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11906 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11907 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11908 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11909 return True;
11912 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11913 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11914 if (bitQ == 0 && size == X11) return False; // implied 1d case
11915 IRTemp vN = newTempV128();
11916 IRTemp vM = newTempV128();
11917 assign(vN, getQReg128(nn));
11918 assign(vM, getQReg128(mm));
11919 IRTemp res128 = newTempV128();
11920 assign(res128,
11921 binop(mkVecADD(size),
11922 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11923 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11924 /* In the half-width case, use CatEL32x4 to extract the half-width
11925 result from the full-width result. */
11926 IRExpr* res
11927 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11928 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11929 mkexpr(res128)))
11930 : mkexpr(res128);
11931 putQReg128(dd, res);
11932 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11933 DIP("addp %s.%s, %s.%s, %s.%s\n",
11934 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11935 return True;
11938 if (bitU == 0
11939 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11940 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11941 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11942 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11943 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11944 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11945 Bool isD = (size & 1) == 1;
11946 if (bitQ == 0 && isD) return False; // implied 1d case
11947 Bool isMIN = (size & 2) == 2;
11948 Bool isNM = opcode == BITS5(1,1,0,0,0);
11949 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11950 IRTemp res = newTempV128();
11951 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11952 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11953 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11954 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11955 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11956 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11957 return True;
11960 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11961 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11962 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11963 Bool isD = (size & 1) == 1;
11964 Bool isSUB = (size & 2) == 2;
11965 if (bitQ == 0 && isD) return False; // implied 1d case
11966 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11967 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11968 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11969 IRTemp rm = mk_get_IR_rounding_mode();
11970 IRTemp t1 = newTempV128();
11971 IRTemp t2 = newTempV128();
11972 // FIXME: double rounding; use FMA primops instead
11973 assign(t1, triop(opMUL,
11974 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11975 assign(t2, triop(isSUB ? opSUB : opADD,
11976 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11977 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11978 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11979 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11980 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11981 return True;
11984 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11985 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11986 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11987 Bool isD = (size & 1) == 1;
11988 Bool isSUB = (size & 2) == 2;
11989 if (bitQ == 0 && isD) return False; // implied 1d case
11990 const IROp ops[4]
11991 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11992 IROp op = ops[size];
11993 IRTemp rm = mk_get_IR_rounding_mode();
11994 IRTemp t1 = newTempV128();
11995 IRTemp t2 = newTempV128();
11996 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11997 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11998 putQReg128(dd, mkexpr(t2));
11999 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12000 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
12001 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12002 return True;
12005 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
12006 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12007 Bool isD = (size & 1) == 1;
12008 if (bitQ == 0 && isD) return False; // implied 1d case
12009 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12010 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12011 IRTemp rm = mk_get_IR_rounding_mode();
12012 IRTemp t1 = newTempV128();
12013 IRTemp t2 = newTempV128();
12014 // FIXME: use Abd primop instead?
12015 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12016 assign(t2, unop(opABS, mkexpr(t1)));
12017 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12018 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12019 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12020 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12021 return True;
12024 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
12025 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12026 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12027 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12028 Bool isD = (size & 1) == 1;
12029 Bool isMULX = bitU == 0;
12030 if (bitQ == 0 && isD) return False; // implied 1d case
12031 IRTemp rm = mk_get_IR_rounding_mode();
12032 IRTemp t1 = newTempV128();
12033 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12034 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12035 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12036 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12037 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
12038 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12039 return True;
12042 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
12043 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12044 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12045 Bool isD = (size & 1) == 1;
12046 if (bitQ == 0 && isD) return False; // implied 1d case
12047 Bool isGE = bitU == 1;
12048 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
12049 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
12050 IRTemp t1 = newTempV128();
12051 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
12052 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
12053 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12054 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12055 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
12056 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12057 return True;
12060 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
12061 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12062 Bool isD = (size & 1) == 1;
12063 if (bitQ == 0 && isD) return False; // implied 1d case
12064 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12065 IRTemp t1 = newTempV128();
12066 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
12067 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12068 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12069 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12070 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12071 return True;
12074 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
12075 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12076 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12077 Bool isD = (size & 1) == 1;
12078 Bool isGT = (size & 2) == 2;
12079 if (bitQ == 0 && isD) return False; // implied 1d case
12080 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
12081 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
12082 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12083 IRTemp t1 = newTempV128();
12084 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
12085 unop(opABS, getQReg128(nn)))); // swapd
12086 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12087 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12088 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
12089 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12090 return True;
12093 if (bitU == 1
12094 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12095 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12096 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12097 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12098 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12099 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12100 Bool isD = (size & 1) == 1;
12101 if (bitQ == 0 && isD) return False; // implied 1d case
12102 Bool isMIN = (size & 2) == 2;
12103 Bool isNM = opcode == BITS5(1,1,0,0,0);
12104 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
12105 IRTemp srcN = newTempV128();
12106 IRTemp srcM = newTempV128();
12107 IRTemp preL = IRTemp_INVALID;
12108 IRTemp preR = IRTemp_INVALID;
12109 assign(srcN, getQReg128(nn));
12110 assign(srcM, getQReg128(mm));
12111 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12112 srcM, srcN, isD, bitQ);
12113 putQReg128(
12114 dd, math_MAYBE_ZERO_HI64_fromE(
12115 bitQ,
12116 binop(opMXX, mkexpr(preL), mkexpr(preR))));
12117 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12118 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12119 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12120 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12121 return True;
12124 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
12125 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12126 Bool isD = size == X01;
12127 if (bitQ == 0 && isD) return False; // implied 1d case
12128 IRTemp srcN = newTempV128();
12129 IRTemp srcM = newTempV128();
12130 IRTemp preL = IRTemp_INVALID;
12131 IRTemp preR = IRTemp_INVALID;
12132 assign(srcN, getQReg128(nn));
12133 assign(srcM, getQReg128(mm));
12134 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12135 srcM, srcN, isD, bitQ);
12136 putQReg128(
12137 dd, math_MAYBE_ZERO_HI64_fromE(
12138 bitQ,
12139 triop(mkVecADDF(isD ? 3 : 2),
12140 mkexpr(mk_get_IR_rounding_mode()),
12141 mkexpr(preL), mkexpr(preR))));
12142 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12143 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12144 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12145 return True;
12148 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
12149 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12150 Bool isD = (size & 1) == 1;
12151 if (bitQ == 0 && isD) return False; // implied 1d case
12152 vassert(size <= 1);
12153 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
12154 IROp op = ops[size];
12155 IRTemp rm = mk_get_IR_rounding_mode();
12156 IRTemp t1 = newTempV128();
12157 IRTemp t2 = newTempV128();
12158 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12159 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12160 putQReg128(dd, mkexpr(t2));
12161 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12162 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12163 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12164 return True;
12167 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
12168 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12169 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12170 Bool isSQRT = (size & 2) == 2;
12171 Bool isD = (size & 1) == 1;
12172 if (bitQ == 0 && isD) return False; // implied 1d case
12173 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
12174 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
12175 IRTemp res = newTempV128();
12176 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12177 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12178 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12179 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
12180 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12181 return True;
12184 return False;
12185 # undef INSN
12189 static
12190 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12192 /* 31 30 29 28 23 21 16 11 9 4
12193 0 Q U 01110 size 10000 opcode 10 n d
12194 Decode fields: U,size,opcode
12196 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12197 if (INSN(31,31) != 0
12198 || INSN(28,24) != BITS5(0,1,1,1,0)
12199 || INSN(21,17) != BITS5(1,0,0,0,0)
12200 || INSN(11,10) != BITS2(1,0)) {
12201 return False;
12203 UInt bitQ = INSN(30,30);
12204 UInt bitU = INSN(29,29);
12205 UInt size = INSN(23,22);
12206 UInt opcode = INSN(16,12);
12207 UInt nn = INSN(9,5);
12208 UInt dd = INSN(4,0);
12209 vassert(size < 4);
12211 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12212 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12213 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12214 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12215 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12216 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12217 vassert(size <= 2);
12218 IRTemp res = newTempV128();
12219 assign(res, unop(iops[size], getQReg128(nn)));
12220 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12221 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12222 DIP("%s %s.%s, %s.%s\n", "rev64",
12223 nameQReg128(dd), arr, nameQReg128(nn), arr);
12224 return True;
12227 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12228 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12229 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12230 Bool isH = size == X01;
12231 IRTemp res = newTempV128();
12232 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12233 assign(res, unop(iop, getQReg128(nn)));
12234 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12235 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12236 DIP("%s %s.%s, %s.%s\n", "rev32",
12237 nameQReg128(dd), arr, nameQReg128(nn), arr);
12238 return True;
12241 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12242 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12243 IRTemp res = newTempV128();
12244 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12245 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12246 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12247 DIP("%s %s.%s, %s.%s\n", "rev16",
12248 nameQReg128(dd), arr, nameQReg128(nn), arr);
12249 return True;
12252 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12253 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12254 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12255 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12256 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12257 /* Widens, and size refers to the narrow size. */
12258 if (size == X11) return False; // no 1d or 2d cases
12259 Bool isU = bitU == 1;
12260 Bool isACC = opcode == BITS5(0,0,1,1,0);
12261 IRTemp src = newTempV128();
12262 IRTemp sum = newTempV128();
12263 IRTemp res = newTempV128();
12264 assign(src, getQReg128(nn));
12265 assign(sum,
12266 binop(mkVecADD(size+1),
12267 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12268 isU, True/*fromOdd*/, size, mkexpr(src))),
12269 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12270 isU, False/*!fromOdd*/, size, mkexpr(src)))));
12271 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12272 : mkexpr(sum));
12273 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12274 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12275 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
12276 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12277 : (isU ? "uaddlp" : "saddlp"),
12278 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12279 return True;
12282 if (opcode == BITS5(0,0,0,1,1)) {
12283 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12284 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12285 if (bitQ == 0 && size == X11) return False; // implied 1d case
12286 Bool isUSQADD = bitU == 1;
12287 /* This is switched (in the US vs SU sense) deliberately.
12288 SUQADD corresponds to the ExtUSsatSS variants and
12289 USQADD corresponds to the ExtSUsatUU variants.
12290 See libvex_ir for more details. */
12291 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12292 : mkVecQADDEXTUSSATSS(size);
12293 IROp nop = mkVecADD(size);
12294 IRTemp argL = newTempV128();
12295 IRTemp argR = newTempV128();
12296 IRTemp qres = newTempV128();
12297 IRTemp nres = newTempV128();
12298 /* Because the two arguments to the addition are implicitly
12299 extended differently (one signedly, the other unsignedly) it is
12300 important to present them to the primop in the correct order. */
12301 assign(argL, getQReg128(nn));
12302 assign(argR, getQReg128(dd));
12303 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12304 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12305 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12306 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12307 putQReg128(dd, mkexpr(qres));
12308 updateQCFLAGwithDifference(qres, nres);
12309 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12310 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12311 nameQReg128(dd), arr, nameQReg128(nn), arr);
12312 return True;
12315 if (opcode == BITS5(0,0,1,0,0)) {
12316 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12317 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12318 if (size == X11) return False; // no 1d or 2d cases
12319 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12320 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
12321 Bool isCLZ = bitU == 1;
12322 IRTemp res = newTempV128();
12323 vassert(size <= 2);
12324 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
12325 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12326 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12327 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12328 nameQReg128(dd), arr, nameQReg128(nn), arr);
12329 return True;
12332 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
12333 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12334 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12335 IRTemp res = newTempV128();
12336 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
12337 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12338 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12339 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
12340 nameQReg128(dd), arr, nameQReg128(nn), arr);
12341 return True;
12344 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12345 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12346 IRTemp res = newTempV128();
12347 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
12348 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12349 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12350 DIP("%s %s.%s, %s.%s\n", "rbit",
12351 nameQReg128(dd), arr, nameQReg128(nn), arr);
12352 return True;
12355 if (opcode == BITS5(0,0,1,1,1)) {
12356 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12357 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12358 if (bitQ == 0 && size == X11) return False; // implied 1d case
12359 Bool isNEG = bitU == 1;
12360 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12361 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12362 getQReg128(nn), size );
12363 IRTemp qres = newTempV128(), nres = newTempV128();
12364 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12365 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
12366 putQReg128(dd, mkexpr(qres));
12367 updateQCFLAGwithDifference(qres, nres);
12368 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12369 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
12370 nameQReg128(dd), arr, nameQReg128(nn), arr);
12371 return True;
12374 if (opcode == BITS5(0,1,0,0,0)) {
12375 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12376 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12377 if (bitQ == 0 && size == X11) return False; // implied 1d case
12378 Bool isGT = bitU == 0;
12379 IRExpr* argL = getQReg128(nn);
12380 IRExpr* argR = mkV128(0x0000);
12381 IRTemp res = newTempV128();
12382 IROp opGTS = mkVecCMPGTS(size);
12383 assign(res, isGT ? binop(opGTS, argL, argR)
12384 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
12385 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12386 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12387 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12388 nameQReg128(dd), arr, nameQReg128(nn), arr);
12389 return True;
12392 if (opcode == BITS5(0,1,0,0,1)) {
12393 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12394 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12395 if (bitQ == 0 && size == X11) return False; // implied 1d case
12396 Bool isEQ = bitU == 0;
12397 IRExpr* argL = getQReg128(nn);
12398 IRExpr* argR = mkV128(0x0000);
12399 IRTemp res = newTempV128();
12400 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12401 : unop(Iop_NotV128,
12402 binop(mkVecCMPGTS(size), argL, argR)));
12403 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12404 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12405 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12406 nameQReg128(dd), arr, nameQReg128(nn), arr);
12407 return True;
12410 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12411 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12412 if (bitQ == 0 && size == X11) return False; // implied 1d case
12413 IRExpr* argL = getQReg128(nn);
12414 IRExpr* argR = mkV128(0x0000);
12415 IRTemp res = newTempV128();
12416 assign(res, binop(mkVecCMPGTS(size), argR, argL));
12417 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12418 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12419 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12420 nameQReg128(dd), arr, nameQReg128(nn), arr);
12421 return True;
12424 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12425 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12426 if (bitQ == 0 && size == X11) return False; // implied 1d case
12427 IRTemp res = newTempV128();
12428 assign(res, unop(mkVecABS(size), getQReg128(nn)));
12429 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12430 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12431 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12432 return True;
12435 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12436 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12437 if (bitQ == 0 && size == X11) return False; // implied 1d case
12438 IRTemp res = newTempV128();
12439 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12440 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12441 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12442 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12443 return True;
12446 UInt ix = 0; /*INVALID*/
12447 if (size >= X10) {
12448 switch (opcode) {
12449 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12450 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12451 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12452 default: break;
12455 if (ix > 0) {
12456 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12457 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12458 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12459 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12460 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12461 if (bitQ == 0 && size == X11) return False; // implied 1d case
12462 Bool isD = size == X11;
12463 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12464 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12465 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12466 IROp opCmp = Iop_INVALID;
12467 Bool swap = False;
12468 const HChar* nm = "??";
12469 switch (ix) {
12470 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12471 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12472 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12473 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12474 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12475 default: vassert(0);
12477 IRExpr* zero = mkV128(0x0000);
12478 IRTemp res = newTempV128();
12479 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12480 : binop(opCmp, getQReg128(nn), zero));
12481 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12482 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12483 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12484 nameQReg128(dd), arr, nameQReg128(nn), arr);
12485 return True;
12488 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12489 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12490 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12491 if (bitQ == 0 && size == X11) return False; // implied 1d case
12492 Bool isFNEG = bitU == 1;
12493 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12494 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12495 IRTemp res = newTempV128();
12496 assign(res, unop(op, getQReg128(nn)));
12497 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12498 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12499 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12500 nameQReg128(dd), arr, nameQReg128(nn), arr);
12501 return True;
12504 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12505 /* -------- 0,xx,10010: XTN{,2} -------- */
12506 if (size == X11) return False;
12507 vassert(size < 3);
12508 Bool is2 = bitQ == 1;
12509 IROp opN = mkVecNARROWUN(size);
12510 IRTemp resN = newTempV128();
12511 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12512 putLO64andZUorPutHI64(is2, dd, resN);
12513 const HChar* nm = "xtn";
12514 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12515 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12516 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12517 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12518 return True;
12521 if (opcode == BITS5(1,0,1,0,0)
12522 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12523 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12524 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12525 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12526 if (size == X11) return False;
12527 vassert(size < 3);
12528 Bool is2 = bitQ == 1;
12529 IROp opN = Iop_INVALID;
12530 Bool zWiden = True;
12531 const HChar* nm = "??";
12532 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12533 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12535 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12536 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12538 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12539 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12541 else vassert(0);
12542 IRTemp src = newTempV128();
12543 assign(src, getQReg128(nn));
12544 IRTemp resN = newTempV128();
12545 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12546 putLO64andZUorPutHI64(is2, dd, resN);
12547 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12548 size, mkexpr(resN));
12549 updateQCFLAGwithDifference(src, resW);
12550 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12551 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12552 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12553 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12554 return True;
12557 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12558 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12559 /* Widens, and size is the narrow size. */
12560 if (size == X11) return False;
12561 Bool is2 = bitQ == 1;
12562 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12563 IROp opSHL = mkVecSHLN(size+1);
12564 IRTemp src = newTempV128();
12565 IRTemp res = newTempV128();
12566 assign(src, getQReg128(nn));
12567 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12568 mkU8(8 << size)));
12569 putQReg128(dd, mkexpr(res));
12570 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12571 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12572 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
12573 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12574 return True;
12577 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12578 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12579 UInt nLanes = size == X00 ? 4 : 2;
12580 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12581 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12582 IRTemp rm = mk_get_IR_rounding_mode();
12583 IRTemp src[nLanes];
12584 for (UInt i = 0; i < nLanes; i++) {
12585 src[i] = newTemp(srcTy);
12586 assign(src[i], getQRegLane(nn, i, srcTy));
12588 for (UInt i = 0; i < nLanes; i++) {
12589 putQRegLane(dd, nLanes * bitQ + i,
12590 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12592 if (bitQ == 0) {
12593 putQRegLane(dd, 1, mkU64(0));
12595 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12596 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12597 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12598 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12599 return True;
12602 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12603 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12604 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12605 odd" but I don't know what that really means. */
12606 IRType srcTy = Ity_F64;
12607 IROp opCvt = Iop_F64toF32;
12608 IRTemp src[2];
12609 for (UInt i = 0; i < 2; i++) {
12610 src[i] = newTemp(srcTy);
12611 assign(src[i], getQRegLane(nn, i, srcTy));
12613 for (UInt i = 0; i < 2; i++) {
12614 putQRegLane(dd, 2 * bitQ + i,
12615 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12617 if (bitQ == 0) {
12618 putQRegLane(dd, 1, mkU64(0));
12620 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12621 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12622 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12623 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12624 return True;
12627 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12628 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12629 UInt nLanes = size == X00 ? 4 : 2;
12630 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
12631 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12632 IRTemp src[nLanes];
12633 for (UInt i = 0; i < nLanes; i++) {
12634 src[i] = newTemp(srcTy);
12635 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12637 for (UInt i = 0; i < nLanes; i++) {
12638 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12640 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12641 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12642 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12643 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12644 return True;
12647 ix = 0;
12648 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12649 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12650 // = 1 + bitU[0]:size[1]:opcode[0]
12651 vassert(ix >= 1 && ix <= 8);
12652 if (ix == 7) ix = 0;
12654 if (ix > 0) {
12655 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12656 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12657 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12658 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12659 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12660 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12661 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12662 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12663 /* rm plan:
12664 FRINTN: tieeven -- !! FIXME KLUDGED !!
12665 FRINTM: -inf
12666 FRINTP: +inf
12667 FRINTZ: zero
12668 FRINTA: tieaway -- !! FIXME KLUDGED !!
12669 FRINTX: per FPCR + "exact = TRUE"
12670 FRINTI: per FPCR
12672 Bool isD = (size & 1) == 1;
12673 if (bitQ == 0 && isD) return False; // implied 1d case
12675 IRTemp irrmRM = mk_get_IR_rounding_mode();
12677 UChar ch = '?';
12678 IRTemp irrm = newTemp(Ity_I32);
12679 switch (ix) {
12680 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12681 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12682 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12683 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12684 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12685 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12686 // I am unsure about the following, due to the "integral exact"
12687 // description in the manual. What does it mean? (frintx, that is)
12688 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12689 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12690 default: vassert(0);
12693 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12694 if (isD) {
12695 for (UInt i = 0; i < 2; i++) {
12696 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12697 getQRegLane(nn, i, Ity_F64)));
12699 } else {
12700 UInt n = bitQ==1 ? 4 : 2;
12701 for (UInt i = 0; i < n; i++) {
12702 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12703 getQRegLane(nn, i, Ity_F32)));
12705 if (bitQ == 0)
12706 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12708 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12709 DIP("frint%c %s.%s, %s.%s\n", ch,
12710 nameQReg128(dd), arr, nameQReg128(nn), arr);
12711 return True;
12714 ix = 0; /*INVALID*/
12715 switch (opcode) {
12716 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12717 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12718 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12719 default: break;
12721 if (ix > 0) {
12722 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12723 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12724 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12725 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12726 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12727 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12728 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12729 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12730 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12731 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12732 Bool isD = (size & 1) == 1;
12733 if (bitQ == 0 && isD) return False; // implied 1d case
12735 IRRoundingMode irrm = 8; /*impossible*/
12736 HChar ch = '?';
12737 switch (ix) {
12738 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12739 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
12740 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12741 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
12742 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
12743 default: vassert(0);
12745 IROp cvt = Iop_INVALID;
12746 if (bitU == 1) {
12747 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12748 } else {
12749 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12751 if (isD) {
12752 for (UInt i = 0; i < 2; i++) {
12753 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12754 getQRegLane(nn, i, Ity_F64)));
12756 } else {
12757 UInt n = bitQ==1 ? 4 : 2;
12758 for (UInt i = 0; i < n; i++) {
12759 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12760 getQRegLane(nn, i, Ity_F32)));
12762 if (bitQ == 0)
12763 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12765 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12766 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12767 nameQReg128(dd), arr, nameQReg128(nn), arr);
12768 return True;
12771 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12772 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12773 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12774 Bool isREC = bitU == 0;
12775 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12776 IRTemp res = newTempV128();
12777 assign(res, unop(op, getQReg128(nn)));
12778 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12779 const HChar* nm = isREC ? "urecpe" : "ursqrte";
12780 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12781 DIP("%s %s.%s, %s.%s\n", nm,
12782 nameQReg128(dd), arr, nameQReg128(nn), arr);
12783 return True;
12786 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12787 /* -------- 0,0x,11101: SCVTF -------- */
12788 /* -------- 1,0x,11101: UCVTF -------- */
12789 /* 31 28 22 21 15 9 4
12790 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12791 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12792 with laneage:
12793 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12795 Bool isQ = bitQ == 1;
12796 Bool isU = bitU == 1;
12797 Bool isF64 = (size & 1) == 1;
12798 if (isQ || !isF64) {
12799 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12800 UInt nLanes = 0;
12801 Bool zeroHI = False;
12802 const HChar* arrSpec = NULL;
12803 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12804 isQ, isF64 );
12805 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12806 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12807 IRTemp rm = mk_get_IR_rounding_mode();
12808 UInt i;
12809 vassert(ok); /* the 'if' above should ensure this */
12810 for (i = 0; i < nLanes; i++) {
12811 putQRegLane(dd, i,
12812 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12814 if (zeroHI) {
12815 putQRegLane(dd, 1, mkU64(0));
12817 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12818 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12819 return True;
12821 /* else fall through */
12824 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12825 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12826 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12827 Bool isSQRT = bitU == 1;
12828 Bool isD = (size & 1) == 1;
12829 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12830 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12831 if (bitQ == 0 && isD) return False; // implied 1d case
12832 IRTemp resV = newTempV128();
12833 assign(resV, unop(op, getQReg128(nn)));
12834 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12835 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12836 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12837 nameQReg128(dd), arr, nameQReg128(nn), arr);
12838 return True;
12841 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12842 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12843 Bool isD = (size & 1) == 1;
12844 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12845 if (bitQ == 0 && isD) return False; // implied 1d case
12846 IRTemp resV = newTempV128();
12847 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12848 getQReg128(nn)));
12849 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12850 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12851 DIP("%s %s.%s, %s.%s\n", "fsqrt",
12852 nameQReg128(dd), arr, nameQReg128(nn), arr);
12853 return True;
12856 return False;
12857 # undef INSN
12861 static
12862 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12864 /* 31 28 23 21 20 19 15 11 9 4
12865 0 Q U 01111 size L M m opcode H 0 n d
12866 Decode fields are: u,size,opcode
12867 M is really part of the mm register number. Individual
12868 cases need to inspect L and H though.
12870 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12871 if (INSN(31,31) != 0
12872 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12873 return False;
12875 UInt bitQ = INSN(30,30);
12876 UInt bitU = INSN(29,29);
12877 UInt size = INSN(23,22);
12878 UInt bitL = INSN(21,21);
12879 UInt bitM = INSN(20,20);
12880 UInt mmLO4 = INSN(19,16);
12881 UInt opcode = INSN(15,12);
12882 UInt bitH = INSN(11,11);
12883 UInt nn = INSN(9,5);
12884 UInt dd = INSN(4,0);
12885 vassert(size < 4);
12886 vassert(bitH < 2 && bitM < 2 && bitL < 2);
12888 if (bitU == 0 && size >= X10
12889 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12890 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12891 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12892 if (bitQ == 0 && size == X11) return False; // implied 1d case
12893 Bool isD = (size & 1) == 1;
12894 Bool isSUB = opcode == BITS4(0,1,0,1);
12895 UInt index;
12896 if (!isD) index = (bitH << 1) | bitL;
12897 else if (isD && bitL == 0) index = bitH;
12898 else return False; // sz:L == x11 => unallocated encoding
12899 vassert(index < (isD ? 2 : 4));
12900 IRType ity = isD ? Ity_F64 : Ity_F32;
12901 IRTemp elem = newTemp(ity);
12902 UInt mm = (bitM << 4) | mmLO4;
12903 assign(elem, getQRegLane(mm, index, ity));
12904 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12905 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12906 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12907 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12908 IRTemp rm = mk_get_IR_rounding_mode();
12909 IRTemp t1 = newTempV128();
12910 IRTemp t2 = newTempV128();
12911 // FIXME: double rounding; use FMA primops instead
12912 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12913 assign(t2, triop(isSUB ? opSUB : opADD,
12914 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12915 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12916 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12917 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12918 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12919 isD ? 'd' : 's', index);
12920 return True;
12923 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12924 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12925 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12926 if (bitQ == 0 && size == X11) return False; // implied 1d case
12927 Bool isD = (size & 1) == 1;
12928 Bool isMULX = bitU == 1;
12929 UInt index;
12930 if (!isD) index = (bitH << 1) | bitL;
12931 else if (isD && bitL == 0) index = bitH;
12932 else return False; // sz:L == x11 => unallocated encoding
12933 vassert(index < (isD ? 2 : 4));
12934 IRType ity = isD ? Ity_F64 : Ity_F32;
12935 IRTemp elem = newTemp(ity);
12936 UInt mm = (bitM << 4) | mmLO4;
12937 assign(elem, getQRegLane(mm, index, ity));
12938 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12939 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12940 IRTemp res = newTempV128();
12941 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12942 mkexpr(mk_get_IR_rounding_mode()),
12943 getQReg128(nn), mkexpr(dupd)));
12944 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12945 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12946 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12947 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12948 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12949 return True;
12952 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12953 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12954 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12955 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12956 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12957 Bool isMLA = opcode == BITS4(0,0,0,0);
12958 Bool isMLS = opcode == BITS4(0,1,0,0);
12959 UInt mm = 32; // invalid
12960 UInt ix = 16; // invalid
12961 switch (size) {
12962 case X00:
12963 return False; // b case is not allowed
12964 case X01:
12965 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12966 case X10:
12967 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12968 case X11:
12969 return False; // d case is not allowed
12970 default:
12971 vassert(0);
12973 vassert(mm < 32 && ix < 16);
12974 IROp opMUL = mkVecMUL(size);
12975 IROp opADD = mkVecADD(size);
12976 IROp opSUB = mkVecSUB(size);
12977 HChar ch = size == X01 ? 'h' : 's';
12978 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12979 IRTemp vecD = newTempV128();
12980 IRTemp vecN = newTempV128();
12981 IRTemp res = newTempV128();
12982 assign(vecD, getQReg128(dd));
12983 assign(vecN, getQReg128(nn));
12984 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12985 if (isMLA || isMLS) {
12986 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12987 } else {
12988 assign(res, prod);
12990 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12991 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12992 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12993 : (isMLS ? "mls" : "mul"),
12994 nameQReg128(dd), arr,
12995 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12996 return True;
12999 if (opcode == BITS4(1,0,1,0)
13000 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
13001 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
13002 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
13003 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
13004 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
13005 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
13006 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
13007 /* Widens, and size refers to the narrowed lanes. */
13008 UInt ks = 3;
13009 switch (opcode) {
13010 case BITS4(1,0,1,0): ks = 0; break;
13011 case BITS4(0,0,1,0): ks = 1; break;
13012 case BITS4(0,1,1,0): ks = 2; break;
13013 default: vassert(0);
13015 vassert(ks >= 0 && ks <= 2);
13016 Bool isU = bitU == 1;
13017 Bool is2 = bitQ == 1;
13018 UInt mm = 32; // invalid
13019 UInt ix = 16; // invalid
13020 switch (size) {
13021 case X00:
13022 return False; // h_b_b[] case is not allowed
13023 case X01:
13024 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13025 case X10:
13026 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13027 case X11:
13028 return False; // q_d_d[] case is not allowed
13029 default:
13030 vassert(0);
13032 vassert(mm < 32 && ix < 16);
13033 IRTemp vecN = newTempV128();
13034 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13035 IRTemp vecD = newTempV128();
13036 assign(vecN, getQReg128(nn));
13037 assign(vecD, getQReg128(dd));
13038 IRTemp res = IRTemp_INVALID;
13039 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
13040 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13041 putQReg128(dd, mkexpr(res));
13042 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
13043 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13044 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13045 HChar ch = size == X01 ? 'h' : 's';
13046 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13047 isU ? 'u' : 's', nm, is2 ? "2" : "",
13048 nameQReg128(dd), arrWide,
13049 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13050 return True;
13053 if (bitU == 0
13054 && (opcode == BITS4(1,0,1,1)
13055 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
13056 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13057 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13058 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13059 /* Widens, and size refers to the narrowed lanes. */
13060 UInt ks = 3;
13061 switch (opcode) {
13062 case BITS4(1,0,1,1): ks = 0; break;
13063 case BITS4(0,0,1,1): ks = 1; break;
13064 case BITS4(0,1,1,1): ks = 2; break;
13065 default: vassert(0);
13067 vassert(ks >= 0 && ks <= 2);
13068 Bool is2 = bitQ == 1;
13069 UInt mm = 32; // invalid
13070 UInt ix = 16; // invalid
13071 switch (size) {
13072 case X00:
13073 return False; // h_b_b[] case is not allowed
13074 case X01:
13075 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13076 case X10:
13077 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13078 case X11:
13079 return False; // q_d_d[] case is not allowed
13080 default:
13081 vassert(0);
13083 vassert(mm < 32 && ix < 16);
13084 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
13085 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
13086 newTempsV128_2(&vecN, &vecD);
13087 assign(vecN, getQReg128(nn));
13088 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13089 assign(vecD, getQReg128(dd));
13090 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
13091 is2, size, "mas"[ks],
13092 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13093 putQReg128(dd, mkexpr(res));
13094 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
13095 updateQCFLAGwithDifference(sat1q, sat1n);
13096 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
13097 updateQCFLAGwithDifference(sat2q, sat2n);
13099 const HChar* nm = ks == 0 ? "sqdmull"
13100 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
13101 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13102 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13103 HChar ch = size == X01 ? 'h' : 's';
13104 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13105 nm, is2 ? "2" : "",
13106 nameQReg128(dd), arrWide,
13107 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13108 return True;
13111 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
13112 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13113 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13114 UInt mm = 32; // invalid
13115 UInt ix = 16; // invalid
13116 switch (size) {
13117 case X00:
13118 return False; // b case is not allowed
13119 case X01:
13120 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13121 case X10:
13122 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13123 case X11:
13124 return False; // q case is not allowed
13125 default:
13126 vassert(0);
13128 vassert(mm < 32 && ix < 16);
13129 Bool isR = opcode == BITS4(1,1,0,1);
13130 IRTemp res, sat1q, sat1n, vN, vM;
13131 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13132 vN = newTempV128();
13133 assign(vN, getQReg128(nn));
13134 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13135 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13136 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13137 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13138 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13139 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
13140 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13141 HChar ch = size == X01 ? 'h' : 's';
13142 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13143 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13144 return True;
13147 return False;
13148 # undef INSN
13152 static
13153 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
13155 /* 31 23 21 16 11 9 4
13156 0100 1110 size 10100 opcode 10 n d
13157 Decode fields are: size,opcode
13158 Size is always 00 in ARMv8, it appears.
13160 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13161 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13162 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13163 return False;
13165 UInt size = INSN(23,22);
13166 UInt opcode = INSN(16,12);
13167 UInt nn = INSN(9,5);
13168 UInt dd = INSN(4,0);
13170 if (size == BITS2(0,0)
13171 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
13172 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13173 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13174 Bool isD = opcode == BITS5(0,0,1,0,1);
13175 IRTemp op1 = newTemp(Ity_V128);
13176 IRTemp op2 = newTemp(Ity_V128);
13177 IRTemp xord = newTemp(Ity_V128);
13178 IRTemp res = newTemp(Ity_V128);
13179 void* helper = isD ? &arm64g_dirtyhelper_AESD
13180 : &arm64g_dirtyhelper_AESE;
13181 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
13182 : "arm64g_dirtyhelper_AESE";
13183 assign(op1, getQReg128(dd));
13184 assign(op2, getQReg128(nn));
13185 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13186 IRDirty* di
13187 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13188 mkIRExprVec_3(
13189 IRExpr_VECRET(),
13190 unop(Iop_V128HIto64, mkexpr(xord)),
13191 unop(Iop_V128to64, mkexpr(xord)) ) );
13192 stmt(IRStmt_Dirty(di));
13193 putQReg128(dd, mkexpr(res));
13194 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13195 nameQReg128(dd), nameQReg128(nn));
13196 return True;
13199 if (size == BITS2(0,0)
13200 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13201 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13202 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13203 Bool isI = opcode == BITS5(0,0,1,1,1);
13204 IRTemp src = newTemp(Ity_V128);
13205 IRTemp res = newTemp(Ity_V128);
13206 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
13207 : &arm64g_dirtyhelper_AESMC;
13208 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
13209 : "arm64g_dirtyhelper_AESMC";
13210 assign(src, getQReg128(nn));
13211 IRDirty* di
13212 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13213 mkIRExprVec_3(
13214 IRExpr_VECRET(),
13215 unop(Iop_V128HIto64, mkexpr(src)),
13216 unop(Iop_V128to64, mkexpr(src)) ) );
13217 stmt(IRStmt_Dirty(di));
13218 putQReg128(dd, mkexpr(res));
13219 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13220 nameQReg128(dd), nameQReg128(nn));
13221 return True;
13224 return False;
13225 # undef INSN
13229 static
13230 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13232 /* 31 28 23 21 20 15 14 11 9 4
13233 0101 1110 sz 0 m 0 opc 00 n d
13234 Decode fields are: sz,opc
13236 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13237 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13238 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13239 return False;
13241 UInt sz = INSN(23,22);
13242 UInt mm = INSN(20,16);
13243 UInt opc = INSN(14,12);
13244 UInt nn = INSN(9,5);
13245 UInt dd = INSN(4,0);
13246 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13247 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13248 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13249 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13250 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13251 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13252 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13253 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13254 vassert(opc < 7);
13255 const HChar* inames[7]
13256 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13257 "sha256h", "sha256h2", "sha256su1" };
13258 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13259 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
13260 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
13261 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
13262 &arm64g_dirtyhelper_SHA256SU1 };
13263 const HChar* hnames[7]
13264 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13265 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13266 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13267 "arm64g_dirtyhelper_SHA256SU1" };
13268 IRTemp vD = newTemp(Ity_V128);
13269 IRTemp vN = newTemp(Ity_V128);
13270 IRTemp vM = newTemp(Ity_V128);
13271 IRTemp vDhi = newTemp(Ity_I64);
13272 IRTemp vDlo = newTemp(Ity_I64);
13273 IRTemp vNhiPre = newTemp(Ity_I64);
13274 IRTemp vNloPre = newTemp(Ity_I64);
13275 IRTemp vNhi = newTemp(Ity_I64);
13276 IRTemp vNlo = newTemp(Ity_I64);
13277 IRTemp vMhi = newTemp(Ity_I64);
13278 IRTemp vMlo = newTemp(Ity_I64);
13279 assign(vD, getQReg128(dd));
13280 assign(vN, getQReg128(nn));
13281 assign(vM, getQReg128(mm));
13282 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13283 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13284 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13285 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
13286 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
13287 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
13288 /* Mask off any bits of the N register operand that aren't actually
13289 needed, so that Memcheck doesn't complain unnecessarily. */
13290 switch (opc) {
13291 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13292 assign(vNhi, mkU64(0));
13293 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13294 break;
13295 case BITS3(0,1,1): case BITS3(1,0,0):
13296 case BITS3(1,0,1): case BITS3(1,1,0):
13297 assign(vNhi, mkexpr(vNhiPre));
13298 assign(vNlo, mkexpr(vNloPre));
13299 break;
13300 default:
13301 vassert(0);
13303 IRTemp res = newTemp(Ity_V128);
13304 IRDirty* di
13305 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13306 mkIRExprVec_7(
13307 IRExpr_VECRET(),
13308 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13309 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13310 stmt(IRStmt_Dirty(di));
13311 putQReg128(dd, mkexpr(res));
13312 switch (opc) {
13313 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13314 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13315 break;
13316 case BITS3(0,1,1): case BITS3(1,1,0):
13317 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13318 break;
13319 case BITS3(1,0,0): case BITS3(1,0,1):
13320 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13321 break;
13322 default:
13323 vassert(0);
13325 return True;
13328 return False;
13329 # undef INSN
13333 static
13334 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13336 /* 31 28 23 21 16 11 9 4
13337 0101 1110 sz 10100 opc 10 n d
13338 Decode fields are: sz,opc
13340 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13341 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13342 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13343 return False;
13345 UInt sz = INSN(23,22);
13346 UInt opc = INSN(16,12);
13347 UInt nn = INSN(9,5);
13348 UInt dd = INSN(4,0);
13349 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
13350 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13351 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13352 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13353 vassert(opc < 3);
13354 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
13355 IRTemp vD = newTemp(Ity_V128);
13356 IRTemp vN = newTemp(Ity_V128);
13357 IRTemp vDhi = newTemp(Ity_I64);
13358 IRTemp vDlo = newTemp(Ity_I64);
13359 IRTemp vNhi = newTemp(Ity_I64);
13360 IRTemp vNlo = newTemp(Ity_I64);
13361 assign(vD, getQReg128(dd));
13362 assign(vN, getQReg128(nn));
13363 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13364 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13365 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
13366 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
13367 /* Mask off any bits of the N register operand that aren't actually
13368 needed, so that Memcheck doesn't complain unnecessarily. Also
13369 construct the calls, given that the helper functions don't take
13370 the same number of arguments. */
13371 IRDirty* di = NULL;
13372 IRTemp res = newTemp(Ity_V128);
13373 switch (opc) {
13374 case BITS5(0,0,0,0,0): {
13375 IRExpr* vNloMasked = unop(Iop_32Uto64,
13376 unop(Iop_64to32, mkexpr(vNlo)));
13377 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13378 "arm64g_dirtyhelper_SHA1H",
13379 &arm64g_dirtyhelper_SHA1H,
13380 mkIRExprVec_3(
13381 IRExpr_VECRET(),
13382 mkU64(0), vNloMasked) );
13383 break;
13385 case BITS5(0,0,0,0,1):
13386 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13387 "arm64g_dirtyhelper_SHA1SU1",
13388 &arm64g_dirtyhelper_SHA1SU1,
13389 mkIRExprVec_5(
13390 IRExpr_VECRET(),
13391 mkexpr(vDhi), mkexpr(vDlo),
13392 mkexpr(vNhi), mkexpr(vNlo)) );
13393 break;
13394 case BITS5(0,0,0,1,0):
13395 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13396 "arm64g_dirtyhelper_SHA256SU0",
13397 &arm64g_dirtyhelper_SHA256SU0,
13398 mkIRExprVec_5(
13399 IRExpr_VECRET(),
13400 mkexpr(vDhi), mkexpr(vDlo),
13401 mkexpr(vNhi), mkexpr(vNlo)) );
13402 break;
13403 default:
13404 vassert(0);
13406 stmt(IRStmt_Dirty(di));
13407 putQReg128(dd, mkexpr(res));
13408 switch (opc) {
13409 case BITS5(0,0,0,0,0):
13410 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
13411 break;
13412 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13413 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
13414 break;
13415 default:
13416 vassert(0);
13418 return True;
13421 return False;
13422 # undef INSN
13426 static
13427 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13429 /* 31 28 23 21 20 15 13 9 4
13430 000 11110 ty 1 m op 1000 n opcode2
13431 The first 3 bits are really "M 0 S", but M and S are always zero.
13432 Decode fields are: ty,op,opcode2
13434 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13435 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13436 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13437 return False;
13439 UInt ty = INSN(23,22);
13440 UInt mm = INSN(20,16);
13441 UInt op = INSN(15,14);
13442 UInt nn = INSN(9,5);
13443 UInt opcode2 = INSN(4,0);
13444 vassert(ty < 4);
13446 if (ty <= X01 && op == X00
13447 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13448 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13449 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13450 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13451 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13452 /* 31 23 20 15 9 4
13453 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13454 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13455 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13456 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13458 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13459 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13460 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13461 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13463 FCMPE generates Invalid Operation exn if either arg is any kind
13464 of NaN. FCMP generates Invalid Operation exn if either arg is a
13465 signalling NaN. We ignore this detail here and produce the same
13466 IR for both.
13468 Bool isD = (ty & 1) == 1;
13469 Bool isCMPE = (opcode2 & 16) == 16;
13470 Bool cmpZero = (opcode2 & 8) == 8;
13471 IRType ity = isD ? Ity_F64 : Ity_F32;
13472 Bool valid = True;
13473 if (cmpZero && mm != 0) valid = False;
13474 if (valid) {
13475 IRTemp argL = newTemp(ity);
13476 IRTemp argR = newTemp(ity);
13477 IRTemp irRes = newTemp(Ity_I32);
13478 assign(argL, getQRegLO(nn, ity));
13479 assign(argR,
13480 cmpZero
13481 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
13482 : getQRegLO(mm, ity));
13483 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13484 mkexpr(argL), mkexpr(argR)));
13485 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13486 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13487 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
13488 setFlags_COPY(nzcv_28x0);
13489 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
13490 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
13491 return True;
13493 return False;
13496 return False;
13497 # undef INSN
13501 static
13502 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13504 /* 31 28 23 21 20 15 11 9 4 3
13505 000 11110 ty 1 m cond 01 n op nzcv
13506 The first 3 bits are really "M 0 S", but M and S are always zero.
13507 Decode fields are: ty,op
13509 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13510 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13511 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13512 return False;
13514 UInt ty = INSN(23,22);
13515 UInt mm = INSN(20,16);
13516 UInt cond = INSN(15,12);
13517 UInt nn = INSN(9,5);
13518 UInt op = INSN(4,4);
13519 UInt nzcv = INSN(3,0);
13520 vassert(ty < 4 && op <= 1);
13522 if (ty <= BITS2(0,1)) {
13523 /* -------- 00,0 FCCMP s_s -------- */
13524 /* -------- 00,1 FCCMPE s_s -------- */
13525 /* -------- 01,0 FCCMP d_d -------- */
13526 /* -------- 01,1 FCCMPE d_d -------- */
13528 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13529 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13530 signalling NaN. We ignore this detail here and produce the same
13531 IR for both.
13533 Bool isD = (ty & 1) == 1;
13534 Bool isCMPE = op == 1;
13535 IRType ity = isD ? Ity_F64 : Ity_F32;
13536 IRTemp argL = newTemp(ity);
13537 IRTemp argR = newTemp(ity);
13538 IRTemp irRes = newTemp(Ity_I32);
13539 assign(argL, getQRegLO(nn, ity));
13540 assign(argR, getQRegLO(mm, ity));
13541 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13542 mkexpr(argL), mkexpr(argR)));
13543 IRTemp condT = newTemp(Ity_I1);
13544 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
13545 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13547 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
13548 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
13550 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
13552 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13553 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
13554 mkexpr(nzcvT_28x0), nzcvF_28x0));
13555 setFlags_COPY(nzcv_28x0);
13556 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
13557 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
13558 return True;
13561 return False;
13562 # undef INSN
13566 static
13567 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
13569 /* 31 23 21 20 15 11 9 5
13570 000 11110 ty 1 m cond 11 n d
13571 The first 3 bits are really "M 0 S", but M and S are always zero.
13572 Decode fields: ty
13574 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13575 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
13576 || INSN(11,10) != BITS2(1,1)) {
13577 return False;
13579 UInt ty = INSN(23,22);
13580 UInt mm = INSN(20,16);
13581 UInt cond = INSN(15,12);
13582 UInt nn = INSN(9,5);
13583 UInt dd = INSN(4,0);
13584 if (ty <= X01) {
13585 /* -------- 00: FCSEL s_s -------- */
13586 /* -------- 00: FCSEL d_d -------- */
13587 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13588 IRTemp srcT = newTemp(ity);
13589 IRTemp srcF = newTemp(ity);
13590 IRTemp res = newTemp(ity);
13591 assign(srcT, getQRegLO(nn, ity));
13592 assign(srcF, getQRegLO(mm, ity));
13593 assign(res, IRExpr_ITE(
13594 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
13595 mkexpr(srcT), mkexpr(srcF)));
13596 putQReg128(dd, mkV128(0x0000));
13597 putQRegLO(dd, mkexpr(res));
13598 DIP("fcsel %s, %s, %s, %s\n",
13599 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
13600 nameCC(cond));
13601 return True;
13603 return False;
13604 # undef INSN
13608 static
13609 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
13611 /* 31 28 23 21 20 14 9 4
13612 000 11110 ty 1 opcode 10000 n d
13613 The first 3 bits are really "M 0 S", but M and S are always zero.
13614 Decode fields: ty,opcode
13616 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13617 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13618 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13619 return False;
13621 UInt ty = INSN(23,22);
13622 UInt opcode = INSN(20,15);
13623 UInt nn = INSN(9,5);
13624 UInt dd = INSN(4,0);
13626 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
13627 /* -------- 0x,000000: FMOV d_d, s_s -------- */
13628 /* -------- 0x,000001: FABS d_d, s_s -------- */
13629 /* -------- 0x,000010: FNEG d_d, s_s -------- */
13630 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13631 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13632 IRTemp src = newTemp(ity);
13633 IRTemp res = newTemp(ity);
13634 const HChar* nm = "??";
13635 assign(src, getQRegLO(nn, ity));
13636 switch (opcode) {
13637 case BITS6(0,0,0,0,0,0):
13638 nm = "fmov"; assign(res, mkexpr(src)); break;
13639 case BITS6(0,0,0,0,0,1):
13640 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
13641 case BITS6(0,0,0,0,1,0):
13642 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
13643 case BITS6(0,0,0,0,1,1):
13644 nm = "fsqrt";
13645 assign(res, binop(mkSQRTF(ity),
13646 mkexpr(mk_get_IR_rounding_mode()),
13647 mkexpr(src))); break;
13648 default:
13649 vassert(0);
13651 putQReg128(dd, mkV128(0x0000));
13652 putQRegLO(dd, mkexpr(res));
13653 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13654 return True;
13657 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13658 || opcode == BITS6(0,0,0,1,0,1)))
13659 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13660 || opcode == BITS6(0,0,0,1,0,1)))
13661 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13662 || opcode == BITS6(0,0,0,1,0,0)))) {
13663 /* -------- 11,000100: FCVT s_h -------- */
13664 /* -------- 11,000101: FCVT d_h -------- */
13665 /* -------- 00,000111: FCVT h_s -------- */
13666 /* -------- 00,000101: FCVT d_s -------- */
13667 /* -------- 01,000111: FCVT h_d -------- */
13668 /* -------- 01,000100: FCVT s_d -------- */
13669 /* 31 23 21 16 14 9 4
13670 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
13671 --------- 11 ----- 01 --------- FCVT Dd, Hn
13672 --------- 00 ----- 11 --------- FCVT Hd, Sn
13673 --------- 00 ----- 01 --------- FCVT Dd, Sn
13674 --------- 01 ----- 11 --------- FCVT Hd, Dn
13675 --------- 01 ----- 00 --------- FCVT Sd, Dn
13676 Rounding, when dst is smaller than src, is per the FPCR.
13678 UInt b2322 = ty;
13679 UInt b1615 = opcode & BITS2(1,1);
13680 switch ((b2322 << 2) | b1615) {
13681 case BITS4(0,0,0,1): // S -> D
13682 case BITS4(1,1,0,1): { // H -> D
13683 Bool srcIsH = b2322 == BITS2(1,1);
13684 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
13685 IRTemp res = newTemp(Ity_F64);
13686 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13687 getQRegLO(nn, srcTy)));
13688 putQReg128(dd, mkV128(0x0000));
13689 putQRegLO(dd, mkexpr(res));
13690 DIP("fcvt %s, %s\n",
13691 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13692 return True;
13694 case BITS4(0,1,0,0): // D -> S
13695 case BITS4(0,1,1,1): { // D -> H
13696 Bool dstIsH = b1615 == BITS2(1,1);
13697 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
13698 IRTemp res = newTemp(dstTy);
13699 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13700 mkexpr(mk_get_IR_rounding_mode()),
13701 getQRegLO(nn, Ity_F64)));
13702 putQReg128(dd, mkV128(0x0000));
13703 putQRegLO(dd, mkexpr(res));
13704 DIP("fcvt %s, %s\n",
13705 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13706 return True;
13708 case BITS4(0,0,1,1): // S -> H
13709 case BITS4(1,1,0,0): { // H -> S
13710 Bool toH = b1615 == BITS2(1,1);
13711 IRType srcTy = toH ? Ity_F32 : Ity_F16;
13712 IRType dstTy = toH ? Ity_F16 : Ity_F32;
13713 IRTemp res = newTemp(dstTy);
13714 if (toH) {
13715 assign(res, binop(Iop_F32toF16,
13716 mkexpr(mk_get_IR_rounding_mode()),
13717 getQRegLO(nn, srcTy)));
13719 } else {
13720 assign(res, unop(Iop_F16toF32,
13721 getQRegLO(nn, srcTy)));
13723 putQReg128(dd, mkV128(0x0000));
13724 putQRegLO(dd, mkexpr(res));
13725 DIP("fcvt %s, %s\n",
13726 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13727 return True;
13729 default:
13730 break;
13732 /* else unhandled */
13733 return False;
13736 if (ty <= X01
13737 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13738 && opcode != BITS6(0,0,1,1,0,1)) {
13739 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13740 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13741 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13742 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13743 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13744 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13745 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13746 /* 31 23 21 17 14 9 4
13747 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
13749 x==0 => S-registers, x==1 => D-registers
13750 rm (17:15) encodings:
13751 111 per FPCR (FRINTI)
13752 001 +inf (FRINTP)
13753 010 -inf (FRINTM)
13754 011 zero (FRINTZ)
13755 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
13756 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
13757 110 per FPCR + "exact = TRUE" (FRINTX)
13758 101 unallocated
13760 Bool isD = (ty & 1) == 1;
13761 UInt rm = opcode & BITS6(0,0,0,1,1,1);
13762 IRType ity = isD ? Ity_F64 : Ity_F32;
13763 IRExpr* irrmE = NULL;
13764 UChar ch = '?';
13765 switch (rm) {
13766 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13767 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13768 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13769 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13770 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13771 // I am unsure about the following, due to the "integral exact"
13772 // description in the manual. What does it mean? (frintx, that is)
13773 case BITS3(1,1,0):
13774 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13775 case BITS3(1,1,1):
13776 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13777 // The following is a kludge. There's no Irrm_ value to represent
13778 // this ("to nearest, with ties to even")
13779 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13780 default: break;
13782 if (irrmE) {
13783 IRTemp src = newTemp(ity);
13784 IRTemp dst = newTemp(ity);
13785 assign(src, getQRegLO(nn, ity));
13786 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13787 irrmE, mkexpr(src)));
13788 putQReg128(dd, mkV128(0x0000));
13789 putQRegLO(dd, mkexpr(dst));
13790 DIP("frint%c %s, %s\n",
13791 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13792 return True;
13794 return False;
13797 return False;
13798 # undef INSN
13802 static
13803 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13805 /* 31 28 23 21 20 15 11 9 4
13806 000 11110 ty 1 m opcode 10 n d
13807 The first 3 bits are really "M 0 S", but M and S are always zero.
13808 Decode fields: ty, opcode
13810 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13811 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13812 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13813 return False;
13815 UInt ty = INSN(23,22);
13816 UInt mm = INSN(20,16);
13817 UInt opcode = INSN(15,12);
13818 UInt nn = INSN(9,5);
13819 UInt dd = INSN(4,0);
13821 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13822 /* ------- 0x,0000: FMUL d_d, s_s ------- */
13823 /* ------- 0x,0001: FDIV d_d, s_s ------- */
13824 /* ------- 0x,0010: FADD d_d, s_s ------- */
13825 /* ------- 0x,0011: FSUB d_d, s_s ------- */
13826 /* ------- 0x,0100: FMAX d_d, s_s ------- */
13827 /* ------- 0x,0101: FMIN d_d, s_s ------- */
13828 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13829 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13830 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13831 IROp iop = Iop_INVALID;
13832 const HChar* nm = "???";
13833 switch (opcode) {
13834 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13835 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13836 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13837 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13838 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13839 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13840 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13841 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13842 default: vassert(0);
13844 if (opcode <= BITS4(0,0,1,1)) {
13845 // This is really not good code. TODO: avoid width-changing
13846 IRTemp res = newTemp(ity);
13847 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13848 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13849 putQReg128(dd, mkV128(0));
13850 putQRegLO(dd, mkexpr(res));
13851 } else {
13852 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13853 binop(iop, getQReg128(nn), getQReg128(mm))));
13855 DIP("%s %s, %s, %s\n",
13856 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13857 return True;
13860 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13861 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13862 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13863 IROp iop = mkMULF(ity);
13864 IROp iopn = mkNEGF(ity);
13865 const HChar* nm = "fnmul";
13866 IRExpr* resE = unop(iopn,
13867 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13868 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13869 IRTemp res = newTemp(ity);
13870 assign(res, resE);
13871 putQReg128(dd, mkV128(0));
13872 putQRegLO(dd, mkexpr(res));
13873 DIP("%s %s, %s, %s\n",
13874 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13875 return True;
13878 return False;
13879 # undef INSN
13883 static
13884 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13886 /* 31 28 23 21 20 15 14 9 4
13887 000 11111 ty o1 m o0 a n d
13888 The first 3 bits are really "M 0 S", but M and S are always zero.
13889 Decode fields: ty,o1,o0
13891 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13892 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13893 return False;
13895 UInt ty = INSN(23,22);
13896 UInt bitO1 = INSN(21,21);
13897 UInt mm = INSN(20,16);
13898 UInt bitO0 = INSN(15,15);
13899 UInt aa = INSN(14,10);
13900 UInt nn = INSN(9,5);
13901 UInt dd = INSN(4,0);
13902 vassert(ty < 4);
13904 if (ty <= X01) {
13905 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13906 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13907 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13908 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13909 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13910 /* 31 22 20 15 14 9 4 ix
13911 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13912 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13913 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13914 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13915 where Fx=Dx when sz=1, Fx=Sx when sz=0
13917 -----SPEC------ ----IMPL----
13918 fmadd a + n * m a + n * m
13919 fmsub a + (-n) * m a - n * m
13920 fnmadd (-a) + (-n) * m -(a + n * m)
13921 fnmsub (-a) + n * m -(a - n * m)
13923 Bool isD = (ty & 1) == 1;
13924 UInt ix = (bitO1 << 1) | bitO0;
13925 IRType ity = isD ? Ity_F64 : Ity_F32;
13926 IROp opADD = mkADDF(ity);
13927 IROp opSUB = mkSUBF(ity);
13928 IROp opMUL = mkMULF(ity);
13929 IROp opNEG = mkNEGF(ity);
13930 IRTemp res = newTemp(ity);
13931 IRExpr* eA = getQRegLO(aa, ity);
13932 IRExpr* eN = getQRegLO(nn, ity);
13933 IRExpr* eM = getQRegLO(mm, ity);
13934 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
13935 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
13936 switch (ix) {
13937 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
13938 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
13939 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13940 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13941 default: vassert(0);
13943 putQReg128(dd, mkV128(0x0000));
13944 putQRegLO(dd, mkexpr(res));
13945 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13946 DIP("%s %s, %s, %s, %s\n",
13947 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13948 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13949 return True;
13952 return False;
13953 # undef INSN
13957 static
13958 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13960 /* 31 28 23 21 20 12 9 4
13961 000 11110 ty 1 imm8 100 imm5 d
13962 The first 3 bits are really "M 0 S", but M and S are always zero.
13964 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13965 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13966 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13967 return False;
13969 UInt ty = INSN(23,22);
13970 UInt imm8 = INSN(20,13);
13971 UInt imm5 = INSN(9,5);
13972 UInt dd = INSN(4,0);
13974 /* ------- 00,00000: FMOV s_imm ------- */
13975 /* ------- 01,00000: FMOV d_imm ------- */
13976 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13977 Bool isD = (ty & 1) == 1;
13978 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
13979 if (!isD) {
13980 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13982 putQReg128(dd, mkV128(0));
13983 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13984 DIP("fmov %s, #0x%llx\n",
13985 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13986 return True;
13989 return False;
13990 # undef INSN
13994 static
13995 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13997 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13998 /* 31 30 29 28 23 21 20 18 15 9 4
13999 sf 0 0 11110 type 0 rmode opcode scale n d
14000 The first 3 bits are really "sf 0 S", but S is always zero.
14001 Decode fields: sf,type,rmode,opcode
14003 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14004 if (INSN(30,29) != BITS2(0,0)
14005 || INSN(28,24) != BITS5(1,1,1,1,0)
14006 || INSN(21,21) != 0) {
14007 return False;
14009 UInt bitSF = INSN(31,31);
14010 UInt ty = INSN(23,22); // type
14011 UInt rm = INSN(20,19); // rmode
14012 UInt op = INSN(18,16); // opcode
14013 UInt sc = INSN(15,10); // scale
14014 UInt nn = INSN(9,5);
14015 UInt dd = INSN(4,0);
14017 if (ty <= X01 && rm == X11
14018 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
14019 /* -------- (ix) sf ty rm opc -------- */
14020 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14021 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14022 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14023 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14025 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14026 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14027 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14028 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14029 Bool isI64 = bitSF == 1;
14030 Bool isF64 = (ty & 1) == 1;
14031 Bool isU = (op & 1) == 1;
14032 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14034 Int fbits = 64 - sc;
14035 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14037 Double scale = two_to_the_plus(fbits);
14038 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14039 : IRExpr_Const(IRConst_F32( (Float)scale ));
14040 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14042 const IROp ops[8]
14043 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
14044 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
14045 IRTemp irrm = newTemp(Ity_I32);
14046 assign(irrm, mkU32(Irrm_ZERO));
14048 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
14049 IRExpr* res = binop(ops[ix], mkexpr(irrm),
14050 triop(opMUL, mkexpr(irrm), src, scaleE));
14051 putIRegOrZR(isI64, dd, res);
14053 DIP("fcvtz%c %s, %s, #%d\n",
14054 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
14055 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
14056 return True;
14059 /* ------ sf,ty,rm,opc ------ */
14060 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14061 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14062 /* (ix) sf S 28 ty rm opc 15 9 4
14063 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14064 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14065 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14066 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14068 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14069 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14070 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14071 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14073 These are signed/unsigned conversion from integer registers to
14074 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14075 scaled per |scale|.
14077 if (ty <= X01 && rm == X00
14078 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
14079 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
14080 Bool isI64 = bitSF == 1;
14081 Bool isF64 = (ty & 1) == 1;
14082 Bool isU = (op & 1) == 1;
14083 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14085 Int fbits = 64 - sc;
14086 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14088 Double scale = two_to_the_minus(fbits);
14089 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14090 : IRExpr_Const(IRConst_F32( (Float)scale ));
14091 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14093 const IROp ops[8]
14094 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14095 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14096 IRExpr* src = getIRegOrZR(isI64, nn);
14097 IRExpr* res = (isF64 && !isI64)
14098 ? unop(ops[ix], src)
14099 : binop(ops[ix],
14100 mkexpr(mk_get_IR_rounding_mode()), src);
14101 putQReg128(dd, mkV128(0));
14102 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
14104 DIP("%ccvtf %s, %s, #%d\n",
14105 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14106 nameIRegOrZR(isI64, nn), fbits);
14107 return True;
14110 return False;
14111 # undef INSN
14115 static
14116 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14118 /* 31 30 29 28 23 21 20 18 15 9 4
14119 sf 0 0 11110 type 1 rmode opcode 000000 n d
14120 The first 3 bits are really "sf 0 S", but S is always zero.
14121 Decode fields: sf,type,rmode,opcode
14123 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14124 if (INSN(30,29) != BITS2(0,0)
14125 || INSN(28,24) != BITS5(1,1,1,1,0)
14126 || INSN(21,21) != 1
14127 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14128 return False;
14130 UInt bitSF = INSN(31,31);
14131 UInt ty = INSN(23,22); // type
14132 UInt rm = INSN(20,19); // rmode
14133 UInt op = INSN(18,16); // opcode
14134 UInt nn = INSN(9,5);
14135 UInt dd = INSN(4,0);
14137 // op = 000, 001
14138 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14139 /* 30 23 20 18 15 9 4
14140 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14141 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14142 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14143 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14144 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14145 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14146 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14148 Rd is Xd when sf==1, Wd when sf==0
14149 Fn is Dn when x==1, Sn when x==0
14150 20:19 carry the rounding mode, using the same encoding as FPCR
14152 if (ty <= X01
14153 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
14154 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
14157 Bool isI64 = bitSF == 1;
14158 Bool isF64 = (ty & 1) == 1;
14159 Bool isU = (op & 1) == 1;
14160 /* Decide on the IR rounding mode to use. */
14161 IRRoundingMode irrm = 8; /*impossible*/
14162 HChar ch = '?';
14163 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
14164 switch (rm) {
14165 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
14166 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
14167 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
14168 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
14169 default: vassert(0);
14171 } else {
14172 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
14173 switch (rm) {
14174 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
14175 default: vassert(0);
14178 vassert(irrm != 8);
14179 /* Decide on the conversion primop, based on the source size,
14180 dest size and signedness (8 possibilities). Case coding:
14181 F32 ->s I32 0
14182 F32 ->u I32 1
14183 F32 ->s I64 2
14184 F32 ->u I64 3
14185 F64 ->s I32 4
14186 F64 ->u I32 5
14187 F64 ->s I64 6
14188 F64 ->u I64 7
14190 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14191 vassert(ix < 8);
14192 const IROp iops[8]
14193 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14194 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14195 IROp iop = iops[ix];
14196 // A bit of ATCery: bounce all cases we haven't seen an example of.
14197 if (/* F32toI32S */
14198 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
14199 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14200 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
14201 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
14202 /* F32toI32U */
14203 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
14204 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
14205 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
14206 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
14207 /* F32toI64S */
14208 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
14209 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14210 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
14211 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
14212 /* F32toI64U */
14213 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
14214 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
14215 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
14216 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
14217 /* F64toI32S */
14218 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
14219 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14220 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
14221 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
14222 /* F64toI32U */
14223 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
14224 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14225 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
14226 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
14227 /* F64toI64S */
14228 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
14229 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14230 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
14231 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
14232 /* F64toI64U */
14233 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
14234 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
14235 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
14236 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
14238 /* validated */
14239 } else {
14240 return False;
14242 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
14243 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
14244 IRTemp src = newTemp(srcTy);
14245 IRTemp dst = newTemp(dstTy);
14246 assign(src, getQRegLO(nn, srcTy));
14247 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14248 putIRegOrZR(isI64, dd, mkexpr(dst));
14249 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14250 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14251 return True;
14254 // op = 010, 011
14255 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14256 /* (ix) sf S 28 ty rm op 15 9 4
14257 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14258 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14259 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14260 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14262 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14263 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14264 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14265 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14267 These are signed/unsigned conversion from integer registers to
14268 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14270 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
14271 Bool isI64 = bitSF == 1;
14272 Bool isF64 = (ty & 1) == 1;
14273 Bool isU = (op & 1) == 1;
14274 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14275 const IROp ops[8]
14276 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14277 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14278 IRExpr* src = getIRegOrZR(isI64, nn);
14279 IRExpr* res = (isF64 && !isI64)
14280 ? unop(ops[ix], src)
14281 : binop(ops[ix],
14282 mkexpr(mk_get_IR_rounding_mode()), src);
14283 putQReg128(dd, mkV128(0));
14284 putQRegLO(dd, res);
14285 DIP("%ccvtf %s, %s\n",
14286 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14287 nameIRegOrZR(isI64, nn));
14288 return True;
14291 // op = 110, 111
14292 /* -------- FMOV (general) -------- */
14293 /* case sf S ty rm op 15 9 4
14294 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14295 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14296 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14298 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14299 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14300 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14302 if (1) {
14303 UInt ix = 0; // case
14304 if (bitSF == 0) {
14305 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14306 ix = 1;
14307 else
14308 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14309 ix = 4;
14310 } else {
14311 vassert(bitSF == 1);
14312 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14313 ix = 2;
14314 else
14315 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14316 ix = 5;
14317 else
14318 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
14319 ix = 3;
14320 else
14321 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
14322 ix = 6;
14324 if (ix > 0) {
14325 switch (ix) {
14326 case 1:
14327 putQReg128(dd, mkV128(0));
14328 putQRegLO(dd, getIReg32orZR(nn));
14329 DIP("fmov s%u, w%u\n", dd, nn);
14330 break;
14331 case 2:
14332 putQReg128(dd, mkV128(0));
14333 putQRegLO(dd, getIReg64orZR(nn));
14334 DIP("fmov d%u, x%u\n", dd, nn);
14335 break;
14336 case 3:
14337 putQRegHI64(dd, getIReg64orZR(nn));
14338 DIP("fmov v%u.d[1], x%u\n", dd, nn);
14339 break;
14340 case 4:
14341 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
14342 DIP("fmov w%u, s%u\n", dd, nn);
14343 break;
14344 case 5:
14345 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
14346 DIP("fmov x%u, d%u\n", dd, nn);
14347 break;
14348 case 6:
14349 putIReg64orZR(dd, getQRegHI64(nn));
14350 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
14351 break;
14352 default:
14353 vassert(0);
14355 return True;
14357 /* undecodable; fall through */
14360 return False;
14361 # undef INSN
14365 static
14366 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
14368 Bool ok;
14369 ok = dis_AdvSIMD_EXT(dres, insn);
14370 if (UNLIKELY(ok)) return True;
14371 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
14372 if (UNLIKELY(ok)) return True;
14373 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
14374 if (UNLIKELY(ok)) return True;
14375 ok = dis_AdvSIMD_across_lanes(dres, insn);
14376 if (UNLIKELY(ok)) return True;
14377 ok = dis_AdvSIMD_copy(dres, insn);
14378 if (UNLIKELY(ok)) return True;
14379 ok = dis_AdvSIMD_modified_immediate(dres, insn);
14380 if (UNLIKELY(ok)) return True;
14381 ok = dis_AdvSIMD_scalar_copy(dres, insn);
14382 if (UNLIKELY(ok)) return True;
14383 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
14384 if (UNLIKELY(ok)) return True;
14385 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
14386 if (UNLIKELY(ok)) return True;
14387 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
14388 if (UNLIKELY(ok)) return True;
14389 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
14390 if (UNLIKELY(ok)) return True;
14391 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
14392 if (UNLIKELY(ok)) return True;
14393 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
14394 if (UNLIKELY(ok)) return True;
14395 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
14396 if (UNLIKELY(ok)) return True;
14397 ok = dis_AdvSIMD_three_different(dres, insn);
14398 if (UNLIKELY(ok)) return True;
14399 ok = dis_AdvSIMD_three_same(dres, insn);
14400 if (UNLIKELY(ok)) return True;
14401 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
14402 if (UNLIKELY(ok)) return True;
14403 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
14404 if (UNLIKELY(ok)) return True;
14405 ok = dis_AdvSIMD_crypto_aes(dres, insn);
14406 if (UNLIKELY(ok)) return True;
14407 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
14408 if (UNLIKELY(ok)) return True;
14409 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
14410 if (UNLIKELY(ok)) return True;
14411 ok = dis_AdvSIMD_fp_compare(dres, insn);
14412 if (UNLIKELY(ok)) return True;
14413 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
14414 if (UNLIKELY(ok)) return True;
14415 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
14416 if (UNLIKELY(ok)) return True;
14417 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
14418 if (UNLIKELY(ok)) return True;
14419 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
14420 if (UNLIKELY(ok)) return True;
14421 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
14422 if (UNLIKELY(ok)) return True;
14423 ok = dis_AdvSIMD_fp_immediate(dres, insn);
14424 if (UNLIKELY(ok)) return True;
14425 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
14426 if (UNLIKELY(ok)) return True;
14427 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
14428 if (UNLIKELY(ok)) return True;
14429 return False;
14433 /*------------------------------------------------------------*/
14434 /*--- Disassemble a single ARM64 instruction ---*/
14435 /*------------------------------------------------------------*/
14437 /* Disassemble a single ARM64 instruction into IR. The instruction
14438 has is located at |guest_instr| and has guest IP of
14439 |guest_PC_curr_instr|, which will have been set before the call
14440 here. Returns True iff the instruction was decoded, in which case
14441 *dres will be set accordingly, or False, in which case *dres should
14442 be ignored by the caller. */
14444 static
14445 Bool disInstr_ARM64_WRK (
14446 /*MB_OUT*/DisResult* dres,
14447 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
14448 Bool resteerCisOk,
14449 void* callback_opaque,
14450 const UChar* guest_instr,
14451 const VexArchInfo* archinfo,
14452 const VexAbiInfo* abiinfo
14455 // A macro to fish bits out of 'insn'.
14456 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14458 //ZZ DisResult dres;
14459 //ZZ UInt insn;
14460 //ZZ //Bool allow_VFP = False;
14461 //ZZ //UInt hwcaps = archinfo->hwcaps;
14462 //ZZ IRTemp condT; /* :: Ity_I32 */
14463 //ZZ UInt summary;
14464 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14465 //ZZ
14466 //ZZ /* What insn variants are we supporting today? */
14467 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14468 //ZZ // etc etc
14470 /* Set result defaults. */
14471 dres->whatNext = Dis_Continue;
14472 dres->len = 4;
14473 dres->continueAt = 0;
14474 dres->jk_StopHere = Ijk_INVALID;
14475 dres->hint = Dis_HintNone;
14477 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14478 4-aligned. So just fish the whole thing out of memory right now
14479 and have done. */
14480 UInt insn = getUIntLittleEndianly( guest_instr );
14482 if (0) vex_printf("insn: 0x%x\n", insn);
14484 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
14486 vassert(0 == (guest_PC_curr_instr & 3ULL));
14488 /* ----------------------------------------------------------- */
14490 /* Spot "Special" instructions (see comment at top of file). */
14492 const UChar* code = guest_instr;
14493 /* Spot the 16-byte preamble:
14494 93CC0D8C ror x12, x12, #3
14495 93CC358C ror x12, x12, #13
14496 93CCCD8C ror x12, x12, #51
14497 93CCF58C ror x12, x12, #61
14499 UInt word1 = 0x93CC0D8C;
14500 UInt word2 = 0x93CC358C;
14501 UInt word3 = 0x93CCCD8C;
14502 UInt word4 = 0x93CCF58C;
14503 if (getUIntLittleEndianly(code+ 0) == word1 &&
14504 getUIntLittleEndianly(code+ 4) == word2 &&
14505 getUIntLittleEndianly(code+ 8) == word3 &&
14506 getUIntLittleEndianly(code+12) == word4) {
14507 /* Got a "Special" instruction preamble. Which one is it? */
14508 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
14509 /* orr x10,x10,x10 */) {
14510 /* X3 = client_request ( X4 ) */
14511 DIP("x3 = client_request ( x4 )\n");
14512 putPC(mkU64( guest_PC_curr_instr + 20 ));
14513 dres->jk_StopHere = Ijk_ClientReq;
14514 dres->whatNext = Dis_StopHere;
14515 return True;
14517 else
14518 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
14519 /* orr x11,x11,x11 */) {
14520 /* X3 = guest_NRADDR */
14521 DIP("x3 = guest_NRADDR\n");
14522 dres->len = 20;
14523 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
14524 return True;
14526 else
14527 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
14528 /* orr x12,x12,x12 */) {
14529 /* branch-and-link-to-noredir X8 */
14530 DIP("branch-and-link-to-noredir x8\n");
14531 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
14532 putPC(getIReg64orZR(8));
14533 dres->jk_StopHere = Ijk_NoRedir;
14534 dres->whatNext = Dis_StopHere;
14535 return True;
14537 else
14538 if (getUIntLittleEndianly(code+16) == 0xAA090129
14539 /* orr x9,x9,x9 */) {
14540 /* IR injection */
14541 DIP("IR injection\n");
14542 vex_inject_ir(irsb, Iend_LE);
14543 // Invalidate the current insn. The reason is that the IRop we're
14544 // injecting here can change. In which case the translation has to
14545 // be redone. For ease of handling, we simply invalidate all the
14546 // time.
14547 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
14548 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
14549 putPC(mkU64( guest_PC_curr_instr + 20 ));
14550 dres->whatNext = Dis_StopHere;
14551 dres->jk_StopHere = Ijk_InvalICache;
14552 return True;
14554 /* We don't know what it is. */
14555 return False;
14556 /*NOTREACHED*/
14560 /* ----------------------------------------------------------- */
14562 /* Main ARM64 instruction decoder starts here. */
14564 Bool ok = False;
14566 /* insn[28:25] determines the top-level grouping, so let's start
14567 off with that.
14569 For all of these dis_ARM64_ functions, we pass *dres with the
14570 normal default results "insn OK, 4 bytes long, keep decoding" so
14571 they don't need to change it. However, decodes of control-flow
14572 insns may cause *dres to change.
14574 switch (INSN(28,25)) {
14575 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
14576 // Data processing - immediate
14577 ok = dis_ARM64_data_processing_immediate(dres, insn);
14578 break;
14579 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
14580 // Branch, exception generation and system instructions
14581 ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo);
14582 break;
14583 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
14584 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
14585 // Loads and stores
14586 ok = dis_ARM64_load_store(dres, insn, abiinfo);
14587 break;
14588 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
14589 // Data processing - register
14590 ok = dis_ARM64_data_processing_register(dres, insn);
14591 break;
14592 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
14593 // Data processing - SIMD and floating point
14594 ok = dis_ARM64_simd_and_fp(dres, insn);
14595 break;
14596 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
14597 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
14598 // UNALLOCATED
14599 break;
14600 default:
14601 vassert(0); /* Can't happen */
14604 /* If the next-level down decoders failed, make sure |dres| didn't
14605 get changed. */
14606 if (!ok) {
14607 vassert(dres->whatNext == Dis_Continue);
14608 vassert(dres->len == 4);
14609 vassert(dres->continueAt == 0);
14610 vassert(dres->jk_StopHere == Ijk_INVALID);
14613 return ok;
14615 # undef INSN
14619 /*------------------------------------------------------------*/
14620 /*--- Top-level fn ---*/
14621 /*------------------------------------------------------------*/
14623 /* Disassemble a single instruction into IR. The instruction
14624 is located in host memory at &guest_code[delta]. */
14626 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
14627 Bool (*resteerOkFn) ( void*, Addr ),
14628 Bool resteerCisOk,
14629 void* callback_opaque,
14630 const UChar* guest_code_IN,
14631 Long delta_IN,
14632 Addr guest_IP,
14633 VexArch guest_arch,
14634 const VexArchInfo* archinfo,
14635 const VexAbiInfo* abiinfo,
14636 VexEndness host_endness_IN,
14637 Bool sigill_diag_IN )
14639 DisResult dres;
14640 vex_bzero(&dres, sizeof(dres));
14642 /* Set globals (see top of this file) */
14643 vassert(guest_arch == VexArchARM64);
14645 irsb = irsb_IN;
14646 host_endness = host_endness_IN;
14647 guest_PC_curr_instr = (Addr64)guest_IP;
14649 /* Sanity checks */
14650 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14651 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14652 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14654 /* Try to decode */
14655 Bool ok = disInstr_ARM64_WRK( &dres,
14656 resteerOkFn, resteerCisOk, callback_opaque,
14657 &guest_code_IN[delta_IN],
14658 archinfo, abiinfo );
14659 if (ok) {
14660 /* All decode successes end up here. */
14661 vassert(dres.len == 4 || dres.len == 20);
14662 switch (dres.whatNext) {
14663 case Dis_Continue:
14664 putPC( mkU64(dres.len + guest_PC_curr_instr) );
14665 break;
14666 case Dis_ResteerU:
14667 case Dis_ResteerC:
14668 putPC(mkU64(dres.continueAt));
14669 break;
14670 case Dis_StopHere:
14671 break;
14672 default:
14673 vassert(0);
14675 DIP("\n");
14676 } else {
14677 /* All decode failures end up here. */
14678 if (sigill_diag_IN) {
14679 Int i, j;
14680 UChar buf[64];
14681 UInt insn
14682 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14683 vex_bzero(buf, sizeof(buf));
14684 for (i = j = 0; i < 32; i++) {
14685 if (i > 0) {
14686 if ((i & 7) == 0) buf[j++] = ' ';
14687 else if ((i & 3) == 0) buf[j++] = '\'';
14689 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14691 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14692 vex_printf("disInstr(arm64): %s\n", buf);
14695 /* Tell the dispatcher that this insn cannot be decoded, and so
14696 has not been executed, and (is currently) the next to be
14697 executed. PC should be up-to-date since it is made so at the
14698 start of each insn, but nevertheless be paranoid and update
14699 it again right now. */
14700 putPC( mkU64(guest_PC_curr_instr) );
14701 dres.len = 0;
14702 dres.whatNext = Dis_StopHere;
14703 dres.jk_StopHere = Ijk_NoDecode;
14704 dres.continueAt = 0;
14706 return dres;
14710 /*--------------------------------------------------------------------*/
14711 /*--- end guest_arm64_toIR.c ---*/
14712 /*--------------------------------------------------------------------*/