Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / guest_arm64_toIR.c
blobe6b92c7a04f4c5f1f5e8fb3b9ba21cee18edc4b8
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2013-2017 OpenWorks
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 /* KNOWN LIMITATIONS 2014-Nov-16
32 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
34 Also FP comparison "unordered" .. is implemented as normal FP
35 comparison.
37 Both should be fixed. They behave incorrectly in the presence of
38 NaNs.
40 FMULX is treated the same as FMUL. That's also not correct.
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
47 * FRINTX might be need updating to set the inexact computation FPSR flag
49 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
50 just rounds to nearest.
53 /* "Special" instructions.
55 This instruction decoder can decode four special instructions
56 which mean nothing natively (are no-ops as far as regs/mem are
57 concerned) but have meaning for supporting Valgrind. A special
58 instruction is flagged by a 16-byte preamble:
60 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
61 (ror x12, x12, #3; ror x12, x12, #13
62 ror x12, x12, #51; ror x12, x12, #61)
64 Following that, one of the following 3 are allowed
65 (standard interpretation in parentheses):
67 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
68 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
69 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
70 AA090129 (orr x9,x9,x9) IR injection
72 Any other bytes following the 16-byte preamble are illegal and
73 constitute a failure in instruction decoding. This all assumes
74 that the preamble will never occur except in specific code
75 fragments designed for Valgrind to catch.
78 /* Translates ARM64 code to IR. */
80 #include "libvex_basictypes.h"
81 #include "libvex_ir.h"
82 #include "libvex.h"
83 #include "libvex_guest_arm64.h"
85 #include "main_util.h"
86 #include "main_globals.h"
87 #include "guest_generic_bb_to_IR.h"
88 #include "guest_arm64_defs.h"
91 /*------------------------------------------------------------*/
92 /*--- Globals ---*/
93 /*------------------------------------------------------------*/
95 /* These are set at the start of the translation of a instruction, so
96 that we don't have to pass them around endlessly. CONST means does
97 not change during translation of the instruction.
100 /* CONST: what is the host's endianness? We need to know this in
101 order to do sub-register accesses to the SIMD/FP registers
102 correctly. */
103 static VexEndness host_endness;
105 /* CONST: The guest address for the instruction currently being
106 translated. */
107 static Addr64 guest_PC_curr_instr;
109 /* MOD: The IRSB* into which we're generating code. */
110 static IRSB* irsb;
113 /*------------------------------------------------------------*/
114 /*--- Debugging output ---*/
115 /*------------------------------------------------------------*/
117 #define DIP(format, args...) \
118 if (vex_traceflags & VEX_TRACE_FE) \
119 vex_printf(format, ## args)
121 #define DIS(buf, format, args...) \
122 if (vex_traceflags & VEX_TRACE_FE) \
123 vex_sprintf(buf, format, ## args)
126 /*------------------------------------------------------------*/
127 /*--- Helper bits and pieces for deconstructing the ---*/
128 /*--- arm insn stream. ---*/
129 /*------------------------------------------------------------*/
131 /* Do a little-endian load of a 32-bit word, regardless of the
132 endianness of the underlying host. */
133 static inline UInt getUIntLittleEndianly ( const UChar* p )
135 UInt w = 0;
136 w = (w << 8) | p[3];
137 w = (w << 8) | p[2];
138 w = (w << 8) | p[1];
139 w = (w << 8) | p[0];
140 return w;
143 /* Sign extend a N-bit value up to 64 bits, by copying
144 bit N-1 into all higher positions. */
145 static ULong sx_to_64 ( ULong x, UInt n )
147 vassert(n > 1 && n < 64);
148 x <<= (64-n);
149 Long r = (Long)x;
150 r >>= (64-n);
151 return (ULong)r;
154 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
155 //ZZ endianness of the underlying host. */
156 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
157 //ZZ {
158 //ZZ UShort w = 0;
159 //ZZ w = (w << 8) | p[1];
160 //ZZ w = (w << 8) | p[0];
161 //ZZ return w;
162 //ZZ }
163 //ZZ
164 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
165 //ZZ vassert(sh >= 0 && sh < 32);
166 //ZZ if (sh == 0)
167 //ZZ return x;
168 //ZZ else
169 //ZZ return (x << (32-sh)) | (x >> sh);
170 //ZZ }
171 //ZZ
172 //ZZ static Int popcount32 ( UInt x )
173 //ZZ {
174 //ZZ Int res = 0, i;
175 //ZZ for (i = 0; i < 32; i++) {
176 //ZZ res += (x & 1);
177 //ZZ x >>= 1;
178 //ZZ }
179 //ZZ return res;
180 //ZZ }
181 //ZZ
182 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
183 //ZZ {
184 //ZZ UInt mask = 1 << ix;
185 //ZZ x &= ~mask;
186 //ZZ x |= ((b << ix) & mask);
187 //ZZ return x;
188 //ZZ }
190 #define BITS2(_b1,_b0) \
191 (((_b1) << 1) | (_b0))
193 #define BITS3(_b2,_b1,_b0) \
194 (((_b2) << 2) | ((_b1) << 1) | (_b0))
196 #define BITS4(_b3,_b2,_b1,_b0) \
197 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
199 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
200 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
201 | BITS4((_b3),(_b2),(_b1),(_b0)))
203 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
204 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
205 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
206 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
207 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
208 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
210 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
211 (((_b8) << 8) \
212 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
214 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
215 (((_b9) << 9) | ((_b8) << 8) \
216 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
218 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
219 (((_b10) << 10) \
220 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
222 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
223 (((_b11) << 11) \
224 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
226 #define X00 BITS2(0,0)
227 #define X01 BITS2(0,1)
228 #define X10 BITS2(1,0)
229 #define X11 BITS2(1,1)
231 // produces _uint[_bMax:_bMin]
232 #define SLICE_UInt(_uint,_bMax,_bMin) \
233 (( ((UInt)(_uint)) >> (_bMin)) \
234 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
237 /*------------------------------------------------------------*/
238 /*--- Helper bits and pieces for creating IR fragments. ---*/
239 /*------------------------------------------------------------*/
241 static IRExpr* mkV128 ( UShort w )
243 return IRExpr_Const(IRConst_V128(w));
246 static IRExpr* mkU64 ( ULong i )
248 return IRExpr_Const(IRConst_U64(i));
251 static IRExpr* mkU32 ( UInt i )
253 return IRExpr_Const(IRConst_U32(i));
256 static IRExpr* mkU16 ( UInt i )
258 vassert(i < 65536);
259 return IRExpr_Const(IRConst_U16(i));
262 static IRExpr* mkU8 ( UInt i )
264 vassert(i < 256);
265 return IRExpr_Const(IRConst_U8( (UChar)i ));
268 static IRExpr* mkexpr ( IRTemp tmp )
270 return IRExpr_RdTmp(tmp);
273 static IRExpr* unop ( IROp op, IRExpr* a )
275 return IRExpr_Unop(op, a);
278 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
280 return IRExpr_Binop(op, a1, a2);
283 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
285 return IRExpr_Triop(op, a1, a2, a3);
288 static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2,
289 IRExpr* a3, IRExpr* a4 )
291 return IRExpr_Qop(op, a1, a2, a3, a4);
294 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
296 return IRExpr_Load(Iend_LE, ty, addr);
299 /* Add a statement to the list held by "irbb". */
300 static void stmt ( IRStmt* st )
302 addStmtToIRSB( irsb, st );
305 static void assign ( IRTemp dst, IRExpr* e )
307 stmt( IRStmt_WrTmp(dst, e) );
310 static void storeLE ( IRExpr* addr, IRExpr* data )
312 stmt( IRStmt_Store(Iend_LE, addr, data) );
315 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
316 //ZZ {
317 //ZZ if (guardT == IRTemp_INVALID) {
318 //ZZ /* unconditional */
319 //ZZ storeLE(addr, data);
320 //ZZ } else {
321 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
322 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
323 //ZZ }
324 //ZZ }
325 //ZZ
326 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
327 //ZZ IRExpr* addr, IRExpr* alt,
328 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
329 //ZZ {
330 //ZZ if (guardT == IRTemp_INVALID) {
331 //ZZ /* unconditional */
332 //ZZ IRExpr* loaded = NULL;
333 //ZZ switch (cvt) {
334 //ZZ case ILGop_Ident32:
335 //ZZ loaded = loadLE(Ity_I32, addr); break;
336 //ZZ case ILGop_8Uto32:
337 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
338 //ZZ case ILGop_8Sto32:
339 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
340 //ZZ case ILGop_16Uto32:
341 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
342 //ZZ case ILGop_16Sto32:
343 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
344 //ZZ default:
345 //ZZ vassert(0);
346 //ZZ }
347 //ZZ vassert(loaded != NULL);
348 //ZZ assign(dst, loaded);
349 //ZZ } else {
350 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
351 //ZZ loaded data before putting the data in 'dst'. If the load
352 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
353 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
354 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
355 //ZZ }
356 //ZZ }
358 /* Generate a new temporary of the given type. */
359 static IRTemp newTemp ( IRType ty )
361 vassert(isPlausibleIRType(ty));
362 return newIRTemp( irsb->tyenv, ty );
365 /* This is used in many places, so the brevity is an advantage. */
366 static IRTemp newTempV128(void)
368 return newTemp(Ity_V128);
371 /* Initialise V128 temporaries en masse. */
372 static
373 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
375 vassert(t1 && *t1 == IRTemp_INVALID);
376 vassert(t2 && *t2 == IRTemp_INVALID);
377 *t1 = newTempV128();
378 *t2 = newTempV128();
381 static
382 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
384 vassert(t1 && *t1 == IRTemp_INVALID);
385 vassert(t2 && *t2 == IRTemp_INVALID);
386 vassert(t3 && *t3 == IRTemp_INVALID);
387 *t1 = newTempV128();
388 *t2 = newTempV128();
389 *t3 = newTempV128();
392 static
393 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
395 vassert(t1 && *t1 == IRTemp_INVALID);
396 vassert(t2 && *t2 == IRTemp_INVALID);
397 vassert(t3 && *t3 == IRTemp_INVALID);
398 vassert(t4 && *t4 == IRTemp_INVALID);
399 *t1 = newTempV128();
400 *t2 = newTempV128();
401 *t3 = newTempV128();
402 *t4 = newTempV128();
405 static
406 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
407 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
409 vassert(t1 && *t1 == IRTemp_INVALID);
410 vassert(t2 && *t2 == IRTemp_INVALID);
411 vassert(t3 && *t3 == IRTemp_INVALID);
412 vassert(t4 && *t4 == IRTemp_INVALID);
413 vassert(t5 && *t5 == IRTemp_INVALID);
414 vassert(t6 && *t6 == IRTemp_INVALID);
415 vassert(t7 && *t7 == IRTemp_INVALID);
416 *t1 = newTempV128();
417 *t2 = newTempV128();
418 *t3 = newTempV128();
419 *t4 = newTempV128();
420 *t5 = newTempV128();
421 *t6 = newTempV128();
422 *t7 = newTempV128();
425 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
426 //ZZ IRRoundingMode. */
427 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
428 //ZZ {
429 //ZZ return mkU32(Irrm_NEAREST);
430 //ZZ }
431 //ZZ
432 //ZZ /* Generate an expression for SRC rotated right by ROT. */
433 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
434 //ZZ {
435 //ZZ vassert(rot >= 0 && rot < 32);
436 //ZZ if (rot == 0)
437 //ZZ return mkexpr(src);
438 //ZZ return
439 //ZZ binop(Iop_Or32,
440 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
441 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
442 //ZZ }
443 //ZZ
444 //ZZ static IRExpr* mkU128 ( ULong i )
445 //ZZ {
446 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
447 //ZZ }
448 //ZZ
449 //ZZ /* Generate a 4-aligned version of the given expression if
450 //ZZ the given condition is true. Else return it unchanged. */
451 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
452 //ZZ {
453 //ZZ if (b)
454 //ZZ return binop(Iop_And32, e, mkU32(~3));
455 //ZZ else
456 //ZZ return e;
457 //ZZ }
459 /* Other IR construction helpers. */
460 static IROp mkAND ( IRType ty ) {
461 switch (ty) {
462 case Ity_I32: return Iop_And32;
463 case Ity_I64: return Iop_And64;
464 default: vpanic("mkAND");
468 static IROp mkOR ( IRType ty ) {
469 switch (ty) {
470 case Ity_I32: return Iop_Or32;
471 case Ity_I64: return Iop_Or64;
472 default: vpanic("mkOR");
476 static IROp mkXOR ( IRType ty ) {
477 switch (ty) {
478 case Ity_I32: return Iop_Xor32;
479 case Ity_I64: return Iop_Xor64;
480 default: vpanic("mkXOR");
484 static IROp mkSHL ( IRType ty ) {
485 switch (ty) {
486 case Ity_I32: return Iop_Shl32;
487 case Ity_I64: return Iop_Shl64;
488 default: vpanic("mkSHL");
492 static IROp mkSHR ( IRType ty ) {
493 switch (ty) {
494 case Ity_I32: return Iop_Shr32;
495 case Ity_I64: return Iop_Shr64;
496 default: vpanic("mkSHR");
500 static IROp mkSAR ( IRType ty ) {
501 switch (ty) {
502 case Ity_I32: return Iop_Sar32;
503 case Ity_I64: return Iop_Sar64;
504 default: vpanic("mkSAR");
508 static IROp mkNOT ( IRType ty ) {
509 switch (ty) {
510 case Ity_I32: return Iop_Not32;
511 case Ity_I64: return Iop_Not64;
512 default: vpanic("mkNOT");
516 static IROp mkADD ( IRType ty ) {
517 switch (ty) {
518 case Ity_I32: return Iop_Add32;
519 case Ity_I64: return Iop_Add64;
520 default: vpanic("mkADD");
524 static IROp mkSUB ( IRType ty ) {
525 switch (ty) {
526 case Ity_I32: return Iop_Sub32;
527 case Ity_I64: return Iop_Sub64;
528 default: vpanic("mkSUB");
532 static IROp mkADDF ( IRType ty ) {
533 switch (ty) {
534 case Ity_F16: return Iop_AddF16;
535 case Ity_F32: return Iop_AddF32;
536 case Ity_F64: return Iop_AddF64;
537 default: vpanic("mkADDF");
541 static IROp mkFMADDF ( IRType ty ) {
542 switch (ty) {
543 case Ity_F32: return Iop_MAddF32;
544 case Ity_F64: return Iop_MAddF64;
545 default: vpanic("mkFMADDF");
549 static IROp mkFMSUBF ( IRType ty ) {
550 switch (ty) {
551 case Ity_F32: return Iop_MSubF32;
552 case Ity_F64: return Iop_MSubF64;
553 default: vpanic("mkFMSUBF");
557 static IROp mkSUBF ( IRType ty ) {
558 switch (ty) {
559 case Ity_F16: return Iop_SubF16;
560 case Ity_F32: return Iop_SubF32;
561 case Ity_F64: return Iop_SubF64;
562 default: vpanic("mkSUBF");
566 static IROp mkMULF ( IRType ty ) {
567 switch (ty) {
568 case Ity_F32: return Iop_MulF32;
569 case Ity_F64: return Iop_MulF64;
570 default: vpanic("mkMULF");
574 static IROp mkDIVF ( IRType ty ) {
575 switch (ty) {
576 case Ity_F32: return Iop_DivF32;
577 case Ity_F64: return Iop_DivF64;
578 default: vpanic("mkDIVF");
582 static IROp mkNEGF ( IRType ty ) {
583 switch (ty) {
584 case Ity_F16: return Iop_NegF16;
585 case Ity_F32: return Iop_NegF32;
586 case Ity_F64: return Iop_NegF64;
587 default: vpanic("mkNEGF");
591 static IROp mkABSF ( IRType ty ) {
592 switch (ty) {
593 case Ity_F16: return Iop_AbsF16;
594 case Ity_F32: return Iop_AbsF32;
595 case Ity_F64: return Iop_AbsF64;
596 default: vpanic("mkABSF");
600 static IROp mkSQRTF ( IRType ty ) {
601 switch (ty) {
602 case Ity_F16: return Iop_SqrtF16;
603 case Ity_F32: return Iop_SqrtF32;
604 case Ity_F64: return Iop_SqrtF64;
605 default: vpanic("mkSQRTF");
609 static IROp mkVecADD ( UInt size ) {
610 const IROp ops[4]
611 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
612 vassert(size < 4);
613 return ops[size];
616 static IROp mkVecQADDU ( UInt size ) {
617 const IROp ops[4]
618 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
619 vassert(size < 4);
620 return ops[size];
623 static IROp mkVecQADDS ( UInt size ) {
624 const IROp ops[4]
625 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
626 vassert(size < 4);
627 return ops[size];
630 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
631 const IROp ops[4]
632 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
633 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
634 vassert(size < 4);
635 return ops[size];
638 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
639 const IROp ops[4]
640 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
641 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
642 vassert(size < 4);
643 return ops[size];
646 static IROp mkVecSUB ( UInt size ) {
647 const IROp ops[4]
648 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
649 vassert(size < 4);
650 return ops[size];
653 static IROp mkVecQSUBU ( UInt size ) {
654 const IROp ops[4]
655 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
656 vassert(size < 4);
657 return ops[size];
660 static IROp mkVecQSUBS ( UInt size ) {
661 const IROp ops[4]
662 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
663 vassert(size < 4);
664 return ops[size];
667 static IROp mkVecSARN ( UInt size ) {
668 const IROp ops[4]
669 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
670 vassert(size < 4);
671 return ops[size];
674 static IROp mkVecSHRN ( UInt size ) {
675 const IROp ops[4]
676 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
677 vassert(size < 4);
678 return ops[size];
681 static IROp mkVecSHLN ( UInt size ) {
682 const IROp ops[4]
683 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
684 vassert(size < 4);
685 return ops[size];
688 static IROp mkVecCATEVENLANES ( UInt size ) {
689 const IROp ops[4]
690 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
691 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
692 vassert(size < 4);
693 return ops[size];
696 static IROp mkVecCATODDLANES ( UInt size ) {
697 const IROp ops[4]
698 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
699 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
700 vassert(size < 4);
701 return ops[size];
704 static IROp mkVecINTERLEAVELO ( UInt size ) {
705 const IROp ops[4]
706 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
707 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
708 vassert(size < 4);
709 return ops[size];
712 static IROp mkVecINTERLEAVEHI ( UInt size ) {
713 const IROp ops[4]
714 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
715 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
716 vassert(size < 4);
717 return ops[size];
720 static IROp mkVecMAXU ( UInt size ) {
721 const IROp ops[4]
722 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
723 vassert(size < 4);
724 return ops[size];
727 static IROp mkVecMAXS ( UInt size ) {
728 const IROp ops[4]
729 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
730 vassert(size < 4);
731 return ops[size];
734 static IROp mkVecMINU ( UInt size ) {
735 const IROp ops[4]
736 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
737 vassert(size < 4);
738 return ops[size];
741 static IROp mkVecMINS ( UInt size ) {
742 const IROp ops[4]
743 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
744 vassert(size < 4);
745 return ops[size];
748 static IROp mkVecMUL ( UInt size ) {
749 const IROp ops[4]
750 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
751 vassert(size < 3);
752 return ops[size];
755 static IROp mkVecMULLU ( UInt sizeNarrow ) {
756 const IROp ops[4]
757 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
758 vassert(sizeNarrow < 3);
759 return ops[sizeNarrow];
762 static IROp mkVecMULLS ( UInt sizeNarrow ) {
763 const IROp ops[4]
764 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
765 vassert(sizeNarrow < 3);
766 return ops[sizeNarrow];
769 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
770 const IROp ops[4]
771 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
772 vassert(sizeNarrow < 3);
773 return ops[sizeNarrow];
776 static IROp mkVecCMPEQ ( UInt size ) {
777 const IROp ops[4]
778 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
779 vassert(size < 4);
780 return ops[size];
783 static IROp mkVecCMPGTU ( UInt size ) {
784 const IROp ops[4]
785 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
786 vassert(size < 4);
787 return ops[size];
790 static IROp mkVecCMPGTS ( UInt size ) {
791 const IROp ops[4]
792 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
793 vassert(size < 4);
794 return ops[size];
797 static IROp mkVecABS ( UInt size ) {
798 const IROp ops[4]
799 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
800 vassert(size < 4);
801 return ops[size];
804 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
805 const IROp ops[4]
806 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
807 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
808 vassert(size < 4);
809 return ops[size];
812 static IRExpr* mkU ( IRType ty, ULong imm ) {
813 switch (ty) {
814 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
815 case Ity_I64: return mkU64(imm);
816 default: vpanic("mkU");
820 static IROp mkVecQDMULHIS ( UInt size ) {
821 const IROp ops[4]
822 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
823 vassert(size < 4);
824 return ops[size];
827 static IROp mkVecQRDMULHIS ( UInt size ) {
828 const IROp ops[4]
829 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
830 vassert(size < 4);
831 return ops[size];
834 static IROp mkVecQANDUQSH ( UInt size ) {
835 const IROp ops[4]
836 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
837 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
838 vassert(size < 4);
839 return ops[size];
842 static IROp mkVecQANDSQSH ( UInt size ) {
843 const IROp ops[4]
844 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
845 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
846 vassert(size < 4);
847 return ops[size];
850 static IROp mkVecQANDUQRSH ( UInt size ) {
851 const IROp ops[4]
852 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
853 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
854 vassert(size < 4);
855 return ops[size];
858 static IROp mkVecQANDSQRSH ( UInt size ) {
859 const IROp ops[4]
860 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
861 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
862 vassert(size < 4);
863 return ops[size];
866 static IROp mkVecSHU ( UInt size ) {
867 const IROp ops[4]
868 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
869 vassert(size < 4);
870 return ops[size];
873 static IROp mkVecSHS ( UInt size ) {
874 const IROp ops[4]
875 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
876 vassert(size < 4);
877 return ops[size];
880 static IROp mkVecRSHU ( UInt size ) {
881 const IROp ops[4]
882 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
883 vassert(size < 4);
884 return ops[size];
887 static IROp mkVecRSHS ( UInt size ) {
888 const IROp ops[4]
889 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
890 vassert(size < 4);
891 return ops[size];
894 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
895 const IROp ops[4]
896 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
897 Iop_NarrowUn64to32x2, Iop_INVALID };
898 vassert(sizeNarrow < 4);
899 return ops[sizeNarrow];
902 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
903 const IROp ops[4]
904 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
905 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
906 vassert(sizeNarrow < 4);
907 return ops[sizeNarrow];
910 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
911 const IROp ops[4]
912 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
913 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
914 vassert(sizeNarrow < 4);
915 return ops[sizeNarrow];
918 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
919 const IROp ops[4]
920 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
921 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
922 vassert(sizeNarrow < 4);
923 return ops[sizeNarrow];
926 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
927 const IROp ops[4]
928 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
929 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
930 vassert(sizeNarrow < 4);
931 return ops[sizeNarrow];
934 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
935 const IROp ops[4]
936 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
937 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
938 vassert(sizeNarrow < 4);
939 return ops[sizeNarrow];
942 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
943 const IROp ops[4]
944 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
945 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
946 vassert(sizeNarrow < 4);
947 return ops[sizeNarrow];
950 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
951 const IROp ops[4]
952 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
953 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
954 vassert(sizeNarrow < 4);
955 return ops[sizeNarrow];
958 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
959 const IROp ops[4]
960 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
961 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
962 vassert(sizeNarrow < 4);
963 return ops[sizeNarrow];
966 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
967 const IROp ops[4]
968 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
969 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
970 vassert(sizeNarrow < 4);
971 return ops[sizeNarrow];
974 static IROp mkVecQSHLNSATUU ( UInt size ) {
975 const IROp ops[4]
976 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
977 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
978 vassert(size < 4);
979 return ops[size];
982 static IROp mkVecQSHLNSATSS ( UInt size ) {
983 const IROp ops[4]
984 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
985 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
986 vassert(size < 4);
987 return ops[size];
990 static IROp mkVecQSHLNSATSU ( UInt size ) {
991 const IROp ops[4]
992 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
993 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
994 vassert(size < 4);
995 return ops[size];
998 static IROp mkVecADDF ( UInt size ) {
999 const IROp ops[4]
1000 = { Iop_INVALID, Iop_Add16Fx8, Iop_Add32Fx4, Iop_Add64Fx2 };
1001 vassert(size < 4);
1002 return ops[size];
1005 static IROp mkVecMAXF ( UInt size ) {
1006 const IROp ops[4]
1007 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
1008 vassert(size < 4);
1009 return ops[size];
1012 static IROp mkVecMINF ( UInt size ) {
1013 const IROp ops[4]
1014 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
1015 vassert(size < 4);
1016 return ops[size];
1019 /* Generate IR to create 'arg rotated right by imm', for sane values
1020 of 'ty' and 'imm'. */
1021 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
1023 UInt w = 0;
1024 if (ty == Ity_I64) {
1025 w = 64;
1026 } else {
1027 vassert(ty == Ity_I32);
1028 w = 32;
1030 vassert(w != 0);
1031 vassert(imm < w);
1032 if (imm == 0) {
1033 return arg;
1035 IRTemp res = newTemp(ty);
1036 assign(res, binop(mkOR(ty),
1037 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1038 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1039 return res;
1042 /* Generate IR to set the returned temp to either all-zeroes or
1043 all ones, as a copy of arg<imm>. */
1044 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1046 UInt w = 0;
1047 if (ty == Ity_I64) {
1048 w = 64;
1049 } else {
1050 vassert(ty == Ity_I32);
1051 w = 32;
1053 vassert(w != 0);
1054 vassert(imm < w);
1055 IRTemp res = newTemp(ty);
1056 assign(res, binop(mkSAR(ty),
1057 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1058 mkU8(w - 1)));
1059 return res;
1062 /* S-widen 8/16/32/64 bit int expr to 64. */
1063 static IRExpr* widenSto64 ( IRType srcTy, IRExpr* e )
1065 switch (srcTy) {
1066 case Ity_I64: return e;
1067 case Ity_I32: return unop(Iop_32Sto64, e);
1068 case Ity_I16: return unop(Iop_16Sto64, e);
1069 case Ity_I8: return unop(Iop_8Sto64, e);
1070 default: vpanic("widenSto64(arm64)");
1074 /* U-widen 8/16/32/64 bit int expr to 64. */
1075 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1077 switch (srcTy) {
1078 case Ity_I64: return e;
1079 case Ity_I32: return unop(Iop_32Uto64, e);
1080 case Ity_I16: return unop(Iop_16Uto64, e);
1081 case Ity_I8: return unop(Iop_8Uto64, e);
1082 default: vpanic("widenUto64(arm64)");
1086 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1087 of these combinations make sense. */
1088 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1090 switch (dstTy) {
1091 case Ity_I64: return e;
1092 case Ity_I32: return unop(Iop_64to32, e);
1093 case Ity_I16: return unop(Iop_64to16, e);
1094 case Ity_I8: return unop(Iop_64to8, e);
1095 default: vpanic("narrowFrom64(arm64)");
1100 /*------------------------------------------------------------*/
1101 /*--- Helpers for accessing guest registers. ---*/
1102 /*------------------------------------------------------------*/
1104 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1105 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1106 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1107 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1108 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1109 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1110 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1111 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1112 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1113 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1114 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1115 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1116 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1117 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1118 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1119 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1120 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1121 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1122 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1123 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1124 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1125 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1126 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1127 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1128 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1129 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1130 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1131 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1132 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1133 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1134 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1136 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1137 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1139 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1140 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1141 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1142 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1144 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1145 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1147 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1148 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1149 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1150 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1151 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1152 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1153 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1154 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1155 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1156 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1157 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1158 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1159 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1160 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1161 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1162 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1163 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1164 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1165 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1166 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1167 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1168 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1169 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1170 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1171 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1172 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1173 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1174 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1175 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1176 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1177 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1178 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1180 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1181 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1183 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1184 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1186 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1187 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1188 #define OFFB_LLSC_DATA_LO64 offsetof(VexGuestARM64State,guest_LLSC_DATA_LO64)
1189 #define OFFB_LLSC_DATA_HI64 offsetof(VexGuestARM64State,guest_LLSC_DATA_HI64)
1192 /* ---------------- Integer registers ---------------- */
1194 static Int offsetIReg64 ( UInt iregNo )
1196 /* Do we care about endianness here? We do if sub-parts of integer
1197 registers are accessed. */
1198 switch (iregNo) {
1199 case 0: return OFFB_X0;
1200 case 1: return OFFB_X1;
1201 case 2: return OFFB_X2;
1202 case 3: return OFFB_X3;
1203 case 4: return OFFB_X4;
1204 case 5: return OFFB_X5;
1205 case 6: return OFFB_X6;
1206 case 7: return OFFB_X7;
1207 case 8: return OFFB_X8;
1208 case 9: return OFFB_X9;
1209 case 10: return OFFB_X10;
1210 case 11: return OFFB_X11;
1211 case 12: return OFFB_X12;
1212 case 13: return OFFB_X13;
1213 case 14: return OFFB_X14;
1214 case 15: return OFFB_X15;
1215 case 16: return OFFB_X16;
1216 case 17: return OFFB_X17;
1217 case 18: return OFFB_X18;
1218 case 19: return OFFB_X19;
1219 case 20: return OFFB_X20;
1220 case 21: return OFFB_X21;
1221 case 22: return OFFB_X22;
1222 case 23: return OFFB_X23;
1223 case 24: return OFFB_X24;
1224 case 25: return OFFB_X25;
1225 case 26: return OFFB_X26;
1226 case 27: return OFFB_X27;
1227 case 28: return OFFB_X28;
1228 case 29: return OFFB_X29;
1229 case 30: return OFFB_X30;
1230 /* but not 31 */
1231 default: vassert(0);
1235 static Int offsetIReg64orSP ( UInt iregNo )
1237 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1240 static const HChar* nameIReg64orZR ( UInt iregNo )
1242 vassert(iregNo < 32);
1243 static const HChar* names[32]
1244 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1245 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1246 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1247 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1248 return names[iregNo];
1251 static const HChar* nameIReg64orSP ( UInt iregNo )
1253 if (iregNo == 31) {
1254 return "sp";
1256 vassert(iregNo < 31);
1257 return nameIReg64orZR(iregNo);
1260 static IRExpr* getIReg64orSP ( UInt iregNo )
1262 vassert(iregNo < 32);
1263 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1266 static IRExpr* getIReg64orZR ( UInt iregNo )
1268 if (iregNo == 31) {
1269 return mkU64(0);
1271 vassert(iregNo < 31);
1272 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1275 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1277 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1278 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1281 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1283 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1284 if (iregNo == 31) {
1285 return;
1287 vassert(iregNo < 31);
1288 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1291 static const HChar* nameIReg32orZR ( UInt iregNo )
1293 vassert(iregNo < 32);
1294 static const HChar* names[32]
1295 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1296 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1297 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1298 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1299 return names[iregNo];
1302 static const HChar* nameIReg32orSP ( UInt iregNo )
1304 if (iregNo == 31) {
1305 return "wsp";
1307 vassert(iregNo < 31);
1308 return nameIReg32orZR(iregNo);
1311 static IRExpr* getIReg32orSP ( UInt iregNo )
1313 vassert(iregNo < 32);
1314 return unop(Iop_64to32,
1315 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1318 static IRExpr* getIReg32orZR ( UInt iregNo )
1320 if (iregNo == 31) {
1321 return mkU32(0);
1323 vassert(iregNo < 31);
1324 return unop(Iop_64to32,
1325 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1328 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1330 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1331 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1334 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1336 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1337 if (iregNo == 31) {
1338 return;
1340 vassert(iregNo < 31);
1341 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1344 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1346 vassert(is64 == True || is64 == False);
1347 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1350 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1352 vassert(is64 == True || is64 == False);
1353 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1356 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1358 vassert(is64 == True || is64 == False);
1359 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1362 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1364 vassert(is64 == True || is64 == False);
1365 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1368 static void putPC ( IRExpr* e )
1370 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1371 stmt( IRStmt_Put(OFFB_PC, e) );
1375 /* ---------------- Vector (Q) registers ---------------- */
1377 static Int offsetQReg128 ( UInt qregNo )
1379 /* We don't care about endianness at this point. It only becomes
1380 relevant when dealing with sections of these registers.*/
1381 switch (qregNo) {
1382 case 0: return OFFB_Q0;
1383 case 1: return OFFB_Q1;
1384 case 2: return OFFB_Q2;
1385 case 3: return OFFB_Q3;
1386 case 4: return OFFB_Q4;
1387 case 5: return OFFB_Q5;
1388 case 6: return OFFB_Q6;
1389 case 7: return OFFB_Q7;
1390 case 8: return OFFB_Q8;
1391 case 9: return OFFB_Q9;
1392 case 10: return OFFB_Q10;
1393 case 11: return OFFB_Q11;
1394 case 12: return OFFB_Q12;
1395 case 13: return OFFB_Q13;
1396 case 14: return OFFB_Q14;
1397 case 15: return OFFB_Q15;
1398 case 16: return OFFB_Q16;
1399 case 17: return OFFB_Q17;
1400 case 18: return OFFB_Q18;
1401 case 19: return OFFB_Q19;
1402 case 20: return OFFB_Q20;
1403 case 21: return OFFB_Q21;
1404 case 22: return OFFB_Q22;
1405 case 23: return OFFB_Q23;
1406 case 24: return OFFB_Q24;
1407 case 25: return OFFB_Q25;
1408 case 26: return OFFB_Q26;
1409 case 27: return OFFB_Q27;
1410 case 28: return OFFB_Q28;
1411 case 29: return OFFB_Q29;
1412 case 30: return OFFB_Q30;
1413 case 31: return OFFB_Q31;
1414 default: vassert(0);
1418 /* Write to a complete Qreg. */
1419 static void putQReg128 ( UInt qregNo, IRExpr* e )
1421 vassert(qregNo < 32);
1422 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1423 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1426 /* Read a complete Qreg. */
1427 static IRExpr* getQReg128 ( UInt qregNo )
1429 vassert(qregNo < 32);
1430 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1433 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1434 bit sub-parts we can choose either integer or float types, and
1435 choose float on the basis that that is the common use case and so
1436 will give least interference with Put-to-Get forwarding later
1437 on. */
1438 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1440 switch (szB) {
1441 case 1: return Ity_I8;
1442 case 2: return Ity_I16;
1443 case 4: return Ity_I32; //Ity_F32;
1444 case 8: return Ity_F64;
1445 case 16: return Ity_V128;
1446 default: vassert(0);
1450 /* Find the offset of the laneNo'th lane of type laneTy in the given
1451 Qreg. Since the host is little-endian, the least significant lane
1452 has the lowest offset. */
1453 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1455 vassert(host_endness == VexEndnessLE);
1456 Int base = offsetQReg128(qregNo);
1457 /* Since the host is little-endian, the least significant lane
1458 will be at the lowest address. */
1459 /* Restrict this to known types, so as to avoid silently accepting
1460 stupid types. */
1461 UInt laneSzB = 0;
1462 switch (laneTy) {
1463 case Ity_I8: laneSzB = 1; break;
1464 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1465 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1466 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1467 case Ity_V128: laneSzB = 16; break;
1468 default: break;
1470 vassert(laneSzB > 0);
1471 UInt minOff = laneNo * laneSzB;
1472 UInt maxOff = minOff + laneSzB - 1;
1473 vassert(maxOff < 16);
1474 return base + minOff;
1477 /* Put to the least significant lane of a Qreg. */
1478 static void putQRegLO ( UInt qregNo, IRExpr* e )
1480 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1481 Int off = offsetQRegLane(qregNo, ty, 0);
1482 switch (ty) {
1483 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1484 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1485 break;
1486 default:
1487 vassert(0); // Other cases are probably invalid
1489 stmt(IRStmt_Put(off, e));
1492 /* Get from the least significant lane of a Qreg. */
1493 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1495 Int off = offsetQRegLane(qregNo, ty, 0);
1496 switch (ty) {
1497 case Ity_I8:
1498 case Ity_F16: case Ity_I16:
1499 case Ity_I32: case Ity_I64:
1500 case Ity_F32: case Ity_F64: case Ity_V128:
1501 break;
1502 default:
1503 vassert(0); // Other cases are ATC
1505 return IRExpr_Get(off, ty);
1508 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1510 static const HChar* namesQ[32]
1511 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1512 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1513 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1514 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1515 static const HChar* namesD[32]
1516 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1517 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1518 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1519 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1520 static const HChar* namesS[32]
1521 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1522 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1523 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1524 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1525 static const HChar* namesH[32]
1526 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1527 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1528 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1529 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1530 static const HChar* namesB[32]
1531 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1532 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1533 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1534 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1535 vassert(qregNo < 32);
1536 switch (sizeofIRType(laneTy)) {
1537 case 1: return namesB[qregNo];
1538 case 2: return namesH[qregNo];
1539 case 4: return namesS[qregNo];
1540 case 8: return namesD[qregNo];
1541 case 16: return namesQ[qregNo];
1542 default: vassert(0);
1544 /*NOTREACHED*/
1547 static const HChar* nameQReg128 ( UInt qregNo )
1549 return nameQRegLO(qregNo, Ity_V128);
1552 /* Find the offset of the most significant half (8 bytes) of the given
1553 Qreg. This requires knowing the endianness of the host. */
1554 static Int offsetQRegHI64 ( UInt qregNo )
1556 return offsetQRegLane(qregNo, Ity_I64, 1);
1559 static IRExpr* getQRegHI64 ( UInt qregNo )
1561 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1564 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1566 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1567 Int off = offsetQRegHI64(qregNo);
1568 switch (ty) {
1569 case Ity_I64: case Ity_F64:
1570 break;
1571 default:
1572 vassert(0); // Other cases are plain wrong
1574 stmt(IRStmt_Put(off, e));
1577 /* Put to a specified lane of a Qreg. */
1578 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1580 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1581 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1582 switch (laneTy) {
1583 case Ity_F64: case Ity_I64:
1584 case Ity_I32: case Ity_F32:
1585 case Ity_I16: case Ity_F16:
1586 case Ity_I8:
1587 break;
1588 default:
1589 vassert(0); // Other cases are ATC
1591 stmt(IRStmt_Put(off, e));
1594 /* Get from a specified lane of a Qreg. */
1595 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1597 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1598 switch (laneTy) {
1599 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1600 case Ity_F64: case Ity_F32: case Ity_F16:
1601 break;
1602 default:
1603 vassert(0); // Other cases are ATC
1605 return IRExpr_Get(off, laneTy);
1609 //ZZ /* ---------------- Misc registers ---------------- */
1610 //ZZ
1611 //ZZ static void putMiscReg32 ( UInt gsoffset,
1612 //ZZ IRExpr* e, /* :: Ity_I32 */
1613 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1614 //ZZ {
1615 //ZZ switch (gsoffset) {
1616 //ZZ case OFFB_FPSCR: break;
1617 //ZZ case OFFB_QFLAG32: break;
1618 //ZZ case OFFB_GEFLAG0: break;
1619 //ZZ case OFFB_GEFLAG1: break;
1620 //ZZ case OFFB_GEFLAG2: break;
1621 //ZZ case OFFB_GEFLAG3: break;
1622 //ZZ default: vassert(0); /* awaiting more cases */
1623 //ZZ }
1624 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1625 //ZZ
1626 //ZZ if (guardT == IRTemp_INVALID) {
1627 //ZZ /* unconditional write */
1628 //ZZ stmt(IRStmt_Put(gsoffset, e));
1629 //ZZ } else {
1630 //ZZ stmt(IRStmt_Put(
1631 //ZZ gsoffset,
1632 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1633 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1634 //ZZ ));
1635 //ZZ }
1636 //ZZ }
1637 //ZZ
1638 //ZZ static IRTemp get_ITSTATE ( void )
1639 //ZZ {
1640 //ZZ ASSERT_IS_THUMB;
1641 //ZZ IRTemp t = newTemp(Ity_I32);
1642 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1643 //ZZ return t;
1644 //ZZ }
1645 //ZZ
1646 //ZZ static void put_ITSTATE ( IRTemp t )
1647 //ZZ {
1648 //ZZ ASSERT_IS_THUMB;
1649 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1650 //ZZ }
1651 //ZZ
1652 //ZZ static IRTemp get_QFLAG32 ( void )
1653 //ZZ {
1654 //ZZ IRTemp t = newTemp(Ity_I32);
1655 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1656 //ZZ return t;
1657 //ZZ }
1658 //ZZ
1659 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1660 //ZZ {
1661 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1662 //ZZ }
1663 //ZZ
1664 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1665 //ZZ Status Register) to indicate that overflow or saturation occurred.
1666 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1667 //ZZ value to indicate saturation. */
1668 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1669 //ZZ {
1670 //ZZ IRTemp old = get_QFLAG32();
1671 //ZZ IRTemp nyu = newTemp(Ity_I32);
1672 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1673 //ZZ put_QFLAG32(nyu, condT);
1674 //ZZ }
1677 /* ---------------- FPCR stuff ---------------- */
1679 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1680 convert them to IR format. Bind the final result to the
1681 returned temp. */
1682 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1684 /* The ARMvfp encoding for rounding mode bits is:
1685 00 to nearest
1686 01 to +infinity
1687 10 to -infinity
1688 11 to zero
1689 We need to convert that to the IR encoding:
1690 00 to nearest (the default)
1691 10 to +infinity
1692 01 to -infinity
1693 11 to zero
1694 Which can be done by swapping bits 0 and 1.
1695 The rmode bits are at 23:22 in FPSCR.
1697 IRTemp armEncd = newTemp(Ity_I32);
1698 IRTemp swapped = newTemp(Ity_I32);
1699 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1700 we don't zero out bits 24 and above, since the assignment to
1701 'swapped' will mask them out anyway. */
1702 assign(armEncd,
1703 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1704 /* Now swap them. */
1705 assign(swapped,
1706 binop(Iop_Or32,
1707 binop(Iop_And32,
1708 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1709 mkU32(2)),
1710 binop(Iop_And32,
1711 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1712 mkU32(1))
1714 return swapped;
1718 /*------------------------------------------------------------*/
1719 /*--- Helpers for flag handling and conditional insns ---*/
1720 /*------------------------------------------------------------*/
1722 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1724 switch (cond) {
1725 case ARM64CondEQ: return "eq";
1726 case ARM64CondNE: return "ne";
1727 case ARM64CondCS: return "cs"; // or 'hs'
1728 case ARM64CondCC: return "cc"; // or 'lo'
1729 case ARM64CondMI: return "mi";
1730 case ARM64CondPL: return "pl";
1731 case ARM64CondVS: return "vs";
1732 case ARM64CondVC: return "vc";
1733 case ARM64CondHI: return "hi";
1734 case ARM64CondLS: return "ls";
1735 case ARM64CondGE: return "ge";
1736 case ARM64CondLT: return "lt";
1737 case ARM64CondGT: return "gt";
1738 case ARM64CondLE: return "le";
1739 case ARM64CondAL: return "al";
1740 case ARM64CondNV: return "nv";
1741 default: vpanic("name_ARM64Condcode");
1745 /* and a handy shorthand for it */
1746 static const HChar* nameCC ( ARM64Condcode cond ) {
1747 return nameARM64Condcode(cond);
1751 /* Build IR to calculate some particular condition from stored
1752 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1753 Ity_I64, suitable for narrowing. Although the return type is
1754 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1755 :: Ity_I64 and must denote the condition to compute in
1756 bits 7:4, and be zero everywhere else.
1758 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1760 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1761 /* And 'cond' had better produce a value in which only bits 7:4 are
1762 nonzero. However, obviously we can't assert for that. */
1764 /* So what we're constructing for the first argument is
1765 "(cond << 4) | stored-operation".
1766 However, as per comments above, 'cond' must be supplied
1767 pre-shifted to this function.
1769 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1770 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1771 8 bits of the first argument. */
1772 IRExpr** args
1773 = mkIRExprVec_4(
1774 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1775 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1776 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1777 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1779 IRExpr* call
1780 = mkIRExprCCall(
1781 Ity_I64,
1782 0/*regparm*/,
1783 "arm64g_calculate_condition", &arm64g_calculate_condition,
1784 args
1787 /* Exclude the requested condition, OP and NDEP from definedness
1788 checking. We're only interested in DEP1 and DEP2. */
1789 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1790 return call;
1794 /* Build IR to calculate some particular condition from stored
1795 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1796 Ity_I64, suitable for narrowing. Although the return type is
1797 Ity_I64, the returned value is either 0 or 1.
1799 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1801 /* First arg is "(cond << 4) | condition". This requires that the
1802 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1803 (COND, OP) pair in the lowest 8 bits of the first argument. */
1804 vassert(cond >= 0 && cond <= 15);
1805 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1809 /* Build IR to calculate just the carry flag from stored
1810 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1811 Ity_I64. */
1812 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1814 IRExpr** args
1815 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1816 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1817 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1818 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1819 IRExpr* call
1820 = mkIRExprCCall(
1821 Ity_I64,
1822 0/*regparm*/,
1823 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1824 args
1826 /* Exclude OP and NDEP from definedness checking. We're only
1827 interested in DEP1 and DEP2. */
1828 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1829 return call;
1833 //ZZ /* Build IR to calculate just the overflow flag from stored
1834 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1835 //ZZ Ity_I32. */
1836 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1837 //ZZ {
1838 //ZZ IRExpr** args
1839 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1840 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1841 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1842 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1843 //ZZ IRExpr* call
1844 //ZZ = mkIRExprCCall(
1845 //ZZ Ity_I32,
1846 //ZZ 0/*regparm*/,
1847 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1848 //ZZ args
1849 //ZZ );
1850 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1851 //ZZ interested in DEP1 and DEP2. */
1852 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1853 //ZZ return call;
1854 //ZZ }
1857 /* Build IR to calculate N Z C V in bits 31:28 of the
1858 returned word. */
1859 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1861 IRExpr** args
1862 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1863 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1864 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1865 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1866 IRExpr* call
1867 = mkIRExprCCall(
1868 Ity_I64,
1869 0/*regparm*/,
1870 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1871 args
1873 /* Exclude OP and NDEP from definedness checking. We're only
1874 interested in DEP1 and DEP2. */
1875 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1876 return call;
1880 /* Build IR to set the flags thunk, in the most general case. */
1881 static
1882 void setFlags_D1_D2_ND ( UInt cc_op,
1883 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1885 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1886 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1887 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1888 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1889 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1890 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1891 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1892 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1895 /* Build IR to set the flags thunk after ADD or SUB. */
1896 static
1897 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1899 IRTemp argL64 = IRTemp_INVALID;
1900 IRTemp argR64 = IRTemp_INVALID;
1901 IRTemp z64 = newTemp(Ity_I64);
1902 if (is64) {
1903 argL64 = argL;
1904 argR64 = argR;
1905 } else {
1906 argL64 = newTemp(Ity_I64);
1907 argR64 = newTemp(Ity_I64);
1908 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1909 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1911 assign(z64, mkU64(0));
1912 UInt cc_op = ARM64G_CC_OP_NUMBER;
1913 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1914 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1915 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1916 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1917 else { vassert(0); }
1918 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1921 /* Build IR to set the flags thunk after ADC or SBC. */
1922 static
1923 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1924 IRTemp argL, IRTemp argR, IRTemp oldC )
1926 IRTemp argL64 = IRTemp_INVALID;
1927 IRTemp argR64 = IRTemp_INVALID;
1928 IRTemp oldC64 = IRTemp_INVALID;
1929 if (is64) {
1930 argL64 = argL;
1931 argR64 = argR;
1932 oldC64 = oldC;
1933 } else {
1934 argL64 = newTemp(Ity_I64);
1935 argR64 = newTemp(Ity_I64);
1936 oldC64 = newTemp(Ity_I64);
1937 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1938 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1939 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1941 UInt cc_op = ARM64G_CC_OP_NUMBER;
1942 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1943 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1944 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1945 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1946 else { vassert(0); }
1947 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1950 /* Build IR to set the flags thunk after ADD or SUB, if the given
1951 condition evaluates to True at run time. If not, the flags are set
1952 to the specified NZCV value. */
1953 static
1954 void setFlags_ADD_SUB_conditionally (
1955 Bool is64, Bool isSUB,
1956 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1959 /* Generate IR as follows:
1960 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1961 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1962 CC_DEP2 = ITE(cond, argR64, 0)
1963 CC_NDEP = 0
1966 IRTemp z64 = newTemp(Ity_I64);
1967 assign(z64, mkU64(0));
1969 /* Establish the operation and operands for the True case. */
1970 IRTemp t_dep1 = IRTemp_INVALID;
1971 IRTemp t_dep2 = IRTemp_INVALID;
1972 UInt t_op = ARM64G_CC_OP_NUMBER;
1973 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1974 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1975 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1976 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1977 else { vassert(0); }
1978 /* */
1979 if (is64) {
1980 t_dep1 = argL;
1981 t_dep2 = argR;
1982 } else {
1983 t_dep1 = newTemp(Ity_I64);
1984 t_dep2 = newTemp(Ity_I64);
1985 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1986 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1989 /* Establish the operation and operands for the False case. */
1990 IRTemp f_dep1 = newTemp(Ity_I64);
1991 IRTemp f_dep2 = z64;
1992 UInt f_op = ARM64G_CC_OP_COPY;
1993 assign(f_dep1, mkU64(nzcv << 28));
1995 /* Final thunk values */
1996 IRTemp dep1 = newTemp(Ity_I64);
1997 IRTemp dep2 = newTemp(Ity_I64);
1998 IRTemp op = newTemp(Ity_I64);
2000 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
2001 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
2002 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
2004 /* finally .. */
2005 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
2006 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
2007 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
2008 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
2011 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
2012 static
2013 void setFlags_LOGIC ( Bool is64, IRTemp res )
2015 IRTemp res64 = IRTemp_INVALID;
2016 IRTemp z64 = newTemp(Ity_I64);
2017 UInt cc_op = ARM64G_CC_OP_NUMBER;
2018 if (is64) {
2019 res64 = res;
2020 cc_op = ARM64G_CC_OP_LOGIC64;
2021 } else {
2022 res64 = newTemp(Ity_I64);
2023 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
2024 cc_op = ARM64G_CC_OP_LOGIC32;
2026 assign(z64, mkU64(0));
2027 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
2030 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
2031 located in bits 31:28 of the supplied value. */
2032 static
2033 void setFlags_COPY ( IRTemp nzcv_28x0 )
2035 IRTemp z64 = newTemp(Ity_I64);
2036 assign(z64, mkU64(0));
2037 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2041 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2042 //ZZ sets it at all) */
2043 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2044 //ZZ IRTemp t_dep2,
2045 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2046 //ZZ {
2047 //ZZ IRTemp z32 = newTemp(Ity_I32);
2048 //ZZ assign( z32, mkU32(0) );
2049 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2050 //ZZ }
2051 //ZZ
2052 //ZZ
2053 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2054 //ZZ sets it at all) */
2055 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2056 //ZZ IRTemp t_ndep,
2057 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2058 //ZZ {
2059 //ZZ IRTemp z32 = newTemp(Ity_I32);
2060 //ZZ assign( z32, mkU32(0) );
2061 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2062 //ZZ }
2063 //ZZ
2064 //ZZ
2065 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2066 //ZZ sets them at all) */
2067 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2068 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2069 //ZZ {
2070 //ZZ IRTemp z32 = newTemp(Ity_I32);
2071 //ZZ assign( z32, mkU32(0) );
2072 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2073 //ZZ }
2076 /*------------------------------------------------------------*/
2077 /*--- Misc math helpers ---*/
2078 /*------------------------------------------------------------*/
2080 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2081 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2083 IRTemp maskT = newTemp(Ity_I64);
2084 IRTemp res = newTemp(Ity_I64);
2085 vassert(sh >= 1 && sh <= 63);
2086 assign(maskT, mkU64(mask));
2087 assign( res,
2088 binop(Iop_Or64,
2089 binop(Iop_Shr64,
2090 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2091 mkU8(sh)),
2092 binop(Iop_And64,
2093 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2094 mkexpr(maskT))
2097 return res;
2100 /* Generates byte swaps within 32-bit lanes. */
2101 static IRTemp math_UINTSWAP64 ( IRTemp src )
2103 IRTemp res;
2104 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2105 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2106 return res;
2109 /* Generates byte swaps within 16-bit lanes. */
2110 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2112 IRTemp res;
2113 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2114 return res;
2117 /* Generates a 64-bit byte swap. */
2118 static IRTemp math_BYTESWAP64 ( IRTemp src )
2120 IRTemp res;
2121 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2122 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2123 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2124 return res;
2127 /* Generates a 64-bit bit swap. */
2128 static IRTemp math_BITSWAP64 ( IRTemp src )
2130 IRTemp res;
2131 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2132 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2133 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2134 return math_BYTESWAP64(res);
2137 /* Duplicates the bits at the bottom of the given word to fill the
2138 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2139 except for the bottom bits. */
2140 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2142 if (srcTy == Ity_I8) {
2143 IRTemp t16 = newTemp(Ity_I64);
2144 assign(t16, binop(Iop_Or64, mkexpr(src),
2145 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2146 IRTemp t32 = newTemp(Ity_I64);
2147 assign(t32, binop(Iop_Or64, mkexpr(t16),
2148 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2149 IRTemp t64 = newTemp(Ity_I64);
2150 assign(t64, binop(Iop_Or64, mkexpr(t32),
2151 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2152 return t64;
2154 if (srcTy == Ity_I16) {
2155 IRTemp t32 = newTemp(Ity_I64);
2156 assign(t32, binop(Iop_Or64, mkexpr(src),
2157 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2158 IRTemp t64 = newTemp(Ity_I64);
2159 assign(t64, binop(Iop_Or64, mkexpr(t32),
2160 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2161 return t64;
2163 if (srcTy == Ity_I32) {
2164 IRTemp t64 = newTemp(Ity_I64);
2165 assign(t64, binop(Iop_Or64, mkexpr(src),
2166 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2167 return t64;
2169 if (srcTy == Ity_I64) {
2170 return src;
2172 vassert(0);
2176 /* Duplicates the src element exactly so as to fill a V128 value. */
2177 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2179 IRTemp res = newTempV128();
2180 if (srcTy == Ity_F64) {
2181 IRTemp i64 = newTemp(Ity_I64);
2182 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2183 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2184 return res;
2186 if (srcTy == Ity_F32) {
2187 IRTemp i64a = newTemp(Ity_I64);
2188 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2189 IRTemp i64b = newTemp(Ity_I64);
2190 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2191 mkexpr(i64a)));
2192 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2193 return res;
2195 if (srcTy == Ity_I64) {
2196 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2197 return res;
2199 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2200 IRTemp t1 = newTemp(Ity_I64);
2201 assign(t1, widenUto64(srcTy, mkexpr(src)));
2202 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2203 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2204 return res;
2206 vassert(0);
2210 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2211 zero out the upper half. */
2212 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2214 if (bitQ == 1) return mkexpr(fullWidth);
2215 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2216 vassert(0);
2219 /* The same, but from an expression instead. */
2220 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2222 IRTemp fullWidthT = newTempV128();
2223 assign(fullWidthT, fullWidth);
2224 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2228 /*------------------------------------------------------------*/
2229 /*--- FP comparison helpers ---*/
2230 /*------------------------------------------------------------*/
2232 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2233 as an IRCmpF64Result. Generate code to convert it to an
2234 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2235 Assign a new temp to hold that value, and return the temp. */
2236 static
2237 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2239 IRTemp ix = newTemp(Ity_I64);
2240 IRTemp termL = newTemp(Ity_I64);
2241 IRTemp termR = newTemp(Ity_I64);
2242 IRTemp nzcv = newTemp(Ity_I64);
2243 IRTemp irRes = newTemp(Ity_I64);
2245 /* This is where the fun starts. We have to convert 'irRes' from
2246 an IR-convention return result (IRCmpF64Result) to an
2247 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2248 4 bits of 'nzcv'. */
2249 /* Map compare result from IR to ARM(nzcv) */
2251 FP cmp result | IR | ARM(nzcv)
2252 --------------------------------
2253 UN 0x45 0011
2254 LT 0x01 1000
2255 GT 0x00 0010
2256 EQ 0x40 0110
2258 /* Now since you're probably wondering WTF ..
2260 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2261 places them side by side, giving a number which is 0, 1, 2 or 3.
2263 termL is a sequence cooked up by GNU superopt. It converts ix
2264 into an almost correct value NZCV value (incredibly), except
2265 for the case of UN, where it produces 0100 instead of the
2266 required 0011.
2268 termR is therefore a correction term, also computed from ix. It
2269 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2270 the final correct value, we subtract termR from termL.
2272 Don't take my word for it. There's a test program at the bottom
2273 of guest_arm_toIR.c, to try this out with.
2275 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2277 assign(
2279 binop(Iop_Or64,
2280 binop(Iop_And64,
2281 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2282 mkU64(3)),
2283 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2285 assign(
2286 termL,
2287 binop(Iop_Add64,
2288 binop(Iop_Shr64,
2289 binop(Iop_Sub64,
2290 binop(Iop_Shl64,
2291 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2292 mkU8(62)),
2293 mkU64(1)),
2294 mkU8(61)),
2295 mkU64(1)));
2297 assign(
2298 termR,
2299 binop(Iop_And64,
2300 binop(Iop_And64,
2301 mkexpr(ix),
2302 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2303 mkU64(1)));
2305 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2306 return nzcv;
2310 /*------------------------------------------------------------*/
2311 /*--- Data processing (immediate) ---*/
2312 /*------------------------------------------------------------*/
2314 /* Helper functions for supporting "DecodeBitMasks" */
2316 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2318 vassert(width > 0 && width <= 64);
2319 vassert(rot >= 0 && rot < width);
2320 if (rot == 0) return x;
2321 ULong res = x >> rot;
2322 res |= (x << (width - rot));
2323 if (width < 64)
2324 res &= ((1ULL << width) - 1);
2325 return res;
2328 static ULong dbm_RepTo64( Int esize, ULong x )
2330 switch (esize) {
2331 case 64:
2332 return x;
2333 case 32:
2334 x &= 0xFFFFFFFF; x |= (x << 32);
2335 return x;
2336 case 16:
2337 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2338 return x;
2339 case 8:
2340 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2341 return x;
2342 case 4:
2343 x &= 0xF; x |= (x << 4); x |= (x << 8);
2344 x |= (x << 16); x |= (x << 32);
2345 return x;
2346 case 2:
2347 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2348 x |= (x << 16); x |= (x << 32);
2349 return x;
2350 default:
2351 break;
2353 vpanic("dbm_RepTo64");
2354 /*NOTREACHED*/
2355 return 0;
2358 static Int dbm_highestSetBit ( ULong x )
2360 Int i;
2361 for (i = 63; i >= 0; i--) {
2362 if (x & (1ULL << i))
2363 return i;
2365 vassert(x == 0);
2366 return -1;
2369 static
2370 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2371 ULong immN, ULong imms, ULong immr, Bool immediate,
2372 UInt M /*32 or 64*/)
2374 vassert(immN < (1ULL << 1));
2375 vassert(imms < (1ULL << 6));
2376 vassert(immr < (1ULL << 6));
2377 vassert(immediate == False || immediate == True);
2378 vassert(M == 32 || M == 64);
2380 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2381 if (len < 1) { /* printf("fail1\n"); */ return False; }
2382 vassert(len <= 6);
2383 vassert(M >= (1 << len));
2385 vassert(len >= 1 && len <= 6);
2386 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2387 (1UL << len) - 1;
2388 vassert(levels >= 1 && levels <= 63);
2390 if (immediate && ((imms & levels) == levels)) {
2391 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2392 return False;
2395 ULong S = imms & levels;
2396 ULong R = immr & levels;
2397 Int diff = S - R;
2398 diff &= 63;
2399 Int esize = 1 << len;
2400 vassert(2 <= esize && esize <= 64);
2402 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2403 same below with d. S can be 63 in which case we have an out of
2404 range and hence undefined shift. */
2405 vassert(S <= 63);
2406 vassert(esize >= (S+1));
2407 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2408 //(1ULL << (S+1)) - 1;
2409 ((1ULL << S) - 1) + (1ULL << S);
2411 Int d = // diff<len-1:0>
2412 diff & ((1 << len)-1);
2413 vassert(esize >= (d+1));
2414 vassert(d >= 0 && d <= 63);
2416 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2417 //(1ULL << (d+1)) - 1;
2418 ((1ULL << d) - 1) + (1ULL << d);
2420 if (esize != 64) vassert(elem_s < (1ULL << esize));
2421 if (esize != 64) vassert(elem_d < (1ULL << esize));
2423 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2424 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2426 return True;
2430 static
2431 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2432 UInt insn, Bool sigill_diag)
2434 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2436 /* insn[28:23]
2437 10000x PC-rel addressing
2438 10001x Add/subtract (immediate)
2439 100100 Logical (immediate)
2440 100101 Move Wide (immediate)
2441 100110 Bitfield
2442 100111 Extract
2445 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2446 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2447 Bool is64 = INSN(31,31) == 1;
2448 Bool isSub = INSN(30,30) == 1;
2449 Bool setCC = INSN(29,29) == 1;
2450 UInt sh = INSN(23,22);
2451 UInt uimm12 = INSN(21,10);
2452 UInt nn = INSN(9,5);
2453 UInt dd = INSN(4,0);
2454 const HChar* nm = isSub ? "sub" : "add";
2455 if (sh >= 2) {
2456 /* Invalid; fall through */
2457 } else {
2458 vassert(sh <= 1);
2459 uimm12 <<= (12 * sh);
2460 if (is64) {
2461 IRTemp argL = newTemp(Ity_I64);
2462 IRTemp argR = newTemp(Ity_I64);
2463 IRTemp res = newTemp(Ity_I64);
2464 assign(argL, getIReg64orSP(nn));
2465 assign(argR, mkU64(uimm12));
2466 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2467 mkexpr(argL), mkexpr(argR)));
2468 if (setCC) {
2469 putIReg64orZR(dd, mkexpr(res));
2470 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2471 DIP("%ss %s, %s, 0x%x\n",
2472 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2473 } else {
2474 putIReg64orSP(dd, mkexpr(res));
2475 DIP("%s %s, %s, 0x%x\n",
2476 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2478 } else {
2479 IRTemp argL = newTemp(Ity_I32);
2480 IRTemp argR = newTemp(Ity_I32);
2481 IRTemp res = newTemp(Ity_I32);
2482 assign(argL, getIReg32orSP(nn));
2483 assign(argR, mkU32(uimm12));
2484 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2485 mkexpr(argL), mkexpr(argR)));
2486 if (setCC) {
2487 putIReg32orZR(dd, mkexpr(res));
2488 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2489 DIP("%ss %s, %s, 0x%x\n",
2490 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2491 } else {
2492 putIReg32orSP(dd, mkexpr(res));
2493 DIP("%s %s, %s, 0x%x\n",
2494 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2497 return True;
2501 /* -------------------- ADR/ADRP -------------------- */
2502 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2503 UInt bP = INSN(31,31);
2504 UInt immLo = INSN(30,29);
2505 UInt immHi = INSN(23,5);
2506 UInt rD = INSN(4,0);
2507 ULong uimm = (immHi << 2) | immLo;
2508 ULong simm = sx_to_64(uimm, 21);
2509 ULong val;
2510 if (bP) {
2511 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2512 } else {
2513 val = guest_PC_curr_instr + simm;
2515 putIReg64orZR(rD, mkU64(val));
2516 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2517 return True;
2520 /* -------------------- LOGIC(imm) -------------------- */
2521 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2522 /* 31 30 28 22 21 15 9 4
2523 sf op 100100 N immr imms Rn Rd
2524 op=00: AND Rd|SP, Rn, #imm
2525 op=01: ORR Rd|SP, Rn, #imm
2526 op=10: EOR Rd|SP, Rn, #imm
2527 op=11: ANDS Rd|ZR, Rn, #imm
2529 Bool is64 = INSN(31,31) == 1;
2530 UInt op = INSN(30,29);
2531 UInt N = INSN(22,22);
2532 UInt immR = INSN(21,16);
2533 UInt immS = INSN(15,10);
2534 UInt nn = INSN(9,5);
2535 UInt dd = INSN(4,0);
2536 ULong imm = 0;
2537 Bool ok;
2538 if (N == 1 && !is64)
2539 goto after_logic_imm; /* not allowed; fall through */
2540 ok = dbm_DecodeBitMasks(&imm, NULL,
2541 N, immS, immR, True, is64 ? 64 : 32);
2542 if (!ok)
2543 goto after_logic_imm;
2545 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2546 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2547 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2549 vassert(op < 4);
2550 if (is64) {
2551 IRExpr* argL = getIReg64orZR(nn);
2552 IRExpr* argR = mkU64(imm);
2553 IRTemp res = newTemp(Ity_I64);
2554 assign(res, binop(ops64[op], argL, argR));
2555 if (op < 3) {
2556 putIReg64orSP(dd, mkexpr(res));
2557 DIP("%s %s, %s, 0x%llx\n", names[op],
2558 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2559 } else {
2560 putIReg64orZR(dd, mkexpr(res));
2561 setFlags_LOGIC(True/*is64*/, res);
2562 DIP("%s %s, %s, 0x%llx\n", names[op],
2563 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2565 } else {
2566 IRExpr* argL = getIReg32orZR(nn);
2567 IRExpr* argR = mkU32((UInt)imm);
2568 IRTemp res = newTemp(Ity_I32);
2569 assign(res, binop(ops32[op], argL, argR));
2570 if (op < 3) {
2571 putIReg32orSP(dd, mkexpr(res));
2572 DIP("%s %s, %s, 0x%x\n", names[op],
2573 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2574 } else {
2575 putIReg32orZR(dd, mkexpr(res));
2576 setFlags_LOGIC(False/*!is64*/, res);
2577 DIP("%s %s, %s, 0x%x\n", names[op],
2578 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2581 return True;
2583 after_logic_imm:
2585 /* -------------------- MOV{Z,N,K} -------------------- */
2586 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2587 /* 31 30 28 22 20 4
2588 | | | | | |
2589 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2590 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2591 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2593 Bool is64 = INSN(31,31) == 1;
2594 UInt subopc = INSN(30,29);
2595 UInt hw = INSN(22,21);
2596 UInt imm16 = INSN(20,5);
2597 UInt dd = INSN(4,0);
2598 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2599 /* invalid; fall through */
2600 } else {
2601 ULong imm64 = ((ULong)imm16) << (16 * hw);
2602 if (!is64)
2603 vassert(imm64 < 0x100000000ULL);
2604 switch (subopc) {
2605 case BITS2(1,0): // MOVZ
2606 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2607 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2608 break;
2609 case BITS2(0,0): // MOVN
2610 imm64 = ~imm64;
2611 if (!is64)
2612 imm64 &= 0xFFFFFFFFULL;
2613 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2614 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2615 break;
2616 case BITS2(1,1): // MOVK
2617 /* This is more complex. We are inserting a slice into
2618 the destination register, so we need to have the old
2619 value of it. */
2620 if (is64) {
2621 IRTemp old = newTemp(Ity_I64);
2622 assign(old, getIReg64orZR(dd));
2623 ULong mask = 0xFFFFULL << (16 * hw);
2624 IRExpr* res
2625 = binop(Iop_Or64,
2626 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2627 mkU64(imm64));
2628 putIReg64orZR(dd, res);
2629 DIP("movk %s, 0x%x, lsl %u\n",
2630 nameIReg64orZR(dd), imm16, 16*hw);
2631 } else {
2632 IRTemp old = newTemp(Ity_I32);
2633 assign(old, getIReg32orZR(dd));
2634 vassert(hw <= 1);
2635 UInt mask = ((UInt)0xFFFF) << (16 * hw);
2636 IRExpr* res
2637 = binop(Iop_Or32,
2638 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2639 mkU32((UInt)imm64));
2640 putIReg32orZR(dd, res);
2641 DIP("movk %s, 0x%x, lsl %u\n",
2642 nameIReg32orZR(dd), imm16, 16*hw);
2644 break;
2645 default:
2646 vassert(0);
2648 return True;
2652 /* -------------------- {U,S,}BFM -------------------- */
2653 /* 30 28 22 21 15 9 4
2655 sf 10 100110 N immr imms nn dd
2656 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2657 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2659 sf 00 100110 N immr imms nn dd
2660 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2661 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2663 sf 01 100110 N immr imms nn dd
2664 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2665 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2667 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2668 UInt sf = INSN(31,31);
2669 UInt opc = INSN(30,29);
2670 UInt N = INSN(22,22);
2671 UInt immR = INSN(21,16);
2672 UInt immS = INSN(15,10);
2673 UInt nn = INSN(9,5);
2674 UInt dd = INSN(4,0);
2675 Bool inZero = False;
2676 Bool extend = False;
2677 const HChar* nm = "???";
2678 /* skip invalid combinations */
2679 switch (opc) {
2680 case BITS2(0,0):
2681 inZero = True; extend = True; nm = "sbfm"; break;
2682 case BITS2(0,1):
2683 inZero = False; extend = False; nm = "bfm"; break;
2684 case BITS2(1,0):
2685 inZero = True; extend = False; nm = "ubfm"; break;
2686 case BITS2(1,1):
2687 goto after_bfm; /* invalid */
2688 default:
2689 vassert(0);
2691 if (sf == 1 && N != 1) goto after_bfm;
2692 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2693 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2694 ULong wmask = 0, tmask = 0;
2695 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2696 N, immS, immR, False, sf == 1 ? 64 : 32);
2697 if (!ok) goto after_bfm; /* hmmm */
2699 Bool is64 = sf == 1;
2700 IRType ty = is64 ? Ity_I64 : Ity_I32;
2702 // Handle plain shifts explicitly. These are functionally identical to
2703 // the general case below, but iropt isn't clever enough to reduce those
2704 // sequences to plain shifts. So give it a hand.
2705 if (is64 && immS == 63 && immR >= 1 && immR <= 63) {
2706 if (opc == BITS2(0,0)) {
2707 // 64-bit signed shift right
2708 putIReg64orZR(dd, binop(Iop_Sar64, getIReg64orZR(nn), mkU8(immR)));
2709 DIP("asr %s, %s, #%u\n",
2710 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2711 return True;
2713 if (opc == BITS2(1,0)) {
2714 // 64-bit unsigned shift right
2715 putIReg64orZR(dd, binop(Iop_Shr64, getIReg64orZR(nn), mkU8(immR)));
2716 DIP("lsr %s, %s, #%u\n",
2717 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2718 return True;
2722 if (!is64 && immS == 31 && immR >= 1 && immR <= 31) {
2723 if (opc == BITS2(0,0)) {
2724 // 32-bit signed shift right
2725 putIReg32orZR(dd, binop(Iop_Sar32, getIReg32orZR(nn), mkU8(immR)));
2726 DIP("asr %s, %s, #%u\n",
2727 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2728 return True;
2730 if (opc == BITS2(1,0)) {
2731 // 32-bit unsigned shift right
2732 putIReg32orZR(dd, binop(Iop_Shr32, getIReg32orZR(nn), mkU8(immR)));
2733 DIP("lsr %s, %s, #%u\n",
2734 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR);
2735 return True;
2739 if (is64 && immS <= 62
2740 && immR == immS + 1 && opc == BITS2(1,0)) {
2741 // 64-bit shift left
2742 UInt shift = 64 - immR;
2743 vassert(shift >= 1 && shift <= 63);
2744 putIReg64orZR(dd, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(shift)));
2745 DIP("lsl %s, %s, #%u\n",
2746 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), shift);
2747 return True;
2749 if (!is64 && immS <= 30
2750 && immR == immS + 1 && opc == BITS2(1,0)) {
2751 // 32-bit shift left
2752 UInt shift = 32 - immR;
2753 vassert(shift >= 1 && shift <= 31);
2754 putIReg32orZR(dd, binop(Iop_Shl32, getIReg32orZR(nn), mkU8(shift)));
2755 DIP("lsl %s, %s, #%u\n",
2756 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), shift);
2757 return True;
2760 // Also special-case sxtw.
2761 if (opc == BITS2(0,0) && immR == 0) {
2762 if (is64) {
2763 // The destination size is 64 bits.
2764 if (immS == 31) {
2765 putIReg64orZR(dd, unop(Iop_32Sto64, getIReg32orZR(nn)));
2766 DIP("sxtw %s, %s\n", nameIReg64orZR(dd), nameIReg32orZR(nn));
2767 return True;
2769 if (immS == 15) {
2770 putIReg64orZR(dd, unop(Iop_16Sto64,
2771 unop(Iop_64to16, getIReg64orZR(nn))));
2772 DIP("sxth %s, %s\n", nameIReg64orZR(dd), nameIReg32orZR(nn));
2773 return True;
2775 if (immS == 7) {
2776 putIReg64orZR(dd, unop(Iop_8Sto64,
2777 unop(Iop_64to8, getIReg64orZR(nn))));
2778 DIP("sxtb %s, %s\n", nameIReg64orZR(dd), nameIReg32orZR(nn));
2779 return True;
2781 } else {
2782 // The destination size is 32 bits.
2783 if (immS == 15) {
2784 putIReg32orZR(dd, unop(Iop_16Sto32,
2785 unop(Iop_64to16, getIReg64orZR(nn))));
2786 DIP("sxth %s, %s\n", nameIReg32orZR(dd), nameIReg32orZR(nn));
2787 return True;
2789 if (immS == 7) {
2790 putIReg32orZR(dd, unop(Iop_8Sto32,
2791 unop(Iop_64to8, getIReg64orZR(nn))));
2792 DIP("sxtb %s, %s\n", nameIReg32orZR(dd), nameIReg32orZR(nn));
2793 return True;
2798 // None of the special cases apply. We have to use the (slow) general
2799 // case.
2800 IRTemp dst = newTemp(ty);
2801 IRTemp src = newTemp(ty);
2802 IRTemp bot = newTemp(ty);
2803 IRTemp top = newTemp(ty);
2804 IRTemp res = newTemp(ty);
2805 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2806 assign(src, getIRegOrZR(is64, nn));
2807 /* perform bitfield move on low bits */
2808 assign(bot, binop(mkOR(ty),
2809 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2810 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2811 mkU(ty, wmask))));
2812 /* determine extension bits (sign, zero or dest register) */
2813 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2814 /* combine extension bits and result bits */
2815 assign(res, binop(mkOR(ty),
2816 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2817 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2818 putIRegOrZR(is64, dd, mkexpr(res));
2819 DIP("%s %s, %s, immR=%u, immS=%u\n",
2820 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2821 return True;
2823 after_bfm:
2825 /* ---------------------- EXTR ---------------------- */
2826 /* 30 28 22 20 15 9 4
2827 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2828 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2830 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2831 Bool is64 = INSN(31,31) == 1;
2832 UInt mm = INSN(20,16);
2833 UInt imm6 = INSN(15,10);
2834 UInt nn = INSN(9,5);
2835 UInt dd = INSN(4,0);
2836 Bool valid = True;
2837 if (INSN(31,31) != INSN(22,22))
2838 valid = False;
2839 if (!is64 && imm6 >= 32)
2840 valid = False;
2841 if (!valid) goto after_extr;
2842 IRType ty = is64 ? Ity_I64 : Ity_I32;
2843 IRTemp srcHi = newTemp(ty);
2844 IRTemp srcLo = newTemp(ty);
2845 IRTemp res = newTemp(ty);
2846 assign(srcHi, getIRegOrZR(is64, nn));
2847 assign(srcLo, getIRegOrZR(is64, mm));
2848 if (imm6 == 0) {
2849 assign(res, mkexpr(srcLo));
2850 } else {
2851 UInt szBits = 8 * sizeofIRType(ty);
2852 vassert(imm6 > 0 && imm6 < szBits);
2853 assign(res, binop(mkOR(ty),
2854 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2855 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2857 putIRegOrZR(is64, dd, mkexpr(res));
2858 DIP("extr %s, %s, %s, #%u\n",
2859 nameIRegOrZR(is64,dd),
2860 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2861 return True;
2863 after_extr:
2865 if (sigill_diag) {
2866 vex_printf("ARM64 front end: data_processing_immediate\n");
2868 return False;
2869 # undef INSN
2873 /*------------------------------------------------------------*/
2874 /*--- Data processing (register) instructions ---*/
2875 /*------------------------------------------------------------*/
2877 static const HChar* nameSH ( UInt sh ) {
2878 switch (sh) {
2879 case 0: return "lsl";
2880 case 1: return "lsr";
2881 case 2: return "asr";
2882 case 3: return "ror";
2883 default: vassert(0);
2887 /* Generate IR to get a register value, possibly shifted by an
2888 immediate. Returns either a 32- or 64-bit temporary holding the
2889 result. After the shift, the value can optionally be NOT-ed
2890 too.
2892 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2893 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2894 isn't allowed, but it's the job of the caller to check that.
2896 static IRTemp getShiftedIRegOrZR ( Bool is64,
2897 UInt sh_how, UInt sh_amt, UInt regNo,
2898 Bool invert )
2900 vassert(sh_how < 4);
2901 vassert(sh_amt < (is64 ? 64 : 32));
2902 IRType ty = is64 ? Ity_I64 : Ity_I32;
2903 IRTemp t0 = newTemp(ty);
2904 assign(t0, getIRegOrZR(is64, regNo));
2905 IRTemp t1 = newTemp(ty);
2906 switch (sh_how) {
2907 case BITS2(0,0):
2908 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2909 break;
2910 case BITS2(0,1):
2911 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2912 break;
2913 case BITS2(1,0):
2914 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2915 break;
2916 case BITS2(1,1):
2917 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2918 break;
2919 default:
2920 vassert(0);
2922 if (invert) {
2923 IRTemp t2 = newTemp(ty);
2924 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2925 return t2;
2926 } else {
2927 return t1;
2932 static
2933 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2934 UInt insn, Bool sigill_diag)
2936 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2938 /* ------------------- ADD/SUB(reg) ------------------- */
2939 /* x==0 => 32 bit op x==1 => 64 bit op
2940 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2942 31 30 29 28 23 21 20 15 9 4
2943 | | | | | | | | | |
2944 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2945 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2946 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2947 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2949 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2950 UInt bX = INSN(31,31);
2951 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2952 UInt bS = INSN(29, 29); /* set flags? */
2953 UInt sh = INSN(23,22);
2954 UInt rM = INSN(20,16);
2955 UInt imm6 = INSN(15,10);
2956 UInt rN = INSN(9,5);
2957 UInt rD = INSN(4,0);
2958 Bool isSUB = bOP == 1;
2959 Bool is64 = bX == 1;
2960 IRType ty = is64 ? Ity_I64 : Ity_I32;
2961 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2962 /* invalid; fall through */
2963 } else {
2964 IRTemp argL = newTemp(ty);
2965 assign(argL, getIRegOrZR(is64, rN));
2966 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2967 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2968 IRTemp res = newTemp(ty);
2969 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2970 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2971 if (bS) {
2972 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2974 DIP("%s%s %s, %s, %s, %s #%u\n",
2975 bOP ? "sub" : "add", bS ? "s" : "",
2976 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2977 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2978 return True;
2982 /* ------------------- ADC/SBC(reg) ------------------- */
2983 /* x==0 => 32 bit op x==1 => 64 bit op
2985 31 30 29 28 23 21 20 15 9 4
2986 | | | | | | | | | |
2987 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2988 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2989 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2990 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2993 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2994 UInt bX = INSN(31,31);
2995 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2996 UInt bS = INSN(29,29); /* set flags */
2997 UInt rM = INSN(20,16);
2998 UInt rN = INSN(9,5);
2999 UInt rD = INSN(4,0);
3001 Bool isSUB = bOP == 1;
3002 Bool is64 = bX == 1;
3003 IRType ty = is64 ? Ity_I64 : Ity_I32;
3005 IRTemp oldC = newTemp(ty);
3006 assign(oldC,
3007 is64 ? mk_arm64g_calculate_flag_c()
3008 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
3010 IRTemp argL = newTemp(ty);
3011 assign(argL, getIRegOrZR(is64, rN));
3012 IRTemp argR = newTemp(ty);
3013 assign(argR, getIRegOrZR(is64, rM));
3015 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
3016 IRTemp res = newTemp(ty);
3017 if (isSUB) {
3018 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
3019 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
3020 assign(res,
3021 binop(op,
3022 binop(op, mkexpr(argL), mkexpr(argR)),
3023 binop(xorOp, mkexpr(oldC), one)));
3024 } else {
3025 assign(res,
3026 binop(op,
3027 binop(op, mkexpr(argL), mkexpr(argR)),
3028 mkexpr(oldC)));
3031 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
3033 if (bS) {
3034 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
3037 DIP("%s%s %s, %s, %s\n",
3038 bOP ? "sbc" : "adc", bS ? "s" : "",
3039 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
3040 nameIRegOrZR(is64, rM));
3041 return True;
3044 /* -------------------- LOGIC(reg) -------------------- */
3045 /* x==0 => 32 bit op x==1 => 64 bit op
3046 N==0 => inv? is no-op (no inversion)
3047 N==1 => inv? is NOT
3048 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
3050 31 30 28 23 21 20 15 9 4
3051 | | | | | | | | |
3052 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
3053 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
3054 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
3055 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
3056 With N=1, the names are: BIC ORN EON BICS
3058 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
3059 UInt bX = INSN(31,31);
3060 UInt sh = INSN(23,22);
3061 UInt bN = INSN(21,21);
3062 UInt rM = INSN(20,16);
3063 UInt imm6 = INSN(15,10);
3064 UInt rN = INSN(9,5);
3065 UInt rD = INSN(4,0);
3066 Bool is64 = bX == 1;
3067 IRType ty = is64 ? Ity_I64 : Ity_I32;
3068 if (!is64 && imm6 > 31) {
3069 /* invalid; fall though */
3070 } else {
3071 IRTemp argL = newTemp(ty);
3072 assign(argL, getIRegOrZR(is64, rN));
3073 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
3074 IROp op = Iop_INVALID;
3075 switch (INSN(30,29)) {
3076 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
3077 case BITS2(0,1): op = mkOR(ty); break;
3078 case BITS2(1,0): op = mkXOR(ty); break;
3079 default: vassert(0);
3081 IRTemp res = newTemp(ty);
3082 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
3083 if (INSN(30,29) == BITS2(1,1)) {
3084 setFlags_LOGIC(is64, res);
3086 putIRegOrZR(is64, rD, mkexpr(res));
3088 static const HChar* names_op[8]
3089 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
3090 vassert(((bN << 2) | INSN(30,29)) < 8);
3091 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
3092 /* Special-case the printing of "MOV" */
3093 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
3094 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
3095 nameIRegOrZR(is64, rM));
3096 } else {
3097 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
3098 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
3099 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
3101 return True;
3105 /* -------------------- {U,S}MULH -------------------- */
3106 /* 31 23 22 20 15 9 4
3107 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
3108 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
3110 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
3111 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
3112 Bool isU = INSN(23,23) == 1;
3113 UInt mm = INSN(20,16);
3114 UInt nn = INSN(9,5);
3115 UInt dd = INSN(4,0);
3116 putIReg64orZR(dd, unop(Iop_128HIto64,
3117 binop(isU ? Iop_MullU64 : Iop_MullS64,
3118 getIReg64orZR(nn), getIReg64orZR(mm))));
3119 DIP("%cmulh %s, %s, %s\n",
3120 isU ? 'u' : 's',
3121 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
3122 return True;
3125 /* -------------------- M{ADD,SUB} -------------------- */
3126 /* 31 30 20 15 14 9 4
3127 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
3128 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
3130 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
3131 Bool is64 = INSN(31,31) == 1;
3132 UInt mm = INSN(20,16);
3133 Bool isAdd = INSN(15,15) == 0;
3134 UInt aa = INSN(14,10);
3135 UInt nn = INSN(9,5);
3136 UInt dd = INSN(4,0);
3137 if (is64) {
3138 putIReg64orZR(
3140 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3141 getIReg64orZR(aa),
3142 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3143 } else {
3144 putIReg32orZR(
3146 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3147 getIReg32orZR(aa),
3148 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3150 DIP("%s %s, %s, %s, %s\n",
3151 isAdd ? "madd" : "msub",
3152 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3153 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3154 return True;
3157 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3158 /* 31 30 28 20 15 11 9 4
3159 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3160 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3161 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3162 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3163 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3165 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3166 Bool is64 = INSN(31,31) == 1;
3167 UInt b30 = INSN(30,30);
3168 UInt mm = INSN(20,16);
3169 UInt cond = INSN(15,12);
3170 UInt b10 = INSN(10,10);
3171 UInt nn = INSN(9,5);
3172 UInt dd = INSN(4,0);
3173 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3174 IRType ty = is64 ? Ity_I64 : Ity_I32;
3175 IRExpr* argL = getIRegOrZR(is64, nn);
3176 IRExpr* argR = getIRegOrZR(is64, mm);
3177 switch (op) {
3178 case BITS2(0,0):
3179 break;
3180 case BITS2(0,1):
3181 argR = binop(mkADD(ty), argR, mkU(ty,1));
3182 break;
3183 case BITS2(1,0):
3184 argR = unop(mkNOT(ty), argR);
3185 break;
3186 case BITS2(1,1):
3187 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3188 break;
3189 default:
3190 vassert(0);
3192 putIRegOrZR(
3193 is64, dd,
3194 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3195 argL, argR)
3197 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3198 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3199 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3200 nameIRegOrZR(is64, mm), nameCC(cond));
3201 return True;
3204 /* -------------- ADD/SUB(extended reg) -------------- */
3205 /* 28 20 15 12 9 4
3206 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3207 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3209 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3210 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3212 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3213 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3215 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3216 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3218 The 'm' operand is extended per opt, thusly:
3220 000 Xm & 0xFF UXTB
3221 001 Xm & 0xFFFF UXTH
3222 010 Xm & (2^32)-1 UXTW
3223 011 Xm UXTX
3225 100 Xm sx from bit 7 SXTB
3226 101 Xm sx from bit 15 SXTH
3227 110 Xm sx from bit 31 SXTW
3228 111 Xm SXTX
3230 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3231 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3232 are the identity operation on Wm.
3234 After extension, the value is shifted left by imm3 bits, which
3235 may only be in the range 0 .. 4 inclusive.
3237 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3238 Bool is64 = INSN(31,31) == 1;
3239 Bool isSub = INSN(30,30) == 1;
3240 Bool setCC = INSN(29,29) == 1;
3241 UInt mm = INSN(20,16);
3242 UInt opt = INSN(15,13);
3243 UInt imm3 = INSN(12,10);
3244 UInt nn = INSN(9,5);
3245 UInt dd = INSN(4,0);
3246 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3247 "sxtb", "sxth", "sxtw", "sxtx" };
3248 /* Do almost the same thing in the 32- and 64-bit cases. */
3249 IRTemp xN = newTemp(Ity_I64);
3250 IRTemp xM = newTemp(Ity_I64);
3251 assign(xN, getIReg64orSP(nn));
3252 assign(xM, getIReg64orZR(mm));
3253 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3254 Int shSX = 0;
3255 /* widen Xm .. */
3256 switch (opt) {
3257 case BITS3(0,0,0): // UXTB
3258 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3259 case BITS3(0,0,1): // UXTH
3260 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3261 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3262 if (is64) {
3263 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3265 break;
3266 case BITS3(0,1,1): // UXTX -- always a noop
3267 break;
3268 case BITS3(1,0,0): // SXTB
3269 shSX = 56; goto sxTo64;
3270 case BITS3(1,0,1): // SXTH
3271 shSX = 48; goto sxTo64;
3272 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3273 if (is64) {
3274 shSX = 32; goto sxTo64;
3276 break;
3277 case BITS3(1,1,1): // SXTX -- always a noop
3278 break;
3279 sxTo64:
3280 vassert(shSX >= 32);
3281 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3282 mkU8(shSX));
3283 break;
3284 default:
3285 vassert(0);
3287 /* and now shift */
3288 IRTemp argL = xN;
3289 IRTemp argR = newTemp(Ity_I64);
3290 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3291 IRTemp res = newTemp(Ity_I64);
3292 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3293 mkexpr(argL), mkexpr(argR)));
3294 if (is64) {
3295 if (setCC) {
3296 putIReg64orZR(dd, mkexpr(res));
3297 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3298 } else {
3299 putIReg64orSP(dd, mkexpr(res));
3301 } else {
3302 if (setCC) {
3303 IRTemp argL32 = newTemp(Ity_I32);
3304 IRTemp argR32 = newTemp(Ity_I32);
3305 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3306 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3307 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3308 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3309 } else {
3310 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3313 DIP("%s%s %s, %s, %s %s lsl %u\n",
3314 isSub ? "sub" : "add", setCC ? "s" : "",
3315 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3316 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3317 nameExt[opt], imm3);
3318 return True;
3321 /* ---------------- CCMP/CCMN(imm) ---------------- */
3322 /* Bizarrely, these appear in the "data processing register"
3323 category, even though they are operations against an
3324 immediate. */
3325 /* 31 29 20 15 11 9 3
3326 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3327 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3329 Operation is:
3330 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3331 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3333 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3334 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3335 Bool is64 = INSN(31,31) == 1;
3336 Bool isSUB = INSN(30,30) == 1;
3337 UInt imm5 = INSN(20,16);
3338 UInt cond = INSN(15,12);
3339 UInt nn = INSN(9,5);
3340 UInt nzcv = INSN(3,0);
3342 IRTemp condT = newTemp(Ity_I1);
3343 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3345 IRType ty = is64 ? Ity_I64 : Ity_I32;
3346 IRTemp argL = newTemp(ty);
3347 IRTemp argR = newTemp(ty);
3349 if (is64) {
3350 assign(argL, getIReg64orZR(nn));
3351 assign(argR, mkU64(imm5));
3352 } else {
3353 assign(argL, getIReg32orZR(nn));
3354 assign(argR, mkU32(imm5));
3356 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3358 DIP("ccm%c %s, #%u, #%u, %s\n",
3359 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3360 imm5, nzcv, nameCC(cond));
3361 return True;
3364 /* ---------------- CCMP/CCMN(reg) ---------------- */
3365 /* 31 29 20 15 11 9 3
3366 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3367 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3368 Operation is:
3369 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3370 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3372 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3373 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3374 Bool is64 = INSN(31,31) == 1;
3375 Bool isSUB = INSN(30,30) == 1;
3376 UInt mm = INSN(20,16);
3377 UInt cond = INSN(15,12);
3378 UInt nn = INSN(9,5);
3379 UInt nzcv = INSN(3,0);
3381 IRTemp condT = newTemp(Ity_I1);
3382 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3384 IRType ty = is64 ? Ity_I64 : Ity_I32;
3385 IRTemp argL = newTemp(ty);
3386 IRTemp argR = newTemp(ty);
3388 if (is64) {
3389 assign(argL, getIReg64orZR(nn));
3390 assign(argR, getIReg64orZR(mm));
3391 } else {
3392 assign(argL, getIReg32orZR(nn));
3393 assign(argR, getIReg32orZR(mm));
3395 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3397 DIP("ccm%c %s, %s, #%u, %s\n",
3398 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3399 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3400 return True;
3404 /* -------------- REV/REV16/REV32/RBIT -------------- */
3405 /* 31 30 28 20 15 11 9 4
3407 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3408 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3410 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3411 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3413 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3414 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3416 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3418 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3419 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3420 UInt b31 = INSN(31,31);
3421 UInt opc = INSN(11,10);
3423 UInt ix = 0;
3424 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3425 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3426 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3427 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3428 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3429 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3430 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3431 if (ix >= 1 && ix <= 7) {
3432 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3433 UInt nn = INSN(9,5);
3434 UInt dd = INSN(4,0);
3435 IRTemp src = newTemp(Ity_I64);
3436 IRTemp dst = IRTemp_INVALID;
3437 IRTemp (*math)(IRTemp) = NULL;
3438 switch (ix) {
3439 case 1: case 2: math = math_BYTESWAP64; break;
3440 case 3: case 4: math = math_BITSWAP64; break;
3441 case 5: case 6: math = math_USHORTSWAP64; break;
3442 case 7: math = math_UINTSWAP64; break;
3443 default: vassert(0);
3445 const HChar* names[7]
3446 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3447 const HChar* nm = names[ix-1];
3448 vassert(math);
3449 if (ix == 6) {
3450 /* This has to be special cased, since the logic below doesn't
3451 handle it correctly. */
3452 assign(src, getIReg64orZR(nn));
3453 dst = math(src);
3454 putIReg64orZR(dd,
3455 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3456 } else if (is64) {
3457 assign(src, getIReg64orZR(nn));
3458 dst = math(src);
3459 putIReg64orZR(dd, mkexpr(dst));
3460 } else {
3461 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3462 dst = math(src);
3463 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3465 DIP("%s %s, %s\n", nm,
3466 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3467 return True;
3469 /* else fall through */
3472 /* -------------------- CLZ/CLS -------------------- */
3473 /* 30 28 24 20 15 9 4
3474 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3475 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3477 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3478 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3479 Bool is64 = INSN(31,31) == 1;
3480 Bool isCLS = INSN(10,10) == 1;
3481 UInt nn = INSN(9,5);
3482 UInt dd = INSN(4,0);
3483 IRTemp src = newTemp(Ity_I64);
3484 IRTemp srcZ = newTemp(Ity_I64);
3485 IRTemp dst = newTemp(Ity_I64);
3486 /* Get the argument, widened out to 64 bit */
3487 if (is64) {
3488 assign(src, getIReg64orZR(nn));
3489 } else {
3490 assign(src, binop(Iop_Shl64,
3491 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3493 /* If this is CLS, mash the arg around accordingly */
3494 if (isCLS) {
3495 IRExpr* one = mkU8(1);
3496 assign(srcZ,
3497 binop(Iop_Xor64,
3498 binop(Iop_Shl64, mkexpr(src), one),
3499 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3500 } else {
3501 assign(srcZ, mkexpr(src));
3503 /* And compute CLZ. */
3504 if (is64) {
3505 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3506 mkU64(isCLS ? 63 : 64),
3507 unop(Iop_Clz64, mkexpr(srcZ))));
3508 putIReg64orZR(dd, mkexpr(dst));
3509 } else {
3510 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3511 mkU64(isCLS ? 31 : 32),
3512 unop(Iop_Clz64, mkexpr(srcZ))));
3513 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3515 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3516 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3517 return True;
3520 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3521 /* 30 28 20 15 11 9 4
3522 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3523 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3524 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3525 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3527 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3528 && INSN(15,12) == BITS4(0,0,1,0)) {
3529 Bool is64 = INSN(31,31) == 1;
3530 UInt mm = INSN(20,16);
3531 UInt op = INSN(11,10);
3532 UInt nn = INSN(9,5);
3533 UInt dd = INSN(4,0);
3534 IRType ty = is64 ? Ity_I64 : Ity_I32;
3535 IRTemp srcL = newTemp(ty);
3536 IRTemp srcR = newTemp(Ity_I64);
3537 IRTemp res = newTemp(ty);
3538 IROp iop = Iop_INVALID;
3539 assign(srcL, getIRegOrZR(is64, nn));
3540 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3541 mkU64(is64 ? 63 : 31)));
3542 if (op < 3) {
3543 // LSLV, LSRV, ASRV
3544 switch (op) {
3545 case BITS2(0,0): iop = mkSHL(ty); break;
3546 case BITS2(0,1): iop = mkSHR(ty); break;
3547 case BITS2(1,0): iop = mkSAR(ty); break;
3548 default: vassert(0);
3550 assign(res, binop(iop, mkexpr(srcL),
3551 unop(Iop_64to8, mkexpr(srcR))));
3552 } else {
3553 // RORV
3554 IROp opSHL = mkSHL(ty);
3555 IROp opSHR = mkSHR(ty);
3556 IROp opOR = mkOR(ty);
3557 IRExpr* width = mkU64(is64 ? 64: 32);
3558 assign(
3559 res,
3560 IRExpr_ITE(
3561 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3562 mkexpr(srcL),
3563 binop(opOR,
3564 binop(opSHL,
3565 mkexpr(srcL),
3566 unop(Iop_64to8, binop(Iop_Sub64, width,
3567 mkexpr(srcR)))),
3568 binop(opSHR,
3569 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3572 putIRegOrZR(is64, dd, mkexpr(res));
3573 vassert(op < 4);
3574 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3575 DIP("%s %s, %s, %s\n",
3576 names[op], nameIRegOrZR(is64,dd),
3577 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3578 return True;
3581 /* -------------------- SDIV/UDIV -------------------- */
3582 /* 30 28 20 15 10 9 4
3583 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3584 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3586 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3587 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3588 Bool is64 = INSN(31,31) == 1;
3589 UInt mm = INSN(20,16);
3590 Bool isS = INSN(10,10) == 1;
3591 UInt nn = INSN(9,5);
3592 UInt dd = INSN(4,0);
3593 if (isS) {
3594 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3595 getIRegOrZR(is64, nn),
3596 getIRegOrZR(is64, mm)));
3597 } else {
3598 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3599 getIRegOrZR(is64, nn),
3600 getIRegOrZR(is64, mm)));
3602 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3603 nameIRegOrZR(is64, dd),
3604 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3605 return True;
3608 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3609 /* 31 23 20 15 14 9 4
3610 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3611 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3612 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3613 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3614 with operation
3615 Xd = Xa +/- (Wn *u/s Wm)
3617 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3618 Bool isU = INSN(23,23) == 1;
3619 UInt mm = INSN(20,16);
3620 Bool isAdd = INSN(15,15) == 0;
3621 UInt aa = INSN(14,10);
3622 UInt nn = INSN(9,5);
3623 UInt dd = INSN(4,0);
3624 IRTemp wN = newTemp(Ity_I32);
3625 IRTemp wM = newTemp(Ity_I32);
3626 IRTemp xA = newTemp(Ity_I64);
3627 IRTemp muld = newTemp(Ity_I64);
3628 IRTemp res = newTemp(Ity_I64);
3629 assign(wN, getIReg32orZR(nn));
3630 assign(wM, getIReg32orZR(mm));
3631 assign(xA, getIReg64orZR(aa));
3632 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3633 mkexpr(wN), mkexpr(wM)));
3634 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3635 mkexpr(xA), mkexpr(muld)));
3636 putIReg64orZR(dd, mkexpr(res));
3637 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3638 nameIReg64orZR(dd), nameIReg32orZR(nn),
3639 nameIReg32orZR(mm), nameIReg64orZR(aa));
3640 return True;
3643 /* -------------------- CRC32/CRC32C -------------------- */
3644 /* 31 30 20 15 11 9 4
3645 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3646 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3648 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3649 && INSN(15,13) == BITS3(0,1,0)) {
3650 UInt bitSF = INSN(31,31);
3651 UInt mm = INSN(20,16);
3652 UInt bitC = INSN(12,12);
3653 UInt sz = INSN(11,10);
3654 UInt nn = INSN(9,5);
3655 UInt dd = INSN(4,0);
3656 vassert(sz <= 3);
3657 if ((bitSF == 0 && sz <= BITS2(1,0))
3658 || (bitSF == 1 && sz == BITS2(1,1))) {
3659 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3660 void* helpers[8]
3661 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3662 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3663 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3664 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3665 const HChar* hNames[8]
3666 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3667 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3668 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3669 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3670 const HChar* iNames[8]
3671 = { "crc32b", "crc32h", "crc32w", "crc32x",
3672 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3674 IRTemp srcN = newTemp(Ity_I64);
3675 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3677 IRTemp srcM = newTemp(Ity_I64);
3678 IRExpr* at64 = getIReg64orZR(mm);
3679 switch (sz) {
3680 case BITS2(0,0):
3681 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3682 case BITS2(0,1):
3683 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3684 case BITS2(1,0):
3685 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3686 case BITS2(1,1):
3687 assign(srcM, at64); break;
3688 default:
3689 vassert(0);
3692 vassert(ix <= 7);
3694 putIReg64orZR(
3696 unop(Iop_32Uto64,
3697 unop(Iop_64to32,
3698 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3699 hNames[ix], helpers[ix],
3700 mkIRExprVec_2(mkexpr(srcN),
3701 mkexpr(srcM))))));
3703 DIP("%s %s, %s, %s\n", iNames[ix],
3704 nameIReg32orZR(dd),
3705 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3706 return True;
3708 /* fall through */
3711 if (sigill_diag) {
3712 vex_printf("ARM64 front end: data_processing_register\n");
3714 return False;
3715 # undef INSN
3719 /*------------------------------------------------------------*/
3720 /*--- Math helpers for vector interleave/deinterleave ---*/
3721 /*------------------------------------------------------------*/
3723 #define EX(_tmp) \
3724 mkexpr(_tmp)
3725 #define SL(_hi128,_lo128,_nbytes) \
3726 ( (_nbytes) == 0 \
3727 ? (_lo128) \
3728 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3729 #define ROR(_v128,_nbytes) \
3730 SL((_v128),(_v128),(_nbytes))
3731 #define ROL(_v128,_nbytes) \
3732 SL((_v128),(_v128),16-(_nbytes))
3733 #define SHR(_v128,_nbytes) \
3734 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3735 #define SHL(_v128,_nbytes) \
3736 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3737 #define ILO64x2(_argL,_argR) \
3738 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3739 #define IHI64x2(_argL,_argR) \
3740 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3741 #define ILO32x4(_argL,_argR) \
3742 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3743 #define IHI32x4(_argL,_argR) \
3744 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3745 #define ILO16x8(_argL,_argR) \
3746 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3747 #define IHI16x8(_argL,_argR) \
3748 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3749 #define ILO8x16(_argL,_argR) \
3750 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3751 #define IHI8x16(_argL,_argR) \
3752 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3753 #define CEV32x4(_argL,_argR) \
3754 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3755 #define COD32x4(_argL,_argR) \
3756 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3757 #define COD16x8(_argL,_argR) \
3758 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3759 #define COD8x16(_argL,_argR) \
3760 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3761 #define CEV8x16(_argL,_argR) \
3762 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3763 #define AND(_arg1,_arg2) \
3764 binop(Iop_AndV128,(_arg1),(_arg2))
3765 #define OR2(_arg1,_arg2) \
3766 binop(Iop_OrV128,(_arg1),(_arg2))
3767 #define OR3(_arg1,_arg2,_arg3) \
3768 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3769 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3770 binop(Iop_OrV128, \
3771 binop(Iop_OrV128,(_arg1),(_arg2)), \
3772 binop(Iop_OrV128,(_arg3),(_arg4)))
3775 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3776 static
3777 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3778 UInt laneSzBlg2, IRTemp u0 )
3780 assign(*i0, mkexpr(u0));
3784 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3785 static
3786 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3787 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3789 /* This is pretty easy, since we have primitives directly to
3790 hand. */
3791 if (laneSzBlg2 == 3) {
3792 // 64x2
3793 // u1 == B1 B0, u0 == A1 A0
3794 // i1 == B1 A1, i0 == B0 A0
3795 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3796 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3797 return;
3799 if (laneSzBlg2 == 2) {
3800 // 32x4
3801 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3802 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3803 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3804 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3805 return;
3807 if (laneSzBlg2 == 1) {
3808 // 16x8
3809 // u1 == B{7..0}, u0 == A{7..0}
3810 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3811 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3812 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3813 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3814 return;
3816 if (laneSzBlg2 == 0) {
3817 // 8x16
3818 // u1 == B{f..0}, u0 == A{f..0}
3819 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3820 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3821 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3822 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3823 return;
3825 /*NOTREACHED*/
3826 vassert(0);
3830 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3831 static
3832 void math_INTERLEAVE3_128(
3833 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3834 UInt laneSzBlg2,
3835 IRTemp u0, IRTemp u1, IRTemp u2 )
3837 if (laneSzBlg2 == 3) {
3838 // 64x2
3839 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3840 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3841 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3842 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3843 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3844 return;
3847 if (laneSzBlg2 == 2) {
3848 // 32x4
3849 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3850 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3851 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3852 IRTemp p0 = newTempV128();
3853 IRTemp p1 = newTempV128();
3854 IRTemp p2 = newTempV128();
3855 IRTemp c1100 = newTempV128();
3856 IRTemp c0011 = newTempV128();
3857 IRTemp c0110 = newTempV128();
3858 assign(c1100, mkV128(0xFF00));
3859 assign(c0011, mkV128(0x00FF));
3860 assign(c0110, mkV128(0x0FF0));
3861 // First interleave them at 64x2 granularity,
3862 // generating partial ("p") values.
3863 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3864 // And more shuffling around for the final answer
3865 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3866 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3867 assign(*i1, OR3( SHL(EX(p2),12),
3868 AND(EX(p1),EX(c0110)),
3869 SHR(EX(p0),12) ));
3870 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3871 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3872 return;
3875 if (laneSzBlg2 == 1) {
3876 // 16x8
3877 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3878 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3879 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3881 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3882 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3883 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3885 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3886 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3887 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3888 IRTemp p0 = newTempV128();
3889 IRTemp p1 = newTempV128();
3890 IRTemp p2 = newTempV128();
3891 IRTemp c1000 = newTempV128();
3892 IRTemp c0100 = newTempV128();
3893 IRTemp c0010 = newTempV128();
3894 IRTemp c0001 = newTempV128();
3895 assign(c1000, mkV128(0xF000));
3896 assign(c0100, mkV128(0x0F00));
3897 assign(c0010, mkV128(0x00F0));
3898 assign(c0001, mkV128(0x000F));
3899 // First interleave them at 32x4 granularity,
3900 // generating partial ("p") values.
3901 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3902 // And more shuffling around for the final answer
3903 assign(*i2,
3904 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3905 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3906 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3907 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3909 assign(*i1,
3910 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3911 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3912 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3913 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3915 assign(*i0,
3916 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3917 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3918 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3919 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3921 return;
3924 if (laneSzBlg2 == 0) {
3925 // 8x16. It doesn't seem worth the hassle of first doing a
3926 // 16x8 interleave, so just generate all 24 partial results
3927 // directly :-(
3928 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3929 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3930 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3931 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3933 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3934 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3935 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3936 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3937 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3938 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3939 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3940 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3941 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3943 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3944 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3946 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3947 IRTemp t_##_tempName = newTempV128(); \
3948 assign(t_##_tempName, \
3949 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3950 ROR(EX(_srcVec2),(_srcShift2)) ) )
3952 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3953 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3955 // The slicing and reassembly are done as interleavedly as possible,
3956 // so as to minimise the demand for registers in the back end, which
3957 // was observed to be a problem in testing.
3959 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3960 XXXX(AfCe, AA, 0xf, CC, 0xe);
3961 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3963 XXXX(BeAe, BB, 0xe, AA, 0xe);
3964 XXXX(CdBd, CC, 0xd, BB, 0xd);
3965 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3966 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3968 XXXX(AdCc, AA, 0xd, CC, 0xc);
3969 XXXX(BcAc, BB, 0xc, AA, 0xc);
3970 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3972 XXXX(CbBb, CC, 0xb, BB, 0xb);
3973 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3974 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3975 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3976 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3978 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3979 XXXX(C9B9, CC, 0x9, BB, 0x9);
3980 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3982 XXXX(A9C8, AA, 0x9, CC, 0x8);
3983 XXXX(B8A8, BB, 0x8, AA, 0x8);
3984 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3985 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3987 XXXX(C7B7, CC, 0x7, BB, 0x7);
3988 XXXX(A7C6, AA, 0x7, CC, 0x6);
3989 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3991 XXXX(B6A6, BB, 0x6, AA, 0x6);
3992 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3993 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3994 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3995 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3997 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3998 XXXX(B4A4, BB, 0x4, AA, 0x4);
3999 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
4001 XXXX(C3B3, CC, 0x3, BB, 0x3);
4002 XXXX(A3C2, AA, 0x3, CC, 0x2);
4003 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
4004 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
4006 XXXX(B2A2, BB, 0x2, AA, 0x2);
4007 XXXX(C1B1, CC, 0x1, BB, 0x1);
4008 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
4010 XXXX(A1C0, AA, 0x1, CC, 0x0);
4011 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
4012 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
4013 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
4014 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
4016 # undef XXXX
4017 return;
4020 /*NOTREACHED*/
4021 vassert(0);
4025 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
4026 static
4027 void math_INTERLEAVE4_128(
4028 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4029 UInt laneSzBlg2,
4030 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4032 if (laneSzBlg2 == 3) {
4033 // 64x2
4034 assign(*i0, ILO64x2(EX(u1), EX(u0)));
4035 assign(*i1, ILO64x2(EX(u3), EX(u2)));
4036 assign(*i2, IHI64x2(EX(u1), EX(u0)));
4037 assign(*i3, IHI64x2(EX(u3), EX(u2)));
4038 return;
4040 if (laneSzBlg2 == 2) {
4041 // 32x4
4042 // First, interleave at the 64-bit lane size.
4043 IRTemp p0 = newTempV128();
4044 IRTemp p1 = newTempV128();
4045 IRTemp p2 = newTempV128();
4046 IRTemp p3 = newTempV128();
4047 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
4048 // And interleave (cat) at the 32 bit size.
4049 assign(*i0, CEV32x4(EX(p1), EX(p0)));
4050 assign(*i1, COD32x4(EX(p1), EX(p0)));
4051 assign(*i2, CEV32x4(EX(p3), EX(p2)));
4052 assign(*i3, COD32x4(EX(p3), EX(p2)));
4053 return;
4055 if (laneSzBlg2 == 1) {
4056 // 16x8
4057 // First, interleave at the 32-bit lane size.
4058 IRTemp p0 = newTempV128();
4059 IRTemp p1 = newTempV128();
4060 IRTemp p2 = newTempV128();
4061 IRTemp p3 = newTempV128();
4062 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
4063 // And rearrange within each vector, to get the right 16 bit lanes.
4064 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
4065 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
4066 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
4067 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
4068 return;
4070 if (laneSzBlg2 == 0) {
4071 // 8x16
4072 // First, interleave at the 16-bit lane size.
4073 IRTemp p0 = newTempV128();
4074 IRTemp p1 = newTempV128();
4075 IRTemp p2 = newTempV128();
4076 IRTemp p3 = newTempV128();
4077 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
4078 // And rearrange within each vector, to get the right 8 bit lanes.
4079 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
4080 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
4081 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
4082 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
4083 return;
4085 /*NOTREACHED*/
4086 vassert(0);
4090 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
4091 static
4092 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
4093 UInt laneSzBlg2, IRTemp i0 )
4095 assign(*u0, mkexpr(i0));
4099 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
4100 static
4101 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4102 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4104 /* This is pretty easy, since we have primitives directly to
4105 hand. */
4106 if (laneSzBlg2 == 3) {
4107 // 64x2
4108 // i1 == B1 A1, i0 == B0 A0
4109 // u1 == B1 B0, u0 == A1 A0
4110 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
4111 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
4112 return;
4114 if (laneSzBlg2 == 2) {
4115 // 32x4
4116 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
4117 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
4118 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
4119 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
4120 return;
4122 if (laneSzBlg2 == 1) {
4123 // 16x8
4124 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
4125 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
4126 // u1 == B{7..0}, u0 == A{7..0}
4127 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
4128 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
4129 return;
4131 if (laneSzBlg2 == 0) {
4132 // 8x16
4133 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
4134 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
4135 // u1 == B{f..0}, u0 == A{f..0}
4136 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
4137 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
4138 return;
4140 /*NOTREACHED*/
4141 vassert(0);
4145 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4146 static
4147 void math_DEINTERLEAVE3_128(
4148 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4149 UInt laneSzBlg2,
4150 IRTemp i0, IRTemp i1, IRTemp i2 )
4152 if (laneSzBlg2 == 3) {
4153 // 64x2
4154 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4155 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4156 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4157 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4158 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4159 return;
4162 if (laneSzBlg2 == 2) {
4163 // 32x4
4164 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4165 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4166 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4167 IRTemp t_a1c0b0a0 = newTempV128();
4168 IRTemp t_a2c1b1a1 = newTempV128();
4169 IRTemp t_a3c2b2a2 = newTempV128();
4170 IRTemp t_a0c3b3a3 = newTempV128();
4171 IRTemp p0 = newTempV128();
4172 IRTemp p1 = newTempV128();
4173 IRTemp p2 = newTempV128();
4174 // Compute some intermediate values.
4175 assign(t_a1c0b0a0, EX(i0));
4176 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4177 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4178 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4179 // First deinterleave into lane-pairs
4180 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4181 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4182 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4183 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4184 // Then deinterleave at 64x2 granularity.
4185 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4186 return;
4189 if (laneSzBlg2 == 1) {
4190 // 16x8
4191 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4192 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4193 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4195 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4196 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4197 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4199 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4200 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4201 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4203 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4204 s0 = s1 = s2 = s3
4205 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4206 newTempsV128_4(&s0, &s1, &s2, &s3);
4207 newTempsV128_4(&t0, &t1, &t2, &t3);
4208 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4210 // s0 == b2a2 c1b1a1 c0b0a0
4211 // s1 == b4a4 c3b3c3 c2b2a2
4212 // s2 == b6a6 c5b5a5 c4b4a4
4213 // s3 == b0a0 c7b7a7 c6b6a6
4214 assign(s0, EX(i0));
4215 assign(s1, SL(EX(i1),EX(i0),6*2));
4216 assign(s2, SL(EX(i2),EX(i1),4*2));
4217 assign(s3, SL(EX(i0),EX(i2),2*2));
4219 // t0 == 0 0 c1c0 b1b0 a1a0
4220 // t1 == 0 0 c3c2 b3b2 a3a2
4221 // t2 == 0 0 c5c4 b5b4 a5a4
4222 // t3 == 0 0 c7c6 b7b6 a7a6
4223 assign(c00111111, mkV128(0x0FFF));
4224 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4225 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4226 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4227 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4229 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4230 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4231 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4233 // Then deinterleave at 32x4 granularity.
4234 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4235 return;
4238 if (laneSzBlg2 == 0) {
4239 // 8x16. This is the same scheme as for 16x8, with twice the
4240 // number of intermediate values.
4242 // u2 == C{f..0}
4243 // u1 == B{f..0}
4244 // u0 == A{f..0}
4246 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4247 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4248 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4250 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4251 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4252 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4254 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4255 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4256 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4257 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4258 = IRTemp_INVALID;
4259 newTempsV128_4(&s0, &s1, &s2, &s3);
4260 newTempsV128_4(&s4, &s5, &s6, &s7);
4261 newTempsV128_4(&t0, &t1, &t2, &t3);
4262 newTempsV128_4(&t4, &t5, &t6, &t7);
4263 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4265 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4266 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4267 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4268 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4269 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4270 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4271 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4272 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4273 assign(s0, SL(EX(i1),EX(i0), 0));
4274 assign(s1, SL(EX(i1),EX(i0), 6));
4275 assign(s2, SL(EX(i1),EX(i0),12));
4276 assign(s3, SL(EX(i2),EX(i1), 2));
4277 assign(s4, SL(EX(i2),EX(i1), 8));
4278 assign(s5, SL(EX(i2),EX(i1),14));
4279 assign(s6, SL(EX(i0),EX(i2), 4));
4280 assign(s7, SL(EX(i0),EX(i2),10));
4282 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4283 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4284 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4285 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4286 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4287 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4288 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4289 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4290 assign(cMASK, mkV128(0x003F));
4291 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4292 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4293 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4294 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4295 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4296 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4297 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4298 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4300 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4301 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4302 SHL(EX(t3),2), SHR(EX(t2),4) ));
4303 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4305 // Then deinterleave at 16x8 granularity.
4306 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4307 return;
4310 /*NOTREACHED*/
4311 vassert(0);
4315 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4316 static
4317 void math_DEINTERLEAVE4_128(
4318 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4319 UInt laneSzBlg2,
4320 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4322 if (laneSzBlg2 == 3) {
4323 // 64x2
4324 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4325 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4326 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4327 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4328 return;
4330 if (laneSzBlg2 == 2) {
4331 // 32x4
4332 IRTemp p0 = newTempV128();
4333 IRTemp p2 = newTempV128();
4334 IRTemp p1 = newTempV128();
4335 IRTemp p3 = newTempV128();
4336 assign(p0, ILO32x4(EX(i1), EX(i0)));
4337 assign(p1, IHI32x4(EX(i1), EX(i0)));
4338 assign(p2, ILO32x4(EX(i3), EX(i2)));
4339 assign(p3, IHI32x4(EX(i3), EX(i2)));
4340 // And now do what we did for the 64-bit case.
4341 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4342 return;
4344 if (laneSzBlg2 == 1) {
4345 // 16x8
4346 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4347 IRTemp p0 = newTempV128();
4348 IRTemp p1 = newTempV128();
4349 IRTemp p2 = newTempV128();
4350 IRTemp p3 = newTempV128();
4351 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4352 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4353 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4354 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4355 // From here on is like the 32 bit case.
4356 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4357 return;
4359 if (laneSzBlg2 == 0) {
4360 // 8x16
4361 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4362 IRTemp p0 = newTempV128();
4363 IRTemp p1 = newTempV128();
4364 IRTemp p2 = newTempV128();
4365 IRTemp p3 = newTempV128();
4366 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4367 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4368 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4369 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4370 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4371 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4372 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4373 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4374 // From here on is like the 16 bit case.
4375 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4376 return;
4378 /*NOTREACHED*/
4379 vassert(0);
4383 /* Wrappers that use the full-width (de)interleavers to do half-width
4384 (de)interleaving. The scheme is to clone each input lane in the
4385 lower half of each incoming value, do a full width (de)interleave
4386 at the next lane size up, and remove every other lane of the the
4387 result. The returned values may have any old junk in the upper
4388 64 bits -- the caller must ignore that. */
4390 /* Helper function -- get doubling and narrowing operations. */
4391 static
4392 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4393 /*OUT*/IROp* halver,
4394 UInt laneSzBlg2 )
4396 switch (laneSzBlg2) {
4397 case 2:
4398 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4399 break;
4400 case 1:
4401 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4402 break;
4403 case 0:
4404 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4405 break;
4406 default:
4407 vassert(0);
4411 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4412 static
4413 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4414 UInt laneSzBlg2, IRTemp u0 )
4416 assign(*i0, mkexpr(u0));
4420 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4421 static
4422 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4423 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4425 if (laneSzBlg2 == 3) {
4426 // 1x64, degenerate case
4427 assign(*i0, EX(u0));
4428 assign(*i1, EX(u1));
4429 return;
4432 vassert(laneSzBlg2 <= 2);
4433 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4434 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4436 IRTemp du0 = newTempV128();
4437 IRTemp du1 = newTempV128();
4438 assign(du0, binop(doubler, EX(u0), EX(u0)));
4439 assign(du1, binop(doubler, EX(u1), EX(u1)));
4440 IRTemp di0 = newTempV128();
4441 IRTemp di1 = newTempV128();
4442 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4443 assign(*i0, binop(halver, EX(di0), EX(di0)));
4444 assign(*i1, binop(halver, EX(di1), EX(di1)));
4448 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4449 static
4450 void math_INTERLEAVE3_64(
4451 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4452 UInt laneSzBlg2,
4453 IRTemp u0, IRTemp u1, IRTemp u2 )
4455 if (laneSzBlg2 == 3) {
4456 // 1x64, degenerate case
4457 assign(*i0, EX(u0));
4458 assign(*i1, EX(u1));
4459 assign(*i2, EX(u2));
4460 return;
4463 vassert(laneSzBlg2 <= 2);
4464 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4465 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4467 IRTemp du0 = newTempV128();
4468 IRTemp du1 = newTempV128();
4469 IRTemp du2 = newTempV128();
4470 assign(du0, binop(doubler, EX(u0), EX(u0)));
4471 assign(du1, binop(doubler, EX(u1), EX(u1)));
4472 assign(du2, binop(doubler, EX(u2), EX(u2)));
4473 IRTemp di0 = newTempV128();
4474 IRTemp di1 = newTempV128();
4475 IRTemp di2 = newTempV128();
4476 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4477 assign(*i0, binop(halver, EX(di0), EX(di0)));
4478 assign(*i1, binop(halver, EX(di1), EX(di1)));
4479 assign(*i2, binop(halver, EX(di2), EX(di2)));
4483 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4484 static
4485 void math_INTERLEAVE4_64(
4486 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4487 UInt laneSzBlg2,
4488 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4490 if (laneSzBlg2 == 3) {
4491 // 1x64, degenerate case
4492 assign(*i0, EX(u0));
4493 assign(*i1, EX(u1));
4494 assign(*i2, EX(u2));
4495 assign(*i3, EX(u3));
4496 return;
4499 vassert(laneSzBlg2 <= 2);
4500 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4501 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4503 IRTemp du0 = newTempV128();
4504 IRTemp du1 = newTempV128();
4505 IRTemp du2 = newTempV128();
4506 IRTemp du3 = newTempV128();
4507 assign(du0, binop(doubler, EX(u0), EX(u0)));
4508 assign(du1, binop(doubler, EX(u1), EX(u1)));
4509 assign(du2, binop(doubler, EX(u2), EX(u2)));
4510 assign(du3, binop(doubler, EX(u3), EX(u3)));
4511 IRTemp di0 = newTempV128();
4512 IRTemp di1 = newTempV128();
4513 IRTemp di2 = newTempV128();
4514 IRTemp di3 = newTempV128();
4515 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4516 laneSzBlg2 + 1, du0, du1, du2, du3);
4517 assign(*i0, binop(halver, EX(di0), EX(di0)));
4518 assign(*i1, binop(halver, EX(di1), EX(di1)));
4519 assign(*i2, binop(halver, EX(di2), EX(di2)));
4520 assign(*i3, binop(halver, EX(di3), EX(di3)));
4524 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4525 static
4526 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4527 UInt laneSzBlg2, IRTemp i0 )
4529 assign(*u0, mkexpr(i0));
4533 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4534 static
4535 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4536 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4538 if (laneSzBlg2 == 3) {
4539 // 1x64, degenerate case
4540 assign(*u0, EX(i0));
4541 assign(*u1, EX(i1));
4542 return;
4545 vassert(laneSzBlg2 <= 2);
4546 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4547 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4549 IRTemp di0 = newTempV128();
4550 IRTemp di1 = newTempV128();
4551 assign(di0, binop(doubler, EX(i0), EX(i0)));
4552 assign(di1, binop(doubler, EX(i1), EX(i1)));
4554 IRTemp du0 = newTempV128();
4555 IRTemp du1 = newTempV128();
4556 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4557 assign(*u0, binop(halver, EX(du0), EX(du0)));
4558 assign(*u1, binop(halver, EX(du1), EX(du1)));
4562 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4563 static
4564 void math_DEINTERLEAVE3_64(
4565 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4566 UInt laneSzBlg2,
4567 IRTemp i0, IRTemp i1, IRTemp i2 )
4569 if (laneSzBlg2 == 3) {
4570 // 1x64, degenerate case
4571 assign(*u0, EX(i0));
4572 assign(*u1, EX(i1));
4573 assign(*u2, EX(i2));
4574 return;
4577 vassert(laneSzBlg2 <= 2);
4578 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4579 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4581 IRTemp di0 = newTempV128();
4582 IRTemp di1 = newTempV128();
4583 IRTemp di2 = newTempV128();
4584 assign(di0, binop(doubler, EX(i0), EX(i0)));
4585 assign(di1, binop(doubler, EX(i1), EX(i1)));
4586 assign(di2, binop(doubler, EX(i2), EX(i2)));
4587 IRTemp du0 = newTempV128();
4588 IRTemp du1 = newTempV128();
4589 IRTemp du2 = newTempV128();
4590 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4591 assign(*u0, binop(halver, EX(du0), EX(du0)));
4592 assign(*u1, binop(halver, EX(du1), EX(du1)));
4593 assign(*u2, binop(halver, EX(du2), EX(du2)));
4597 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4598 static
4599 void math_DEINTERLEAVE4_64(
4600 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4601 UInt laneSzBlg2,
4602 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4604 if (laneSzBlg2 == 3) {
4605 // 1x64, degenerate case
4606 assign(*u0, EX(i0));
4607 assign(*u1, EX(i1));
4608 assign(*u2, EX(i2));
4609 assign(*u3, EX(i3));
4610 return;
4613 vassert(laneSzBlg2 <= 2);
4614 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4615 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4617 IRTemp di0 = newTempV128();
4618 IRTemp di1 = newTempV128();
4619 IRTemp di2 = newTempV128();
4620 IRTemp di3 = newTempV128();
4621 assign(di0, binop(doubler, EX(i0), EX(i0)));
4622 assign(di1, binop(doubler, EX(i1), EX(i1)));
4623 assign(di2, binop(doubler, EX(i2), EX(i2)));
4624 assign(di3, binop(doubler, EX(i3), EX(i3)));
4625 IRTemp du0 = newTempV128();
4626 IRTemp du1 = newTempV128();
4627 IRTemp du2 = newTempV128();
4628 IRTemp du3 = newTempV128();
4629 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4630 laneSzBlg2 + 1, di0, di1, di2, di3);
4631 assign(*u0, binop(halver, EX(du0), EX(du0)));
4632 assign(*u1, binop(halver, EX(du1), EX(du1)));
4633 assign(*u2, binop(halver, EX(du2), EX(du2)));
4634 assign(*u3, binop(halver, EX(du3), EX(du3)));
4638 #undef EX
4639 #undef SL
4640 #undef ROR
4641 #undef ROL
4642 #undef SHR
4643 #undef SHL
4644 #undef ILO64x2
4645 #undef IHI64x2
4646 #undef ILO32x4
4647 #undef IHI32x4
4648 #undef ILO16x8
4649 #undef IHI16x8
4650 #undef ILO16x8
4651 #undef IHI16x8
4652 #undef CEV32x4
4653 #undef COD32x4
4654 #undef COD16x8
4655 #undef COD8x16
4656 #undef CEV8x16
4657 #undef AND
4658 #undef OR2
4659 #undef OR3
4660 #undef OR4
4663 /*------------------------------------------------------------*/
4664 /*--- Load and Store instructions ---*/
4665 /*------------------------------------------------------------*/
4667 /* Generate the EA for a "reg + reg" style amode. This is done from
4668 parts of the insn, but for sanity checking sake it takes the whole
4669 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4670 and S=insn[12]:
4672 The possible forms, along with their opt:S values, are:
4673 011:0 Xn|SP + Xm
4674 111:0 Xn|SP + Xm
4675 011:1 Xn|SP + Xm * transfer_szB
4676 111:1 Xn|SP + Xm * transfer_szB
4677 010:0 Xn|SP + 32Uto64(Wm)
4678 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4679 110:0 Xn|SP + 32Sto64(Wm)
4680 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4682 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4683 the transfer size is insn[23,31,30]. For integer loads/stores,
4684 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4686 If the decoding fails, it returns IRTemp_INVALID.
4688 isInt is True iff this is decoding is for transfers to/from integer
4689 registers. If False it is for transfers to/from vector registers.
4691 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4693 UInt optS = SLICE_UInt(insn, 15, 12);
4694 UInt mm = SLICE_UInt(insn, 20, 16);
4695 UInt nn = SLICE_UInt(insn, 9, 5);
4696 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4697 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4699 buf[0] = 0;
4701 /* Sanity checks, that this really is a load/store insn. */
4702 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4703 goto fail;
4705 if (isInt
4706 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4707 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4708 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4709 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4710 goto fail;
4712 if (!isInt
4713 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4714 goto fail;
4716 /* Throw out non-verified but possibly valid cases. */
4717 switch (szLg2) {
4718 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4719 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4720 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4721 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4722 case BITS3(1,0,0): // can only ever be valid for the vector case
4723 if (isInt) goto fail; else break;
4724 case BITS3(1,0,1): // these sizes are never valid
4725 case BITS3(1,1,0):
4726 case BITS3(1,1,1): goto fail;
4728 default: vassert(0);
4731 IRExpr* rhs = NULL;
4732 switch (optS) {
4733 case BITS4(1,1,1,0): goto fail; //ATC
4734 case BITS4(0,1,1,0):
4735 rhs = getIReg64orZR(mm);
4736 vex_sprintf(buf, "[%s, %s]",
4737 nameIReg64orZR(nn), nameIReg64orZR(mm));
4738 break;
4739 case BITS4(1,1,1,1): goto fail; //ATC
4740 case BITS4(0,1,1,1):
4741 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4742 vex_sprintf(buf, "[%s, %s lsl %u]",
4743 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4744 break;
4745 case BITS4(0,1,0,0):
4746 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4747 vex_sprintf(buf, "[%s, %s uxtx]",
4748 nameIReg64orZR(nn), nameIReg32orZR(mm));
4749 break;
4750 case BITS4(0,1,0,1):
4751 rhs = binop(Iop_Shl64,
4752 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4753 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4754 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4755 break;
4756 case BITS4(1,1,0,0):
4757 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4758 vex_sprintf(buf, "[%s, %s sxtx]",
4759 nameIReg64orZR(nn), nameIReg32orZR(mm));
4760 break;
4761 case BITS4(1,1,0,1):
4762 rhs = binop(Iop_Shl64,
4763 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4764 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4765 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4766 break;
4767 default:
4768 /* The rest appear to be genuinely invalid */
4769 goto fail;
4772 vassert(rhs);
4773 IRTemp res = newTemp(Ity_I64);
4774 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4775 return res;
4777 fail:
4778 if (0 /*really, sigill_diag, but that causes too much plumbing*/) {
4779 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4781 return IRTemp_INVALID;
4785 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4786 bits of DATAE :: Ity_I64. */
4787 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4789 IRExpr* addrE = mkexpr(addr);
4790 switch (szB) {
4791 case 8:
4792 storeLE(addrE, dataE);
4793 break;
4794 case 4:
4795 storeLE(addrE, unop(Iop_64to32, dataE));
4796 break;
4797 case 2:
4798 storeLE(addrE, unop(Iop_64to16, dataE));
4799 break;
4800 case 1:
4801 storeLE(addrE, unop(Iop_64to8, dataE));
4802 break;
4803 default:
4804 vassert(0);
4809 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4810 placing the result in an Ity_I64 temporary. */
4811 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4813 IRTemp res = newTemp(Ity_I64);
4814 IRExpr* addrE = mkexpr(addr);
4815 switch (szB) {
4816 case 8:
4817 assign(res, loadLE(Ity_I64,addrE));
4818 break;
4819 case 4:
4820 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4821 break;
4822 case 2:
4823 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4824 break;
4825 case 1:
4826 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4827 break;
4828 default:
4829 vassert(0);
4831 return res;
4835 /* Generate a SIGBUS followed by a restart of the current instruction if
4836 `effective_addr` is `align`-aligned. This is required behaviour for atomic
4837 instructions. This assumes that guest_RIP_curr_instr is set correctly!
4839 This is hardwired to generate SIGBUS because so far the only supported arm64
4840 (arm64-linux) does that. Should we need to later extend it to generate some
4841 other signal, use the same scheme as with gen_SIGNAL_if_not_XX_aligned in
4842 guest_amd64_toIR.c. */
4843 static
4844 void gen_SIGBUS_if_not_XX_aligned ( IRTemp effective_addr, ULong align )
4846 if (align == 1) {
4847 return;
4849 vassert(align == 16 || align == 8 || align == 4 || align == 2);
4850 stmt(
4851 IRStmt_Exit(
4852 binop(Iop_CmpNE64,
4853 binop(Iop_And64,mkexpr(effective_addr),mkU64(align-1)),
4854 mkU64(0)),
4855 Ijk_SigBUS,
4856 IRConst_U64(guest_PC_curr_instr),
4857 OFFB_PC
4863 /* Generate a "standard 7" name, from bitQ and size. But also
4864 allow ".1d" since that's occasionally useful. */
4865 static
4866 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4868 vassert(bitQ <= 1 && size <= 3);
4869 const HChar* nms[8]
4870 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4871 UInt ix = (bitQ << 2) | size;
4872 vassert(ix < 8);
4873 return nms[ix];
4877 static
4878 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4879 const VexAbiInfo* abiinfo, Bool sigill_diag)
4881 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4883 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4884 /* uimm12 is scaled by the transfer size
4886 31 29 26 21 9 4
4887 | | | | | |
4888 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4889 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4891 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4892 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4894 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4895 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4897 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4898 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4900 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4901 UInt szLg2 = INSN(31,30);
4902 UInt szB = 1 << szLg2;
4903 Bool isLD = INSN(22,22) == 1;
4904 UInt offs = INSN(21,10) * szB;
4905 UInt nn = INSN(9,5);
4906 UInt tt = INSN(4,0);
4907 IRTemp ta = newTemp(Ity_I64);
4908 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4909 if (nn == 31) { /* FIXME generate stack alignment check */ }
4910 vassert(szLg2 < 4);
4911 if (isLD) {
4912 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4913 } else {
4914 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4916 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4917 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4918 DIP("%s %s, [%s, #%u]\n",
4919 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4920 nameIReg64orSP(nn), offs);
4921 return True;
4924 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4926 31 29 26 20 11 9 4
4927 | | | | | | |
4928 (at-Rn-then-Rn=EA) | | |
4929 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4930 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4932 (at-EA-then-Rn=EA)
4933 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4934 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4936 (at-EA)
4937 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4938 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4940 simm9 is unscaled.
4942 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4943 load case this is because would create two competing values for
4944 Rt. In the store case the reason is unclear, but the spec
4945 disallows it anyway.
4947 Stores are narrowing, loads are unsigned widening. sz encodes
4948 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4950 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4951 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4952 UInt szLg2 = INSN(31,30);
4953 UInt szB = 1 << szLg2;
4954 Bool isLoad = INSN(22,22) == 1;
4955 UInt imm9 = INSN(20,12);
4956 UInt nn = INSN(9,5);
4957 UInt tt = INSN(4,0);
4958 Bool wBack = INSN(10,10) == 1;
4959 UInt how = INSN(11,10);
4960 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4961 /* undecodable; fall through */
4962 } else {
4963 if (nn == 31) { /* FIXME generate stack alignment check */ }
4965 // Compute the transfer address TA and the writeback address WA.
4966 IRTemp tRN = newTemp(Ity_I64);
4967 assign(tRN, getIReg64orSP(nn));
4968 IRTemp tEA = newTemp(Ity_I64);
4969 Long simm9 = (Long)sx_to_64(imm9, 9);
4970 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4972 IRTemp tTA = newTemp(Ity_I64);
4973 IRTemp tWA = newTemp(Ity_I64);
4974 switch (how) {
4975 case BITS2(0,1):
4976 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4977 case BITS2(1,1):
4978 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4979 case BITS2(0,0):
4980 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4981 default:
4982 vassert(0); /* NOTREACHED */
4985 /* Normally rN would be updated after the transfer. However, in
4986 the special cases typifed by
4987 str x30, [sp,#-16]!
4988 str w1, [sp,#-32]!
4989 it is necessary to update SP before the transfer, (1)
4990 because Memcheck will otherwise complain about a write
4991 below the stack pointer, and (2) because the segfault
4992 stack extension mechanism will otherwise extend the stack
4993 only down to SP before the instruction, which might not be
4994 far enough, if the -16/-32 bit takes the actual access
4995 address to the next page.
4997 Bool earlyWBack
4998 = wBack && simm9 < 0
4999 && (szB == 8 || szB == 4 || szB == 2 || szB == 1)
5000 && how == BITS2(1,1) && nn == 31 && !isLoad;
5002 if (wBack && earlyWBack)
5003 putIReg64orSP(nn, mkexpr(tEA));
5005 if (isLoad) {
5006 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
5007 } else {
5008 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
5011 if (wBack && !earlyWBack)
5012 putIReg64orSP(nn, mkexpr(tEA));
5014 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
5015 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
5016 const HChar* fmt_str = NULL;
5017 switch (how) {
5018 case BITS2(0,1):
5019 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5020 break;
5021 case BITS2(1,1):
5022 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5023 break;
5024 case BITS2(0,0):
5025 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
5026 break;
5027 default:
5028 vassert(0);
5030 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
5031 nameIRegOrZR(szB == 8, tt),
5032 nameIReg64orSP(nn), simm9);
5033 return True;
5037 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
5038 /* L==1 => mm==LD
5039 L==0 => mm==ST
5040 x==0 => 32 bit transfers, and zero extended loads
5041 x==1 => 64 bit transfers
5042 simm7 is scaled by the (single-register) transfer size
5044 (at-Rn-then-Rn=EA)
5045 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
5047 (at-EA-then-Rn=EA)
5048 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
5050 (at-EA)
5051 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
5053 UInt insn_30_23 = INSN(30,23);
5054 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
5055 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
5056 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
5057 UInt bL = INSN(22,22);
5058 UInt bX = INSN(31,31);
5059 UInt bWBack = INSN(23,23);
5060 UInt rT1 = INSN(4,0);
5061 UInt rN = INSN(9,5);
5062 UInt rT2 = INSN(14,10);
5063 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5064 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5065 || (bL && rT1 == rT2)) {
5066 /* undecodable; fall through */
5067 } else {
5068 if (rN == 31) { /* FIXME generate stack alignment check */ }
5070 // Compute the transfer address TA and the writeback address WA.
5071 IRTemp tRN = newTemp(Ity_I64);
5072 assign(tRN, getIReg64orSP(rN));
5073 IRTemp tEA = newTemp(Ity_I64);
5074 simm7 = (bX ? 8 : 4) * simm7;
5075 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5077 IRTemp tTA = newTemp(Ity_I64);
5078 IRTemp tWA = newTemp(Ity_I64);
5079 switch (INSN(24,23)) {
5080 case BITS2(0,1):
5081 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5082 case BITS2(1,1):
5083 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5084 case BITS2(1,0):
5085 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5086 default:
5087 vassert(0); /* NOTREACHED */
5090 /* Normally rN would be updated after the transfer. However, in
5091 the special case typifed by
5092 stp x29, x30, [sp,#-112]!
5093 it is necessary to update SP before the transfer, (1)
5094 because Memcheck will otherwise complain about a write
5095 below the stack pointer, and (2) because the segfault
5096 stack extension mechanism will otherwise extend the stack
5097 only down to SP before the instruction, which might not be
5098 far enough, if the -112 bit takes the actual access
5099 address to the next page.
5101 Bool earlyWBack
5102 = bWBack && simm7 < 0
5103 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
5105 if (bWBack && earlyWBack)
5106 putIReg64orSP(rN, mkexpr(tEA));
5108 /**/ if (bL == 1 && bX == 1) {
5109 // 64 bit load
5110 putIReg64orZR(rT1, loadLE(Ity_I64,
5111 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
5112 putIReg64orZR(rT2, loadLE(Ity_I64,
5113 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
5114 } else if (bL == 1 && bX == 0) {
5115 // 32 bit load
5116 putIReg32orZR(rT1, loadLE(Ity_I32,
5117 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
5118 putIReg32orZR(rT2, loadLE(Ity_I32,
5119 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
5120 } else if (bL == 0 && bX == 1) {
5121 // 64 bit store
5122 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
5123 getIReg64orZR(rT1));
5124 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
5125 getIReg64orZR(rT2));
5126 } else {
5127 vassert(bL == 0 && bX == 0);
5128 // 32 bit store
5129 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
5130 getIReg32orZR(rT1));
5131 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
5132 getIReg32orZR(rT2));
5135 if (bWBack && !earlyWBack)
5136 putIReg64orSP(rN, mkexpr(tEA));
5138 const HChar* fmt_str = NULL;
5139 switch (INSN(24,23)) {
5140 case BITS2(0,1):
5141 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5142 break;
5143 case BITS2(1,1):
5144 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5145 break;
5146 case BITS2(1,0):
5147 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5148 break;
5149 default:
5150 vassert(0);
5152 DIP(fmt_str, bL == 0 ? "st" : "ld",
5153 nameIRegOrZR(bX == 1, rT1),
5154 nameIRegOrZR(bX == 1, rT2),
5155 nameIReg64orSP(rN), simm7);
5156 return True;
5160 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
5161 /* Does 32 bit transfers which are sign extended to 64 bits.
5162 simm7 is scaled by the (single-register) transfer size
5164 (at-Rn-then-Rn=EA)
5165 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
5167 (at-EA-then-Rn=EA)
5168 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
5170 (at-EA)
5171 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5173 UInt insn_31_22 = INSN(31,22);
5174 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5175 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5176 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5177 UInt bWBack = INSN(23,23);
5178 UInt rT1 = INSN(4,0);
5179 UInt rN = INSN(9,5);
5180 UInt rT2 = INSN(14,10);
5181 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5182 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5183 || (rT1 == rT2)) {
5184 /* undecodable; fall through */
5185 } else {
5186 if (rN == 31) { /* FIXME generate stack alignment check */ }
5188 // Compute the transfer address TA and the writeback address WA.
5189 IRTemp tRN = newTemp(Ity_I64);
5190 assign(tRN, getIReg64orSP(rN));
5191 IRTemp tEA = newTemp(Ity_I64);
5192 simm7 = 4 * simm7;
5193 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5195 IRTemp tTA = newTemp(Ity_I64);
5196 IRTemp tWA = newTemp(Ity_I64);
5197 switch (INSN(24,23)) {
5198 case BITS2(0,1):
5199 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5200 case BITS2(1,1):
5201 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5202 case BITS2(1,0):
5203 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5204 default:
5205 vassert(0); /* NOTREACHED */
5208 // 32 bit load, sign extended to 64 bits
5209 putIReg64orZR(rT1, unop(Iop_32Sto64,
5210 loadLE(Ity_I32, binop(Iop_Add64,
5211 mkexpr(tTA),
5212 mkU64(0)))));
5213 putIReg64orZR(rT2, unop(Iop_32Sto64,
5214 loadLE(Ity_I32, binop(Iop_Add64,
5215 mkexpr(tTA),
5216 mkU64(4)))));
5217 if (bWBack)
5218 putIReg64orSP(rN, mkexpr(tEA));
5220 const HChar* fmt_str = NULL;
5221 switch (INSN(24,23)) {
5222 case BITS2(0,1):
5223 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5224 break;
5225 case BITS2(1,1):
5226 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5227 break;
5228 case BITS2(1,0):
5229 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5230 break;
5231 default:
5232 vassert(0);
5234 DIP(fmt_str, nameIReg64orZR(rT1),
5235 nameIReg64orZR(rT2),
5236 nameIReg64orSP(rN), simm7);
5237 return True;
5241 /* ---------------- LDR (literal, int reg) ---------------- */
5242 /* 31 29 23 4
5243 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5244 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5245 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5246 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5247 Just handles the first two cases for now.
5249 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5250 UInt imm19 = INSN(23,5);
5251 UInt rT = INSN(4,0);
5252 UInt bX = INSN(30,30);
5253 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5254 if (bX) {
5255 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5256 } else {
5257 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5259 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5260 return True;
5263 /* -------------- {LD,ST}R (integer register) --------------- */
5264 /* 31 29 20 15 12 11 9 4
5265 | | | | | | | |
5266 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5267 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5268 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5269 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5271 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5272 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5273 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5274 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5276 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5277 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5278 HChar dis_buf[64];
5279 UInt szLg2 = INSN(31,30);
5280 Bool isLD = INSN(22,22) == 1;
5281 UInt tt = INSN(4,0);
5282 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5283 if (ea != IRTemp_INVALID) {
5284 switch (szLg2) {
5285 case 3: /* 64 bit */
5286 if (isLD) {
5287 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5288 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5289 } else {
5290 storeLE(mkexpr(ea), getIReg64orZR(tt));
5291 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5293 break;
5294 case 2: /* 32 bit */
5295 if (isLD) {
5296 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5297 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5298 } else {
5299 storeLE(mkexpr(ea), getIReg32orZR(tt));
5300 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5302 break;
5303 case 1: /* 16 bit */
5304 if (isLD) {
5305 putIReg64orZR(tt, unop(Iop_16Uto64,
5306 loadLE(Ity_I16, mkexpr(ea))));
5307 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5308 } else {
5309 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5310 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5312 break;
5313 case 0: /* 8 bit */
5314 if (isLD) {
5315 putIReg64orZR(tt, unop(Iop_8Uto64,
5316 loadLE(Ity_I8, mkexpr(ea))));
5317 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5318 } else {
5319 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5320 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5322 break;
5323 default:
5324 vassert(0);
5326 return True;
5330 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5331 /* 31 29 26 23 21 9 4
5332 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5333 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5334 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5335 where
5336 Rt is Wt when x==1, Xt when x==0
5338 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5339 /* Further checks on bits 31:30 and 22 */
5340 Bool valid = False;
5341 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5342 case BITS3(1,0,0):
5343 case BITS3(0,1,0): case BITS3(0,1,1):
5344 case BITS3(0,0,0): case BITS3(0,0,1):
5345 valid = True;
5346 break;
5348 if (valid) {
5349 UInt szLg2 = INSN(31,30);
5350 UInt bitX = INSN(22,22);
5351 UInt imm12 = INSN(21,10);
5352 UInt nn = INSN(9,5);
5353 UInt tt = INSN(4,0);
5354 UInt szB = 1 << szLg2;
5355 IRExpr* ea = binop(Iop_Add64,
5356 getIReg64orSP(nn), mkU64(imm12 * szB));
5357 switch (szB) {
5358 case 4:
5359 vassert(bitX == 0);
5360 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5361 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5362 nameIReg64orSP(nn), imm12 * szB);
5363 break;
5364 case 2:
5365 if (bitX == 1) {
5366 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5367 } else {
5368 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5370 DIP("ldrsh %s, [%s, #%u]\n",
5371 nameIRegOrZR(bitX == 0, tt),
5372 nameIReg64orSP(nn), imm12 * szB);
5373 break;
5374 case 1:
5375 if (bitX == 1) {
5376 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5377 } else {
5378 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5380 DIP("ldrsb %s, [%s, #%u]\n",
5381 nameIRegOrZR(bitX == 0, tt),
5382 nameIReg64orSP(nn), imm12 * szB);
5383 break;
5384 default:
5385 vassert(0);
5387 return True;
5389 /* else fall through */
5392 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5393 /* (at-Rn-then-Rn=EA)
5394 31 29 23 21 20 11 9 4
5395 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5396 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5397 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5399 (at-EA-then-Rn=EA)
5400 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5401 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5402 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5403 where
5404 Rt is Wt when x==1, Xt when x==0
5405 transfer-at-Rn when [11]==0, at EA when [11]==1
5407 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5408 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5409 /* Further checks on bits 31:30 and 22 */
5410 Bool valid = False;
5411 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5412 case BITS3(1,0,0): // LDRSW Xt
5413 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5414 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5415 valid = True;
5416 break;
5418 if (valid) {
5419 UInt szLg2 = INSN(31,30);
5420 UInt imm9 = INSN(20,12);
5421 Bool atRN = INSN(11,11) == 0;
5422 UInt nn = INSN(9,5);
5423 UInt tt = INSN(4,0);
5424 IRTemp tRN = newTemp(Ity_I64);
5425 IRTemp tEA = newTemp(Ity_I64);
5426 IRTemp tTA = IRTemp_INVALID;
5427 ULong simm9 = sx_to_64(imm9, 9);
5428 Bool is64 = INSN(22,22) == 0;
5429 assign(tRN, getIReg64orSP(nn));
5430 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5431 tTA = atRN ? tRN : tEA;
5432 HChar ch = '?';
5433 /* There are 5 cases:
5434 byte load, SX to 64
5435 byte load, SX to 32, ZX to 64
5436 halfword load, SX to 64
5437 halfword load, SX to 32, ZX to 64
5438 word load, SX to 64
5439 The ifs below handle them in the listed order.
5441 if (szLg2 == 0) {
5442 ch = 'b';
5443 if (is64) {
5444 putIReg64orZR(tt, unop(Iop_8Sto64,
5445 loadLE(Ity_I8, mkexpr(tTA))));
5446 } else {
5447 putIReg32orZR(tt, unop(Iop_8Sto32,
5448 loadLE(Ity_I8, mkexpr(tTA))));
5451 else if (szLg2 == 1) {
5452 ch = 'h';
5453 if (is64) {
5454 putIReg64orZR(tt, unop(Iop_16Sto64,
5455 loadLE(Ity_I16, mkexpr(tTA))));
5456 } else {
5457 putIReg32orZR(tt, unop(Iop_16Sto32,
5458 loadLE(Ity_I16, mkexpr(tTA))));
5461 else if (szLg2 == 2 && is64) {
5462 ch = 'w';
5463 putIReg64orZR(tt, unop(Iop_32Sto64,
5464 loadLE(Ity_I32, mkexpr(tTA))));
5466 else {
5467 vassert(0);
5469 putIReg64orSP(nn, mkexpr(tEA));
5470 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5471 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5472 return True;
5474 /* else fall through */
5477 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5478 /* 31 29 23 21 20 11 9 4
5479 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5480 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5481 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5482 where
5483 Rt is Wt when x==1, Xt when x==0
5485 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5486 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5487 /* Further checks on bits 31:30 and 22 */
5488 Bool valid = False;
5489 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5490 case BITS3(1,0,0): // LDURSW Xt
5491 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5492 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5493 valid = True;
5494 break;
5496 if (valid) {
5497 UInt szLg2 = INSN(31,30);
5498 UInt imm9 = INSN(20,12);
5499 UInt nn = INSN(9,5);
5500 UInt tt = INSN(4,0);
5501 IRTemp tRN = newTemp(Ity_I64);
5502 IRTemp tEA = newTemp(Ity_I64);
5503 ULong simm9 = sx_to_64(imm9, 9);
5504 Bool is64 = INSN(22,22) == 0;
5505 assign(tRN, getIReg64orSP(nn));
5506 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5507 HChar ch = '?';
5508 /* There are 5 cases:
5509 byte load, SX to 64
5510 byte load, SX to 32, ZX to 64
5511 halfword load, SX to 64
5512 halfword load, SX to 32, ZX to 64
5513 word load, SX to 64
5514 The ifs below handle them in the listed order.
5516 if (szLg2 == 0) {
5517 ch = 'b';
5518 if (is64) {
5519 putIReg64orZR(tt, unop(Iop_8Sto64,
5520 loadLE(Ity_I8, mkexpr(tEA))));
5521 } else {
5522 putIReg32orZR(tt, unop(Iop_8Sto32,
5523 loadLE(Ity_I8, mkexpr(tEA))));
5526 else if (szLg2 == 1) {
5527 ch = 'h';
5528 if (is64) {
5529 putIReg64orZR(tt, unop(Iop_16Sto64,
5530 loadLE(Ity_I16, mkexpr(tEA))));
5531 } else {
5532 putIReg32orZR(tt, unop(Iop_16Sto32,
5533 loadLE(Ity_I16, mkexpr(tEA))));
5536 else if (szLg2 == 2 && is64) {
5537 ch = 'w';
5538 putIReg64orZR(tt, unop(Iop_32Sto64,
5539 loadLE(Ity_I32, mkexpr(tEA))));
5541 else {
5542 vassert(0);
5544 DIP("ldurs%c %s, [%s, #%lld]\n",
5545 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5546 return True;
5548 /* else fall through */
5551 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5552 /* L==1 => mm==LD
5553 L==0 => mm==ST
5554 sz==00 => 32 bit (S) transfers
5555 sz==01 => 64 bit (D) transfers
5556 sz==10 => 128 bit (Q) transfers
5557 sz==11 isn't allowed
5558 simm7 is scaled by the (single-register) transfer size
5560 31 29 26 22 21 14 9 4
5562 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5563 (at-EA, with nontemporal hint)
5565 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5566 (at-Rn-then-Rn=EA)
5568 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5569 (at-EA)
5571 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5572 (at-EA-then-Rn=EA)
5574 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5575 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5576 Bool isLD = INSN(22,22) == 1;
5577 Bool wBack = INSN(23,23) == 1;
5578 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5579 UInt tt2 = INSN(14,10);
5580 UInt nn = INSN(9,5);
5581 UInt tt1 = INSN(4,0);
5582 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5583 /* undecodable; fall through */
5584 } else {
5585 if (nn == 31) { /* FIXME generate stack alignment check */ }
5587 // Compute the transfer address TA and the writeback address WA.
5588 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5589 IRTemp tRN = newTemp(Ity_I64);
5590 assign(tRN, getIReg64orSP(nn));
5591 IRTemp tEA = newTemp(Ity_I64);
5592 simm7 = szB * simm7;
5593 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5595 IRTemp tTA = newTemp(Ity_I64);
5596 IRTemp tWA = newTemp(Ity_I64);
5597 switch (INSN(24,23)) {
5598 case BITS2(0,1):
5599 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5600 case BITS2(1,1):
5601 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5602 case BITS2(1,0):
5603 case BITS2(0,0):
5604 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5605 default:
5606 vassert(0); /* NOTREACHED */
5609 IRType ty = Ity_INVALID;
5610 switch (szB) {
5611 case 4: ty = Ity_F32; break;
5612 case 8: ty = Ity_F64; break;
5613 case 16: ty = Ity_V128; break;
5614 default: vassert(0);
5617 /* Normally rN would be updated after the transfer. However, in
5618 the special cases typifed by
5619 stp q0, q1, [sp,#-512]!
5620 stp d0, d1, [sp,#-512]!
5621 stp s0, s1, [sp,#-512]!
5622 it is necessary to update SP before the transfer, (1)
5623 because Memcheck will otherwise complain about a write
5624 below the stack pointer, and (2) because the segfault
5625 stack extension mechanism will otherwise extend the stack
5626 only down to SP before the instruction, which might not be
5627 far enough, if the -512 bit takes the actual access
5628 address to the next page.
5630 Bool earlyWBack
5631 = wBack && simm7 < 0
5632 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5634 if (wBack && earlyWBack)
5635 putIReg64orSP(nn, mkexpr(tEA));
5637 if (isLD) {
5638 if (szB < 16) {
5639 putQReg128(tt1, mkV128(0x0000));
5641 putQRegLO(tt1,
5642 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5643 if (szB < 16) {
5644 putQReg128(tt2, mkV128(0x0000));
5646 putQRegLO(tt2,
5647 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5648 } else {
5649 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5650 getQRegLO(tt1, ty));
5651 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5652 getQRegLO(tt2, ty));
5655 if (wBack && !earlyWBack)
5656 putIReg64orSP(nn, mkexpr(tEA));
5658 const HChar* fmt_str = NULL;
5659 switch (INSN(24,23)) {
5660 case BITS2(0,1):
5661 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5662 break;
5663 case BITS2(1,1):
5664 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5665 break;
5666 case BITS2(1,0):
5667 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5668 break;
5669 case BITS2(0,0):
5670 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5671 break;
5672 default:
5673 vassert(0);
5675 DIP(fmt_str, isLD ? "ld" : "st",
5676 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5677 nameIReg64orSP(nn), simm7);
5678 return True;
5682 /* -------------- {LD,ST}R (vector register) --------------- */
5683 /* 31 29 23 20 15 12 11 9 4
5684 | | | | | | | | |
5685 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5686 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5687 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5688 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5689 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5691 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5692 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5693 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5694 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5695 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5697 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5698 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5699 HChar dis_buf[64];
5700 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5701 Bool isLD = INSN(22,22) == 1;
5702 UInt tt = INSN(4,0);
5703 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5704 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5705 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5706 switch (szLg2) {
5707 case 0: /* 8 bit */
5708 if (isLD) {
5709 putQReg128(tt, mkV128(0x0000));
5710 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5711 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5712 } else {
5713 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5714 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5716 break;
5717 case 1:
5718 if (isLD) {
5719 putQReg128(tt, mkV128(0x0000));
5720 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5721 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5722 } else {
5723 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5724 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5726 break;
5727 case 2: /* 32 bit */
5728 if (isLD) {
5729 putQReg128(tt, mkV128(0x0000));
5730 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5731 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5732 } else {
5733 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5734 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5736 break;
5737 case 3: /* 64 bit */
5738 if (isLD) {
5739 putQReg128(tt, mkV128(0x0000));
5740 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5741 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5742 } else {
5743 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5744 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5746 break;
5747 case 4:
5748 if (isLD) {
5749 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5750 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5751 } else {
5752 storeLE(mkexpr(ea), getQReg128(tt));
5753 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5755 break;
5756 default:
5757 vassert(0);
5759 return True;
5761 after_LDR_STR_vector_register:
5763 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5764 /* 31 29 22 20 15 12 11 9 4
5765 | | | | | | | | |
5766 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5768 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5769 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5771 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5772 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5774 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5775 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5776 HChar dis_buf[64];
5777 UInt szLg2 = INSN(31,30);
5778 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5779 UInt tt = INSN(4,0);
5780 if (szLg2 == 3) goto after_LDRS_integer_register;
5781 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5782 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5783 /* Enumerate the 5 variants explicitly. */
5784 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5785 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5786 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5787 return True;
5789 else
5790 if (szLg2 == 1/*16 bit*/) {
5791 if (sxTo64) {
5792 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5793 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5794 } else {
5795 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5796 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5798 return True;
5800 else
5801 if (szLg2 == 0/*8 bit*/) {
5802 if (sxTo64) {
5803 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5804 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5805 } else {
5806 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5807 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5809 return True;
5811 /* else it's an invalid combination */
5813 after_LDRS_integer_register:
5815 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5816 /* This is the Unsigned offset variant only. The Post-Index and
5817 Pre-Index variants are below.
5819 31 29 23 21 9 4
5820 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5821 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5822 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5823 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5824 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5826 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5827 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5828 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5829 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5830 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5832 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5833 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5834 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5835 Bool isLD = INSN(22,22) == 1;
5836 UInt pimm12 = INSN(21,10) << szLg2;
5837 UInt nn = INSN(9,5);
5838 UInt tt = INSN(4,0);
5839 IRTemp tEA = newTemp(Ity_I64);
5840 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5841 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5842 if (isLD) {
5843 if (szLg2 < 4) {
5844 putQReg128(tt, mkV128(0x0000));
5846 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5847 } else {
5848 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5850 DIP("%s %s, [%s, #%u]\n",
5851 isLD ? "ldr" : "str",
5852 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5853 return True;
5856 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5857 /* These are the Post-Index and Pre-Index variants.
5859 31 29 23 20 11 9 4
5860 (at-Rn-then-Rn=EA)
5861 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5862 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5863 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5864 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5865 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5867 (at-EA-then-Rn=EA)
5868 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5869 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5870 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5871 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5872 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5874 Stores are the same except with bit 22 set to 0.
5876 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5877 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5878 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5879 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5880 Bool isLD = INSN(22,22) == 1;
5881 UInt imm9 = INSN(20,12);
5882 Bool atRN = INSN(11,11) == 0;
5883 UInt nn = INSN(9,5);
5884 UInt tt = INSN(4,0);
5885 IRTemp tRN = newTemp(Ity_I64);
5886 IRTemp tEA = newTemp(Ity_I64);
5887 IRTemp tTA = IRTemp_INVALID;
5888 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5889 ULong simm9 = sx_to_64(imm9, 9);
5890 assign(tRN, getIReg64orSP(nn));
5891 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5892 tTA = atRN ? tRN : tEA;
5894 /* Do early writeback for the cases typified by
5895 str d8, [sp, #-32]!
5896 str d10, [sp, #-128]!
5897 str q1, [sp, #-32]!
5898 for the same reasons as described in a similar comment in the
5899 "LDP,STP (immediate, simm7) (FP&VEC)" case just above.
5901 Bool earlyWBack
5902 = !atRN && !isLD && (ty == Ity_F64 || ty == Ity_V128)
5903 && nn == 31 && ((Long)simm9) < 0;
5905 if (earlyWBack)
5906 putIReg64orSP(nn, mkexpr(tEA));
5908 if (isLD) {
5909 if (szLg2 < 4) {
5910 putQReg128(tt, mkV128(0x0000));
5912 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5913 } else {
5914 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5917 if (!earlyWBack)
5918 putIReg64orSP(nn, mkexpr(tEA));
5920 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5921 isLD ? "ldr" : "str",
5922 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5923 return True;
5926 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5927 /* 31 29 23 20 11 9 4
5928 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5929 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5930 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5931 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5932 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5934 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5935 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5936 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5937 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5938 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5940 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5941 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5942 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5943 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5944 Bool isLD = INSN(22,22) == 1;
5945 UInt imm9 = INSN(20,12);
5946 UInt nn = INSN(9,5);
5947 UInt tt = INSN(4,0);
5948 ULong simm9 = sx_to_64(imm9, 9);
5949 IRTemp tEA = newTemp(Ity_I64);
5950 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5951 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5952 if (isLD) {
5953 if (szLg2 < 4) {
5954 putQReg128(tt, mkV128(0x0000));
5956 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5957 } else {
5958 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5960 DIP("%s %s, [%s, #%lld]\n",
5961 isLD ? "ldur" : "stur",
5962 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5963 return True;
5966 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5967 /* 31 29 23 4
5968 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5969 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5970 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5972 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5973 UInt szB = 4 << INSN(31,30);
5974 UInt imm19 = INSN(23,5);
5975 UInt tt = INSN(4,0);
5976 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5977 IRType ty = preferredVectorSubTypeFromSize(szB);
5978 putQReg128(tt, mkV128(0x0000));
5979 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5980 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5981 return True;
5984 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5985 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5986 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5987 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5988 /* 31 29 26 22 21 20 15 11 9 4
5990 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5991 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5993 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5994 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5996 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5997 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5999 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
6000 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
6002 T = defined by Q and sz in the normal way
6003 step = if m == 11111 then transfer-size else Xm
6004 xx = case L of 1 -> LD ; 0 -> ST
6006 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6007 && INSN(21,21) == 0) {
6008 Bool bitQ = INSN(30,30);
6009 Bool isPX = INSN(23,23) == 1;
6010 Bool isLD = INSN(22,22) == 1;
6011 UInt mm = INSN(20,16);
6012 UInt opc = INSN(15,12);
6013 UInt sz = INSN(11,10);
6014 UInt nn = INSN(9,5);
6015 UInt tt = INSN(4,0);
6016 Bool isQ = bitQ == 1;
6017 Bool is1d = sz == BITS2(1,1) && !isQ;
6018 UInt nRegs = 0;
6019 switch (opc) {
6020 case BITS4(0,0,0,0): nRegs = 4; break;
6021 case BITS4(0,1,0,0): nRegs = 3; break;
6022 case BITS4(1,0,0,0): nRegs = 2; break;
6023 case BITS4(0,1,1,1): nRegs = 1; break;
6024 default: break;
6027 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6028 If we see it, set nRegs to 0 so as to cause the next conditional
6029 to fail. */
6030 if (!isPX && mm != 0)
6031 nRegs = 0;
6033 if (nRegs == 1 /* .1d is allowed */
6034 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
6036 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6038 /* Generate the transfer address (TA) and if necessary the
6039 writeback address (WB) */
6040 IRTemp tTA = newTemp(Ity_I64);
6041 assign(tTA, getIReg64orSP(nn));
6042 if (nn == 31) { /* FIXME generate stack alignment check */ }
6043 IRTemp tWB = IRTemp_INVALID;
6044 if (isPX) {
6045 tWB = newTemp(Ity_I64);
6046 assign(tWB, binop(Iop_Add64,
6047 mkexpr(tTA),
6048 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6049 : getIReg64orZR(mm)));
6052 /* -- BEGIN generate the transfers -- */
6054 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
6055 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
6056 switch (nRegs) {
6057 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
6058 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
6059 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
6060 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
6061 default: vassert(0);
6064 /* -- Multiple 128 or 64 bit stores -- */
6065 if (!isLD) {
6066 switch (nRegs) {
6067 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6068 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6069 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
6070 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
6071 default: vassert(0);
6073 switch (nRegs) {
6074 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
6075 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
6076 break;
6077 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
6078 (&i0, &i1, &i2, sz, u0, u1, u2);
6079 break;
6080 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
6081 (&i0, &i1, sz, u0, u1);
6082 break;
6083 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
6084 (&i0, sz, u0);
6085 break;
6086 default: vassert(0);
6088 # define MAYBE_NARROW_TO_64(_expr) \
6089 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6090 UInt step = isQ ? 16 : 8;
6091 switch (nRegs) {
6092 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6093 MAYBE_NARROW_TO_64(mkexpr(i3)) );
6094 /* fallthru */
6095 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6096 MAYBE_NARROW_TO_64(mkexpr(i2)) );
6097 /* fallthru */
6098 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6099 MAYBE_NARROW_TO_64(mkexpr(i1)) );
6100 /* fallthru */
6101 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6102 MAYBE_NARROW_TO_64(mkexpr(i0)) );
6103 break;
6104 default: vassert(0);
6106 # undef MAYBE_NARROW_TO_64
6109 /* -- Multiple 128 or 64 bit loads -- */
6110 else /* isLD */ {
6111 UInt step = isQ ? 16 : 8;
6112 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6113 # define MAYBE_WIDEN_FROM_64(_expr) \
6114 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6115 switch (nRegs) {
6116 case 4:
6117 assign(i3, MAYBE_WIDEN_FROM_64(
6118 loadLE(loadTy,
6119 binop(Iop_Add64, mkexpr(tTA),
6120 mkU64(3 * step)))));
6121 /* fallthru */
6122 case 3:
6123 assign(i2, MAYBE_WIDEN_FROM_64(
6124 loadLE(loadTy,
6125 binop(Iop_Add64, mkexpr(tTA),
6126 mkU64(2 * step)))));
6127 /* fallthru */
6128 case 2:
6129 assign(i1, MAYBE_WIDEN_FROM_64(
6130 loadLE(loadTy,
6131 binop(Iop_Add64, mkexpr(tTA),
6132 mkU64(1 * step)))));
6133 /* fallthru */
6134 case 1:
6135 assign(i0, MAYBE_WIDEN_FROM_64(
6136 loadLE(loadTy,
6137 binop(Iop_Add64, mkexpr(tTA),
6138 mkU64(0 * step)))));
6139 break;
6140 default:
6141 vassert(0);
6143 # undef MAYBE_WIDEN_FROM_64
6144 switch (nRegs) {
6145 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
6146 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
6147 break;
6148 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
6149 (&u0, &u1, &u2, sz, i0, i1, i2);
6150 break;
6151 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
6152 (&u0, &u1, sz, i0, i1);
6153 break;
6154 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
6155 (&u0, sz, i0);
6156 break;
6157 default: vassert(0);
6159 switch (nRegs) {
6160 case 4: putQReg128( (tt+3) % 32,
6161 math_MAYBE_ZERO_HI64(bitQ, u3));
6162 /* fallthru */
6163 case 3: putQReg128( (tt+2) % 32,
6164 math_MAYBE_ZERO_HI64(bitQ, u2));
6165 /* fallthru */
6166 case 2: putQReg128( (tt+1) % 32,
6167 math_MAYBE_ZERO_HI64(bitQ, u1));
6168 /* fallthru */
6169 case 1: putQReg128( (tt+0) % 32,
6170 math_MAYBE_ZERO_HI64(bitQ, u0));
6171 break;
6172 default: vassert(0);
6176 /* -- END generate the transfers -- */
6178 /* Do the writeback, if necessary */
6179 if (isPX) {
6180 putIReg64orSP(nn, mkexpr(tWB));
6183 HChar pxStr[20];
6184 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6185 if (isPX) {
6186 if (mm == BITS5(1,1,1,1,1))
6187 vex_sprintf(pxStr, ", #%u", xferSzB);
6188 else
6189 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6191 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6192 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6193 isLD ? "ld" : "st", nRegs,
6194 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6195 pxStr);
6197 if (nRegs >= 3) {
6198 dres->hint = Dis_HintVerbose;
6200 return True;
6202 /* else fall through */
6205 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6206 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6207 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6208 /* 31 29 26 22 21 20 15 11 9 4
6210 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6211 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6213 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6214 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6216 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6217 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6219 T = defined by Q and sz in the normal way
6220 step = if m == 11111 then transfer-size else Xm
6221 xx = case L of 1 -> LD ; 0 -> ST
6223 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6224 && INSN(21,21) == 0) {
6225 Bool bitQ = INSN(30,30);
6226 Bool isPX = INSN(23,23) == 1;
6227 Bool isLD = INSN(22,22) == 1;
6228 UInt mm = INSN(20,16);
6229 UInt opc = INSN(15,12);
6230 UInt sz = INSN(11,10);
6231 UInt nn = INSN(9,5);
6232 UInt tt = INSN(4,0);
6233 Bool isQ = bitQ == 1;
6234 UInt nRegs = 0;
6235 switch (opc) {
6236 case BITS4(0,0,1,0): nRegs = 4; break;
6237 case BITS4(0,1,1,0): nRegs = 3; break;
6238 case BITS4(1,0,1,0): nRegs = 2; break;
6239 default: break;
6242 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6243 If we see it, set nRegs to 0 so as to cause the next conditional
6244 to fail. */
6245 if (!isPX && mm != 0)
6246 nRegs = 0;
6248 if (nRegs >= 2 && nRegs <= 4) {
6250 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6252 /* Generate the transfer address (TA) and if necessary the
6253 writeback address (WB) */
6254 IRTemp tTA = newTemp(Ity_I64);
6255 assign(tTA, getIReg64orSP(nn));
6256 if (nn == 31) { /* FIXME generate stack alignment check */ }
6257 IRTemp tWB = IRTemp_INVALID;
6258 if (isPX) {
6259 tWB = newTemp(Ity_I64);
6260 assign(tWB, binop(Iop_Add64,
6261 mkexpr(tTA),
6262 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6263 : getIReg64orZR(mm)));
6266 /* -- BEGIN generate the transfers -- */
6268 IRTemp u0, u1, u2, u3;
6269 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6270 switch (nRegs) {
6271 case 4: u3 = newTempV128(); /* fallthru */
6272 case 3: u2 = newTempV128(); /* fallthru */
6273 case 2: u1 = newTempV128();
6274 u0 = newTempV128(); break;
6275 default: vassert(0);
6278 /* -- Multiple 128 or 64 bit stores -- */
6279 if (!isLD) {
6280 switch (nRegs) {
6281 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6282 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6283 case 2: assign(u1, getQReg128((tt+1) % 32));
6284 assign(u0, getQReg128((tt+0) % 32)); break;
6285 default: vassert(0);
6287 # define MAYBE_NARROW_TO_64(_expr) \
6288 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6289 UInt step = isQ ? 16 : 8;
6290 switch (nRegs) {
6291 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6292 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6293 /* fallthru */
6294 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6295 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6296 /* fallthru */
6297 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6298 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6299 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6300 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6301 break;
6302 default: vassert(0);
6304 # undef MAYBE_NARROW_TO_64
6307 /* -- Multiple 128 or 64 bit loads -- */
6308 else /* isLD */ {
6309 UInt step = isQ ? 16 : 8;
6310 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6311 # define MAYBE_WIDEN_FROM_64(_expr) \
6312 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6313 switch (nRegs) {
6314 case 4:
6315 assign(u3, MAYBE_WIDEN_FROM_64(
6316 loadLE(loadTy,
6317 binop(Iop_Add64, mkexpr(tTA),
6318 mkU64(3 * step)))));
6319 /* fallthru */
6320 case 3:
6321 assign(u2, MAYBE_WIDEN_FROM_64(
6322 loadLE(loadTy,
6323 binop(Iop_Add64, mkexpr(tTA),
6324 mkU64(2 * step)))));
6325 /* fallthru */
6326 case 2:
6327 assign(u1, MAYBE_WIDEN_FROM_64(
6328 loadLE(loadTy,
6329 binop(Iop_Add64, mkexpr(tTA),
6330 mkU64(1 * step)))));
6331 assign(u0, MAYBE_WIDEN_FROM_64(
6332 loadLE(loadTy,
6333 binop(Iop_Add64, mkexpr(tTA),
6334 mkU64(0 * step)))));
6335 break;
6336 default:
6337 vassert(0);
6339 # undef MAYBE_WIDEN_FROM_64
6340 switch (nRegs) {
6341 case 4: putQReg128( (tt+3) % 32,
6342 math_MAYBE_ZERO_HI64(bitQ, u3));
6343 /* fallthru */
6344 case 3: putQReg128( (tt+2) % 32,
6345 math_MAYBE_ZERO_HI64(bitQ, u2));
6346 /* fallthru */
6347 case 2: putQReg128( (tt+1) % 32,
6348 math_MAYBE_ZERO_HI64(bitQ, u1));
6349 putQReg128( (tt+0) % 32,
6350 math_MAYBE_ZERO_HI64(bitQ, u0));
6351 break;
6352 default: vassert(0);
6356 /* -- END generate the transfers -- */
6358 /* Do the writeback, if necessary */
6359 if (isPX) {
6360 putIReg64orSP(nn, mkexpr(tWB));
6363 HChar pxStr[20];
6364 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6365 if (isPX) {
6366 if (mm == BITS5(1,1,1,1,1))
6367 vex_sprintf(pxStr, ", #%u", xferSzB);
6368 else
6369 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6371 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6372 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6373 isLD ? "ld" : "st",
6374 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6375 pxStr);
6377 return True;
6379 /* else fall through */
6382 /* ---------- LD1R (single structure, replicate) ---------- */
6383 /* ---------- LD2R (single structure, replicate) ---------- */
6384 /* ---------- LD3R (single structure, replicate) ---------- */
6385 /* ---------- LD4R (single structure, replicate) ---------- */
6386 /* 31 29 22 20 15 11 9 4
6387 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6388 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6390 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6391 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6393 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6394 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6396 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6397 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6399 step = if m == 11111 then transfer-size else Xm
6401 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6402 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6403 && INSN(12,12) == 0) {
6404 UInt bitQ = INSN(30,30);
6405 Bool isPX = INSN(23,23) == 1;
6406 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6407 UInt mm = INSN(20,16);
6408 UInt sz = INSN(11,10);
6409 UInt nn = INSN(9,5);
6410 UInt tt = INSN(4,0);
6412 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6413 if (isPX || mm == 0) {
6415 IRType ty = integerIRTypeOfSize(1 << sz);
6417 UInt laneSzB = 1 << sz;
6418 UInt xferSzB = laneSzB * nRegs;
6420 /* Generate the transfer address (TA) and if necessary the
6421 writeback address (WB) */
6422 IRTemp tTA = newTemp(Ity_I64);
6423 assign(tTA, getIReg64orSP(nn));
6424 if (nn == 31) { /* FIXME generate stack alignment check */ }
6425 IRTemp tWB = IRTemp_INVALID;
6426 if (isPX) {
6427 tWB = newTemp(Ity_I64);
6428 assign(tWB, binop(Iop_Add64,
6429 mkexpr(tTA),
6430 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6431 : getIReg64orZR(mm)));
6434 /* Do the writeback, if necessary */
6435 if (isPX) {
6436 putIReg64orSP(nn, mkexpr(tWB));
6439 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6440 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6441 switch (nRegs) {
6442 case 4:
6443 e3 = newTemp(ty);
6444 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6445 mkU64(3 * laneSzB))));
6446 v3 = math_DUP_TO_V128(e3, ty);
6447 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6448 /* fallthrough */
6449 case 3:
6450 e2 = newTemp(ty);
6451 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6452 mkU64(2 * laneSzB))));
6453 v2 = math_DUP_TO_V128(e2, ty);
6454 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6455 /* fallthrough */
6456 case 2:
6457 e1 = newTemp(ty);
6458 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6459 mkU64(1 * laneSzB))));
6460 v1 = math_DUP_TO_V128(e1, ty);
6461 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6462 /* fallthrough */
6463 case 1:
6464 e0 = newTemp(ty);
6465 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6466 mkU64(0 * laneSzB))));
6467 v0 = math_DUP_TO_V128(e0, ty);
6468 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6469 break;
6470 default:
6471 vassert(0);
6474 HChar pxStr[20];
6475 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6476 if (isPX) {
6477 if (mm == BITS5(1,1,1,1,1))
6478 vex_sprintf(pxStr, ", #%u", xferSzB);
6479 else
6480 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6482 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6483 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6484 nRegs,
6485 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6486 pxStr);
6488 return True;
6490 /* else fall through */
6493 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6494 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6495 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6496 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6497 /* 31 29 22 21 20 15 11 9 4
6498 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6499 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6501 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6502 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6504 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6505 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6507 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6508 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6510 step = if m == 11111 then transfer-size else Xm
6511 op = case L of 1 -> LD ; 0 -> ST
6513 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6514 01:b:b:b0 -> 2, bbb
6515 10:b:b:00 -> 4, bb
6516 10:b:0:01 -> 8, b
6518 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6519 UInt bitQ = INSN(30,30);
6520 Bool isPX = INSN(23,23) == 1;
6521 Bool isLD = INSN(22,22) == 1;
6522 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6523 UInt mm = INSN(20,16);
6524 UInt xx = INSN(15,14);
6525 UInt bitS = INSN(12,12);
6526 UInt sz = INSN(11,10);
6527 UInt nn = INSN(9,5);
6528 UInt tt = INSN(4,0);
6530 Bool valid = True;
6532 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6533 if (!isPX && mm != 0)
6534 valid = False;
6536 UInt laneSzB = 0; /* invalid */
6537 UInt ix = 16; /* invalid */
6539 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6540 switch (xx_q_S_sz) {
6541 case 0x00: case 0x01: case 0x02: case 0x03:
6542 case 0x04: case 0x05: case 0x06: case 0x07:
6543 case 0x08: case 0x09: case 0x0A: case 0x0B:
6544 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6545 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6546 break;
6547 case 0x10: case 0x12: case 0x14: case 0x16:
6548 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6549 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6550 break;
6551 case 0x20: case 0x24: case 0x28: case 0x2C:
6552 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6553 break;
6554 case 0x21: case 0x29:
6555 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6556 break;
6557 default:
6558 break;
6561 if (valid && laneSzB != 0) {
6563 IRType ty = integerIRTypeOfSize(laneSzB);
6564 UInt xferSzB = laneSzB * nRegs;
6566 /* Generate the transfer address (TA) and if necessary the
6567 writeback address (WB) */
6568 IRTemp tTA = newTemp(Ity_I64);
6569 assign(tTA, getIReg64orSP(nn));
6570 if (nn == 31) { /* FIXME generate stack alignment check */ }
6571 IRTemp tWB = IRTemp_INVALID;
6572 if (isPX) {
6573 tWB = newTemp(Ity_I64);
6574 assign(tWB, binop(Iop_Add64,
6575 mkexpr(tTA),
6576 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6577 : getIReg64orZR(mm)));
6580 /* Do the writeback, if necessary */
6581 if (isPX) {
6582 putIReg64orSP(nn, mkexpr(tWB));
6585 switch (nRegs) {
6586 case 4: {
6587 IRExpr* addr
6588 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6589 if (isLD) {
6590 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6591 } else {
6592 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6595 /* fallthrough */
6596 case 3: {
6597 IRExpr* addr
6598 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6599 if (isLD) {
6600 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6601 } else {
6602 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6605 /* fallthrough */
6606 case 2: {
6607 IRExpr* addr
6608 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6609 if (isLD) {
6610 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6611 } else {
6612 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6615 /* fallthrough */
6616 case 1: {
6617 IRExpr* addr
6618 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6619 if (isLD) {
6620 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6621 } else {
6622 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6624 break;
6626 default:
6627 vassert(0);
6630 HChar pxStr[20];
6631 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6632 if (isPX) {
6633 if (mm == BITS5(1,1,1,1,1))
6634 vex_sprintf(pxStr, ", #%u", xferSzB);
6635 else
6636 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6638 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6639 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6640 isLD ? "ld" : "st", nRegs,
6641 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6642 ix, nameIReg64orSP(nn), pxStr);
6644 return True;
6646 /* else fall through */
6649 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6650 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6651 /* 31 29 23 20 14 9 4
6652 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6653 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6654 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6655 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6657 /* For the "standard" implementation we pass through the LL and SC to
6658 the host. For the "fallback" implementation, for details see
6659 https://bugs.kde.org/show_bug.cgi?id=344524 and
6660 https://bugs.kde.org/show_bug.cgi?id=369459,
6661 but in short:
6663 LoadLinked(addr)
6664 gs.LLsize = load_size // 1, 2, 4 or 8
6665 gs.LLaddr = addr
6666 gs.LLdata = zeroExtend(*addr)
6668 StoreCond(addr, data)
6669 tmp_LLsize = gs.LLsize
6670 gs.LLsize = 0 // "no transaction"
6671 if tmp_LLsize != store_size -> fail
6672 if addr != gs.LLaddr -> fail
6673 if zeroExtend(*addr) != gs.LLdata -> fail
6674 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6675 if !cas_ok -> fail
6676 succeed
6678 When thread scheduled
6679 gs.LLsize = 0 // "no transaction"
6680 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6681 has to do this bit)
6683 if (INSN(29,24) == BITS6(0,0,1,0,0,0)
6684 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6685 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6686 UInt szBlg2 = INSN(31,30);
6687 Bool isLD = INSN(22,22) == 1;
6688 Bool isAcqOrRel = INSN(15,15) == 1;
6689 UInt ss = INSN(20,16);
6690 UInt nn = INSN(9,5);
6691 UInt tt = INSN(4,0);
6693 vassert(szBlg2 < 4);
6694 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6695 IRType ty = integerIRTypeOfSize(szB);
6696 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6698 IRTemp ea = newTemp(Ity_I64);
6699 assign(ea, getIReg64orSP(nn));
6700 gen_SIGBUS_if_not_XX_aligned(ea, szB);
6702 if (isLD && ss == BITS5(1,1,1,1,1)) {
6703 IRTemp res = newTemp(ty);
6704 if (abiinfo->guest__use_fallback_LLSC) {
6705 // Do the load first so we don't update any guest state
6706 // if it faults.
6707 IRTemp loaded_data64 = newTemp(Ity_I64);
6708 assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6709 stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) ));
6710 stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) ));
6711 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6712 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6713 putIReg64orZR(tt, mkexpr(loaded_data64));
6714 } else {
6715 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6716 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6718 if (isAcqOrRel) {
6719 stmt(IRStmt_MBE(Imbe_Fence));
6721 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6722 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6723 abiinfo->guest__use_fallback_LLSC
6724 ? "(fallback implementation)" : "");
6725 return True;
6727 if (!isLD) {
6728 if (isAcqOrRel) {
6729 stmt(IRStmt_MBE(Imbe_Fence));
6731 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6732 if (abiinfo->guest__use_fallback_LLSC) {
6733 // This is really ugly, since we don't have any way to do
6734 // proper if-then-else. First, set up as if the SC failed,
6735 // and jump forwards if it really has failed.
6737 // Continuation address
6738 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6740 // "the SC failed". Any non-zero value means failure.
6741 putIReg64orZR(ss, mkU64(1));
6743 IRTemp tmp_LLsize = newTemp(Ity_I64);
6744 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6745 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6747 // Fail if no or wrong-size transaction
6748 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6749 stmt( IRStmt_Exit(
6750 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6751 Ijk_Boring, nia, OFFB_PC
6753 // Fail if the address doesn't match the LL address
6754 stmt( IRStmt_Exit(
6755 binop(Iop_CmpNE64, mkexpr(ea),
6756 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6757 Ijk_Boring, nia, OFFB_PC
6759 // Fail if the data doesn't match the LL data
6760 IRTemp llsc_data64 = newTemp(Ity_I64);
6761 assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
6762 stmt( IRStmt_Exit(
6763 binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6764 mkexpr(llsc_data64)),
6765 Ijk_Boring, nia, OFFB_PC
6767 // Try to CAS the new value in.
6768 IRTemp old = newTemp(ty);
6769 IRTemp expd = newTemp(ty);
6770 assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6771 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6772 Iend_LE, mkexpr(ea),
6773 /*expdHi*/NULL, mkexpr(expd),
6774 /*dataHi*/NULL, data
6775 )));
6776 // Fail if the CAS failed (viz, old != expd)
6777 stmt( IRStmt_Exit(
6778 binop(Iop_CmpNE64,
6779 widenUto64(ty, mkexpr(old)),
6780 widenUto64(ty, mkexpr(expd))),
6781 Ijk_Boring, nia, OFFB_PC
6783 // Otherwise we succeeded (!)
6784 putIReg64orZR(ss, mkU64(0));
6785 } else {
6786 IRTemp res = newTemp(Ity_I1);
6787 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6788 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6789 Need to set rS to 1 on failure, 0 on success. */
6790 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6791 mkU64(1)));
6793 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6794 nameIRegOrZR(False, ss),
6795 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6796 abiinfo->guest__use_fallback_LLSC
6797 ? "(fallback implementation)" : "");
6798 return True;
6800 /* else fall through */
6803 /* -------------------- LD{,A}XP -------------------- */
6804 /* -------------------- ST{,L}XP -------------------- */
6805 /* 31 30 29 23 20 15 14 9 4
6806 1 sz 001000 011 11111 0 t2 n t1 LDXP Rt1, Rt2, [Xn|SP]
6807 1 sz 001000 011 11111 1 t2 n t1 LDAXP Rt1, Rt2, [Xn|SP]
6808 1 sz 001000 001 s 0 t2 n t1 STXP Ws, Rt1, Rt2, [Xn|SP]
6809 1 sz 001000 001 s 1 t2 n t1 STLXP Ws, Rt1, Rt2, [Xn|SP]
6811 /* See just above, "LD{,A}X{R,RH,RB} / ST{,L}X{R,RH,RB}", for detailed
6812 comments about this implementation. Note the 'sz' field here is only 1
6813 bit; above, it is 2 bits, and has a different encoding.
6815 if (INSN(31,31) == 1
6816 && INSN(29,24) == BITS6(0,0,1,0,0,0)
6817 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,1)) {
6818 Bool elemIs64 = INSN(30,30) == 1;
6819 Bool isLD = INSN(22,22) == 1;
6820 Bool isAcqOrRel = INSN(15,15) == 1;
6821 UInt ss = INSN(20,16);
6822 UInt tt2 = INSN(14,10);
6823 UInt nn = INSN(9,5);
6824 UInt tt1 = INSN(4,0);
6826 UInt elemSzB = elemIs64 ? 8 : 4;
6827 UInt fullSzB = 2 * elemSzB;
6828 IRType elemTy = integerIRTypeOfSize(elemSzB);
6829 IRType fullTy = integerIRTypeOfSize(fullSzB);
6831 IRTemp ea = newTemp(Ity_I64);
6832 assign(ea, getIReg64orSP(nn));
6833 gen_SIGBUS_if_not_XX_aligned(ea, fullSzB);
6835 if (isLD && ss == BITS5(1,1,1,1,1)) {
6836 if (abiinfo->guest__use_fallback_LLSC) {
6837 // Fallback implementation of LL.
6838 // Do the load first so we don't update any guest state if it
6839 // faults. Assumes little-endian guest.
6840 if (fullTy == Ity_I64) {
6841 vassert(elemSzB == 4);
6842 IRTemp loaded_data64 = newTemp(Ity_I64);
6843 assign(loaded_data64, loadLE(fullTy, mkexpr(ea)));
6844 stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) ));
6845 stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) ));
6846 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6847 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(8) ));
6848 putIReg64orZR(tt1, unop(Iop_32Uto64,
6849 unop(Iop_64to32,
6850 mkexpr(loaded_data64))));
6851 putIReg64orZR(tt2, unop(Iop_32Uto64,
6852 unop(Iop_64HIto32,
6853 mkexpr(loaded_data64))));
6854 } else {
6855 vassert(elemSzB == 8 && fullTy == Ity_I128);
6856 IRTemp loaded_data128 = newTemp(Ity_I128);
6857 // Hack: do the load as V128 rather than I128 so as to avoid
6858 // having to implement I128 loads in the arm64 back end.
6859 assign(loaded_data128, unop(Iop_ReinterpV128asI128,
6860 loadLE(Ity_V128, mkexpr(ea))));
6861 IRTemp loaded_data_lo64 = newTemp(Ity_I64);
6862 IRTemp loaded_data_hi64 = newTemp(Ity_I64);
6863 assign(loaded_data_lo64, unop(Iop_128to64,
6864 mkexpr(loaded_data128)));
6865 assign(loaded_data_hi64, unop(Iop_128HIto64,
6866 mkexpr(loaded_data128)));
6867 stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64,
6868 mkexpr(loaded_data_lo64) ));
6869 stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64,
6870 mkexpr(loaded_data_hi64) ));
6871 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6872 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(16) ));
6873 putIReg64orZR(tt1, mkexpr(loaded_data_lo64));
6874 putIReg64orZR(tt2, mkexpr(loaded_data_hi64));
6876 } else {
6877 // Non-fallback implementation of LL.
6878 IRTemp res = newTemp(fullTy); // I64 or I128
6879 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6880 // Assuming a little-endian guest here. Rt1 goes at the lower
6881 // address, so it must live in the least significant half of `res`.
6882 IROp opGetLO = fullTy == Ity_I128 ? Iop_128to64 : Iop_64to32;
6883 IROp opGetHI = fullTy == Ity_I128 ? Iop_128HIto64 : Iop_64HIto32;
6884 putIReg64orZR(tt1, widenUto64(elemTy, unop(opGetLO, mkexpr(res))));
6885 putIReg64orZR(tt2, widenUto64(elemTy, unop(opGetHI, mkexpr(res))));
6887 if (isAcqOrRel) {
6888 stmt(IRStmt_MBE(Imbe_Fence));
6890 DIP("ld%sxp %s, %s, [%s] %s\n",
6891 isAcqOrRel ? (isLD ? "a" : "l") : "",
6892 nameIRegOrZR(elemSzB == 8, tt1),
6893 nameIRegOrZR(elemSzB == 8, tt2),
6894 nameIReg64orSP(nn),
6895 abiinfo->guest__use_fallback_LLSC
6896 ? "(fallback implementation)" : "");
6897 return True;
6899 if (!isLD) {
6900 if (isAcqOrRel) {
6901 stmt(IRStmt_MBE(Imbe_Fence));
6903 if (abiinfo->guest__use_fallback_LLSC) {
6904 // Fallback implementation of SC.
6905 // This is really ugly, since we don't have any way to do
6906 // proper if-then-else. First, set up as if the SC failed,
6907 // and jump forwards if it really has failed.
6909 // Continuation address
6910 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6912 // "the SC failed". Any non-zero value means failure.
6913 putIReg64orZR(ss, mkU64(1));
6915 IRTemp tmp_LLsize = newTemp(Ity_I64);
6916 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6917 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6919 // Fail if no or wrong-size transaction
6920 vassert((fullSzB == 8 && fullTy == Ity_I64)
6921 || (fullSzB == 16 && fullTy == Ity_I128));
6922 stmt( IRStmt_Exit(
6923 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(fullSzB)),
6924 Ijk_Boring, nia, OFFB_PC
6926 // Fail if the address doesn't match the LL address
6927 stmt( IRStmt_Exit(
6928 binop(Iop_CmpNE64, mkexpr(ea),
6929 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6930 Ijk_Boring, nia, OFFB_PC
6932 // The data to be stored.
6933 IRTemp store_data = newTemp(fullTy);
6934 if (fullTy == Ity_I64) {
6935 assign(store_data,
6936 binop(Iop_32HLto64,
6937 narrowFrom64(Ity_I32, getIReg64orZR(tt2)),
6938 narrowFrom64(Ity_I32, getIReg64orZR(tt1))));
6939 } else {
6940 assign(store_data,
6941 binop(Iop_64HLto128,
6942 getIReg64orZR(tt2), getIReg64orZR(tt1)));
6945 if (fullTy == Ity_I64) {
6946 // 64 bit (2x32 bit) path
6947 // Fail if the data in memory doesn't match the data stashed by
6948 // the LL.
6949 IRTemp llsc_data_lo64 = newTemp(Ity_I64);
6950 assign(llsc_data_lo64,
6951 IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
6952 stmt( IRStmt_Exit(
6953 binop(Iop_CmpNE64, loadLE(Ity_I64, mkexpr(ea)),
6954 mkexpr(llsc_data_lo64)),
6955 Ijk_Boring, nia, OFFB_PC
6957 // Try to CAS the new value in.
6958 IRTemp old = newTemp(Ity_I64);
6959 IRTemp expd = newTemp(Ity_I64);
6960 assign(expd, mkexpr(llsc_data_lo64));
6961 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6962 Iend_LE, mkexpr(ea),
6963 /*expdHi*/NULL, mkexpr(expd),
6964 /*dataHi*/NULL, mkexpr(store_data)
6965 )));
6966 // Fail if the CAS failed (viz, old != expd)
6967 stmt( IRStmt_Exit(
6968 binop(Iop_CmpNE64, mkexpr(old), mkexpr(expd)),
6969 Ijk_Boring, nia, OFFB_PC
6971 } else {
6972 // 128 bit (2x64 bit) path
6973 // Fail if the data in memory doesn't match the data stashed by
6974 // the LL.
6975 IRTemp llsc_data_lo64 = newTemp(Ity_I64);
6976 assign(llsc_data_lo64,
6977 IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
6978 IRTemp llsc_data_hi64 = newTemp(Ity_I64);
6979 assign(llsc_data_hi64,
6980 IRExpr_Get(OFFB_LLSC_DATA_HI64, Ity_I64));
6981 IRTemp data_at_ea = newTemp(Ity_I128);
6982 assign(data_at_ea,
6983 unop(Iop_ReinterpV128asI128,
6984 loadLE(Ity_V128, mkexpr(ea))));
6985 stmt( IRStmt_Exit(
6986 binop(Iop_CmpNE64,
6987 unop(Iop_128to64, mkexpr(data_at_ea)),
6988 mkexpr(llsc_data_lo64)),
6989 Ijk_Boring, nia, OFFB_PC
6991 stmt( IRStmt_Exit(
6992 binop(Iop_CmpNE64,
6993 unop(Iop_128HIto64, mkexpr(data_at_ea)),
6994 mkexpr(llsc_data_hi64)),
6995 Ijk_Boring, nia, OFFB_PC
6997 // Try to CAS the new value in.
6998 IRTemp old_lo64 = newTemp(Ity_I64);
6999 IRTemp old_hi64 = newTemp(Ity_I64);
7000 IRTemp expd_lo64 = newTemp(Ity_I64);
7001 IRTemp expd_hi64 = newTemp(Ity_I64);
7002 IRTemp store_data_lo64 = newTemp(Ity_I64);
7003 IRTemp store_data_hi64 = newTemp(Ity_I64);
7004 assign(expd_lo64, mkexpr(llsc_data_lo64));
7005 assign(expd_hi64, mkexpr(llsc_data_hi64));
7006 assign(store_data_lo64, unop(Iop_128to64, mkexpr(store_data)));
7007 assign(store_data_hi64, unop(Iop_128HIto64, mkexpr(store_data)));
7008 stmt( IRStmt_CAS(mkIRCAS(old_hi64, old_lo64,
7009 Iend_LE, mkexpr(ea),
7010 mkexpr(expd_hi64), mkexpr(expd_lo64),
7011 mkexpr(store_data_hi64),
7012 mkexpr(store_data_lo64)
7013 )));
7014 // Fail if the CAS failed (viz, old != expd)
7015 stmt( IRStmt_Exit(
7016 binop(Iop_CmpNE64, mkexpr(old_lo64), mkexpr(expd_lo64)),
7017 Ijk_Boring, nia, OFFB_PC
7019 stmt( IRStmt_Exit(
7020 binop(Iop_CmpNE64, mkexpr(old_hi64), mkexpr(expd_hi64)),
7021 Ijk_Boring, nia, OFFB_PC
7024 // Otherwise we succeeded (!)
7025 putIReg64orZR(ss, mkU64(0));
7026 } else {
7027 // Non-fallback implementation of SC.
7028 IRTemp res = newTemp(Ity_I1);
7029 IRExpr* dataLO = narrowFrom64(elemTy, getIReg64orZR(tt1));
7030 IRExpr* dataHI = narrowFrom64(elemTy, getIReg64orZR(tt2));
7031 IROp opMerge = fullTy == Ity_I128 ? Iop_64HLto128 : Iop_32HLto64;
7032 IRExpr* data = binop(opMerge, dataHI, dataLO);
7033 // Assuming a little-endian guest here. Rt1 goes at the lower
7034 // address, so it must live in the least significant half of `data`.
7035 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
7036 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
7037 Need to set rS to 1 on failure, 0 on success. */
7038 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
7039 mkU64(1)));
7041 DIP("st%sxp %s, %s, %s, [%s] %s\n",
7042 isAcqOrRel ? (isLD ? "a" : "l") : "",
7043 nameIRegOrZR(False, ss),
7044 nameIRegOrZR(elemSzB == 8, tt1),
7045 nameIRegOrZR(elemSzB == 8, tt2),
7046 nameIReg64orSP(nn),
7047 abiinfo->guest__use_fallback_LLSC
7048 ? "(fallback implementation)" : "");
7049 return True;
7051 /* else fall through */
7054 /* ------------------ LDA{R,RH,RB} ------------------ */
7055 /* ------------------ STL{R,RH,RB} ------------------ */
7056 /* 31 29 23 20 14 9 4
7057 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
7058 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
7060 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
7061 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
7062 UInt szBlg2 = INSN(31,30);
7063 Bool isLD = INSN(22,22) == 1;
7064 UInt nn = INSN(9,5);
7065 UInt tt = INSN(4,0);
7067 vassert(szBlg2 < 4);
7068 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
7069 IRType ty = integerIRTypeOfSize(szB);
7070 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
7072 IRTemp ea = newTemp(Ity_I64);
7073 assign(ea, getIReg64orSP(nn));
7074 gen_SIGBUS_if_not_XX_aligned(ea, szB);
7076 if (isLD) {
7077 IRTemp res = newTemp(ty);
7078 assign(res, loadLE(ty, mkexpr(ea)));
7079 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
7080 stmt(IRStmt_MBE(Imbe_Fence));
7081 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
7082 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
7083 } else {
7084 stmt(IRStmt_MBE(Imbe_Fence));
7085 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
7086 storeLE(mkexpr(ea), data);
7087 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
7088 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
7090 return True;
7093 /* The PRFM cases that follow are possibly allow Rt values (the
7094 prefetch operation) which are not allowed by the documentation.
7095 This should be looked into. */
7096 /* ------------------ PRFM (immediate) ------------------ */
7097 /* 31 21 9 4
7098 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
7100 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
7101 UInt imm12 = INSN(21,10);
7102 UInt nn = INSN(9,5);
7103 UInt tt = INSN(4,0);
7104 /* Generating any IR here is pointless, except for documentation
7105 purposes, as it will get optimised away later. */
7106 IRTemp ea = newTemp(Ity_I64);
7107 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
7108 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
7109 return True;
7112 /* ------------------ PRFM (register) ------------------ */
7113 /* 31 29 22 20 15 12 11 9 4
7114 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
7116 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
7117 && INSN(11,10) == BITS2(1,0)) {
7118 HChar dis_buf[64];
7119 UInt tt = INSN(4,0);
7120 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
7121 if (ea != IRTemp_INVALID) {
7122 /* No actual code to generate. */
7123 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
7124 return True;
7128 /* ------------------ PRFM (unscaled offset) ------------------ */
7129 /* 31 29 22 20 11 9 4
7130 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
7132 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
7133 && INSN(11,10) == BITS2(0,0)) {
7134 ULong imm9 = INSN(20,12);
7135 UInt nn = INSN(9,5);
7136 UInt tt = INSN(4,0);
7137 ULong offset = sx_to_64(imm9, 9);
7138 IRTemp ea = newTemp(Ity_I64);
7139 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
7140 /* No actual code to generate. */
7141 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
7142 return True;
7145 /* ---------------- ARMv8.1-LSE: Atomic Memory Operations ---------------- */
7146 /* 31 29 23 22 21 20 15 11 9 4
7147 sz 111000 A R 1 s 0000 00 n t LDADD{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7148 sz 111000 A R 1 s 0001 00 n t LDCLR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7149 sz 111000 A R 1 s 0010 00 n t LDEOR{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7150 sz 111000 A R 1 s 0011 00 n t LDSET{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7151 sz 111000 A R 1 s 0100 00 n t LDSMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7152 sz 111000 A R 1 s 0101 00 n t LDSMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7153 sz 111000 A R 1 s 0110 00 n t LDUMAX{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7154 sz 111000 A R 1 s 0111 00 n t LDUMIN{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7155 sz 111000 A R 1 s 1000 00 n t SWP{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7157 if (INSN(29,24) == BITS6(1,1,1,0,0,0)
7158 && INSN(21,21) == 1
7159 && (INSN(15,12) <= BITS4(1,0,0,0))
7160 && INSN(11,10) == BITS2(0,0)) {
7161 UInt szBlg2 = INSN(31,30);
7162 Bool isAcq = INSN(23,23) == 1;
7163 Bool isRel = INSN(22,22) == 1;
7164 UInt ss = INSN(20,16);
7165 UInt opc = INSN(15,12);
7166 UInt nn = INSN(9,5);
7167 UInt tt = INSN(4,0);
7169 const HChar* nm = NULL;
7170 const HChar* suffix[4] = { "b", "h", "", "" };
7172 vassert(szBlg2 < 4);
7173 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 bytes*/
7174 IRType ty = integerIRTypeOfSize(szB);
7175 Bool is64 = szB == 8;
7176 Bool isSigned = (opc == 4) || (opc == 5) /*smax || smin*/;
7178 // IR used to emulate these atomic memory ops:
7179 // 1) barrier
7180 // 2) load
7181 // 3) widen operands and do arithmetic/logic op
7182 // 4) cas to see if target memory updated
7183 // 5) barrier
7184 // 6) repeat from 1) if cas says target memory not updated
7185 // 7) update register
7187 IRTemp ea = newTemp(Ity_I64);
7188 assign(ea, getIReg64orSP(nn));
7189 gen_SIGBUS_if_not_XX_aligned(ea, szB);
7191 // Insert barrier before loading for acquire and acquire-release variants:
7192 // A and AL.
7193 if (isAcq && (tt != 31))
7194 stmt(IRStmt_MBE(Imbe_Fence));
7196 // Load LHS from memory, RHS from register.
7197 IRTemp orig = newTemp(ty);
7198 assign(orig, loadLE(ty, mkexpr(ea)));
7199 IRExpr *lhs = mkexpr(orig);
7200 IRExpr *rhs = narrowFrom64(ty, getIReg64orZR(ss));
7201 IRExpr *res = NULL;
7203 lhs = isSigned ? widenSto64(ty, lhs) : widenUto64(ty, lhs);
7204 rhs = isSigned ? widenSto64(ty, rhs) : widenUto64(ty, rhs);
7206 // Perform the operation.
7207 switch (opc) {
7208 case 0:
7209 nm = "ldadd";
7210 res = binop(Iop_Add64, lhs, rhs);
7211 break;
7212 case 1:
7213 nm = "ldclr";
7214 res = binop(Iop_And64, lhs, unop(mkNOT(Ity_I64), rhs));
7215 break;
7216 case 2:
7217 nm = "ldeor";
7218 res = binop(Iop_Xor64, lhs, rhs);
7219 break;
7220 case 3:
7221 nm = "ldset";
7222 res = binop(Iop_Or64, lhs, rhs);
7223 break;
7224 case 4:
7225 nm = "ldsmax";
7226 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), rhs, lhs);
7227 break;
7228 case 5:
7229 nm = "ldsmin";
7230 res = IRExpr_ITE(binop(Iop_CmpLT64S, lhs, rhs), lhs, rhs);
7231 break;
7232 case 6:
7233 nm = "ldumax";
7234 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), rhs, lhs);
7235 break;
7236 case 7:
7237 nm = "ldumin";
7238 res = IRExpr_ITE(binop(Iop_CmpLT64U, lhs, rhs), lhs, rhs);
7239 break;
7240 case 8:
7241 nm = "swp";
7242 res = rhs;
7243 break;
7244 default:
7245 vassert(0);
7246 break;
7249 // Store the result back if LHS remains unchanged in memory.
7250 IRTemp old = newTemp(ty);
7251 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
7252 Iend_LE, mkexpr(ea),
7253 /*expdHi*/NULL, mkexpr(orig),
7254 /*dataHi*/NULL, narrowFrom64(ty, res))) );
7256 // Insert barrier after storing for release and acquire-release variants:
7257 // L and AL.
7258 if (isRel)
7259 stmt(IRStmt_MBE(Imbe_Fence));
7261 // Retry if the CAS failed (i.e. when old != orig).
7262 IRConst* nia = IRConst_U64(guest_PC_curr_instr);
7263 stmt( IRStmt_Exit(
7264 binop(Iop_CasCmpNE64,
7265 widenUto64(ty, mkexpr(old)),
7266 widenUto64(ty, mkexpr(orig))),
7267 Ijk_Boring, nia, OFFB_PC ));
7268 // Otherwise we succeeded.
7269 putIReg64orZR(tt, widenUto64(ty, mkexpr(old)));
7271 DIP("%s%s%s%s %s, %s, [%s]\n", nm, isAcq ? "a" : "", isRel ? "l" : "",
7272 suffix[szBlg2], nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt),
7273 nameIReg64orSP(nn));
7274 return True;
7277 /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
7278 /* 31 29 22 21 20 15 14 9 4
7279 sz 0010001 A 1 s R 11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
7281 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
7282 && INSN(21,21) == 1
7283 && INSN(14,10) == BITS5(1,1,1,1,1)) {
7284 UInt szBlg2 = INSN(31,30);
7285 Bool isAcq = INSN(22,22) == 1;
7286 Bool isRel = INSN(15,15) == 1;
7287 UInt ss = INSN(20,16);
7288 UInt nn = INSN(9,5);
7289 UInt tt = INSN(4,0);
7291 const HChar* suffix[4] = { "b", "h", "", "" };
7293 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
7294 IRType ty = integerIRTypeOfSize(szB);
7295 Bool is64 = szB == 8;
7297 IRTemp ea = newTemp(Ity_I64);
7298 assign(ea, getIReg64orSP(nn));
7299 gen_SIGBUS_if_not_XX_aligned(ea, szB);
7301 IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
7302 IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
7304 if (isAcq)
7305 stmt(IRStmt_MBE(Imbe_Fence));
7307 // Store the result back if LHS remains unchanged in memory.
7308 IRTemp old = newTemp(ty);
7309 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
7310 Iend_LE, mkexpr(ea),
7311 /*expdHi*/NULL, exp,
7312 /*dataHi*/NULL, new)) );
7314 if (isRel)
7315 stmt(IRStmt_MBE(Imbe_Fence));
7317 putIReg64orZR(ss, widenUto64(ty, mkexpr(old)));
7318 DIP("cas%s%s%s %s, %s, [%s]\n",
7319 isAcq ? "a" : "", isRel ? "l" : "", suffix[szBlg2],
7320 nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
7321 return True;
7324 /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
7325 /* 31 30 29 22 21 20 15 14 9 4
7326 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
7328 if (INSN(31,31) == 0
7329 && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
7330 && INSN(21,21) == 1
7331 && INSN(14,10) == BITS5(1,1,1,1,1)) {
7332 UInt is64 = INSN(30,30);
7333 Bool isAcq = INSN(22,22) == 1;
7334 Bool isRel = INSN(15,15) == 1;
7335 UInt ss = INSN(20,16);
7336 UInt nn = INSN(9,5);
7337 UInt tt = INSN(4,0);
7339 if ((ss & 0x1) || (tt & 0x1)) {
7340 /* undefined; fall through */
7341 } else {
7342 IRTemp ea = newTemp(Ity_I64);
7343 assign(ea, getIReg64orSP(nn));
7344 gen_SIGBUS_if_not_XX_aligned(ea, is64 ? 16 : 8);
7346 IRExpr *expLo = getIRegOrZR(is64, ss);
7347 IRExpr *expHi = getIRegOrZR(is64, ss + 1);
7348 IRExpr *newLo = getIRegOrZR(is64, tt);
7349 IRExpr *newHi = getIRegOrZR(is64, tt + 1);
7350 IRTemp oldLo = newTemp(is64 ? Ity_I64 : Ity_I32);
7351 IRTemp oldHi = newTemp(is64 ? Ity_I64 : Ity_I32);
7353 if (isAcq)
7354 stmt(IRStmt_MBE(Imbe_Fence));
7356 stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
7357 Iend_LE, mkexpr(ea),
7358 expHi, expLo,
7359 newHi, newLo)) );
7361 if (isRel)
7362 stmt(IRStmt_MBE(Imbe_Fence));
7364 putIRegOrZR(is64, ss, mkexpr(oldLo));
7365 putIRegOrZR(is64, ss+1, mkexpr(oldHi));
7366 DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
7367 isAcq ? "a" : "", isRel ? "l" : "",
7368 nameIRegOrZR(is64, ss), nameIRegOrZR(is64, ss+1),
7369 nameIRegOrZR(is64, tt), nameIRegOrZR(is64, tt+1),
7370 nameIReg64orSP(nn));
7371 return True;
7375 if (sigill_diag) {
7376 vex_printf("ARM64 front end: load_store\n");
7379 return False;
7380 # undef INSN
7384 /*------------------------------------------------------------*/
7385 /*--- Control flow and misc instructions ---*/
7386 /*------------------------------------------------------------*/
7388 static
7389 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
7390 const VexArchInfo* archinfo,
7391 const VexAbiInfo* abiinfo, Bool sigill_diag)
7393 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7395 /* ---------------------- B cond ----------------------- */
7396 /* 31 24 4 3
7397 0101010 0 imm19 0 cond */
7398 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
7399 UInt cond = INSN(3,0);
7400 ULong uimm64 = INSN(23,5) << 2;
7401 Long simm64 = (Long)sx_to_64(uimm64, 21);
7402 vassert(dres->whatNext == Dis_Continue);
7403 vassert(dres->len == 4);
7404 vassert(dres->jk_StopHere == Ijk_INVALID);
7405 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
7406 Ijk_Boring,
7407 IRConst_U64(guest_PC_curr_instr + simm64),
7408 OFFB_PC) );
7409 putPC(mkU64(guest_PC_curr_instr + 4));
7410 dres->whatNext = Dis_StopHere;
7411 dres->jk_StopHere = Ijk_Boring;
7412 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
7413 return True;
7416 /* -------------------- B{L} uncond -------------------- */
7417 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
7418 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
7419 100101 imm26 B (PC + sxTo64(imm26 << 2))
7421 UInt bLink = INSN(31,31);
7422 ULong uimm64 = INSN(25,0) << 2;
7423 Long simm64 = (Long)sx_to_64(uimm64, 28);
7424 if (bLink) {
7425 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
7427 putPC(mkU64(guest_PC_curr_instr + simm64));
7428 dres->whatNext = Dis_StopHere;
7429 dres->jk_StopHere = Ijk_Call;
7430 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
7431 guest_PC_curr_instr + simm64);
7432 return True;
7435 /* --------------------- B{L} reg --------------------- */
7436 /* 31 24 22 20 15 9 4
7437 1101011 00 10 11111 000000 nn 00000 RET Rn
7438 1101011 00 01 11111 000000 nn 00000 CALL Rn
7439 1101011 00 00 11111 000000 nn 00000 JMP Rn
7441 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
7442 && INSN(20,16) == BITS5(1,1,1,1,1)
7443 && INSN(15,10) == BITS6(0,0,0,0,0,0)
7444 && INSN(4,0) == BITS5(0,0,0,0,0)) {
7445 UInt branch_type = INSN(22,21);
7446 UInt nn = INSN(9,5);
7447 if (branch_type == BITS2(1,0) /* RET */) {
7448 putPC(getIReg64orZR(nn));
7449 dres->whatNext = Dis_StopHere;
7450 dres->jk_StopHere = Ijk_Ret;
7451 DIP("ret %s\n", nameIReg64orZR(nn));
7452 return True;
7454 if (branch_type == BITS2(0,1) /* CALL */) {
7455 IRTemp dst = newTemp(Ity_I64);
7456 assign(dst, getIReg64orZR(nn));
7457 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
7458 putPC(mkexpr(dst));
7459 dres->whatNext = Dis_StopHere;
7460 dres->jk_StopHere = Ijk_Call;
7461 DIP("blr %s\n", nameIReg64orZR(nn));
7462 return True;
7464 if (branch_type == BITS2(0,0) /* JMP */) {
7465 putPC(getIReg64orZR(nn));
7466 dres->whatNext = Dis_StopHere;
7467 dres->jk_StopHere = Ijk_Boring;
7468 DIP("jmp %s\n", nameIReg64orZR(nn));
7469 return True;
7473 /* -------------------- CB{N}Z -------------------- */
7474 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7475 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
7477 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
7478 Bool is64 = INSN(31,31) == 1;
7479 Bool bIfZ = INSN(24,24) == 0;
7480 ULong uimm64 = INSN(23,5) << 2;
7481 UInt rT = INSN(4,0);
7482 Long simm64 = (Long)sx_to_64(uimm64, 21);
7483 IRExpr* cond = NULL;
7484 if (is64) {
7485 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7486 getIReg64orZR(rT), mkU64(0));
7487 } else {
7488 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
7489 getIReg32orZR(rT), mkU32(0));
7491 stmt( IRStmt_Exit(cond,
7492 Ijk_Boring,
7493 IRConst_U64(guest_PC_curr_instr + simm64),
7494 OFFB_PC) );
7495 putPC(mkU64(guest_PC_curr_instr + 4));
7496 dres->whatNext = Dis_StopHere;
7497 dres->jk_StopHere = Ijk_Boring;
7498 DIP("cb%sz %s, 0x%llx\n",
7499 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
7500 guest_PC_curr_instr + simm64);
7501 return True;
7504 /* -------------------- TB{N}Z -------------------- */
7505 /* 31 30 24 23 18 5 4
7506 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7507 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
7509 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
7510 UInt b5 = INSN(31,31);
7511 Bool bIfZ = INSN(24,24) == 0;
7512 UInt b40 = INSN(23,19);
7513 UInt imm14 = INSN(18,5);
7514 UInt tt = INSN(4,0);
7515 UInt bitNo = (b5 << 5) | b40;
7516 ULong uimm64 = imm14 << 2;
7517 Long simm64 = sx_to_64(uimm64, 16);
7518 IRExpr* cond
7519 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
7520 binop(Iop_And64,
7521 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
7522 mkU64(1)),
7523 mkU64(0));
7524 stmt( IRStmt_Exit(cond,
7525 Ijk_Boring,
7526 IRConst_U64(guest_PC_curr_instr + simm64),
7527 OFFB_PC) );
7528 putPC(mkU64(guest_PC_curr_instr + 4));
7529 dres->whatNext = Dis_StopHere;
7530 dres->jk_StopHere = Ijk_Boring;
7531 DIP("tb%sz %s, #%u, 0x%llx\n",
7532 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
7533 guest_PC_curr_instr + simm64);
7534 return True;
7537 /* -------------------- SVC -------------------- */
7538 /* 11010100 000 imm16 000 01
7539 Don't bother with anything except the imm16==0 case.
7541 if (INSN(31,0) == 0xD4000001) {
7542 putPC(mkU64(guest_PC_curr_instr + 4));
7543 dres->whatNext = Dis_StopHere;
7544 dres->jk_StopHere = Ijk_Sys_syscall;
7545 DIP("svc #0\n");
7546 return True;
7549 /* ------------------ M{SR,RS} ------------------ */
7550 /* ---- Case for MIDR_EL1 (RO) ----
7551 Read the Main ID register from host.
7552 0xD53800 000 Rt MRS rT, midr_el1
7554 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380000 /*MRS*/) {
7555 UInt tt = INSN(4,0);
7556 IRTemp val = newTemp(Ity_I64);
7557 IRExpr** args = mkIRExprVec_0();
7558 IRDirty* d = unsafeIRDirty_1_N (
7559 val,
7560 0/*regparms*/,
7561 "arm64g_dirtyhelper_MRS_MIDR_EL1",
7562 &arm64g_dirtyhelper_MRS_MIDR_EL1,
7563 args
7565 /* execute the dirty call, dumping the result in val. */
7566 stmt( IRStmt_Dirty(d) );
7567 putIReg64orZR(tt, mkexpr(val));
7568 DIP("mrs %s, midr_el1\n", nameIReg64orZR(tt));
7569 return True;
7571 /* ---- Case for MPIDR_EL1 (RO) ----
7572 Instead of returing a fake register, we use the same
7573 value as does the kernel emulation.
7574 0xD53800 101 Rt MRS rT, mpidr_el1
7576 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53800A0 /*MRS*/) {
7577 UInt tt = INSN(4,0);
7578 putIReg64orZR(tt, mkU64((1UL<<31)));
7579 DIP("mrs %s, mpidr_el1 (FAKED)\n", nameIReg64orZR(tt));
7580 return True;
7582 /* ---- Case for REVDIR_EL1 (RO) ----
7583 Instead of emulating the register, we just return the same
7584 value as does the kernel emulation.
7585 0xD53800 110 Rt MRS rT, revdir_el1
7587 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53800C0 /*MRS*/) {
7588 UInt tt = INSN(4,0);
7589 putIReg32orZR(tt, mkU32(0x0));
7590 DIP("mrs %s, revdir_el1 (FAKED)\n", nameIReg32orZR(tt));
7591 return True;
7593 /* ---- Case for ID_AA64PFR0_EL1 (RO) ----
7594 Instead of returing a fake register, we use the same
7595 value as does the kernel emulation. We set deprecate half
7596 precission floating-point to normal floating-point support.
7597 We set all other values to zero.
7598 0xD53804 000 Rt MRS rT, id_aa64pfr0_el1
7600 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380400 /*MRS*/) {
7601 UInt tt = INSN(4,0);
7602 IRTemp val = newTemp(Ity_I64);
7603 IRExpr** args = mkIRExprVec_0();
7604 IRDirty* d = unsafeIRDirty_1_N (
7605 val,
7606 0/*regparms*/,
7607 "arm64g_dirtyhelper_MRS_ID_AA64PFR0_EL1",
7608 &arm64g_dirtyhelper_MRS_ID_AA64PFR0_EL1,
7609 args
7611 /* execute the dirty call, dumping the result in val. */
7612 stmt( IRStmt_Dirty(d) );
7614 putIReg64orZR(tt, mkexpr(val));
7615 return True;
7617 /* ---- Case for ID_AA64PFR1_EL1 (RO) ----
7618 We just return 0x0 here, as we don't support the opcodes of
7619 new commands in the emulation environment.
7620 0xD53804 001 Rt MRS rT, id_aa64pfr1_el1
7622 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380420 /*MRS*/) {
7623 UInt tt = INSN(4,0);
7624 putIReg64orZR(tt, mkU64(0x0));
7625 DIP("mrs %s, id_aa64pfr1_el1 (FAKED)\n", nameIReg32orZR(tt));
7626 return True;
7628 /* ---- Case for ID_AA64ZFR0_EL1 (RO) ----
7629 We just return 0x0 here, as we don't support the opcodes of
7630 new commands in the emulation environment.
7631 0xD53804 010 Rt MRS rT, id_aa64zfr0_el1
7633 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380440 /*MRS*/) {
7634 UInt tt = INSN(4,0);
7635 putIReg64orZR(tt, mkU64(0x0));
7636 DIP("mrs %s, id_aa64zfr0_el1 (FAKED)\n", nameIReg64orZR(tt));
7637 return True;
7639 /* ---- Case for ID_AA64DFR0_EL1 (RO) ----
7640 Just return the value indicating the implementation of the
7641 ARMv8 debug architecture without any extensions.
7642 0xD53805 000 Rt MRS rT, id_aa64dfr0_el1
7644 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380500 /*MRS*/) {
7645 UInt tt = INSN(4,0);
7646 putIReg64orZR(tt, mkU64(0x6));
7647 DIP("mrs %s, id_aa64dfr0_el1 (FAKED)\n", nameIReg64orZR(tt));
7648 return True;
7650 /* ---- Case for ID_AA64DFR1_EL1 (RO) ----
7651 We just return 0x0 here, as we don't support the opcodes of
7652 new commands in the emulation environment.
7653 0xD53805 001 Rt MRS rT, id_aa64dfr1_el1
7655 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380520 /*MRS*/) {
7656 UInt tt = INSN(4,0);
7657 putIReg64orZR(tt, mkU64(0x0));
7658 DIP("mrs %s, id_aa64dfr1_el1 (FAKED)\n", nameIReg64orZR(tt));
7659 return True;
7661 /* ---- Case for ID_AA64AFR0_EL1 (RO) ----
7662 We just return 0x0 here, as we don't support the opcodes of
7663 new commands in the emulation environment.
7664 0xD53805 100 Rt MRS rT, id_aa64afr0_el1
7666 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380580 /*MRS*/) {
7667 UInt tt = INSN(4,0);
7668 putIReg64orZR(tt, mkU64(0x0));
7669 DIP("mrs %s, id_aa64afr0_el1 (FAKED)\n", nameIReg64orZR(tt));
7670 return True;
7672 /* ---- Case for ID_AA64AFR1_EL1 (RO) ----
7673 We just return 0x0 here, as we don't support the opcodes of
7674 new commands in the emulation environment.
7675 0xD53805 101 Rt MRS rT, id_aa64afr1_el1
7677 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53805A0 /*MRS*/) {
7678 UInt tt = INSN(4,0);
7679 putIReg64orZR(tt, mkU64(0x0));
7680 DIP("mrs %s, id_aa64afr1_el1 (FAKED)\n", nameIReg64orZR(tt));
7681 return True;
7683 /* ---- Case for ID_AA64ISAR0_EL1 (RO) ----
7684 We only take care of SHA2, SHA1 and AES bits, as all the other
7685 commands are not part of the emulation environment.
7686 We degredate SHA2 from 0x2 to 0x1 as we don't support the commands.
7687 0xD53806 000 Rt MRS rT, id_aa64isar0_el1
7689 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380600 /*MRS*/) {
7690 UInt tt = INSN(4,0);
7691 IRTemp val = newTemp(Ity_I64);
7692 IRExpr** args = mkIRExprVec_0();
7693 IRDirty* d = unsafeIRDirty_1_N (
7694 val,
7695 0/*regparms*/,
7696 "arm64g_dirtyhelper_MRS_ID_AA64ISAR0_EL1",
7697 &arm64g_dirtyhelper_MRS_ID_AA64ISAR0_EL1,
7698 args
7700 /* execute the dirty call, dumping the result in val. */
7701 stmt( IRStmt_Dirty(d) );
7702 putIReg64orZR(tt, mkexpr(val));
7703 DIP("mrs %s, id_aa64isar0_el1 (FAKED)\n", nameIReg64orZR(tt));
7704 return True;
7706 /* ---- Case for ID_AA64ISAR1_EL1 (RO) ----
7707 We just return 0x0 here, as we don't support the opcodes of
7708 new commands in the emulation environment.
7709 0xD53806 001 Rt MRS rT, id_aa64isar1_el1
7711 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380620 /*MRS*/) {
7712 UInt tt = INSN(4,0);
7713 IRTemp val = newTemp(Ity_I64);
7714 IRExpr** args = mkIRExprVec_0();
7715 IRDirty* d = unsafeIRDirty_1_N (
7716 val,
7717 0/*regparms*/,
7718 "arm64g_dirtyhelper_MRS_ID_AA64ISAR1_EL1",
7719 &arm64g_dirtyhelper_MRS_ID_AA64ISAR1_EL1,
7720 args
7722 /* execute the dirty call, dumping the result in val. */
7723 stmt( IRStmt_Dirty(d) );
7724 putIReg64orZR(tt, mkexpr(val));
7725 DIP("mrs %s, id_aa64isar1_el1 (FAKED)\n", nameIReg64orZR(tt));
7726 return True;
7728 /* ---- Case for ID_AA64MMFR0_EL1 (RO) ----
7729 Instead of returing a fake register, we use the same
7730 value as does the kernel emulation.
7731 0xD53807 000 Rt MRS rT, id_aa64mmfr0_el1
7733 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380700 /*MRS*/) {
7734 UInt tt = INSN(4,0);
7735 IRTemp val = newTemp(Ity_I64);
7736 IRExpr** args = mkIRExprVec_0();
7737 IRDirty* d = unsafeIRDirty_1_N (
7738 val,
7739 0/*regparms*/,
7740 "arm64g_dirtyhelper_MRS_ID_AA64MMFR0_EL1",
7741 &arm64g_dirtyhelper_MRS_ID_AA64MMFR0_EL1,
7742 args
7744 /* execute the dirty call, dumping the result in val. */
7745 stmt( IRStmt_Dirty(d) );
7746 putIReg64orZR(tt, mkexpr(val));
7747 DIP("mrs %s, id_aa64mmfr0_el1 (FAKED)\n", nameIReg64orZR(tt));
7748 return True;
7750 /* ---- Case for ID_AA64MMFR1_EL1 (RO) ----
7751 Instead of returing a fake register, we use the same
7752 value as does the kernel emulation. Set VHE and HAFDBS
7753 to not implemented.
7754 0xD53807 001 Rt MRS rT, id_aa64mmfr1_el1
7756 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380720 /*MRS*/) {
7757 UInt tt = INSN(4,0);
7758 IRTemp val = newTemp(Ity_I64);
7759 IRExpr** args = mkIRExprVec_0();
7760 IRDirty* d = unsafeIRDirty_1_N (
7761 val,
7762 0/*regparms*/,
7763 "arm64g_dirtyhelper_MRS_ID_AA64MMFR1_EL1",
7764 &arm64g_dirtyhelper_MRS_ID_AA64MMFR1_EL1,
7765 args
7767 /* execute the dirty call, dumping the result in val. */
7768 stmt( IRStmt_Dirty(d) );
7769 putIReg64orZR(tt, mkexpr(val));
7770 DIP("mrs %s, id_aa64mmfr1_el1 (FAKED)\n", nameIReg64orZR(tt));
7771 return True;
7773 /* ---- Case for ID_AA64MMFR2_EL1 (RO) ----
7774 Return faked value of not implemented ARMv8.2 and ARMv8.3
7775 0xD53807 010 Rt MRS rT, id_aa64mmfr2_el1
7777 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD5380740 /*MRS*/) {
7778 UInt tt = INSN(4,0);
7779 putIReg64orZR(tt, mkU64(0x0));
7780 DIP("mrs %s, id_aa64mmfr2_el1 (FAKED)\n", nameIReg64orZR(tt));
7781 return True;
7783 /* ---- Cases for TPIDR_EL0 ----
7784 0xD51BD0 010 Rt MSR tpidr_el0, rT
7785 0xD53BD0 010 Rt MRS rT, tpidr_el0
7787 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
7788 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
7789 Bool toSys = INSN(21,21) == 0;
7790 UInt tt = INSN(4,0);
7791 if (toSys) {
7792 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
7793 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
7794 } else {
7795 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
7796 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
7798 return True;
7800 /* ---- Cases for FPCR ----
7801 0xD51B44 000 Rt MSR fpcr, rT
7802 0xD53B44 000 Rt MSR rT, fpcr
7804 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
7805 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
7806 Bool toSys = INSN(21,21) == 0;
7807 UInt tt = INSN(4,0);
7808 if (toSys) {
7809 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
7810 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
7811 } else {
7812 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
7813 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
7815 return True;
7817 /* ---- Cases for FPSR ----
7818 0xD51B44 001 Rt MSR fpsr, rT
7819 0xD53B44 001 Rt MSR rT, fpsr
7820 The only part of this we model is FPSR.QC. All other bits
7821 are ignored when writing to it and RAZ when reading from it.
7823 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
7824 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
7825 Bool toSys = INSN(21,21) == 0;
7826 UInt tt = INSN(4,0);
7827 if (toSys) {
7828 /* Just deal with FPSR.QC. Make up a V128 value which is
7829 zero if Xt[27] is zero and any other value if Xt[27] is
7830 nonzero. */
7831 IRTemp qc64 = newTemp(Ity_I64);
7832 assign(qc64, binop(Iop_And64,
7833 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
7834 mkU64(1)));
7835 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
7836 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
7837 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
7838 } else {
7839 /* Generate a value which is all zeroes except for bit 27,
7840 which must be zero if QCFLAG is all zeroes and one otherwise. */
7841 IRTemp qcV128 = newTempV128();
7842 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
7843 IRTemp qc64 = newTemp(Ity_I64);
7844 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
7845 unop(Iop_V128to64, mkexpr(qcV128))));
7846 IRExpr* res = binop(Iop_Shl64,
7847 unop(Iop_1Uto64,
7848 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
7849 mkU8(27));
7850 putIReg64orZR(tt, res);
7851 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
7853 return True;
7855 /* ---- Cases for NZCV ----
7856 D51B42 000 Rt MSR nzcv, rT
7857 D53B42 000 Rt MRS rT, nzcv
7858 The only parts of NZCV that actually exist are bits 31:28, which
7859 are the N Z C and V bits themselves. Hence the flags thunk provides
7860 all the state we need.
7862 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
7863 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
7864 Bool toSys = INSN(21,21) == 0;
7865 UInt tt = INSN(4,0);
7866 if (toSys) {
7867 IRTemp t = newTemp(Ity_I64);
7868 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
7869 setFlags_COPY(t);
7870 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
7871 } else {
7872 IRTemp res = newTemp(Ity_I64);
7873 assign(res, mk_arm64g_calculate_flags_nzcv());
7874 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
7875 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
7877 return True;
7879 /* ---- Cases for DCZID_EL0 ----
7880 This is the data cache zero ID register. It controls whether
7881 DC ZVA is supported and if so the block size used. Support reads of it
7882 only by passing through to the host.
7883 D5 3B 00 111 Rt MRS rT, dczid_el0
7885 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
7886 UInt tt = INSN(4,0);
7887 IRTemp val = newTemp(Ity_I64);
7888 IRExpr** args = mkIRExprVec_0();
7889 IRDirty* d = unsafeIRDirty_1_N (
7890 val,
7891 0/*regparms*/,
7892 "arm64g_dirtyhelper_MRS_DCZID_EL0",
7893 &arm64g_dirtyhelper_MRS_DCZID_EL0,
7894 args
7896 /* execute the dirty call, dumping the result in val. */
7897 stmt( IRStmt_Dirty(d) );
7898 putIReg64orZR(tt, mkexpr(val));
7899 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
7900 return True;
7902 /* ---- Cases for CTR_EL0 ----
7903 We just handle reads, and make up a value from the D and I line
7904 sizes in the VexArchInfo we are given, and patch in the following
7905 fields that the Foundation model gives ("natively"):
7906 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
7907 D5 3B 00 001 Rt MRS rT, dczid_el0
7909 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
7910 UInt tt = INSN(4,0);
7911 /* Need to generate a value from dMinLine_lg2_szB and
7912 dMinLine_lg2_szB. The value in the register is in 32-bit
7913 units, so need to subtract 2 from the values in the
7914 VexArchInfo. We can assume that the values here are valid --
7915 disInstr_ARM64 checks them -- so there's no need to deal with
7916 out-of-range cases. */
7917 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7918 && archinfo->arm64_dMinLine_lg2_szB <= 17
7919 && archinfo->arm64_iMinLine_lg2_szB >= 2
7920 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7921 UInt val
7922 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7923 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7924 putIReg64orZR(tt, mkU64(val));
7925 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7926 return True;
7928 /* ---- Cases for CNTVCT_EL0 ----
7929 This is a timestamp counter of some sort. Support reads of it only
7930 by passing through to the host.
7931 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7933 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7934 UInt tt = INSN(4,0);
7935 IRTemp val = newTemp(Ity_I64);
7936 IRExpr** args = mkIRExprVec_0();
7937 IRDirty* d = unsafeIRDirty_1_N (
7938 val,
7939 0/*regparms*/,
7940 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7941 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7942 args
7944 /* execute the dirty call, dumping the result in val. */
7945 stmt( IRStmt_Dirty(d) );
7946 putIReg64orZR(tt, mkexpr(val));
7947 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7948 return True;
7950 /* ---- Cases for CNTFRQ_EL0 ----
7951 This is always RO at EL0, so it's safe to pass through to the host.
7952 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7954 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7955 UInt tt = INSN(4,0);
7956 IRTemp val = newTemp(Ity_I64);
7957 IRExpr** args = mkIRExprVec_0();
7958 IRDirty* d = unsafeIRDirty_1_N (
7959 val,
7960 0/*regparms*/,
7961 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7962 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7963 args
7965 /* execute the dirty call, dumping the result in val. */
7966 stmt( IRStmt_Dirty(d) );
7967 putIReg64orZR(tt, mkexpr(val));
7968 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7969 return True;
7972 /* ------------------ IC_IVAU ------------------ */
7973 /* D5 0B 75 001 Rt ic ivau, rT
7975 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7976 /* We will always be provided with a valid iMinLine value. */
7977 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7978 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7979 /* Round the requested address, in rT, down to the start of the
7980 containing block. */
7981 UInt tt = INSN(4,0);
7982 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7983 IRTemp addr = newTemp(Ity_I64);
7984 assign( addr, binop( Iop_And64,
7985 getIReg64orZR(tt),
7986 mkU64(~(lineszB - 1))) );
7987 /* Set the invalidation range, request exit-and-invalidate, with
7988 continuation at the next instruction. */
7989 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7990 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7991 /* be paranoid ... */
7992 stmt( IRStmt_MBE(Imbe_Fence) );
7993 putPC(mkU64( guest_PC_curr_instr + 4 ));
7994 dres->whatNext = Dis_StopHere;
7995 dres->jk_StopHere = Ijk_InvalICache;
7996 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7997 return True;
8000 /* ------------------ DC_ZVA ------------------ */
8001 /* D5 0B 74 001 Rt dc zva, rT
8003 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7420) {
8004 /* Round the requested address, in rT, down to the start of the
8005 containing block. */
8006 UInt tt = INSN(4,0);
8007 ULong clearszB = 1UL << (archinfo->arm64_cache_block_size + 2);
8008 IRTemp addr = newTemp(Ity_I64);
8009 assign( addr, binop( Iop_And64,
8010 getIReg64orZR(tt),
8011 mkU64(~(clearszB - 1))) );
8012 for (ULong o = 0; o < clearszB; o += 8) {
8013 storeLE(binop(Iop_Add64,mkexpr(addr),mkU64(o)), mkU64(0));
8015 DIP("dc zva, %s\n", nameIReg64orZR(tt));
8016 return True;
8019 /* ------------------ DC_CVAU ------------------ */
8020 /* D5 0B 7A 001 Rt dc cvac, rT
8021 D5 0B 7B 001 Rt dc cvau, rT
8022 D5 0B 7C 001 Rt dc cvap, rT
8023 D5 0B 7D 001 Rt dc cvadp, rT
8024 D5 0B 7E 001 Rt dc civac, rT
8026 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7A20
8027 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20
8028 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7C20
8029 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7D20
8030 || (INSN(31,0) & 0xFFFFFFE0) == 0xD50B7E20) {
8031 /* Exactly the same scheme as for IC IVAU, except we observe the
8032 dMinLine size, and request an Ijk_FlushDCache instead of
8033 Ijk_InvalICache. */
8034 /* We will always be provided with a valid dMinLine value. */
8035 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
8036 && archinfo->arm64_dMinLine_lg2_szB <= 17);
8037 /* Round the requested address, in rT, down to the start of the
8038 containing block. */
8039 UInt tt = INSN(4,0);
8040 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
8041 IRTemp addr = newTemp(Ity_I64);
8042 assign( addr, binop( Iop_And64,
8043 getIReg64orZR(tt),
8044 mkU64(~(lineszB - 1))) );
8045 /* Set the flush range, request exit-and-flush, with
8046 continuation at the next instruction. */
8047 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
8048 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
8049 /* be paranoid ... */
8050 stmt( IRStmt_MBE(Imbe_Fence) );
8051 putPC(mkU64( guest_PC_curr_instr + 4 ));
8052 dres->whatNext = Dis_StopHere;
8053 dres->jk_StopHere = Ijk_FlushDCache;
8054 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
8055 return True;
8058 /* ------------------ ISB, DMB, DSB ------------------ */
8059 /* 31 21 11 7 6 4
8060 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
8061 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
8062 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
8064 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
8065 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
8066 && INSN(7,7) == 1
8067 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
8068 UInt opc = INSN(6,5);
8069 UInt CRm = INSN(11,8);
8070 vassert(opc <= 2 && CRm <= 15);
8071 stmt(IRStmt_MBE(Imbe_Fence));
8072 const HChar* opNames[3]
8073 = { "dsb", "dmb", "isb" };
8074 const HChar* howNames[16]
8075 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
8076 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
8077 DIP("%s %s\n", opNames[opc], howNames[CRm]);
8078 return True;
8081 /* -------------------- NOP -------------------- */
8082 if (INSN(31,0) == 0xD503201F) {
8083 DIP("nop\n");
8084 return True;
8087 /* -------------------- BRK -------------------- */
8088 /* 31 23 20 4
8089 1101 0100 001 imm16 00000 BRK #imm16
8091 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
8092 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
8093 UInt imm16 = INSN(20,5);
8094 /* Request SIGTRAP and then restart of this insn. */
8095 putPC(mkU64(guest_PC_curr_instr + 0));
8096 dres->whatNext = Dis_StopHere;
8097 dres->jk_StopHere = Ijk_SigTRAP;
8098 DIP("brk #%u\n", imm16);
8099 return True;
8102 /* ------------------- YIELD ------------------- */
8103 /* 31 23 15 7
8104 1101 0101 0000 0011 0010 0000 0011 1111
8106 if (INSN(31,0) == 0xD503203F) {
8107 /* Request yield followed by continuation at the next insn. */
8108 putPC(mkU64(guest_PC_curr_instr + 4));
8109 dres->whatNext = Dis_StopHere;
8110 dres->jk_StopHere = Ijk_Yield;
8111 DIP("yield\n");
8112 return True;
8115 /* -------------------- HINT ------------------- */
8116 /* 31 23 15 11 4 3
8117 1101 0101 0000 0011 0010 imm7 1 1111
8118 Catch otherwise unhandled HINT instructions - any
8119 like YIELD which are explicitly handled should go
8120 above this case.
8122 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
8123 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
8124 && INSN(15,12) == BITS4(0,0,1,0)
8125 && INSN(4,0) == BITS5(1,1,1,1,1)) {
8126 UInt imm7 = INSN(11,5);
8127 DIP("hint #%u\n", imm7);
8128 return True;
8131 /* ------------------- CLREX ------------------ */
8132 /* 31 23 15 11 7
8133 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
8134 CRm is apparently ignored.
8136 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
8137 UInt mm = INSN(11,8);
8138 /* AFAICS, this simply cancels a (all?) reservations made by a
8139 (any?) preceding LDREX(es). Arrange to hand it through to
8140 the back end. */
8141 if (abiinfo->guest__use_fallback_LLSC) {
8142 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
8143 } else {
8144 stmt( IRStmt_MBE(Imbe_CancelReservation) );
8146 DIP("clrex #%u\n", mm);
8147 return True;
8150 if (sigill_diag) {
8151 vex_printf("ARM64 front end: branch_etc\n");
8153 return False;
8154 # undef INSN
8158 /*------------------------------------------------------------*/
8159 /*--- SIMD and FP instructions: helper functions ---*/
8160 /*------------------------------------------------------------*/
8162 /* Some constructors for interleave/deinterleave expressions. */
8164 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
8165 // returns a0 b0
8166 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
8169 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
8170 // returns a1 b1
8171 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
8174 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
8175 // returns a2 a0 b2 b0
8176 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
8179 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
8180 // returns a3 a1 b3 b1
8181 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
8184 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
8185 // returns a1 b1 a0 b0
8186 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
8189 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
8190 // returns a3 b3 a2 b2
8191 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
8194 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
8195 // returns a6 a4 a2 a0 b6 b4 b2 b0
8196 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
8199 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
8200 // returns a7 a5 a3 a1 b7 b5 b3 b1
8201 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
8204 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
8205 // returns a3 b3 a2 b2 a1 b1 a0 b0
8206 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
8209 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
8210 // returns a7 b7 a6 b6 a5 b5 a4 b4
8211 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
8214 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
8215 IRTemp bFEDCBA9876543210 ) {
8216 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
8217 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
8218 mkexpr(bFEDCBA9876543210));
8221 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
8222 IRTemp bFEDCBA9876543210 ) {
8223 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
8224 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
8225 mkexpr(bFEDCBA9876543210));
8228 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
8229 IRTemp bFEDCBA9876543210 ) {
8230 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
8231 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
8232 mkexpr(bFEDCBA9876543210));
8235 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
8236 IRTemp bFEDCBA9876543210 ) {
8237 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
8238 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
8239 mkexpr(bFEDCBA9876543210));
8242 /* Generate N copies of |bit| in the bottom of a ULong. */
8243 static ULong Replicate ( ULong bit, Int N )
8245 vassert(bit <= 1 && N >= 1 && N < 64);
8246 if (bit == 0) {
8247 return 0;
8248 } else {
8249 /* Careful. This won't work for N == 64. */
8250 return (1ULL << N) - 1;
8254 static ULong Replicate32x2 ( ULong bits32 )
8256 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
8257 return (bits32 << 32) | bits32;
8260 static ULong Replicate16x4 ( ULong bits16 )
8262 vassert(0 == (bits16 & ~0xFFFFULL));
8263 return Replicate32x2((bits16 << 16) | bits16);
8266 static ULong Replicate8x8 ( ULong bits8 )
8268 vassert(0 == (bits8 & ~0xFFULL));
8269 return Replicate16x4((bits8 << 8) | bits8);
8272 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
8273 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
8274 is 64. In the former case, the upper 32 bits of the returned value
8275 are guaranteed to be zero. */
8276 static ULong VFPExpandImm ( ULong imm8, Int N )
8278 vassert(imm8 <= 0xFF);
8279 vassert(N == 32 || N == 64);
8280 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
8281 Int F = N - E - 1;
8282 ULong imm8_6 = (imm8 >> 6) & 1;
8283 /* sign: 1 bit */
8284 /* exp: E bits */
8285 /* frac: F bits */
8286 ULong sign = (imm8 >> 7) & 1;
8287 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
8288 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
8289 vassert(sign < (1ULL << 1));
8290 vassert(exp < (1ULL << E));
8291 vassert(frac < (1ULL << F));
8292 vassert(1 + E + F == N);
8293 ULong res = (sign << (E+F)) | (exp << F) | frac;
8294 return res;
8297 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
8298 This might fail, as indicated by the returned Bool. Page 2530 of
8299 the manual. */
8300 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
8301 UInt op, UInt cmode, UInt imm8 )
8303 vassert(op <= 1);
8304 vassert(cmode <= 15);
8305 vassert(imm8 <= 255);
8307 *res = 0; /* will overwrite iff returning True */
8309 ULong imm64 = 0;
8310 Bool testimm8 = False;
8312 switch (cmode >> 1) {
8313 case 0:
8314 testimm8 = False; imm64 = Replicate32x2(imm8); break;
8315 case 1:
8316 testimm8 = True; imm64 = Replicate32x2(imm8 << 8UL); break;
8317 case 2:
8318 // Coverity is right but we want the overflow, Replicate32x2 expects
8319 // (and asserts) that the top 32bits be zero so if we get rid of the
8320 // overflow we would need to add a mask. So just let it overflow.
8321 // coverity[OVERFLOW_BEFORE_WIDEN:FALSE]
8322 testimm8 = True; imm64 = Replicate32x2(imm8 << 16UL); break;
8323 case 3:
8324 // coverity[OVERFLOW_BEFORE_WIDEN:FALSE]
8325 testimm8 = True; imm64 = Replicate32x2(imm8 << 24UL); break;
8326 case 4:
8327 testimm8 = False; imm64 = Replicate16x4(imm8); break;
8328 case 5:
8329 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
8330 case 6:
8331 testimm8 = True;
8332 if ((cmode & 1) == 0)
8333 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
8334 else
8335 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
8336 break;
8337 case 7:
8338 testimm8 = False;
8339 if ((cmode & 1) == 0 && op == 0)
8340 imm64 = Replicate8x8(imm8);
8341 if ((cmode & 1) == 0 && op == 1) {
8342 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
8343 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
8344 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
8345 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
8346 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
8347 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
8348 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
8349 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
8351 if ((cmode & 1) == 1 && op == 0) {
8352 ULong imm8_7 = (imm8 >> 7) & 1;
8353 ULong imm8_6 = (imm8 >> 6) & 1;
8354 ULong imm8_50 = imm8 & 63;
8355 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
8356 | ((imm8_6 ^ 1) << (5 + 6 + 19))
8357 | (Replicate(imm8_6, 5) << (6 + 19))
8358 | (imm8_50 << 19);
8359 imm64 = Replicate32x2(imm32);
8361 if ((cmode & 1) == 1 && op == 1) {
8362 // imm64 = imm8<7>:NOT(imm8<6>)
8363 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
8364 ULong imm8_7 = (imm8 >> 7) & 1;
8365 ULong imm8_6 = (imm8 >> 6) & 1;
8366 ULong imm8_50 = imm8 & 63;
8367 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
8368 | (Replicate(imm8_6, 8) << 54)
8369 | (imm8_50 << 48);
8371 break;
8372 default:
8373 vassert(0);
8376 if (testimm8 && imm8 == 0)
8377 return False;
8379 *res = imm64;
8380 return True;
8383 /* Help a bit for decoding laneage for vector operations that can be
8384 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
8385 and SZ bits, typically for vector floating point. */
8386 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
8387 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
8388 /*OUT*/const HChar** arrSpec,
8389 Bool bitQ, Bool bitSZ )
8391 vassert(bitQ == True || bitQ == False);
8392 vassert(bitSZ == True || bitSZ == False);
8393 if (bitQ && bitSZ) { // 2x64
8394 if (tyI) *tyI = Ity_I64;
8395 if (tyF) *tyF = Ity_F64;
8396 if (nLanes) *nLanes = 2;
8397 if (zeroUpper) *zeroUpper = False;
8398 if (arrSpec) *arrSpec = "2d";
8399 return True;
8401 if (bitQ && !bitSZ) { // 4x32
8402 if (tyI) *tyI = Ity_I32;
8403 if (tyF) *tyF = Ity_F32;
8404 if (nLanes) *nLanes = 4;
8405 if (zeroUpper) *zeroUpper = False;
8406 if (arrSpec) *arrSpec = "4s";
8407 return True;
8409 if (!bitQ && !bitSZ) { // 2x32
8410 if (tyI) *tyI = Ity_I32;
8411 if (tyF) *tyF = Ity_F32;
8412 if (nLanes) *nLanes = 2;
8413 if (zeroUpper) *zeroUpper = True;
8414 if (arrSpec) *arrSpec = "2s";
8415 return True;
8417 // Else impliedly 1x64, which isn't allowed.
8418 return False;
8421 /* Helper for decoding laneage for shift-style vector operations
8422 that involve an immediate shift amount. */
8423 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
8424 UInt immh, UInt immb )
8426 vassert(immh < (1<<4));
8427 vassert(immb < (1<<3));
8428 UInt immhb = (immh << 3) | immb;
8429 if (immh & 8) {
8430 if (shift) *shift = 128 - immhb;
8431 if (szBlg2) *szBlg2 = 3;
8432 return True;
8434 if (immh & 4) {
8435 if (shift) *shift = 64 - immhb;
8436 if (szBlg2) *szBlg2 = 2;
8437 return True;
8439 if (immh & 2) {
8440 if (shift) *shift = 32 - immhb;
8441 if (szBlg2) *szBlg2 = 1;
8442 return True;
8444 if (immh & 1) {
8445 if (shift) *shift = 16 - immhb;
8446 if (szBlg2) *szBlg2 = 0;
8447 return True;
8449 return False;
8452 /* Generate IR to fold all lanes of the V128 value in 'src' as
8453 characterised by the operator 'op', and return the result in the
8454 bottom bits of a V128, with all other bits set to zero. */
8455 static IRTemp math_FOLDV ( IRTemp src, IROp op )
8457 /* The basic idea is to use repeated applications of Iop_CatEven*
8458 and Iop_CatOdd* operators to 'src' so as to clone each lane into
8459 a complete vector. Then fold all those vectors with 'op' and
8460 zero out all but the least significant lane. */
8461 switch (op) {
8462 case Iop_Min8Sx16: case Iop_Min8Ux16:
8463 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
8464 /* NB: temp naming here is misleading -- the naming is for 8
8465 lanes of 16 bit, whereas what is being operated on is 16
8466 lanes of 8 bits. */
8467 IRTemp x76543210 = src;
8468 IRTemp x76547654 = newTempV128();
8469 IRTemp x32103210 = newTempV128();
8470 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
8471 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
8472 IRTemp x76767676 = newTempV128();
8473 IRTemp x54545454 = newTempV128();
8474 IRTemp x32323232 = newTempV128();
8475 IRTemp x10101010 = newTempV128();
8476 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
8477 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
8478 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
8479 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
8480 IRTemp x77777777 = newTempV128();
8481 IRTemp x66666666 = newTempV128();
8482 IRTemp x55555555 = newTempV128();
8483 IRTemp x44444444 = newTempV128();
8484 IRTemp x33333333 = newTempV128();
8485 IRTemp x22222222 = newTempV128();
8486 IRTemp x11111111 = newTempV128();
8487 IRTemp x00000000 = newTempV128();
8488 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
8489 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
8490 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
8491 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
8492 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
8493 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
8494 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
8495 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
8496 /* Naming not misleading after here. */
8497 IRTemp xAllF = newTempV128();
8498 IRTemp xAllE = newTempV128();
8499 IRTemp xAllD = newTempV128();
8500 IRTemp xAllC = newTempV128();
8501 IRTemp xAllB = newTempV128();
8502 IRTemp xAllA = newTempV128();
8503 IRTemp xAll9 = newTempV128();
8504 IRTemp xAll8 = newTempV128();
8505 IRTemp xAll7 = newTempV128();
8506 IRTemp xAll6 = newTempV128();
8507 IRTemp xAll5 = newTempV128();
8508 IRTemp xAll4 = newTempV128();
8509 IRTemp xAll3 = newTempV128();
8510 IRTemp xAll2 = newTempV128();
8511 IRTemp xAll1 = newTempV128();
8512 IRTemp xAll0 = newTempV128();
8513 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
8514 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
8515 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
8516 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
8517 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
8518 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
8519 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
8520 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
8521 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
8522 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
8523 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
8524 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
8525 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
8526 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
8527 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
8528 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
8529 IRTemp maxFE = newTempV128();
8530 IRTemp maxDC = newTempV128();
8531 IRTemp maxBA = newTempV128();
8532 IRTemp max98 = newTempV128();
8533 IRTemp max76 = newTempV128();
8534 IRTemp max54 = newTempV128();
8535 IRTemp max32 = newTempV128();
8536 IRTemp max10 = newTempV128();
8537 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
8538 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
8539 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
8540 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
8541 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
8542 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
8543 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
8544 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
8545 IRTemp maxFEDC = newTempV128();
8546 IRTemp maxBA98 = newTempV128();
8547 IRTemp max7654 = newTempV128();
8548 IRTemp max3210 = newTempV128();
8549 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
8550 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
8551 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
8552 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
8553 IRTemp maxFEDCBA98 = newTempV128();
8554 IRTemp max76543210 = newTempV128();
8555 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
8556 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
8557 IRTemp maxAllLanes = newTempV128();
8558 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
8559 mkexpr(max76543210)));
8560 IRTemp res = newTempV128();
8561 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
8562 return res;
8564 case Iop_Min16Sx8: case Iop_Min16Ux8:
8565 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
8566 IRTemp x76543210 = src;
8567 IRTemp x76547654 = newTempV128();
8568 IRTemp x32103210 = newTempV128();
8569 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
8570 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
8571 IRTemp x76767676 = newTempV128();
8572 IRTemp x54545454 = newTempV128();
8573 IRTemp x32323232 = newTempV128();
8574 IRTemp x10101010 = newTempV128();
8575 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
8576 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
8577 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
8578 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
8579 IRTemp x77777777 = newTempV128();
8580 IRTemp x66666666 = newTempV128();
8581 IRTemp x55555555 = newTempV128();
8582 IRTemp x44444444 = newTempV128();
8583 IRTemp x33333333 = newTempV128();
8584 IRTemp x22222222 = newTempV128();
8585 IRTemp x11111111 = newTempV128();
8586 IRTemp x00000000 = newTempV128();
8587 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
8588 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
8589 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
8590 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
8591 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
8592 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
8593 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
8594 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
8595 IRTemp max76 = newTempV128();
8596 IRTemp max54 = newTempV128();
8597 IRTemp max32 = newTempV128();
8598 IRTemp max10 = newTempV128();
8599 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
8600 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
8601 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
8602 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
8603 IRTemp max7654 = newTempV128();
8604 IRTemp max3210 = newTempV128();
8605 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
8606 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
8607 IRTemp max76543210 = newTempV128();
8608 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
8609 IRTemp res = newTempV128();
8610 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
8611 return res;
8613 case Iop_Max32Fx4: case Iop_Min32Fx4:
8614 case Iop_Min32Sx4: case Iop_Min32Ux4:
8615 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
8616 IRTemp x3210 = src;
8617 IRTemp x3232 = newTempV128();
8618 IRTemp x1010 = newTempV128();
8619 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
8620 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
8621 IRTemp x3333 = newTempV128();
8622 IRTemp x2222 = newTempV128();
8623 IRTemp x1111 = newTempV128();
8624 IRTemp x0000 = newTempV128();
8625 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
8626 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
8627 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
8628 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
8629 IRTemp max32 = newTempV128();
8630 IRTemp max10 = newTempV128();
8631 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
8632 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
8633 IRTemp max3210 = newTempV128();
8634 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
8635 IRTemp res = newTempV128();
8636 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
8637 return res;
8639 case Iop_Add64x2: {
8640 IRTemp x10 = src;
8641 IRTemp x00 = newTempV128();
8642 IRTemp x11 = newTempV128();
8643 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
8644 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
8645 IRTemp max10 = newTempV128();
8646 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
8647 IRTemp res = newTempV128();
8648 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
8649 return res;
8651 default:
8652 vassert(0);
8657 /* Generate IR for TBL and TBX. This deals with the 128 bit case
8658 only. */
8659 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
8660 IRTemp oor_values )
8662 vassert(len <= 3);
8664 /* Generate some useful constants as concisely as possible. */
8665 IRTemp half15 = newTemp(Ity_I64);
8666 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
8667 IRTemp half16 = newTemp(Ity_I64);
8668 assign(half16, mkU64(0x1010101010101010ULL));
8670 /* A zero vector */
8671 IRTemp allZero = newTempV128();
8672 assign(allZero, mkV128(0x0000));
8673 /* A vector containing 15 in each 8-bit lane */
8674 IRTemp all15 = newTempV128();
8675 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
8676 /* A vector containing 16 in each 8-bit lane */
8677 IRTemp all16 = newTempV128();
8678 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
8679 /* A vector containing 32 in each 8-bit lane */
8680 IRTemp all32 = newTempV128();
8681 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
8682 /* A vector containing 48 in each 8-bit lane */
8683 IRTemp all48 = newTempV128();
8684 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
8685 /* A vector containing 64 in each 8-bit lane */
8686 IRTemp all64 = newTempV128();
8687 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
8689 /* Group the 16/32/48/64 vectors so as to be indexable. */
8690 IRTemp allXX[4] = { all16, all32, all48, all64 };
8692 /* Compute the result for each table vector, with zeroes in places
8693 where the index values are out of range, and OR them into the
8694 running vector. */
8695 IRTemp running_result = newTempV128();
8696 assign(running_result, mkV128(0));
8698 UInt tabent;
8699 for (tabent = 0; tabent <= len; tabent++) {
8700 vassert(tabent < 4);
8701 IRTemp bias = newTempV128();
8702 assign(bias,
8703 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
8704 IRTemp biased_indices = newTempV128();
8705 assign(biased_indices,
8706 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
8707 IRTemp valid_mask = newTempV128();
8708 assign(valid_mask,
8709 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
8710 IRTemp safe_biased_indices = newTempV128();
8711 assign(safe_biased_indices,
8712 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
8713 IRTemp results_or_junk = newTempV128();
8714 assign(results_or_junk,
8715 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
8716 mkexpr(safe_biased_indices)));
8717 IRTemp results_or_zero = newTempV128();
8718 assign(results_or_zero,
8719 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
8720 /* And OR that into the running result. */
8721 IRTemp tmp = newTempV128();
8722 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
8723 mkexpr(running_result)));
8724 running_result = tmp;
8727 /* So now running_result holds the overall result where the indices
8728 are in range, and zero in out-of-range lanes. Now we need to
8729 compute an overall validity mask and use this to copy in the
8730 lanes in the oor_values for out of range indices. This is
8731 unnecessary for TBL but will get folded out by iropt, so we lean
8732 on that and generate the same code for TBL and TBX here. */
8733 IRTemp overall_valid_mask = newTempV128();
8734 assign(overall_valid_mask,
8735 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
8736 IRTemp result = newTempV128();
8737 assign(result,
8738 binop(Iop_OrV128,
8739 mkexpr(running_result),
8740 binop(Iop_AndV128,
8741 mkexpr(oor_values),
8742 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
8743 return result;
8747 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
8748 an op which takes two I64s and produces a V128. That is, a widening
8749 operator. Generate IR which applies |opI64x2toV128| to either the
8750 lower (if |is2| is False) or upper (if |is2| is True) halves of
8751 |argL| and |argR|, and return the value in a new IRTemp.
8753 static
8754 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
8755 IRExpr* argL, IRExpr* argR )
8757 IRTemp res = newTempV128();
8758 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
8759 assign(res, binop(opI64x2toV128, unop(slice, argL),
8760 unop(slice, argR)));
8761 return res;
8765 /* Generate signed/unsigned absolute difference vector IR. */
8766 static
8767 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
8769 vassert(size <= 3);
8770 IRTemp argL = newTempV128();
8771 IRTemp argR = newTempV128();
8772 IRTemp msk = newTempV128();
8773 IRTemp res = newTempV128();
8774 assign(argL, argLE);
8775 assign(argR, argRE);
8776 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
8777 mkexpr(argL), mkexpr(argR)));
8778 assign(res,
8779 binop(Iop_OrV128,
8780 binop(Iop_AndV128,
8781 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
8782 mkexpr(msk)),
8783 binop(Iop_AndV128,
8784 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
8785 unop(Iop_NotV128, mkexpr(msk)))));
8786 return res;
8790 /* Generate IR that takes a V128 and sign- or zero-widens
8791 either the lower or upper set of lanes to twice-as-wide,
8792 resulting in a new V128 value. */
8793 static
8794 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
8795 UInt sizeNarrow, IRExpr* srcE )
8797 IRTemp src = newTempV128();
8798 IRTemp res = newTempV128();
8799 assign(src, srcE);
8800 switch (sizeNarrow) {
8801 case X10:
8802 assign(res,
8803 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
8804 binop(fromUpperHalf ? Iop_InterleaveHI32x4
8805 : Iop_InterleaveLO32x4,
8806 mkexpr(src),
8807 mkexpr(src)),
8808 mkU8(32)));
8809 break;
8810 case X01:
8811 assign(res,
8812 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
8813 binop(fromUpperHalf ? Iop_InterleaveHI16x8
8814 : Iop_InterleaveLO16x8,
8815 mkexpr(src),
8816 mkexpr(src)),
8817 mkU8(16)));
8818 break;
8819 case X00:
8820 assign(res,
8821 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
8822 binop(fromUpperHalf ? Iop_InterleaveHI8x16
8823 : Iop_InterleaveLO8x16,
8824 mkexpr(src),
8825 mkexpr(src)),
8826 mkU8(8)));
8827 break;
8828 default:
8829 vassert(0);
8831 return res;
8835 /* Generate IR that takes a V128 and sign- or zero-widens
8836 either the even or odd lanes to twice-as-wide,
8837 resulting in a new V128 value. */
8838 static
8839 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
8840 UInt sizeNarrow, IRExpr* srcE )
8842 IRTemp src = newTempV128();
8843 IRTemp res = newTempV128();
8844 IROp opSAR = mkVecSARN(sizeNarrow+1);
8845 IROp opSHR = mkVecSHRN(sizeNarrow+1);
8846 IROp opSHL = mkVecSHLN(sizeNarrow+1);
8847 IROp opSxR = zWiden ? opSHR : opSAR;
8848 UInt amt = 0;
8849 switch (sizeNarrow) {
8850 case X10: amt = 32; break;
8851 case X01: amt = 16; break;
8852 case X00: amt = 8; break;
8853 default: vassert(0);
8855 assign(src, srcE);
8856 if (fromOdd) {
8857 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
8858 } else {
8859 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
8860 mkU8(amt)));
8862 return res;
8866 /* Generate IR that takes two V128s and narrows (takes lower half)
8867 of each lane, producing a single V128 value. */
8868 static
8869 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
8871 IRTemp res = newTempV128();
8872 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
8873 mkexpr(argHi), mkexpr(argLo)));
8874 return res;
8878 /* Return a temp which holds the vector dup of the lane of width
8879 (1 << size) obtained from src[laneNo]. */
8880 static
8881 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
8883 vassert(size <= 3);
8884 /* Normalise |laneNo| so it is of the form
8885 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
8886 This puts the bits we want to inspect at constant offsets
8887 regardless of the value of |size|.
8889 UInt ix = laneNo << size;
8890 vassert(ix <= 15);
8891 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
8892 switch (size) {
8893 case 0: /* B */
8894 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
8895 /* fallthrough */
8896 case 1: /* H */
8897 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
8898 /* fallthrough */
8899 case 2: /* S */
8900 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
8901 /* fallthrough */
8902 case 3: /* D */
8903 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
8904 break;
8905 default:
8906 vassert(0);
8908 IRTemp res = newTempV128();
8909 assign(res, src);
8910 Int i;
8911 for (i = 3; i >= 0; i--) {
8912 if (ops[i] == Iop_INVALID)
8913 break;
8914 IRTemp tmp = newTempV128();
8915 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
8916 res = tmp;
8918 return res;
8922 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
8923 selector encoded as shown below. Return a new V128 holding the
8924 selected lane from |srcV| dup'd out to V128, and also return the
8925 lane number, log2 of the lane size in bytes, and width-character via
8926 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
8927 is an invalid selector, in which case return
8928 IRTemp_INVALID, 0, 0 and '?' respectively.
8930 imm5 = xxxx1 signifies .b[xxxx]
8931 = xxx10 .h[xxx]
8932 = xx100 .s[xx]
8933 = x1000 .d[x]
8934 otherwise invalid
8936 static
8937 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
8938 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
8939 IRExpr* srcV, UInt imm5 )
8941 *laneNo = 0;
8942 *laneSzLg2 = 0;
8943 *laneCh = '?';
8945 if (imm5 & 1) {
8946 *laneNo = (imm5 >> 1) & 15;
8947 *laneSzLg2 = 0;
8948 *laneCh = 'b';
8950 else if (imm5 & 2) {
8951 *laneNo = (imm5 >> 2) & 7;
8952 *laneSzLg2 = 1;
8953 *laneCh = 'h';
8955 else if (imm5 & 4) {
8956 *laneNo = (imm5 >> 3) & 3;
8957 *laneSzLg2 = 2;
8958 *laneCh = 's';
8960 else if (imm5 & 8) {
8961 *laneNo = (imm5 >> 4) & 1;
8962 *laneSzLg2 = 3;
8963 *laneCh = 'd';
8965 else {
8966 /* invalid */
8967 return IRTemp_INVALID;
8970 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8974 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8975 static
8976 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8978 IRType ty = Ity_INVALID;
8979 IRTemp rcS = IRTemp_INVALID;
8980 switch (size) {
8981 case X01:
8982 vassert(imm <= 0xFFFFULL);
8983 ty = Ity_I16;
8984 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8985 break;
8986 case X10:
8987 vassert(imm <= 0xFFFFFFFFULL);
8988 ty = Ity_I32;
8989 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8990 break;
8991 case X11:
8992 ty = Ity_I64;
8993 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8994 default:
8995 vassert(0);
8997 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8998 return rcV;
9002 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
9003 and the upper can contain any value -- it is ignored. If |is2| is False,
9004 generate IR to put |new64| in the lower half of vector reg |dd| and zero
9005 the upper half. If |is2| is True, generate IR to put |new64| in the upper
9006 half of vector reg |dd| and leave the lower half unchanged. This
9007 simulates the behaviour of the "foo/foo2" instructions in which the
9008 destination is half the width of sources, for example addhn/addhn2.
9010 static
9011 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
9013 if (is2) {
9014 /* Get the old contents of Vdd, zero the upper half, and replace
9015 it with 'x'. */
9016 IRTemp t_zero_oldLO = newTempV128();
9017 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9018 IRTemp t_newHI_zero = newTempV128();
9019 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
9020 mkV128(0x0000)));
9021 IRTemp res = newTempV128();
9022 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
9023 mkexpr(t_newHI_zero)));
9024 putQReg128(dd, mkexpr(res));
9025 } else {
9026 /* This is simple. */
9027 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
9032 /* Compute vector SQABS at lane size |size| for |srcE|, returning
9033 the q result in |*qabs| and the normal result in |*nabs|. */
9034 static
9035 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
9036 IRExpr* srcE, UInt size )
9038 IRTemp src, mask, maskn, nsub, qsub;
9039 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
9040 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
9041 assign(src, srcE);
9042 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
9043 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
9044 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
9045 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
9046 assign(*nabs, binop(Iop_OrV128,
9047 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
9048 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
9049 assign(*qabs, binop(Iop_OrV128,
9050 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
9051 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
9055 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
9056 the q result in |*qneg| and the normal result in |*nneg|. */
9057 static
9058 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
9059 IRExpr* srcE, UInt size )
9061 IRTemp src = IRTemp_INVALID;
9062 newTempsV128_3(&src, nneg, qneg);
9063 assign(src, srcE);
9064 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
9065 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
9069 /* Zero all except the least significant lane of |srcE|, where |size|
9070 indicates the lane size in the usual way. */
9071 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
9073 vassert(size < 4);
9074 IRTemp t = newTempV128();
9075 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
9076 return t;
9080 /* Generate IR to compute vector widening MULL from either the lower
9081 (is2==False) or upper (is2==True) halves of vecN and vecM. The
9082 widening multiplies are unsigned when isU==True and signed when
9083 isU==False. |size| is the narrow lane size indication. Optionally,
9084 the product may be added to or subtracted from vecD, at the wide lane
9085 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
9086 is 'm' (only multiply) then the accumulate part does not happen, and
9087 |vecD| is expected to == IRTemp_INVALID.
9089 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
9090 are allowed. The result is returned in a new IRTemp, which is
9091 returned in *res. */
9092 static
9093 void math_MULL_ACC ( /*OUT*/IRTemp* res,
9094 Bool is2, Bool isU, UInt size, HChar mas,
9095 IRTemp vecN, IRTemp vecM, IRTemp vecD )
9097 vassert(res && *res == IRTemp_INVALID);
9098 vassert(size <= 2);
9099 vassert(mas == 'm' || mas == 'a' || mas == 's');
9100 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
9101 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
9102 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
9103 : (mas == 's' ? mkVecSUB(size+1)
9104 : Iop_INVALID);
9105 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
9106 mkexpr(vecN), mkexpr(vecM));
9107 *res = newTempV128();
9108 assign(*res, mas == 'm' ? mkexpr(mul)
9109 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
9113 /* Same as math_MULL_ACC, except the multiply is signed widening,
9114 the multiplied value is then doubled, before being added to or
9115 subtracted from the accumulated value. And everything is
9116 saturated. In all cases, saturation residuals are returned
9117 via (sat1q, sat1n), and in the accumulate cases,
9118 via (sat2q, sat2n) too. All results are returned in new temporaries.
9119 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
9120 so the caller can tell this has happened. */
9121 static
9122 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
9123 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
9124 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
9125 Bool is2, UInt size, HChar mas,
9126 IRTemp vecN, IRTemp vecM, IRTemp vecD )
9128 vassert(size <= 2);
9129 vassert(mas == 'm' || mas == 'a' || mas == 's');
9130 /* Compute
9131 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
9132 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
9133 IOW take either the low or high halves of vecN and vecM, signed widen,
9134 multiply, double that, and signedly saturate. Also compute the same
9135 but without saturation.
9137 vassert(sat2q && *sat2q == IRTemp_INVALID);
9138 vassert(sat2n && *sat2n == IRTemp_INVALID);
9139 newTempsV128_3(sat1q, sat1n, res);
9140 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
9141 mkexpr(vecN), mkexpr(vecM));
9142 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
9143 mkexpr(vecN), mkexpr(vecM));
9144 assign(*sat1q, mkexpr(tq));
9145 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
9147 /* If there is no accumulation, the final result is sat1q,
9148 and there's no assignment to sat2q or sat2n. */
9149 if (mas == 'm') {
9150 assign(*res, mkexpr(*sat1q));
9151 return;
9154 /* Compute
9155 sat2q = vecD +sq/-sq sat1q
9156 sat2n = vecD +/- sat1n
9157 result = sat2q
9159 newTempsV128_2(sat2q, sat2n);
9160 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
9161 mkexpr(vecD), mkexpr(*sat1q)));
9162 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
9163 mkexpr(vecD), mkexpr(*sat1n)));
9164 assign(*res, mkexpr(*sat2q));
9168 /* Generate IR for widening signed vector multiplies. The operands
9169 have their lane width signedly widened, and they are then multiplied
9170 at the wider width, returning results in two new IRTemps. */
9171 static
9172 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
9173 UInt sizeNarrow, IRTemp argL, IRTemp argR )
9175 vassert(sizeNarrow <= 2);
9176 newTempsV128_2(resHI, resLO);
9177 IRTemp argLhi = newTemp(Ity_I64);
9178 IRTemp argLlo = newTemp(Ity_I64);
9179 IRTemp argRhi = newTemp(Ity_I64);
9180 IRTemp argRlo = newTemp(Ity_I64);
9181 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
9182 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
9183 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
9184 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
9185 IROp opMulls = mkVecMULLS(sizeNarrow);
9186 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
9187 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
9191 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
9192 double that, possibly add a rounding constant (R variants), and take
9193 the high half. */
9194 static
9195 void math_SQDMULH ( /*OUT*/IRTemp* res,
9196 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
9197 Bool isR, UInt size, IRTemp vN, IRTemp vM )
9199 vassert(size == X01 || size == X10); /* s or h only */
9201 newTempsV128_3(res, sat1q, sat1n);
9203 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
9204 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
9206 IRTemp addWide = mkVecADD(size+1);
9208 if (isR) {
9209 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
9211 Int rcShift = size == X01 ? 15 : 31;
9212 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
9213 assign(*sat1n,
9214 binop(mkVecCATODDLANES(size),
9215 binop(addWide,
9216 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
9217 mkexpr(roundConst)),
9218 binop(addWide,
9219 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
9220 mkexpr(roundConst))));
9221 } else {
9222 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
9224 assign(*sat1n,
9225 binop(mkVecCATODDLANES(size),
9226 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
9227 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
9230 assign(*res, mkexpr(*sat1q));
9233 /* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
9234 double, add a rounding constant, take the high half and accumulate. */
9235 static
9236 void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd,
9237 UInt size, IRTemp vD, IRTemp vN, IRTemp vM )
9239 vassert(size == X01 || size == X10); /* s or h only */
9241 /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
9243 IRTemp mul, mul_nosat, dummy;
9244 mul = mul_nosat = dummy = IRTemp_INVALID;
9245 math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM);
9247 IROp op = isAdd ? mkVecADD(size) : mkVecSUB(size);
9248 IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size);
9249 newTempsV128_2(res, res_nosat);
9250 assign(*res, binop(qop, mkexpr(vD), mkexpr(mul)));
9251 assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat)));
9255 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
9256 a new temp in *res, and the Q difference pair in new temps in
9257 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
9258 three operations it is. */
9259 static
9260 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
9261 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
9262 IRTemp src, UInt size, UInt shift, const HChar* nm )
9264 vassert(size <= 3);
9265 UInt laneBits = 8 << size;
9266 vassert(shift < laneBits);
9267 newTempsV128_3(res, qDiff1, qDiff2);
9268 IRTemp z128 = newTempV128();
9269 assign(z128, mkV128(0x0000));
9271 /* UQSHL */
9272 if (vex_streq(nm, "uqshl")) {
9273 IROp qop = mkVecQSHLNSATUU(size);
9274 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
9275 if (shift == 0) {
9276 /* No shift means no saturation. */
9277 assign(*qDiff1, mkexpr(z128));
9278 assign(*qDiff2, mkexpr(z128));
9279 } else {
9280 /* Saturation has occurred if any of the shifted-out bits are
9281 nonzero. We get the shifted-out bits by right-shifting the
9282 original value. */
9283 UInt rshift = laneBits - shift;
9284 vassert(rshift >= 1 && rshift < laneBits);
9285 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
9286 assign(*qDiff2, mkexpr(z128));
9288 return;
9291 /* SQSHL */
9292 if (vex_streq(nm, "sqshl")) {
9293 IROp qop = mkVecQSHLNSATSS(size);
9294 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
9295 if (shift == 0) {
9296 /* No shift means no saturation. */
9297 assign(*qDiff1, mkexpr(z128));
9298 assign(*qDiff2, mkexpr(z128));
9299 } else {
9300 /* Saturation has occurred if any of the shifted-out bits are
9301 different from the top bit of the original value. */
9302 UInt rshift = laneBits - 1 - shift;
9303 vassert(rshift < laneBits-1);
9304 /* qDiff1 is the shifted out bits, and the top bit of the original
9305 value, preceded by zeroes. */
9306 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
9307 /* qDiff2 is the top bit of the original value, cloned the
9308 correct number of times. */
9309 assign(*qDiff2, binop(mkVecSHRN(size),
9310 binop(mkVecSARN(size), mkexpr(src),
9311 mkU8(laneBits-1)),
9312 mkU8(rshift)));
9313 /* This also succeeds in comparing the top bit of the original
9314 value to itself, which is a bit stupid, but not wrong. */
9316 return;
9319 /* SQSHLU */
9320 if (vex_streq(nm, "sqshlu")) {
9321 IROp qop = mkVecQSHLNSATSU(size);
9322 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
9323 if (shift == 0) {
9324 /* If there's no shift, saturation depends on the top bit
9325 of the source. */
9326 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
9327 assign(*qDiff2, mkexpr(z128));
9328 } else {
9329 /* Saturation has occurred if any of the shifted-out bits are
9330 nonzero. We get the shifted-out bits by right-shifting the
9331 original value. */
9332 UInt rshift = laneBits - shift;
9333 vassert(rshift >= 1 && rshift < laneBits);
9334 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
9335 assign(*qDiff2, mkexpr(z128));
9337 return;
9340 vassert(0);
9344 /* Generate IR to do SRHADD and URHADD. */
9345 static
9346 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
9348 /* Generate this:
9349 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
9351 vassert(size <= 3);
9352 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
9353 IROp opADD = mkVecADD(size);
9354 /* The only tricky bit is to generate the correct vector 1 constant. */
9355 const ULong ones64[4]
9356 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
9357 0x0000000100000001ULL, 0x0000000000000001ULL };
9358 IRTemp imm64 = newTemp(Ity_I64);
9359 assign(imm64, mkU64(ones64[size]));
9360 IRTemp vecOne = newTempV128();
9361 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
9362 IRTemp scaOne = newTemp(Ity_I8);
9363 assign(scaOne, mkU8(1));
9364 IRTemp res = newTempV128();
9365 assign(res,
9366 binop(opADD,
9367 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
9368 binop(opADD,
9369 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
9370 binop(opSHR,
9371 binop(opADD,
9372 binop(opADD,
9373 binop(Iop_AndV128, mkexpr(aa),
9374 mkexpr(vecOne)),
9375 binop(Iop_AndV128, mkexpr(bb),
9376 mkexpr(vecOne))
9378 mkexpr(vecOne)
9380 mkexpr(scaOne)
9385 return res;
9389 /* Generate IR to do {U,S}ADDLP */
9390 static
9391 IRTemp math_ADDLP ( UInt sizeNarrow, Bool isU, IRTemp src )
9393 IRTemp res = newTempV128();
9394 assign(res,
9395 binop(mkVecADD(sizeNarrow+1),
9396 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9397 isU, True/*fromOdd*/, sizeNarrow, mkexpr(src))),
9398 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9399 isU, False/*!fromOdd*/, sizeNarrow, mkexpr(src)))));
9400 return res;
9404 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
9405 thusly: if, after application of |opZHI| to both |qres| and |nres|,
9406 they have the same value, leave QCFLAG unchanged. Otherwise, set it
9407 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
9408 operators, or Iop_INVALID, in which case |qres| and |nres| are used
9409 unmodified. The presence |opZHI| means this function can be used to
9410 generate QCFLAG update code for both scalar and vector SIMD operations.
9412 static
9413 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
9415 IRTemp diff = newTempV128();
9416 IRTemp oldQCFLAG = newTempV128();
9417 IRTemp newQCFLAG = newTempV128();
9418 if (opZHI == Iop_INVALID) {
9419 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
9420 } else {
9421 vassert(opZHI == Iop_ZeroHI64ofV128
9422 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
9423 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
9425 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
9426 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
9427 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
9431 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
9432 are used unmodified, hence suitable for QCFLAG updates for whole-vector
9433 operations. */
9434 static
9435 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
9437 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
9441 /* Generate IR to rearrange two vector values in a way which is useful
9442 for doing S/D/H add-pair etc operations. There are 5 cases:
9444 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
9446 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
9448 8h: [m7 m6 m5 m4 m3 m2 m1 m0] [n7 n6 n5 n4 n3 n2 n1 n0] -->
9449 [m7 m5 n7 n5 m3 m1 n3 n1] [m6 m4 n6 n4 m2 m0 n2 n0]
9451 2s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
9453 4h: [m7 m6 m5 m4 m3 m2 m1 m0] [n7 n6 n5 n4 n3 n2 n1 n0] -->
9454 [ 0 0 0 0 m3 m1 n3 n1] [ 0 0 0 0 m2 m0 n2 n0]
9456 static
9457 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
9458 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
9459 IRTemp vecM, IRTemp vecN, ARM64VecESize sz, UInt bitQ
9462 vassert(rearrL && *rearrL == IRTemp_INVALID);
9463 vassert(rearrR && *rearrR == IRTemp_INVALID);
9464 *rearrL = newTempV128();
9465 *rearrR = newTempV128();
9467 switch (sz) {
9468 case ARM64VSizeD:
9469 // 2d case
9470 vassert(bitQ == 1);
9471 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
9472 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
9473 break;
9475 case ARM64VSizeS:
9476 if (bitQ == 1) {
9477 // 4s case
9478 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
9479 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
9480 } else {
9481 // 2s case
9482 IRTemp m1n1m0n0 = newTempV128();
9483 IRTemp m0n0m1n1 = newTempV128();
9484 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
9485 mkexpr(vecM), mkexpr(vecN)));
9486 assign(m0n0m1n1, triop(Iop_SliceV128,
9487 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
9488 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
9489 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
9491 break;
9493 case ARM64VSizeH:
9494 if (bitQ == 1) {
9495 // 8h case
9496 assign(*rearrL, binop(Iop_CatOddLanes16x8, mkexpr(vecM), mkexpr(vecN)));
9497 assign(*rearrR, binop(Iop_CatEvenLanes16x8, mkexpr(vecM), mkexpr(vecN)));
9498 } else {
9499 // 4h case
9500 IRTemp m3m1n3n1 = newTempV128();
9501 IRTemp m2m0n2n0 = newTempV128();
9502 assign(m3m1n3n1, binop(Iop_CatOddLanes16x8, mkexpr(vecM), mkexpr(vecN)));
9503 assign(m2m0n2n0, binop(Iop_CatEvenLanes16x8, mkexpr(vecM), mkexpr(vecN)));
9504 assign(*rearrL, unop(Iop_ZeroHI64ofV128,
9505 binop(Iop_CatEvenLanes32x4, mkexpr(m3m1n3n1),
9506 mkexpr(m3m1n3n1))));
9507 assign(*rearrR, unop(Iop_ZeroHI64ofV128,
9508 binop(Iop_CatEvenLanes32x4, mkexpr(m2m0n2n0),
9509 mkexpr(m2m0n2n0))));
9511 break;
9513 default: vpanic("math_REARRANGE_FOR_FLOATING_PAIRWISE");
9518 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
9519 static Double two_to_the_minus ( Int n )
9521 if (n == 1) return 0.5;
9522 vassert(n >= 2 && n <= 64);
9523 Int half = n / 2;
9524 return two_to_the_minus(half) * two_to_the_minus(n - half);
9528 /* Returns 2.0 ^ n for n in 1 .. 64 */
9529 static Double two_to_the_plus ( Int n )
9531 if (n == 1) return 2.0;
9532 vassert(n >= 2 && n <= 64);
9533 Int half = n / 2;
9534 return two_to_the_plus(half) * two_to_the_plus(n - half);
9538 /*------------------------------------------------------------*/
9539 /*--- SIMD and FP instructions ---*/
9540 /*------------------------------------------------------------*/
9542 static
9543 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
9545 /* 31 29 23 21 20 15 14 10 9 4
9546 0 q 101110 op2 0 m 0 imm4 0 n d
9547 Decode fields: op2
9549 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9550 if (INSN(31,31) != 0
9551 || INSN(29,24) != BITS6(1,0,1,1,1,0)
9552 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
9553 return False;
9555 UInt bitQ = INSN(30,30);
9556 UInt op2 = INSN(23,22);
9557 UInt mm = INSN(20,16);
9558 UInt imm4 = INSN(14,11);
9559 UInt nn = INSN(9,5);
9560 UInt dd = INSN(4,0);
9562 if (op2 == BITS2(0,0)) {
9563 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
9564 IRTemp sHi = newTempV128();
9565 IRTemp sLo = newTempV128();
9566 IRTemp res = newTempV128();
9567 assign(sHi, getQReg128(mm));
9568 assign(sLo, getQReg128(nn));
9569 if (bitQ == 1) {
9570 if (imm4 == 0) {
9571 assign(res, mkexpr(sLo));
9572 } else {
9573 vassert(imm4 >= 1 && imm4 <= 15);
9574 assign(res, triop(Iop_SliceV128,
9575 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
9577 putQReg128(dd, mkexpr(res));
9578 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
9579 } else {
9580 if (imm4 >= 8) return False;
9581 if (imm4 == 0) {
9582 assign(res, mkexpr(sLo));
9583 } else {
9584 vassert(imm4 >= 1 && imm4 <= 7);
9585 IRTemp hi64lo64 = newTempV128();
9586 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
9587 mkexpr(sHi), mkexpr(sLo)));
9588 assign(res, triop(Iop_SliceV128,
9589 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
9591 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9592 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
9594 return True;
9597 return False;
9598 # undef INSN
9602 static
9603 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
9605 /* 31 29 23 21 20 15 14 12 11 9 4
9606 0 q 001110 op2 0 m 0 len op 00 n d
9607 Decode fields: op2,len,op
9609 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9610 if (INSN(31,31) != 0
9611 || INSN(29,24) != BITS6(0,0,1,1,1,0)
9612 || INSN(21,21) != 0
9613 || INSN(15,15) != 0
9614 || INSN(11,10) != BITS2(0,0)) {
9615 return False;
9617 UInt bitQ = INSN(30,30);
9618 UInt op2 = INSN(23,22);
9619 UInt mm = INSN(20,16);
9620 UInt len = INSN(14,13);
9621 UInt bitOP = INSN(12,12);
9622 UInt nn = INSN(9,5);
9623 UInt dd = INSN(4,0);
9625 if (op2 == X00) {
9626 /* -------- 00,xx,0 TBL, xx register table -------- */
9627 /* -------- 00,xx,1 TBX, xx register table -------- */
9628 /* 31 28 20 15 14 12 9 4
9629 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
9630 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
9631 where Ta = 16b(q=1) or 8b(q=0)
9633 Bool isTBX = bitOP == 1;
9634 /* The out-of-range values to use. */
9635 IRTemp oor_values = newTempV128();
9636 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
9637 /* src value */
9638 IRTemp src = newTempV128();
9639 assign(src, getQReg128(mm));
9640 /* The table values */
9641 IRTemp tab[4];
9642 UInt i;
9643 for (i = 0; i <= len; i++) {
9644 vassert(i < 4);
9645 tab[i] = newTempV128();
9646 assign(tab[i], getQReg128((nn + i) % 32));
9648 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
9649 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9650 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
9651 const HChar* nm = isTBX ? "tbx" : "tbl";
9652 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
9653 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
9654 return True;
9657 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9658 return False;
9659 # undef INSN
9663 static
9664 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
9666 /* 31 29 23 21 20 15 14 11 9 4
9667 0 q 001110 size 0 m 0 opcode 10 n d
9668 Decode fields: opcode
9670 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9671 if (INSN(31,31) != 0
9672 || INSN(29,24) != BITS6(0,0,1,1,1,0)
9673 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
9674 return False;
9676 UInt bitQ = INSN(30,30);
9677 UInt size = INSN(23,22);
9678 UInt mm = INSN(20,16);
9679 UInt opcode = INSN(14,12);
9680 UInt nn = INSN(9,5);
9681 UInt dd = INSN(4,0);
9683 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
9684 /* -------- 001 UZP1 std7_std7_std7 -------- */
9685 /* -------- 101 UZP2 std7_std7_std7 -------- */
9686 if (bitQ == 0 && size == X11) return False; // implied 1d case
9687 Bool isUZP1 = opcode == BITS3(0,0,1);
9688 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
9689 : mkVecCATODDLANES(size);
9690 IRTemp preL = newTempV128();
9691 IRTemp preR = newTempV128();
9692 IRTemp res = newTempV128();
9693 if (bitQ == 0) {
9694 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
9695 getQReg128(nn)));
9696 assign(preR, mkexpr(preL));
9697 } else {
9698 assign(preL, getQReg128(mm));
9699 assign(preR, getQReg128(nn));
9701 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
9702 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9703 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
9704 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9705 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9706 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9707 return True;
9710 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
9711 /* -------- 010 TRN1 std7_std7_std7 -------- */
9712 /* -------- 110 TRN2 std7_std7_std7 -------- */
9713 if (bitQ == 0 && size == X11) return False; // implied 1d case
9714 Bool isTRN1 = opcode == BITS3(0,1,0);
9715 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
9716 : mkVecCATODDLANES(size);
9717 IROp op2 = mkVecINTERLEAVEHI(size);
9718 IRTemp srcM = newTempV128();
9719 IRTemp srcN = newTempV128();
9720 IRTemp res = newTempV128();
9721 assign(srcM, getQReg128(mm));
9722 assign(srcN, getQReg128(nn));
9723 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
9724 binop(op1, mkexpr(srcN), mkexpr(srcN))));
9725 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9726 const HChar* nm = isTRN1 ? "trn1" : "trn2";
9727 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9728 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9729 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9730 return True;
9733 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
9734 /* -------- 011 ZIP1 std7_std7_std7 -------- */
9735 /* -------- 111 ZIP2 std7_std7_std7 -------- */
9736 if (bitQ == 0 && size == X11) return False; // implied 1d case
9737 Bool isZIP1 = opcode == BITS3(0,1,1);
9738 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
9739 : mkVecINTERLEAVEHI(size);
9740 IRTemp preL = newTempV128();
9741 IRTemp preR = newTempV128();
9742 IRTemp res = newTempV128();
9743 if (bitQ == 0 && !isZIP1) {
9744 IRTemp z128 = newTempV128();
9745 assign(z128, mkV128(0x0000));
9746 // preL = Vm shifted left 32 bits
9747 // preR = Vn shifted left 32 bits
9748 assign(preL, triop(Iop_SliceV128,
9749 getQReg128(mm), mkexpr(z128), mkU8(12)));
9750 assign(preR, triop(Iop_SliceV128,
9751 getQReg128(nn), mkexpr(z128), mkU8(12)));
9753 } else {
9754 assign(preL, getQReg128(mm));
9755 assign(preR, getQReg128(nn));
9757 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
9758 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9759 const HChar* nm = isZIP1 ? "zip1" : "zip2";
9760 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9761 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9762 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9763 return True;
9766 return False;
9767 # undef INSN
9771 static
9772 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
9774 /* 31 28 23 21 16 11 9 4
9775 0 q u 01110 size 11000 opcode 10 n d
9776 Decode fields: u,size,opcode
9778 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9779 if (INSN(31,31) != 0
9780 || INSN(28,24) != BITS5(0,1,1,1,0)
9781 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
9782 return False;
9784 UInt bitQ = INSN(30,30);
9785 UInt bitU = INSN(29,29);
9786 UInt size = INSN(23,22);
9787 UInt opcode = INSN(16,12);
9788 UInt nn = INSN(9,5);
9789 UInt dd = INSN(4,0);
9791 if (opcode == BITS5(0,0,0,1,1)) {
9792 /* -------- 0,xx,00011 SADDLV -------- */
9793 /* -------- 1,xx,00011 UADDLV -------- */
9794 /* size is the narrow size */
9795 if (size == X11 || (size == X10 && bitQ == 0)) return False;
9796 Bool isU = bitU == 1;
9797 IRTemp src = newTempV128();
9798 assign(src, getQReg128(nn));
9799 /* The basic plan is to widen the lower half, and if Q = 1,
9800 the upper half too. Add them together (if Q = 1), and in
9801 either case fold with add at twice the lane width.
9803 IRExpr* widened
9804 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
9805 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
9806 if (bitQ == 1) {
9807 widened
9808 = binop(mkVecADD(size+1),
9809 widened,
9810 mkexpr(math_WIDEN_LO_OR_HI_LANES(
9811 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
9814 /* Now fold. */
9815 IRTemp tWi = newTempV128();
9816 assign(tWi, widened);
9817 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
9818 putQReg128(dd, mkexpr(res));
9819 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9820 const HChar ch = "bhsd"[size];
9821 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
9822 nameQReg128(dd), ch, nameQReg128(nn), arr);
9823 return True;
9826 UInt ix = 0;
9827 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
9828 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
9829 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
9830 /**/
9831 if (ix != 0) {
9832 /* -------- 0,xx,01010: SMAXV -------- (1) */
9833 /* -------- 1,xx,01010: UMAXV -------- (2) */
9834 /* -------- 0,xx,11010: SMINV -------- (3) */
9835 /* -------- 1,xx,11010: UMINV -------- (4) */
9836 /* -------- 0,xx,11011: ADDV -------- (5) */
9837 vassert(ix >= 1 && ix <= 5);
9838 if (size == X11) return False; // 1d,2d cases not allowed
9839 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
9840 const IROp opMAXS[3]
9841 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
9842 const IROp opMAXU[3]
9843 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
9844 const IROp opMINS[3]
9845 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
9846 const IROp opMINU[3]
9847 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
9848 const IROp opADD[3]
9849 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
9850 vassert(size < 3);
9851 IROp op = Iop_INVALID;
9852 const HChar* nm = NULL;
9853 switch (ix) {
9854 case 1: op = opMAXS[size]; nm = "smaxv"; break;
9855 case 2: op = opMAXU[size]; nm = "umaxv"; break;
9856 case 3: op = opMINS[size]; nm = "sminv"; break;
9857 case 4: op = opMINU[size]; nm = "uminv"; break;
9858 case 5: op = opADD[size]; nm = "addv"; break;
9859 default: vassert(0);
9861 vassert(op != Iop_INVALID && nm != NULL);
9862 IRTemp tN1 = newTempV128();
9863 assign(tN1, getQReg128(nn));
9864 /* If Q == 0, we're just folding lanes in the lower half of
9865 the value. In which case, copy the lower half of the
9866 source into the upper half, so we can then treat it the
9867 same as the full width case. Except for the addition case,
9868 in which we have to zero out the upper half. */
9869 IRTemp tN2 = newTempV128();
9870 assign(tN2, bitQ == 0
9871 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
9872 : mk_CatEvenLanes64x2(tN1,tN1))
9873 : mkexpr(tN1));
9874 IRTemp res = math_FOLDV(tN2, op);
9875 if (res == IRTemp_INVALID)
9876 return False; /* means math_FOLDV
9877 doesn't handle this case yet */
9878 putQReg128(dd, mkexpr(res));
9879 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
9880 IRType laneTy = tys[size];
9881 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9882 DIP("%s %s, %s.%s\n", nm,
9883 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
9884 return True;
9887 if ((size == X00 || size == X10)
9888 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9889 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
9890 /* -------- 0,10,01100: FMINMNV s_4s -------- */
9891 /* -------- 1,00,01111: FMAXV s_4s -------- */
9892 /* -------- 1,10,01111: FMINV s_4s -------- */
9893 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9894 if (bitQ == 0) return False; // Only 4s is allowed
9895 Bool isMIN = (size & 2) == 2;
9896 Bool isNM = opcode == BITS5(0,1,1,0,0);
9897 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
9898 IRTemp src = newTempV128();
9899 assign(src, getQReg128(nn));
9900 IRTemp res = math_FOLDV(src, opMXX);
9901 putQReg128(dd, mkexpr(res));
9902 DIP("%s%sv s%u, %u.4s\n",
9903 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
9904 return True;
9907 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9908 return False;
9909 # undef INSN
9913 static
9914 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9916 /* 31 28 20 15 14 10 9 4
9917 0 q op 01110000 imm5 0 imm4 1 n d
9918 Decode fields: q,op,imm4
9920 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9921 if (INSN(31,31) != 0
9922 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
9923 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9924 return False;
9926 UInt bitQ = INSN(30,30);
9927 UInt bitOP = INSN(29,29);
9928 UInt imm5 = INSN(20,16);
9929 UInt imm4 = INSN(14,11);
9930 UInt nn = INSN(9,5);
9931 UInt dd = INSN(4,0);
9933 /* -------- x,0,0000: DUP (element, vector) -------- */
9934 /* 31 28 20 15 9 4
9935 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
9937 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9938 UInt laneNo = 0;
9939 UInt laneSzLg2 = 0;
9940 HChar laneCh = '?';
9941 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
9942 getQReg128(nn), imm5);
9943 if (res == IRTemp_INVALID)
9944 return False;
9945 if (bitQ == 0 && laneSzLg2 == X11)
9946 return False; /* .1d case */
9947 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9948 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
9949 DIP("dup %s.%s, %s.%c[%u]\n",
9950 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
9951 return True;
9954 /* -------- x,0,0001: DUP (general, vector) -------- */
9955 /* 31 28 20 15 9 4
9956 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
9957 Q=0 writes 64, Q=1 writes 128
9958 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
9959 xxx10 4H(q=0) or 8H(q=1), R=W
9960 xx100 2S(q=0) or 4S(q=1), R=W
9961 x1000 Invalid(q=0) or 2D(q=1), R=X
9962 x0000 Invalid(q=0) or Invalid(q=1)
9963 Require op=0, imm4=0001
9965 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
9966 Bool isQ = bitQ == 1;
9967 IRTemp w0 = newTemp(Ity_I64);
9968 const HChar* arT = "??";
9969 IRType laneTy = Ity_INVALID;
9970 if (imm5 & 1) {
9971 arT = isQ ? "16b" : "8b";
9972 laneTy = Ity_I8;
9973 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
9975 else if (imm5 & 2) {
9976 arT = isQ ? "8h" : "4h";
9977 laneTy = Ity_I16;
9978 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
9980 else if (imm5 & 4) {
9981 arT = isQ ? "4s" : "2s";
9982 laneTy = Ity_I32;
9983 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
9985 else if ((imm5 & 8) && isQ) {
9986 arT = "2d";
9987 laneTy = Ity_I64;
9988 assign(w0, getIReg64orZR(nn));
9990 else {
9991 /* invalid; leave laneTy unchanged. */
9993 /* */
9994 if (laneTy != Ity_INVALID) {
9995 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
9996 putQReg128(dd, binop(Iop_64HLtoV128,
9997 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
9998 DIP("dup %s.%s, %s\n",
9999 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
10000 return True;
10002 /* invalid */
10003 return False;
10006 /* -------- 1,0,0011: INS (general) -------- */
10007 /* 31 28 20 15 9 4
10008 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
10009 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
10010 xxx10 -> H, xxx
10011 xx100 -> S, xx
10012 x1000 -> D, x
10014 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
10015 HChar ts = '?';
10016 UInt laneNo = 16;
10017 IRExpr* src = NULL;
10018 if (imm5 & 1) {
10019 src = unop(Iop_64to8, getIReg64orZR(nn));
10020 laneNo = (imm5 >> 1) & 15;
10021 ts = 'b';
10023 else if (imm5 & 2) {
10024 src = unop(Iop_64to16, getIReg64orZR(nn));
10025 laneNo = (imm5 >> 2) & 7;
10026 ts = 'h';
10028 else if (imm5 & 4) {
10029 src = unop(Iop_64to32, getIReg64orZR(nn));
10030 laneNo = (imm5 >> 3) & 3;
10031 ts = 's';
10033 else if (imm5 & 8) {
10034 src = getIReg64orZR(nn);
10035 laneNo = (imm5 >> 4) & 1;
10036 ts = 'd';
10038 /* */
10039 if (src) {
10040 vassert(laneNo < 16);
10041 putQRegLane(dd, laneNo, src);
10042 DIP("ins %s.%c[%u], %s\n",
10043 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
10044 return True;
10046 /* invalid */
10047 return False;
10050 /* -------- x,0,0101: SMOV -------- */
10051 /* -------- x,0,0111: UMOV -------- */
10052 /* 31 28 20 15 9 4
10053 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
10054 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
10055 dest is Xd when q==1, Wd when q==0
10056 UMOV:
10057 Ts,index,ops = case q:imm5 of
10058 0:xxxx1 -> B, xxxx, 8Uto64
10059 1:xxxx1 -> invalid
10060 0:xxx10 -> H, xxx, 16Uto64
10061 1:xxx10 -> invalid
10062 0:xx100 -> S, xx, 32Uto64
10063 1:xx100 -> invalid
10064 1:x1000 -> D, x, copy64
10065 other -> invalid
10066 SMOV:
10067 Ts,index,ops = case q:imm5 of
10068 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
10069 1:xxxx1 -> B, xxxx, 8Sto64
10070 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
10071 1:xxx10 -> H, xxx, 16Sto64
10072 0:xx100 -> invalid
10073 1:xx100 -> S, xx, 32Sto64
10074 1:x1000 -> invalid
10075 other -> invalid
10077 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
10078 Bool isU = (imm4 & 2) == 2;
10079 const HChar* arTs = "??";
10080 UInt laneNo = 16; /* invalid */
10081 // Setting 'res' to non-NULL determines valid/invalid
10082 IRExpr* res = NULL;
10083 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
10084 laneNo = (imm5 >> 1) & 15;
10085 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
10086 res = isU ? unop(Iop_8Uto64, lane)
10087 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
10088 arTs = "b";
10090 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
10091 laneNo = (imm5 >> 1) & 15;
10092 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
10093 res = isU ? NULL
10094 : unop(Iop_8Sto64, lane);
10095 arTs = "b";
10097 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
10098 laneNo = (imm5 >> 2) & 7;
10099 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
10100 res = isU ? unop(Iop_16Uto64, lane)
10101 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
10102 arTs = "h";
10104 else if (bitQ && (imm5 & 2)) { // 1:xxx10
10105 laneNo = (imm5 >> 2) & 7;
10106 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
10107 res = isU ? NULL
10108 : unop(Iop_16Sto64, lane);
10109 arTs = "h";
10111 else if (!bitQ && (imm5 & 4)) { // 0:xx100
10112 laneNo = (imm5 >> 3) & 3;
10113 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
10114 res = isU ? unop(Iop_32Uto64, lane)
10115 : NULL;
10116 arTs = "s";
10118 else if (bitQ && (imm5 & 4)) { // 1:xxx10
10119 laneNo = (imm5 >> 3) & 3;
10120 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
10121 res = isU ? NULL
10122 : unop(Iop_32Sto64, lane);
10123 arTs = "s";
10125 else if (bitQ && (imm5 & 8)) { // 1:x1000
10126 laneNo = (imm5 >> 4) & 1;
10127 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
10128 res = isU ? lane
10129 : NULL;
10130 arTs = "d";
10132 /* */
10133 if (res) {
10134 vassert(laneNo < 16);
10135 putIReg64orZR(dd, res);
10136 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
10137 nameIRegOrZR(bitQ == 1, dd),
10138 nameQReg128(nn), arTs, laneNo);
10139 return True;
10141 /* invalid */
10142 return False;
10145 /* -------- 1,1,xxxx: INS (element) -------- */
10146 /* 31 28 20 14 9 4
10147 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
10148 where Ts,ix1,ix2
10149 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
10150 xxx10 -> H, xxx, imm4[3:1]
10151 xx100 -> S, xx, imm4[3:2]
10152 x1000 -> D, x, imm4[3:3]
10154 if (bitQ == 1 && bitOP == 1) {
10155 HChar ts = '?';
10156 IRType ity = Ity_INVALID;
10157 UInt ix1 = 16;
10158 UInt ix2 = 16;
10159 if (imm5 & 1) {
10160 ts = 'b';
10161 ity = Ity_I8;
10162 ix1 = (imm5 >> 1) & 15;
10163 ix2 = (imm4 >> 0) & 15;
10165 else if (imm5 & 2) {
10166 ts = 'h';
10167 ity = Ity_I16;
10168 ix1 = (imm5 >> 2) & 7;
10169 ix2 = (imm4 >> 1) & 7;
10171 else if (imm5 & 4) {
10172 ts = 's';
10173 ity = Ity_I32;
10174 ix1 = (imm5 >> 3) & 3;
10175 ix2 = (imm4 >> 2) & 3;
10177 else if (imm5 & 8) {
10178 ts = 'd';
10179 ity = Ity_I64;
10180 ix1 = (imm5 >> 4) & 1;
10181 ix2 = (imm4 >> 3) & 1;
10183 /* */
10184 if (ity != Ity_INVALID) {
10185 vassert(ix1 < 16);
10186 vassert(ix2 < 16);
10187 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
10188 DIP("ins %s.%c[%u], %s.%c[%u]\n",
10189 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
10190 return True;
10192 /* invalid */
10193 return False;
10196 return False;
10197 # undef INSN
10201 static
10202 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10204 /* 31 28 18 15 11 9 4
10205 0q op 01111 00000 abc cmode 01 defgh d
10206 Decode fields: q,op,cmode
10207 Bit 11 is really "o2", but it is always zero.
10209 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10210 if (INSN(31,31) != 0
10211 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
10212 || INSN(11,10) != BITS2(0,1)) {
10213 return False;
10215 UInt bitQ = INSN(30,30);
10216 UInt bitOP = INSN(29,29);
10217 UInt cmode = INSN(15,12);
10218 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
10219 UInt dd = INSN(4,0);
10221 ULong imm64lo = 0;
10222 UInt op_cmode = (bitOP << 4) | cmode;
10223 Bool ok = False;
10224 Bool isORR = False;
10225 Bool isBIC = False;
10226 Bool isMOV = False;
10227 Bool isMVN = False;
10228 Bool isFMOV = False;
10229 switch (op_cmode) {
10230 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
10231 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
10232 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
10233 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
10234 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
10235 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
10236 ok = True; isMOV = True; break;
10238 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
10239 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
10240 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
10241 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
10242 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
10243 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
10244 ok = True; isORR = True; break;
10246 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
10247 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
10248 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
10249 ok = True; isMOV = True; break;
10251 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
10252 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
10253 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
10254 ok = True; isORR = True; break;
10256 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
10257 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
10258 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
10259 ok = True; isMOV = True; break;
10261 /* -------- x,0,1110 MOVI 8-bit -------- */
10262 case BITS5(0,1,1,1,0):
10263 ok = True; isMOV = True; break;
10265 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
10266 case BITS5(0,1,1,1,1): // 0:1111
10267 ok = True; isFMOV = True; break;
10269 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
10270 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
10271 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
10272 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
10273 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
10274 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
10275 ok = True; isMVN = True; break;
10277 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
10278 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
10279 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
10280 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
10281 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
10282 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
10283 ok = True; isBIC = True; break;
10285 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
10286 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
10287 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
10288 ok = True; isMVN = True; break;
10290 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
10291 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
10292 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
10293 ok = True; isBIC = True; break;
10295 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
10296 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
10297 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
10298 ok = True; isMVN = True; break;
10300 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
10301 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
10302 case BITS5(1,1,1,1,0):
10303 ok = True; isMOV = True; break;
10305 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
10306 case BITS5(1,1,1,1,1): // 1:1111
10307 ok = bitQ == 1; isFMOV = True; break;
10309 default:
10310 break;
10312 if (ok) {
10313 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
10314 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
10315 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
10317 if (ok) {
10318 if (isORR || isBIC) {
10319 ULong inv
10320 = isORR ? 0ULL : ~0ULL;
10321 IRExpr* immV128
10322 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
10323 IRExpr* res
10324 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
10325 const HChar* nm = isORR ? "orr" : "bic";
10326 if (bitQ == 0) {
10327 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
10328 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
10329 } else {
10330 putQReg128(dd, res);
10331 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
10332 nameQReg128(dd), imm64lo, imm64lo);
10335 else if (isMOV || isMVN || isFMOV) {
10336 if (isMVN) imm64lo = ~imm64lo;
10337 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
10338 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
10339 mkU64(imm64lo));
10340 putQReg128(dd, immV128);
10341 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
10343 return True;
10345 /* else fall through */
10347 return False;
10348 # undef INSN
10352 static
10353 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
10355 /* 31 28 20 15 14 10 9 4
10356 01 op 11110000 imm5 0 imm4 1 n d
10357 Decode fields: op,imm4
10359 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10360 if (INSN(31,30) != BITS2(0,1)
10361 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
10362 || INSN(15,15) != 0 || INSN(10,10) != 1) {
10363 return False;
10365 UInt bitOP = INSN(29,29);
10366 UInt imm5 = INSN(20,16);
10367 UInt imm4 = INSN(14,11);
10368 UInt nn = INSN(9,5);
10369 UInt dd = INSN(4,0);
10371 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
10372 /* -------- 0,0000 DUP (element, scalar) -------- */
10373 IRTemp w0 = newTemp(Ity_I64);
10374 const HChar* arTs = "??";
10375 IRType laneTy = Ity_INVALID;
10376 UInt laneNo = 16; /* invalid */
10377 if (imm5 & 1) {
10378 arTs = "b";
10379 laneNo = (imm5 >> 1) & 15;
10380 laneTy = Ity_I8;
10381 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
10383 else if (imm5 & 2) {
10384 arTs = "h";
10385 laneNo = (imm5 >> 2) & 7;
10386 laneTy = Ity_I16;
10387 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
10389 else if (imm5 & 4) {
10390 arTs = "s";
10391 laneNo = (imm5 >> 3) & 3;
10392 laneTy = Ity_I32;
10393 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
10395 else if (imm5 & 8) {
10396 arTs = "d";
10397 laneNo = (imm5 >> 4) & 1;
10398 laneTy = Ity_I64;
10399 assign(w0, getQRegLane(nn, laneNo, laneTy));
10401 else {
10402 /* invalid; leave laneTy unchanged. */
10404 /* */
10405 if (laneTy != Ity_INVALID) {
10406 vassert(laneNo < 16);
10407 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
10408 DIP("dup %s, %s.%s[%u]\n",
10409 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
10410 return True;
10412 /* else fall through */
10415 return False;
10416 # undef INSN
10420 static
10421 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn,
10422 const VexArchInfo* archinfo)
10424 /* 31 28 23 21 16 11 9 4
10425 01 u 11110 sz 11000 opcode 10 n d
10426 Decode fields: u,sz,opcode
10428 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10429 if (INSN(31,30) != BITS2(0,1)
10430 || INSN(28,24) != BITS5(1,1,1,1,0)
10431 || INSN(21,17) != BITS5(1,1,0,0,0)
10432 || INSN(11,10) != BITS2(1,0)) {
10433 return False;
10435 UInt bitU = INSN(29,29);
10436 UInt sz = INSN(23,22);
10437 UInt opcode = INSN(16,12);
10438 UInt nn = INSN(9,5);
10439 UInt dd = INSN(4,0);
10441 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
10442 /* -------- 0,11,11011 ADDP d_2d -------- */
10443 IRTemp xy = newTempV128();
10444 IRTemp xx = newTempV128();
10445 assign(xy, getQReg128(nn));
10446 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
10447 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10448 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
10449 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
10450 return True;
10453 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
10454 /* -------- 1,00,01101 ADDP s_2s -------- */
10455 /* -------- 1,01,01101 ADDP d_2d -------- */
10456 Bool isD = sz == X01;
10457 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
10458 IROp opADD = mkVecADDF(isD ? 3 : 2);
10459 IRTemp src = newTempV128();
10460 IRTemp argL = newTempV128();
10461 IRTemp argR = newTempV128();
10462 assign(src, getQReg128(nn));
10463 assign(argL, unop(opZHI, mkexpr(src)));
10464 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
10465 mkU8(isD ? 8 : 4))));
10466 putQReg128(dd, unop(opZHI,
10467 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
10468 mkexpr(argL), mkexpr(argR))));
10469 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
10470 return True;
10473 /* Half-precision floating point ADDP (v8.2). */
10474 if (bitU == 0 && sz <= X00 && opcode == BITS5(0,1,1,0,1)) {
10475 /* -------- 0,00,01101 ADDP h_2h -------- */
10476 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
10477 return False;
10478 IROp opZHI = mkVecZEROHIxxOFV128(1);
10479 IROp opADD = mkVecADDF(1);
10480 IRTemp src = newTempV128();
10481 IRTemp argL = newTempV128();
10482 IRTemp argR = newTempV128();
10483 assign(src, getQReg128(nn));
10484 assign(argL, unop(opZHI, mkexpr(src)));
10485 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
10486 mkU8(2))));
10487 putQReg128(dd, unop(opZHI,
10488 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
10489 mkexpr(argL), mkexpr(argR))));
10490 DIP("faddp h%u, v%u.2h\n", dd, nn);
10491 return True;
10494 if (bitU == 1
10495 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
10496 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
10497 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
10498 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
10499 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
10500 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
10501 Bool isD = (sz & 1) == 1;
10502 Bool isMIN = (sz & 2) == 2;
10503 Bool isNM = opcode == BITS5(0,1,1,0,0);
10504 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
10505 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
10506 IRTemp src = newTempV128();
10507 IRTemp argL = newTempV128();
10508 IRTemp argR = newTempV128();
10509 assign(src, getQReg128(nn));
10510 assign(argL, unop(opZHI, mkexpr(src)));
10511 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
10512 mkU8(isD ? 8 : 4))));
10513 putQReg128(dd, unop(opZHI,
10514 binop(opMXX, mkexpr(argL), mkexpr(argR))));
10515 HChar c = isD ? 'd' : 's';
10516 DIP("%s%sp %c%u, v%u.2%c\n",
10517 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
10518 return True;
10521 return False;
10522 # undef INSN
10526 static
10527 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
10529 /* 31 28 22 18 15 10 9 4
10530 01 u 111110 immh immb opcode 1 n d
10531 Decode fields: u,immh,opcode
10533 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10534 if (INSN(31,30) != BITS2(0,1)
10535 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
10536 return False;
10538 UInt bitU = INSN(29,29);
10539 UInt immh = INSN(22,19);
10540 UInt immb = INSN(18,16);
10541 UInt opcode = INSN(15,11);
10542 UInt nn = INSN(9,5);
10543 UInt dd = INSN(4,0);
10544 UInt immhb = (immh << 3) | immb;
10546 if ((immh & 8) == 8
10547 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
10548 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
10549 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
10550 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
10551 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
10552 Bool isU = bitU == 1;
10553 Bool isAcc = opcode == BITS5(0,0,0,1,0);
10554 UInt sh = 128 - immhb;
10555 vassert(sh >= 1 && sh <= 64);
10556 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
10557 IRExpr* src = getQReg128(nn);
10558 IRTemp shf = newTempV128();
10559 IRTemp res = newTempV128();
10560 if (sh == 64 && isU) {
10561 assign(shf, mkV128(0x0000));
10562 } else {
10563 UInt nudge = 0;
10564 if (sh == 64) {
10565 vassert(!isU);
10566 nudge = 1;
10568 assign(shf, binop(op, src, mkU8(sh - nudge)));
10570 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
10571 : mkexpr(shf));
10572 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10573 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10574 : (isU ? "ushr" : "sshr");
10575 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
10576 return True;
10579 if ((immh & 8) == 8
10580 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
10581 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
10582 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
10583 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
10584 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
10585 Bool isU = bitU == 1;
10586 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10587 UInt sh = 128 - immhb;
10588 vassert(sh >= 1 && sh <= 64);
10589 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
10590 vassert(sh >= 1 && sh <= 64);
10591 IRExpr* src = getQReg128(nn);
10592 IRTemp imm8 = newTemp(Ity_I8);
10593 assign(imm8, mkU8((UChar)(-sh)));
10594 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10595 IRTemp shf = newTempV128();
10596 IRTemp res = newTempV128();
10597 assign(shf, binop(op, src, amt));
10598 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
10599 : mkexpr(shf));
10600 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10601 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10602 : (isU ? "urshr" : "srshr");
10603 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
10604 return True;
10607 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
10608 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
10609 UInt sh = 128 - immhb;
10610 vassert(sh >= 1 && sh <= 64);
10611 if (sh == 64) {
10612 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
10613 } else {
10614 /* sh is in range 1 .. 63 */
10615 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
10616 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
10617 IRTemp res = newTempV128();
10618 assign(res, binop(Iop_OrV128,
10619 binop(Iop_AndV128, getQReg128(dd), nmaskV),
10620 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
10621 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10623 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
10624 return True;
10627 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
10628 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
10629 UInt sh = immhb - 64;
10630 vassert(sh < 64);
10631 putQReg128(dd,
10632 unop(Iop_ZeroHI64ofV128,
10633 sh == 0 ? getQReg128(nn)
10634 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
10635 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
10636 return True;
10639 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
10640 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
10641 UInt sh = immhb - 64;
10642 vassert(sh < 64);
10643 if (sh == 0) {
10644 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
10645 } else {
10646 /* sh is in range 1 .. 63 */
10647 ULong nmask = (1ULL << sh) - 1;
10648 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
10649 IRTemp res = newTempV128();
10650 assign(res, binop(Iop_OrV128,
10651 binop(Iop_AndV128, getQReg128(dd), nmaskV),
10652 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
10653 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10655 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
10656 return True;
10659 if (opcode == BITS5(0,1,1,1,0)
10660 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10661 /* -------- 0,01110 SQSHL #imm -------- */
10662 /* -------- 1,01110 UQSHL #imm -------- */
10663 /* -------- 1,01100 SQSHLU #imm -------- */
10664 UInt size = 0;
10665 UInt shift = 0;
10666 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10667 if (!ok) return False;
10668 vassert(size <= 3);
10669 /* The shift encoding has opposite sign for the leftwards case.
10670 Adjust shift to compensate. */
10671 UInt lanebits = 8 << size;
10672 shift = lanebits - shift;
10673 vassert(shift < lanebits);
10674 const HChar* nm = NULL;
10675 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10676 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10677 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10678 else vassert(0);
10679 IRTemp qDiff1 = IRTemp_INVALID;
10680 IRTemp qDiff2 = IRTemp_INVALID;
10681 IRTemp res = IRTemp_INVALID;
10682 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
10683 /* This relies on the fact that the zeroed out lanes generate zeroed
10684 result lanes and don't saturate, so there's no point in trimming
10685 the resulting res, qDiff1 or qDiff2 values. */
10686 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10687 putQReg128(dd, mkexpr(res));
10688 updateQCFLAGwithDifference(qDiff1, qDiff2);
10689 const HChar arr = "bhsd"[size];
10690 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
10691 return True;
10694 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10695 || (bitU == 1
10696 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10697 /* -------- 0,10010 SQSHRN #imm -------- */
10698 /* -------- 1,10010 UQSHRN #imm -------- */
10699 /* -------- 0,10011 SQRSHRN #imm -------- */
10700 /* -------- 1,10011 UQRSHRN #imm -------- */
10701 /* -------- 1,10000 SQSHRUN #imm -------- */
10702 /* -------- 1,10001 SQRSHRUN #imm -------- */
10703 UInt size = 0;
10704 UInt shift = 0;
10705 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10706 if (!ok || size == X11) return False;
10707 // always true, size is unsigned int
10708 //vassert(size >= X00);
10709 vassert(size <= X10);
10710 vassert(shift >= 1 && shift <= (8 << size));
10711 const HChar* nm = "??";
10712 IROp op = Iop_INVALID;
10713 /* Decide on the name and the operation. */
10714 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10715 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10717 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10718 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10720 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10721 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10723 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10724 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10726 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10727 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10729 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10730 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10732 else vassert(0);
10733 /* Compute the result (Q, shifted value) pair. */
10734 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
10735 IRTemp pair = newTempV128();
10736 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10737 /* Update the result reg */
10738 IRTemp res64in128 = newTempV128();
10739 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10740 putQReg128(dd, mkexpr(res64in128));
10741 /* Update the Q flag. */
10742 IRTemp q64q64 = newTempV128();
10743 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10744 IRTemp z128 = newTempV128();
10745 assign(z128, mkV128(0x0000));
10746 updateQCFLAGwithDifference(q64q64, z128);
10747 /* */
10748 const HChar arrNarrow = "bhsd"[size];
10749 const HChar arrWide = "bhsd"[size+1];
10750 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
10751 return True;
10754 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
10755 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
10756 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
10757 UInt size = 0;
10758 UInt fbits = 0;
10759 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10760 /* The following holds because immh is never zero. */
10761 vassert(ok);
10762 /* The following holds because immh >= 0100. */
10763 vassert(size == X10 || size == X11);
10764 Bool isD = size == X11;
10765 Bool isU = bitU == 1;
10766 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10767 Double scale = two_to_the_minus(fbits);
10768 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10769 : IRExpr_Const(IRConst_F32( (Float)scale ));
10770 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10771 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10772 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10773 IRType tyF = isD ? Ity_F64 : Ity_F32;
10774 IRType tyI = isD ? Ity_I64 : Ity_I32;
10775 IRTemp src = newTemp(tyI);
10776 IRTemp res = newTemp(tyF);
10777 IRTemp rm = mk_get_IR_rounding_mode();
10778 assign(src, getQRegLane(nn, 0, tyI));
10779 assign(res, triop(opMUL, mkexpr(rm),
10780 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
10781 putQRegLane(dd, 0, mkexpr(res));
10782 if (!isD) {
10783 putQRegLane(dd, 1, mkU32(0));
10785 putQRegLane(dd, 1, mkU64(0));
10786 const HChar ch = isD ? 'd' : 's';
10787 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
10788 ch, dd, ch, nn, fbits);
10789 return True;
10792 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
10793 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
10794 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
10795 UInt size = 0;
10796 UInt fbits = 0;
10797 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10798 /* The following holds because immh is never zero. */
10799 vassert(ok);
10800 /* The following holds because immh >= 0100. */
10801 vassert(size == X10 || size == X11);
10802 Bool isD = size == X11;
10803 Bool isU = bitU == 1;
10804 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10805 Double scale = two_to_the_plus(fbits);
10806 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10807 : IRExpr_Const(IRConst_F32( (Float)scale ));
10808 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10809 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10810 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10811 IRType tyF = isD ? Ity_F64 : Ity_F32;
10812 IRType tyI = isD ? Ity_I64 : Ity_I32;
10813 IRTemp src = newTemp(tyF);
10814 IRTemp res = newTemp(tyI);
10815 IRTemp rm = newTemp(Ity_I32);
10816 assign(src, getQRegLane(nn, 0, tyF));
10817 assign(rm, mkU32(Irrm_ZERO));
10818 assign(res, binop(opCVT, mkexpr(rm),
10819 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
10820 putQRegLane(dd, 0, mkexpr(res));
10821 if (!isD) {
10822 putQRegLane(dd, 1, mkU32(0));
10824 putQRegLane(dd, 1, mkU64(0));
10825 const HChar ch = isD ? 'd' : 's';
10826 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10827 ch, dd, ch, nn, fbits);
10828 return True;
10831 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10832 return False;
10833 # undef INSN
10837 static
10838 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10840 /* 31 29 28 23 21 20 15 11 9 4
10841 01 U 11110 size 1 m opcode 00 n d
10842 Decode fields: u,opcode
10844 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10845 if (INSN(31,30) != BITS2(0,1)
10846 || INSN(28,24) != BITS5(1,1,1,1,0)
10847 || INSN(21,21) != 1
10848 || INSN(11,10) != BITS2(0,0)) {
10849 return False;
10851 UInt bitU = INSN(29,29);
10852 UInt size = INSN(23,22);
10853 UInt mm = INSN(20,16);
10854 UInt opcode = INSN(15,12);
10855 UInt nn = INSN(9,5);
10856 UInt dd = INSN(4,0);
10857 vassert(size < 4);
10859 if (bitU == 0
10860 && (opcode == BITS4(1,1,0,1)
10861 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10862 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
10863 /* -------- 0,1001 SQDMLAL -------- */ // 1
10864 /* -------- 0,1011 SQDMLSL -------- */ // 2
10865 /* Widens, and size refers to the narrowed lanes. */
10866 UInt ks = 3;
10867 switch (opcode) {
10868 case BITS4(1,1,0,1): ks = 0; break;
10869 case BITS4(1,0,0,1): ks = 1; break;
10870 case BITS4(1,0,1,1): ks = 2; break;
10871 default: vassert(0);
10873 vassert(ks <= 2);
10874 if (size == X00 || size == X11) return False;
10875 vassert(size <= 2);
10876 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
10877 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10878 newTempsV128_3(&vecN, &vecM, &vecD);
10879 assign(vecN, getQReg128(nn));
10880 assign(vecM, getQReg128(mm));
10881 assign(vecD, getQReg128(dd));
10882 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10883 False/*!is2*/, size, "mas"[ks],
10884 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10885 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10886 putQReg128(dd, unop(opZHI, mkexpr(res)));
10887 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10888 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10889 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10890 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10892 const HChar* nm = ks == 0 ? "sqdmull"
10893 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10894 const HChar arrNarrow = "bhsd"[size];
10895 const HChar arrWide = "bhsd"[size+1];
10896 DIP("%s %c%u, %c%u, %c%u\n",
10897 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
10898 return True;
10901 return False;
10902 # undef INSN
10906 static
10907 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
10909 /* 31 29 28 23 21 20 15 10 9 4
10910 01 U 11110 size 1 m opcode 1 n d
10911 Decode fields: u,size,opcode
10913 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10914 if (INSN(31,30) != BITS2(0,1)
10915 || INSN(28,24) != BITS5(1,1,1,1,0)
10916 || INSN(21,21) != 1
10917 || INSN(10,10) != 1) {
10918 return False;
10920 UInt bitU = INSN(29,29);
10921 UInt size = INSN(23,22);
10922 UInt mm = INSN(20,16);
10923 UInt opcode = INSN(15,11);
10924 UInt nn = INSN(9,5);
10925 UInt dd = INSN(4,0);
10926 vassert(size < 4);
10928 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
10929 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
10930 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
10931 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
10932 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
10933 Bool isADD = opcode == BITS5(0,0,0,0,1);
10934 Bool isU = bitU == 1;
10935 IROp qop = Iop_INVALID;
10936 IROp nop = Iop_INVALID;
10937 if (isADD) {
10938 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
10939 nop = mkVecADD(size);
10940 } else {
10941 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
10942 nop = mkVecSUB(size);
10944 IRTemp argL = newTempV128();
10945 IRTemp argR = newTempV128();
10946 IRTemp qres = newTempV128();
10947 IRTemp nres = newTempV128();
10948 assign(argL, getQReg128(nn));
10949 assign(argR, getQReg128(mm));
10950 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10951 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
10952 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10953 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
10954 putQReg128(dd, mkexpr(qres));
10955 updateQCFLAGwithDifference(qres, nres);
10956 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
10957 : (isU ? "uqsub" : "sqsub");
10958 const HChar arr = "bhsd"[size];
10959 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10960 return True;
10963 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
10964 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
10965 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
10966 Bool isGT = bitU == 0;
10967 IRExpr* argL = getQReg128(nn);
10968 IRExpr* argR = getQReg128(mm);
10969 IRTemp res = newTempV128();
10970 assign(res,
10971 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10972 : binop(Iop_CmpGT64Ux2, argL, argR));
10973 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10974 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
10975 nameQRegLO(dd, Ity_I64),
10976 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10977 return True;
10980 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
10981 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
10982 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
10983 Bool isGE = bitU == 0;
10984 IRExpr* argL = getQReg128(nn);
10985 IRExpr* argR = getQReg128(mm);
10986 IRTemp res = newTempV128();
10987 assign(res,
10988 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
10989 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
10990 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10991 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
10992 nameQRegLO(dd, Ity_I64),
10993 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10994 return True;
10997 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
10998 || opcode == BITS5(0,1,0,1,0))) {
10999 /* -------- 0,xx,01000 SSHL d_d_d -------- */
11000 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
11001 /* -------- 1,xx,01000 USHL d_d_d -------- */
11002 /* -------- 1,xx,01010 URSHL d_d_d -------- */
11003 Bool isU = bitU == 1;
11004 Bool isR = opcode == BITS5(0,1,0,1,0);
11005 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11006 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11007 IRTemp res = newTempV128();
11008 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11009 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
11010 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11011 : (isU ? "ushl" : "sshl");
11012 DIP("%s %s, %s, %s\n", nm,
11013 nameQRegLO(dd, Ity_I64),
11014 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
11015 return True;
11018 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11019 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
11020 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
11021 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
11022 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
11023 Bool isU = bitU == 1;
11024 Bool isR = opcode == BITS5(0,1,0,1,1);
11025 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11026 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11027 /* This is a bit tricky. Since we're only interested in the lowest
11028 lane of the result, we zero out all the rest in the operands, so
11029 as to ensure that other lanes don't pollute the returned Q value.
11030 This works because it means, for the lanes we don't care about, we
11031 are shifting zero by zero, which can never saturate. */
11032 IRTemp res256 = newTemp(Ity_V256);
11033 IRTemp resSH = newTempV128();
11034 IRTemp resQ = newTempV128();
11035 IRTemp zero = newTempV128();
11036 assign(
11037 res256,
11038 binop(op,
11039 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
11040 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
11041 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11042 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11043 assign(zero, mkV128(0x0000));
11044 putQReg128(dd, mkexpr(resSH));
11045 updateQCFLAGwithDifference(resQ, zero);
11046 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11047 : (isU ? "uqshl" : "sqshl");
11048 const HChar arr = "bhsd"[size];
11049 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
11050 return True;
11053 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
11054 /* -------- 0,11,10000 ADD d_d_d -------- */
11055 /* -------- 1,11,10000 SUB d_d_d -------- */
11056 Bool isSUB = bitU == 1;
11057 IRTemp res = newTemp(Ity_I64);
11058 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
11059 getQRegLane(nn, 0, Ity_I64),
11060 getQRegLane(mm, 0, Ity_I64)));
11061 putQRegLane(dd, 0, mkexpr(res));
11062 putQRegLane(dd, 1, mkU64(0));
11063 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
11064 nameQRegLO(dd, Ity_I64),
11065 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
11066 return True;
11069 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
11070 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
11071 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
11072 Bool isEQ = bitU == 1;
11073 IRExpr* argL = getQReg128(nn);
11074 IRExpr* argR = getQReg128(mm);
11075 IRTemp res = newTempV128();
11076 assign(res,
11077 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
11078 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
11079 binop(Iop_AndV128, argL, argR),
11080 mkV128(0x0000))));
11081 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
11082 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
11083 nameQRegLO(dd, Ity_I64),
11084 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
11085 return True;
11088 if (opcode == BITS5(1,0,1,1,0)) {
11089 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11090 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11091 if (size == X00 || size == X11) return False;
11092 Bool isR = bitU == 1;
11093 IRTemp res, sat1q, sat1n, vN, vM;
11094 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11095 newTempsV128_2(&vN, &vM);
11096 assign(vN, getQReg128(nn));
11097 assign(vM, getQReg128(mm));
11098 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11099 putQReg128(dd,
11100 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
11101 updateQCFLAGwithDifference(
11102 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
11103 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
11104 const HChar arr = "bhsd"[size];
11105 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11106 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
11107 return True;
11110 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11111 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
11112 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
11113 IRTemp res = newTemp(ity);
11114 assign(res, unop(mkABSF(ity),
11115 triop(mkSUBF(ity),
11116 mkexpr(mk_get_IR_rounding_mode()),
11117 getQRegLO(nn,ity), getQRegLO(mm,ity))));
11118 putQReg128(dd, mkV128(0x0000));
11119 putQRegLO(dd, mkexpr(res));
11120 DIP("fabd %s, %s, %s\n",
11121 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
11122 return True;
11125 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11126 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
11127 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11128 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
11129 IRTemp res = newTemp(ity);
11130 assign(res, triop(mkMULF(ity),
11131 mkexpr(mk_get_IR_rounding_mode()),
11132 getQRegLO(nn,ity), getQRegLO(mm,ity)));
11133 putQReg128(dd, mkV128(0x0000));
11134 putQRegLO(dd, mkexpr(res));
11135 DIP("fmulx %s, %s, %s\n",
11136 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
11137 return True;
11140 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11141 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
11142 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
11143 Bool isD = size == X01;
11144 IRType ity = isD ? Ity_F64 : Ity_F32;
11145 Bool isGE = bitU == 1;
11146 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11147 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
11148 IRTemp res = newTempV128();
11149 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11150 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
11151 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
11152 mkexpr(res))));
11153 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
11154 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
11155 return True;
11158 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11159 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
11160 Bool isD = size == X11;
11161 IRType ity = isD ? Ity_F64 : Ity_F32;
11162 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11163 IRTemp res = newTempV128();
11164 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
11165 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
11166 mkexpr(res))));
11167 DIP("%s %s, %s, %s\n", "fcmgt",
11168 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
11169 return True;
11172 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11173 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
11174 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
11175 Bool isD = (size & 1) == 1;
11176 IRType ity = isD ? Ity_F64 : Ity_F32;
11177 Bool isGT = (size & 2) == 2;
11178 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11179 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11180 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11181 IRTemp res = newTempV128();
11182 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
11183 unop(opABS, getQReg128(nn)))); // swapd
11184 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
11185 mkexpr(res))));
11186 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
11187 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
11188 return True;
11191 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11192 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
11193 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
11194 Bool isSQRT = (size & 2) == 2;
11195 Bool isD = (size & 1) == 1;
11196 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11197 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11198 IRTemp res = newTempV128();
11199 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11200 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
11201 mkexpr(res))));
11202 HChar c = isD ? 'd' : 's';
11203 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
11204 c, dd, c, nn, c, mm);
11205 return True;
11208 return False;
11209 # undef INSN
11212 static
11213 Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn,
11214 const VexArchInfo* archinfo)
11216 /* 31 29 28 23 21 20 15 10 9 4
11217 01 U 11110 size 0 m opcode 1 n d
11218 Decode fields: u,size,opcode
11220 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11221 if (INSN(31,30) != BITS2(0,1)
11222 || INSN(28,24) != BITS5(1,1,1,1,0)
11223 || INSN(21,21) != 0
11224 || INSN(10,10) != 1) {
11225 return False;
11227 UInt bitU = INSN(29,29);
11228 UInt size = INSN(23,22);
11229 UInt mm = INSN(20,16);
11230 UInt opcode = INSN(15,11);
11231 UInt nn = INSN(9,5);
11232 UInt dd = INSN(4,0);
11233 vassert(size < 4);
11234 vassert(mm < 32 && nn < 32 && dd < 32);
11236 if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
11237 /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
11238 /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
11239 if (size == X00 || size == X11) return False;
11240 Bool isAdd = opcode == BITS5(1,0,0,0,0);
11242 IRTemp res, res_nosat, vD, vN, vM;
11243 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
11244 newTempsV128_3(&vD, &vN, &vM);
11245 assign(vD, getQReg128(dd));
11246 assign(vN, getQReg128(nn));
11247 assign(vM, getQReg128(mm));
11249 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
11250 putQReg128(dd,
11251 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
11252 updateQCFLAGwithDifference(
11253 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)),
11254 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat)));
11256 const HChar arr = "hs"[size];
11257 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
11258 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
11259 return True;
11262 if (bitU == 1 && size == X11 && opcode == BITS5(0,0,0,1,0)) {
11263 /* -------- 1,11,00010 FABD h_h_h -------- */
11264 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
11265 return False;
11266 IRTemp res = newTemp(Ity_F16);
11267 assign(res, unop(mkABSF(Ity_F16),
11268 triop(mkSUBF(Ity_F16),
11269 mkexpr(mk_get_IR_rounding_mode()),
11270 getQRegLO(nn,Ity_F16), getQRegLO(mm,Ity_F16))));
11271 putQReg128(dd, mkV128(0x0000));
11272 putQRegLO(dd, mkexpr(res));
11273 DIP("fabd %s, %s, %s\n",
11274 nameQRegLO(dd, Ity_F16), nameQRegLO(nn, Ity_F16), nameQRegLO(mm, Ity_F16));
11275 return True;
11278 if (size == X01 && opcode == BITS5(0,0,1,0,0)) {
11279 /* -------- 0,01,00100 FCMEQ h_h_h -------- */
11280 /* -------- 1,01,00100 FCMGE h_h_h -------- */
11281 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
11282 return False;
11283 Bool isGE = bitU == 1;
11284 IROp opCMP = isGE ? Iop_CmpLE16Fx8 : Iop_CmpEQ16Fx8;
11285 IRTemp res = newTempV128();
11286 /* Swap source and destination in order to use existing LE IR op for GE. */
11287 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn))
11288 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
11289 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01, mkexpr(res))));
11290 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
11291 nameQRegLO(dd, Ity_F16), nameQRegLO(nn, Ity_F16), nameQRegLO(mm, Ity_F16));
11292 return True;
11295 if (bitU == 1 && size == X11 && opcode == BITS5(0,0,1,0,0)) {
11296 /* -------- 1,11,00100 FCMGT h_h_h -------- */
11297 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
11298 return False;
11299 IRTemp res = newTempV128();
11300 /* Swap source and destination in order to use existing LT IR op for GT. */
11301 assign(res, binop(Iop_CmpLT16Fx8, getQReg128(mm), getQReg128(nn)));
11302 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01, mkexpr(res))));
11303 DIP("%s %s, %s, %s\n", "fcmgt",
11304 nameQRegLO(dd, Ity_F16), nameQRegLO(nn, Ity_F16), nameQRegLO(mm, Ity_F16));
11305 return True;
11308 if (bitU == 1 && opcode == BITS5(0,0,1,0,1)) {
11309 /* -------- 1,01,00101 FACGE h_h_h -------- */
11310 /* -------- 1,01,00101 FACGT h_h_h -------- */
11311 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
11312 return False;
11313 IRType ity = Ity_F16;
11314 Bool isGT = (size & 2) == 2;
11315 IROp opCMP = isGT ? Iop_CmpLT16Fx8 : Iop_CmpLE16Fx8;
11316 IROp opABS = Iop_Abs16Fx8;
11317 IRTemp res = newTempV128();
11318 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
11319 unop(opABS, getQReg128(nn))));
11320 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01,
11321 mkexpr(res))));
11322 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
11323 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
11324 return True;
11327 return False;
11328 # undef INSN
11332 static
11333 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11335 /* 31 29 28 23 21 16 11 9 4
11336 01 U 11110 size 10000 opcode 10 n d
11337 Decode fields: u,size,opcode
11339 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11340 if (INSN(31,30) != BITS2(0,1)
11341 || INSN(28,24) != BITS5(1,1,1,1,0)
11342 || INSN(21,17) != BITS5(1,0,0,0,0)
11343 || INSN(11,10) != BITS2(1,0)) {
11344 return False;
11346 UInt bitU = INSN(29,29);
11347 UInt size = INSN(23,22);
11348 UInt opcode = INSN(16,12);
11349 UInt nn = INSN(9,5);
11350 UInt dd = INSN(4,0);
11351 vassert(size < 4);
11353 if (opcode == BITS5(0,0,0,1,1)) {
11354 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
11355 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
11356 /* These are a bit tricky (to say the least). See comments on
11357 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
11358 details. */
11359 Bool isUSQADD = bitU == 1;
11360 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11361 : mkVecQADDEXTUSSATSS(size);
11362 IROp nop = mkVecADD(size);
11363 IRTemp argL = newTempV128();
11364 IRTemp argR = newTempV128();
11365 assign(argL, getQReg128(nn));
11366 assign(argR, getQReg128(dd));
11367 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11368 size, binop(qop, mkexpr(argL), mkexpr(argR)));
11369 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11370 size, binop(nop, mkexpr(argL), mkexpr(argR)));
11371 putQReg128(dd, mkexpr(qres));
11372 updateQCFLAGwithDifference(qres, nres);
11373 const HChar arr = "bhsd"[size];
11374 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
11375 return True;
11378 if (opcode == BITS5(0,0,1,1,1)) {
11379 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
11380 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
11381 Bool isNEG = bitU == 1;
11382 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11383 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11384 getQReg128(nn), size );
11385 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
11386 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
11387 putQReg128(dd, mkexpr(qres));
11388 updateQCFLAGwithDifference(qres, nres);
11389 const HChar arr = "bhsd"[size];
11390 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
11391 return True;
11394 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
11395 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
11396 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
11397 Bool isGT = bitU == 0;
11398 IRExpr* argL = getQReg128(nn);
11399 IRExpr* argR = mkV128(0x0000);
11400 IRTemp res = newTempV128();
11401 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
11402 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
11403 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
11404 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
11405 return True;
11408 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
11409 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
11410 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
11411 Bool isEQ = bitU == 0;
11412 IRExpr* argL = getQReg128(nn);
11413 IRExpr* argR = mkV128(0x0000);
11414 IRTemp res = newTempV128();
11415 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
11416 : unop(Iop_NotV128,
11417 binop(Iop_CmpGT64Sx2, argL, argR)));
11418 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
11419 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
11420 return True;
11423 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
11424 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
11425 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
11426 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
11427 getQReg128(nn))));
11428 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
11429 return True;
11432 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
11433 /* -------- 0,11,01011 ABS d_d -------- */
11434 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
11435 unop(Iop_Abs64x2, getQReg128(nn))));
11436 DIP("abs d%u, d%u\n", dd, nn);
11437 return True;
11440 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
11441 /* -------- 1,11,01011 NEG d_d -------- */
11442 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
11443 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
11444 DIP("neg d%u, d%u\n", dd, nn);
11445 return True;
11448 UInt ix = 0; /*INVALID*/
11449 if (size >= X10) {
11450 switch (opcode) {
11451 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
11452 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
11453 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
11454 default: break;
11457 if (ix > 0) {
11458 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
11459 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
11460 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
11461 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
11462 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
11463 Bool isD = size == X11;
11464 IRType ity = isD ? Ity_F64 : Ity_F32;
11465 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
11466 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
11467 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11468 IROp opCmp = Iop_INVALID;
11469 Bool swap = False;
11470 const HChar* nm = "??";
11471 switch (ix) {
11472 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
11473 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
11474 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
11475 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
11476 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
11477 default: vassert(0);
11479 IRExpr* zero = mkV128(0x0000);
11480 IRTemp res = newTempV128();
11481 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
11482 : binop(opCmp, getQReg128(nn), zero));
11483 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
11484 mkexpr(res))));
11486 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
11487 return True;
11490 if (opcode == BITS5(1,0,1,0,0)
11491 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
11492 /* -------- 0,xx,10100: SQXTN -------- */
11493 /* -------- 1,xx,10100: UQXTN -------- */
11494 /* -------- 1,xx,10010: SQXTUN -------- */
11495 if (size == X11) return False;
11496 vassert(size < 3);
11497 IROp opN = Iop_INVALID;
11498 Bool zWiden = True;
11499 const HChar* nm = "??";
11500 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
11501 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
11503 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
11504 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
11506 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
11507 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
11509 else vassert(0);
11510 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11511 size+1, getQReg128(nn));
11512 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
11513 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
11514 putQReg128(dd, mkexpr(resN));
11515 /* This widens zero lanes to zero, and compares it against zero, so all
11516 of the non-participating lanes make no contribution to the
11517 Q flag state. */
11518 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
11519 size, mkexpr(resN));
11520 updateQCFLAGwithDifference(src, resW);
11521 const HChar arrNarrow = "bhsd"[size];
11522 const HChar arrWide = "bhsd"[size+1];
11523 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
11524 return True;
11527 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
11528 /* -------- 1,01,10110 FCVTXN s_d -------- */
11529 /* Using Irrm_NEAREST here isn't right. The docs say "round to
11530 odd" but I don't know what that really means. */
11531 putQRegLO(dd,
11532 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
11533 getQRegLO(nn, Ity_F64)));
11534 putQRegLane(dd, 1, mkU32(0));
11535 putQRegLane(dd, 1, mkU64(0));
11536 DIP("fcvtxn s%u, d%u\n", dd, nn);
11537 return True;
11540 ix = 0; /*INVALID*/
11541 switch (opcode) {
11542 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
11543 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
11544 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
11545 default: break;
11547 if (ix > 0) {
11548 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
11549 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
11550 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
11551 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
11552 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
11553 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
11554 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
11555 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
11556 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
11557 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
11558 Bool isD = (size & 1) == 1;
11559 IRType tyF = isD ? Ity_F64 : Ity_F32;
11560 IRType tyI = isD ? Ity_I64 : Ity_I32;
11561 IRRoundingMode irrm = 8; /*impossible*/
11562 HChar ch = '?';
11563 switch (ix) {
11564 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
11565 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
11566 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
11567 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
11568 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
11569 default: vassert(0);
11571 IROp cvt = Iop_INVALID;
11572 if (bitU == 1) {
11573 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
11574 } else {
11575 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
11577 IRTemp src = newTemp(tyF);
11578 IRTemp res = newTemp(tyI);
11579 assign(src, getQRegLane(nn, 0, tyF));
11580 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
11581 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
11582 if (!isD) {
11583 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
11585 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
11586 HChar sOrD = isD ? 'd' : 's';
11587 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
11588 sOrD, dd, sOrD, nn);
11589 return True;
11592 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
11593 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
11594 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
11595 Bool isU = bitU == 1;
11596 Bool isD = (size & 1) == 1;
11597 IRType tyI = isD ? Ity_I64 : Ity_I32;
11598 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11599 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11600 IRTemp rm = mk_get_IR_rounding_mode();
11601 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
11602 if (!isD) {
11603 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
11605 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
11606 HChar c = isD ? 'd' : 's';
11607 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
11608 return True;
11611 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
11612 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
11613 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
11614 Bool isSQRT = bitU == 1;
11615 Bool isD = (size & 1) == 1;
11616 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
11617 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
11618 IRTemp resV = newTempV128();
11619 assign(resV, unop(op, getQReg128(nn)));
11620 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
11621 mkexpr(resV))));
11622 HChar c = isD ? 'd' : 's';
11623 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
11624 return True;
11627 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
11628 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
11629 Bool isD = (size & 1) == 1;
11630 IRType ty = isD ? Ity_F64 : Ity_F32;
11631 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
11632 IRTemp res = newTemp(ty);
11633 IRTemp rm = mk_get_IR_rounding_mode();
11634 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
11635 putQReg128(dd, mkV128(0x0000));
11636 putQRegLane(dd, 0, mkexpr(res));
11637 HChar c = isD ? 'd' : 's';
11638 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
11639 return True;
11642 return False;
11643 # undef INSN
11647 static
11648 Bool dis_AdvSIMD_scalar_two_reg_misc_fp16(/*MB_OUT*/DisResult* dres, UInt insn,
11649 const VexArchInfo* archinfo)
11651 /* This decode function only handles instructions with half-precision
11652 floating-point (fp16) operands.
11654 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
11655 return False;
11657 /* 31 29 28 23 21 16 11 9 4
11658 01 U 11110 size 11100 opcode 10 n d
11659 Decode fields: u,size,opcode
11661 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11662 if (INSN(31,30) != BITS2(0,1)
11663 || INSN(28,24) != BITS5(1,1,1,1,0)
11664 || INSN(21,17) != BITS5(1,1,1,0,0)
11665 || INSN(11,10) != BITS2(1,0)) {
11666 return False;
11668 UInt bitU = INSN(29,29);
11669 UInt size = INSN(23,22);
11670 UInt opcode = INSN(16,12);
11671 UInt nn = INSN(9,5);
11672 UInt dd = INSN(4,0);
11673 vassert(size == 3);
11675 /* Decoding FCM<condtion> based on opcode and bitU. ix used to select
11676 * <condition>
11678 UInt ix = 0; // Invalid <condition>
11679 switch (opcode) {
11680 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 4 : 1; break; // FCMLE=4,FCMEQ=1
11681 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 5 : 2; break; // FCMGE=5,FCMGT=2
11682 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break; // FCMLT=3
11683 default: break;
11685 if (ix > 0) {
11686 /* -------- 0,01101 FCMEQ h_h_#0.0 (ix 1) -------- */
11687 /* -------- 0,01100 FCMGT h_h_#0.0 (ix 2) -------- */
11688 /* -------- 0,01110 FCMLT h_h_#0.0 (ix 3) -------- */
11689 /* -------- 1,01101 FCMLE h_h_#0.0 (ix 4) -------- */
11690 /* -------- 1,01100 FCMGE h_h_#0.0 (ix 5) -------- */
11691 IRType ity = Ity_F16;
11692 IROp opCmp = Iop_INVALID;
11693 Bool swap = False;
11694 const HChar* nm = "??";
11695 switch (ix) {
11696 case 1: nm = "fcmeq"; opCmp = Iop_CmpEQ16Fx8; break;
11697 case 2: nm = "fcmgt"; opCmp = Iop_CmpLT16Fx8; swap = True; break;
11698 case 3: nm = "fcmlt"; opCmp = Iop_CmpLT16Fx8; break;
11699 case 4: nm = "fcmle"; opCmp = Iop_CmpLE16Fx8; break;
11700 case 5: nm = "fcmge"; opCmp = Iop_CmpLE16Fx8; swap = True; break;
11701 default: vassert(0);
11703 IRExpr* zero = mkV128(0x0000);
11704 IRTemp res = newTempV128();
11705 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
11706 : binop(opCmp, getQReg128(nn), zero));
11707 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(X01, mkexpr(res))));
11709 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
11710 return True;
11713 return False;
11714 # undef INSN
11718 static
11719 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
11721 /* 31 28 23 21 20 19 15 11 9 4
11722 01 U 11111 size L M m opcode H 0 n d
11723 Decode fields are: u,size,opcode
11724 M is really part of the mm register number. Individual
11725 cases need to inspect L and H though.
11727 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11728 if (INSN(31,30) != BITS2(0,1)
11729 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
11730 return False;
11732 UInt bitU = INSN(29,29);
11733 UInt size = INSN(23,22);
11734 UInt bitL = INSN(21,21);
11735 UInt bitM = INSN(20,20);
11736 UInt mmLO4 = INSN(19,16);
11737 UInt opcode = INSN(15,12);
11738 UInt bitH = INSN(11,11);
11739 UInt nn = INSN(9,5);
11740 UInt dd = INSN(4,0);
11741 vassert(size < 4);
11742 vassert(bitH < 2 && bitM < 2 && bitL < 2);
11744 if (bitU == 0 && size >= X10
11745 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
11746 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
11747 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
11748 Bool isD = (size & 1) == 1;
11749 Bool isSUB = opcode == BITS4(0,1,0,1);
11750 UInt index;
11751 if (!isD) index = (bitH << 1) | bitL;
11752 else if (isD && bitL == 0) index = bitH;
11753 else return False; // sz:L == x11 => unallocated encoding
11754 vassert(index < (isD ? 2 : 4));
11755 IRType ity = isD ? Ity_F64 : Ity_F32;
11756 IRTemp elem = newTemp(ity);
11757 UInt mm = (bitM << 4) | mmLO4;
11758 assign(elem, getQRegLane(mm, index, ity));
11759 IRTemp dupd = math_DUP_TO_V128(elem, ity);
11760 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11761 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11762 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11763 IRTemp rm = mk_get_IR_rounding_mode();
11764 IRTemp t1 = newTempV128();
11765 IRTemp t2 = newTempV128();
11766 // FIXME: double rounding; use FMA primops instead
11767 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
11768 assign(t2, triop(isSUB ? opSUB : opADD,
11769 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11770 putQReg128(dd,
11771 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
11772 mkexpr(t2))));
11773 const HChar c = isD ? 'd' : 's';
11774 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
11775 c, dd, c, nn, nameQReg128(mm), c, index);
11776 return True;
11779 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
11780 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
11781 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
11782 Bool isD = (size & 1) == 1;
11783 Bool isMULX = bitU == 1;
11784 UInt index;
11785 if (!isD) index = (bitH << 1) | bitL;
11786 else if (isD && bitL == 0) index = bitH;
11787 else return False; // sz:L == x11 => unallocated encoding
11788 vassert(index < (isD ? 2 : 4));
11789 IRType ity = isD ? Ity_F64 : Ity_F32;
11790 IRTemp elem = newTemp(ity);
11791 UInt mm = (bitM << 4) | mmLO4;
11792 assign(elem, getQRegLane(mm, index, ity));
11793 IRTemp dupd = math_DUP_TO_V128(elem, ity);
11794 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11795 IRTemp rm = mk_get_IR_rounding_mode();
11796 IRTemp t1 = newTempV128();
11797 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11798 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
11799 putQReg128(dd,
11800 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
11801 mkexpr(t1))));
11802 const HChar c = isD ? 'd' : 's';
11803 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
11804 c, dd, c, nn, nameQReg128(mm), c, index);
11805 return True;
11808 if (bitU == 0
11809 && (opcode == BITS4(1,0,1,1)
11810 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
11811 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
11812 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
11813 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
11814 /* Widens, and size refers to the narrowed lanes. */
11815 UInt ks = 3;
11816 switch (opcode) {
11817 case BITS4(1,0,1,1): ks = 0; break;
11818 case BITS4(0,0,1,1): ks = 1; break;
11819 case BITS4(0,1,1,1): ks = 2; break;
11820 default: vassert(0);
11822 vassert(ks <= 2);
11823 UInt mm = 32; // invalid
11824 UInt ix = 16; // invalid
11825 switch (size) {
11826 case X00:
11827 return False; // h_b_b[] case is not allowed
11828 case X01:
11829 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11830 case X10:
11831 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11832 case X11:
11833 return False; // q_d_d[] case is not allowed
11834 default:
11835 vassert(0);
11837 vassert(mm < 32 && ix < 16);
11838 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
11839 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11840 newTempsV128_2(&vecN, &vecD);
11841 assign(vecN, getQReg128(nn));
11842 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11843 assign(vecD, getQReg128(dd));
11844 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11845 False/*!is2*/, size, "mas"[ks],
11846 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11847 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
11848 putQReg128(dd, unop(opZHI, mkexpr(res)));
11849 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11850 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11851 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11852 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
11854 const HChar* nm = ks == 0 ? "sqmull"
11855 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11856 const HChar arrNarrow = "bhsd"[size];
11857 const HChar arrWide = "bhsd"[size+1];
11858 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
11859 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
11860 return True;
11863 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
11864 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
11865 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
11866 UInt mm = 32; // invalid
11867 UInt ix = 16; // invalid
11868 switch (size) {
11869 case X00:
11870 return False; // b case is not allowed
11871 case X01:
11872 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11873 case X10:
11874 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11875 case X11:
11876 return False; // q case is not allowed
11877 default:
11878 vassert(0);
11880 vassert(mm < 32 && ix < 16);
11881 Bool isR = opcode == BITS4(1,1,0,1);
11882 IRTemp res, sat1q, sat1n, vN, vM;
11883 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11884 vN = newTempV128();
11885 assign(vN, getQReg128(nn));
11886 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11887 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11888 IROp opZHI = mkVecZEROHIxxOFV128(size);
11889 putQReg128(dd, unop(opZHI, mkexpr(res)));
11890 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11891 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11892 HChar ch = size == X01 ? 'h' : 's';
11893 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
11894 return True;
11897 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
11898 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
11899 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
11900 UInt mm = 32; // invalid
11901 UInt ix = 16; // invalid
11902 switch (size) {
11903 case X00:
11904 return False; // b case is not allowed
11905 case X01:
11906 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11907 case X10:
11908 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11909 case X11:
11910 return False; // d case is not allowed
11911 default:
11912 vassert(0);
11914 vassert(size < 4);
11915 vassert(mm < 32 && ix < 16);
11916 Bool isAdd = opcode == BITS4(1,1,0,1);
11918 IRTemp res, res_nosat, vD, vN, vM;
11919 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
11920 newTempsV128_2(&vD, &vN);
11921 assign(vD, getQReg128(dd));
11922 assign(vN, getQReg128(nn));
11923 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11925 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
11926 IROp opZHI = mkVecZEROHIxxOFV128(size);
11927 putQReg128(dd, unop(opZHI, mkexpr(res)));
11928 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
11930 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
11931 HChar ch = size == X01 ? 'h' : 's';
11932 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
11933 return True;
11936 return False;
11937 # undef INSN
11941 static
11942 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
11944 /* 31 28 22 18 15 10 9 4
11945 0 q u 011110 immh immb opcode 1 n d
11946 Decode fields: u,opcode
11948 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11949 if (INSN(31,31) != 0
11950 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
11951 return False;
11953 UInt bitQ = INSN(30,30);
11954 UInt bitU = INSN(29,29);
11955 UInt immh = INSN(22,19);
11956 UInt immb = INSN(18,16);
11957 UInt opcode = INSN(15,11);
11958 UInt nn = INSN(9,5);
11959 UInt dd = INSN(4,0);
11961 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
11962 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
11963 /* -------- 1,00000 USHR std7_std7_#imm -------- */
11964 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
11965 /* -------- 1,00010 USRA std7_std7_#imm -------- */
11966 /* laneTy, shift = case immh:immb of
11967 0001:xxx -> B, SHR:8-xxx
11968 001x:xxx -> H, SHR:16-xxxx
11969 01xx:xxx -> S, SHR:32-xxxxx
11970 1xxx:xxx -> D, SHR:64-xxxxxx
11971 other -> invalid
11973 UInt size = 0;
11974 UInt shift = 0;
11975 Bool isQ = bitQ == 1;
11976 Bool isU = bitU == 1;
11977 Bool isAcc = opcode == BITS5(0,0,0,1,0);
11978 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
11979 if (!ok || (bitQ == 0 && size == X11)) return False;
11980 vassert(size <= 3);
11981 UInt lanebits = 8 << size;
11982 vassert(shift >= 1 && shift <= lanebits);
11983 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
11984 IRExpr* src = getQReg128(nn);
11985 IRTemp shf = newTempV128();
11986 IRTemp res = newTempV128();
11987 if (shift == lanebits && isU) {
11988 assign(shf, mkV128(0x0000));
11989 } else {
11990 UInt nudge = 0;
11991 if (shift == lanebits) {
11992 vassert(!isU);
11993 nudge = 1;
11995 assign(shf, binop(op, src, mkU8(shift - nudge)));
11997 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
11998 : mkexpr(shf));
11999 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12000 HChar laneCh = "bhsd"[size];
12001 UInt nLanes = (isQ ? 128 : 64) / lanebits;
12002 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
12003 : (isU ? "ushr" : "sshr");
12004 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
12005 nameQReg128(dd), nLanes, laneCh,
12006 nameQReg128(nn), nLanes, laneCh, shift);
12007 return True;
12010 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
12011 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
12012 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
12013 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
12014 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
12015 /* laneTy, shift = case immh:immb of
12016 0001:xxx -> B, SHR:8-xxx
12017 001x:xxx -> H, SHR:16-xxxx
12018 01xx:xxx -> S, SHR:32-xxxxx
12019 1xxx:xxx -> D, SHR:64-xxxxxx
12020 other -> invalid
12022 UInt size = 0;
12023 UInt shift = 0;
12024 Bool isQ = bitQ == 1;
12025 Bool isU = bitU == 1;
12026 Bool isAcc = opcode == BITS5(0,0,1,1,0);
12027 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
12028 if (!ok || (bitQ == 0 && size == X11)) return False;
12029 vassert(size <= 3);
12030 UInt lanebits = 8 << size;
12031 vassert(shift >= 1 && shift <= lanebits);
12032 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
12033 IRExpr* src = getQReg128(nn);
12034 IRTemp imm8 = newTemp(Ity_I8);
12035 assign(imm8, mkU8((UChar)(-shift)));
12036 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
12037 IRTemp shf = newTempV128();
12038 IRTemp res = newTempV128();
12039 assign(shf, binop(op, src, amt));
12040 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
12041 : mkexpr(shf));
12042 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12043 HChar laneCh = "bhsd"[size];
12044 UInt nLanes = (isQ ? 128 : 64) / lanebits;
12045 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
12046 : (isU ? "urshr" : "srshr");
12047 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
12048 nameQReg128(dd), nLanes, laneCh,
12049 nameQReg128(nn), nLanes, laneCh, shift);
12050 return True;
12053 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
12054 /* -------- 1,01000 SRI std7_std7_#imm -------- */
12055 /* laneTy, shift = case immh:immb of
12056 0001:xxx -> B, SHR:8-xxx
12057 001x:xxx -> H, SHR:16-xxxx
12058 01xx:xxx -> S, SHR:32-xxxxx
12059 1xxx:xxx -> D, SHR:64-xxxxxx
12060 other -> invalid
12062 UInt size = 0;
12063 UInt shift = 0;
12064 Bool isQ = bitQ == 1;
12065 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
12066 if (!ok || (bitQ == 0 && size == X11)) return False;
12067 vassert(size <= 3);
12068 UInt lanebits = 8 << size;
12069 vassert(shift >= 1 && shift <= lanebits);
12070 IRExpr* src = getQReg128(nn);
12071 IRTemp res = newTempV128();
12072 if (shift == lanebits) {
12073 assign(res, getQReg128(dd));
12074 } else {
12075 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
12076 IRExpr* nmask = binop(mkVecSHLN(size),
12077 mkV128(0xFFFF), mkU8(lanebits - shift));
12078 IRTemp tmp = newTempV128();
12079 assign(tmp, binop(Iop_OrV128,
12080 mkexpr(res),
12081 binop(Iop_AndV128, getQReg128(dd), nmask)));
12082 res = tmp;
12084 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12085 HChar laneCh = "bhsd"[size];
12086 UInt nLanes = (isQ ? 128 : 64) / lanebits;
12087 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
12088 nameQReg128(dd), nLanes, laneCh,
12089 nameQReg128(nn), nLanes, laneCh, shift);
12090 return True;
12093 if (opcode == BITS5(0,1,0,1,0)) {
12094 /* -------- 0,01010 SHL std7_std7_#imm -------- */
12095 /* -------- 1,01010 SLI std7_std7_#imm -------- */
12096 /* laneTy, shift = case immh:immb of
12097 0001:xxx -> B, xxx
12098 001x:xxx -> H, xxxx
12099 01xx:xxx -> S, xxxxx
12100 1xxx:xxx -> D, xxxxxx
12101 other -> invalid
12103 UInt size = 0;
12104 UInt shift = 0;
12105 Bool isSLI = bitU == 1;
12106 Bool isQ = bitQ == 1;
12107 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
12108 if (!ok || (bitQ == 0 && size == X11)) return False;
12109 vassert(size <= 3);
12110 /* The shift encoding has opposite sign for the leftwards case.
12111 Adjust shift to compensate. */
12112 UInt lanebits = 8 << size;
12113 shift = lanebits - shift;
12114 vassert(shift < lanebits);
12115 IROp op = mkVecSHLN(size);
12116 IRExpr* src = getQReg128(nn);
12117 IRTemp res = newTempV128();
12118 if (shift == 0) {
12119 assign(res, src);
12120 } else {
12121 assign(res, binop(op, src, mkU8(shift)));
12122 if (isSLI) {
12123 IRExpr* nmask = binop(mkVecSHRN(size),
12124 mkV128(0xFFFF), mkU8(lanebits - shift));
12125 IRTemp tmp = newTempV128();
12126 assign(tmp, binop(Iop_OrV128,
12127 mkexpr(res),
12128 binop(Iop_AndV128, getQReg128(dd), nmask)));
12129 res = tmp;
12132 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12133 HChar laneCh = "bhsd"[size];
12134 UInt nLanes = (isQ ? 128 : 64) / lanebits;
12135 const HChar* nm = isSLI ? "sli" : "shl";
12136 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
12137 nameQReg128(dd), nLanes, laneCh,
12138 nameQReg128(nn), nLanes, laneCh, shift);
12139 return True;
12142 if (opcode == BITS5(0,1,1,1,0)
12143 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
12144 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
12145 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
12146 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
12147 UInt size = 0;
12148 UInt shift = 0;
12149 Bool isQ = bitQ == 1;
12150 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
12151 if (!ok || (bitQ == 0 && size == X11)) return False;
12152 vassert(size >= 0 && size <= 3);
12153 /* The shift encoding has opposite sign for the leftwards case.
12154 Adjust shift to compensate. */
12155 UInt lanebits = 8 << size;
12156 shift = lanebits - shift;
12157 vassert(shift >= 0 && shift < lanebits);
12158 const HChar* nm = NULL;
12159 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
12160 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
12161 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
12162 else vassert(0);
12163 IRTemp qDiff1 = IRTemp_INVALID;
12164 IRTemp qDiff2 = IRTemp_INVALID;
12165 IRTemp res = IRTemp_INVALID;
12166 IRTemp src = newTempV128();
12167 assign(src, getQReg128(nn));
12168 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
12169 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12170 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
12171 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
12172 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12173 DIP("%s %s.%s, %s.%s, #%u\n", nm,
12174 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
12175 return True;
12178 if (bitU == 0
12179 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
12180 /* -------- 0,10000 SHRN{,2} #imm -------- */
12181 /* -------- 0,10001 RSHRN{,2} #imm -------- */
12182 /* Narrows, and size is the narrow size. */
12183 UInt size = 0;
12184 UInt shift = 0;
12185 Bool is2 = bitQ == 1;
12186 Bool isR = opcode == BITS5(1,0,0,0,1);
12187 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
12188 if (!ok || size == X11) return False;
12189 vassert(shift >= 1);
12190 IRTemp t1 = newTempV128();
12191 IRTemp t2 = newTempV128();
12192 IRTemp t3 = newTempV128();
12193 assign(t1, getQReg128(nn));
12194 assign(t2, isR ? binop(mkVecADD(size+1),
12195 mkexpr(t1),
12196 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
12197 : mkexpr(t1));
12198 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
12199 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
12200 putLO64andZUorPutHI64(is2, dd, t4);
12201 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12202 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12203 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
12204 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
12205 return True;
12208 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
12209 || (bitU == 1
12210 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
12211 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
12212 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
12213 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
12214 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
12215 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
12216 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
12217 UInt size = 0;
12218 UInt shift = 0;
12219 Bool is2 = bitQ == 1;
12220 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
12221 if (!ok || size == X11) return False;
12222 vassert(shift >= 1 && shift <= (8 << size));
12223 const HChar* nm = "??";
12224 IROp op = Iop_INVALID;
12225 /* Decide on the name and the operation. */
12226 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12227 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
12229 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12230 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
12232 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
12233 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
12235 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12236 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
12238 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
12239 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
12241 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
12242 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
12244 else vassert(0);
12245 /* Compute the result (Q, shifted value) pair. */
12246 IRTemp src128 = newTempV128();
12247 assign(src128, getQReg128(nn));
12248 IRTemp pair = newTempV128();
12249 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
12250 /* Update the result reg */
12251 IRTemp res64in128 = newTempV128();
12252 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
12253 putLO64andZUorPutHI64(is2, dd, res64in128);
12254 /* Update the Q flag. */
12255 IRTemp q64q64 = newTempV128();
12256 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
12257 IRTemp z128 = newTempV128();
12258 assign(z128, mkV128(0x0000));
12259 updateQCFLAGwithDifference(q64q64, z128);
12260 /* */
12261 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12262 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12263 DIP("%s %s.%s, %s.%s, #%u\n", nm,
12264 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
12265 return True;
12268 if (opcode == BITS5(1,0,1,0,0)) {
12269 /* -------- 0,10100 SSHLL{,2} #imm -------- */
12270 /* -------- 1,10100 USHLL{,2} #imm -------- */
12271 /* 31 28 22 18 15 9 4
12272 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
12273 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
12274 where Ta,Tb,sh
12275 = case immh of 1xxx -> invalid
12276 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
12277 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
12278 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
12279 0000 -> AdvSIMD modified immediate (???)
12281 Bool isQ = bitQ == 1;
12282 Bool isU = bitU == 1;
12283 UInt immhb = (immh << 3) | immb;
12284 IRTemp src = newTempV128();
12285 IRTemp zero = newTempV128();
12286 IRExpr* res = NULL;
12287 UInt sh = 0;
12288 const HChar* ta = "??";
12289 const HChar* tb = "??";
12290 assign(src, getQReg128(nn));
12291 assign(zero, mkV128(0x0000));
12292 if (immh & 8) {
12293 /* invalid; don't assign to res */
12295 else if (immh & 4) {
12296 sh = immhb - 32;
12297 vassert(sh < 32); /* so 32-sh is 1..32 */
12298 ta = "2d";
12299 tb = isQ ? "4s" : "2s";
12300 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
12301 : mk_InterleaveLO32x4(src, zero);
12302 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
12304 else if (immh & 2) {
12305 sh = immhb - 16;
12306 vassert(sh < 16); /* so 16-sh is 1..16 */
12307 ta = "4s";
12308 tb = isQ ? "8h" : "4h";
12309 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
12310 : mk_InterleaveLO16x8(src, zero);
12311 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
12313 else if (immh & 1) {
12314 sh = immhb - 8;
12315 vassert(sh < 8); /* so 8-sh is 1..8 */
12316 ta = "8h";
12317 tb = isQ ? "16b" : "8b";
12318 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
12319 : mk_InterleaveLO8x16(src, zero);
12320 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
12321 } else {
12322 vassert(immh == 0);
12323 /* invalid; don't assign to res */
12325 /* */
12326 if (res) {
12327 putQReg128(dd, res);
12328 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
12329 isU ? 'u' : 's', isQ ? "2" : "",
12330 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
12331 return True;
12333 return False;
12336 if (opcode == BITS5(1,1,1,0,0)) {
12337 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
12338 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
12339 /* If immh is of the form 00xx, the insn is invalid. */
12340 if (immh < BITS4(0,1,0,0)) return False;
12341 UInt size = 0;
12342 UInt fbits = 0;
12343 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
12344 /* The following holds because immh is never zero. */
12345 vassert(ok);
12346 /* The following holds because immh >= 0100. */
12347 vassert(size == X10 || size == X11);
12348 Bool isD = size == X11;
12349 Bool isU = bitU == 1;
12350 Bool isQ = bitQ == 1;
12351 if (isD && !isQ) return False; /* reject .1d case */
12352 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
12353 Double scale = two_to_the_minus(fbits);
12354 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
12355 : IRExpr_Const(IRConst_F32( (Float)scale ));
12356 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
12357 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
12358 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
12359 IRType tyF = isD ? Ity_F64 : Ity_F32;
12360 IRType tyI = isD ? Ity_I64 : Ity_I32;
12361 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
12362 vassert(nLanes == 2 || nLanes == 4);
12363 for (UInt i = 0; i < nLanes; i++) {
12364 IRTemp src = newTemp(tyI);
12365 IRTemp res = newTemp(tyF);
12366 IRTemp rm = mk_get_IR_rounding_mode();
12367 assign(src, getQRegLane(nn, i, tyI));
12368 assign(res, triop(opMUL, mkexpr(rm),
12369 binop(opCVT, mkexpr(rm), mkexpr(src)),
12370 scaleE));
12371 putQRegLane(dd, i, mkexpr(res));
12373 if (!isQ) {
12374 putQRegLane(dd, 1, mkU64(0));
12376 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12377 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
12378 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
12379 return True;
12382 if (opcode == BITS5(1,1,1,1,1)) {
12383 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
12384 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
12385 /* If immh is of the form 00xx, the insn is invalid. */
12386 if (immh < BITS4(0,1,0,0)) return False;
12387 UInt size = 0;
12388 UInt fbits = 0;
12389 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
12390 /* The following holds because immh is never zero. */
12391 vassert(ok);
12392 /* The following holds because immh >= 0100. */
12393 vassert(size == X10 || size == X11);
12394 Bool isD = size == X11;
12395 Bool isU = bitU == 1;
12396 Bool isQ = bitQ == 1;
12397 if (isD && !isQ) return False; /* reject .1d case */
12398 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
12399 Double scale = two_to_the_plus(fbits);
12400 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
12401 : IRExpr_Const(IRConst_F32( (Float)scale ));
12402 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
12403 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
12404 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
12405 IRType tyF = isD ? Ity_F64 : Ity_F32;
12406 IRType tyI = isD ? Ity_I64 : Ity_I32;
12407 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
12408 vassert(nLanes == 2 || nLanes == 4);
12409 for (UInt i = 0; i < nLanes; i++) {
12410 IRTemp src = newTemp(tyF);
12411 IRTemp res = newTemp(tyI);
12412 IRTemp rm = newTemp(Ity_I32);
12413 assign(src, getQRegLane(nn, i, tyF));
12414 assign(rm, mkU32(Irrm_ZERO));
12415 assign(res, binop(opCVT, mkexpr(rm),
12416 triop(opMUL, mkexpr(rm),
12417 mkexpr(src), scaleE)));
12418 putQRegLane(dd, i, mkexpr(res));
12420 if (!isQ) {
12421 putQRegLane(dd, 1, mkU64(0));
12423 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12424 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
12425 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
12426 return True;
12429 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12430 return False;
12431 # undef INSN
12435 static
12436 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
12438 /* 31 30 29 28 23 21 20 15 11 9 4
12439 0 Q U 01110 size 1 m opcode 00 n d
12440 Decode fields: u,opcode
12442 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12443 if (INSN(31,31) != 0
12444 || INSN(28,24) != BITS5(0,1,1,1,0)
12445 || INSN(21,21) != 1
12446 || INSN(11,10) != BITS2(0,0)) {
12447 return False;
12449 UInt bitQ = INSN(30,30);
12450 UInt bitU = INSN(29,29);
12451 UInt size = INSN(23,22);
12452 UInt mm = INSN(20,16);
12453 UInt opcode = INSN(15,12);
12454 UInt nn = INSN(9,5);
12455 UInt dd = INSN(4,0);
12456 vassert(size < 4);
12457 Bool is2 = bitQ == 1;
12459 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
12460 /* -------- 0,0000 SADDL{2} -------- */
12461 /* -------- 1,0000 UADDL{2} -------- */
12462 /* -------- 0,0010 SSUBL{2} -------- */
12463 /* -------- 1,0010 USUBL{2} -------- */
12464 /* Widens, and size refers to the narrow lanes. */
12465 if (size == X11) return False;
12466 vassert(size <= 2);
12467 Bool isU = bitU == 1;
12468 Bool isADD = opcode == BITS4(0,0,0,0);
12469 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
12470 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
12471 IRTemp res = newTempV128();
12472 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
12473 mkexpr(argL), mkexpr(argR)));
12474 putQReg128(dd, mkexpr(res));
12475 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12476 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12477 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
12478 : (isU ? "usubl" : "ssubl");
12479 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
12480 nameQReg128(dd), arrWide,
12481 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
12482 return True;
12485 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
12486 /* -------- 0,0001 SADDW{2} -------- */
12487 /* -------- 1,0001 UADDW{2} -------- */
12488 /* -------- 0,0011 SSUBW{2} -------- */
12489 /* -------- 1,0011 USUBW{2} -------- */
12490 /* Widens, and size refers to the narrow lanes. */
12491 if (size == X11) return False;
12492 vassert(size <= 2);
12493 Bool isU = bitU == 1;
12494 Bool isADD = opcode == BITS4(0,0,0,1);
12495 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
12496 IRTemp res = newTempV128();
12497 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
12498 getQReg128(nn), mkexpr(argR)));
12499 putQReg128(dd, mkexpr(res));
12500 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12501 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12502 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
12503 : (isU ? "usubw" : "ssubw");
12504 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
12505 nameQReg128(dd), arrWide,
12506 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
12507 return True;
12510 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
12511 /* -------- 0,0100 ADDHN{2} -------- */
12512 /* -------- 1,0100 RADDHN{2} -------- */
12513 /* -------- 0,0110 SUBHN{2} -------- */
12514 /* -------- 1,0110 RSUBHN{2} -------- */
12515 /* Narrows, and size refers to the narrowed lanes. */
12516 if (size == X11) return False;
12517 vassert(size <= 2);
12518 const UInt shift[3] = { 8, 16, 32 };
12519 Bool isADD = opcode == BITS4(0,1,0,0);
12520 Bool isR = bitU == 1;
12521 /* Combined elements in wide lanes */
12522 IRTemp wide = newTempV128();
12523 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
12524 getQReg128(nn), getQReg128(mm));
12525 if (isR) {
12526 wideE = binop(mkVecADD(size+1),
12527 wideE,
12528 mkexpr(math_VEC_DUP_IMM(size+1,
12529 1ULL << (shift[size]-1))));
12531 assign(wide, wideE);
12532 /* Top halves of elements, still in wide lanes */
12533 IRTemp shrd = newTempV128();
12534 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
12535 /* Elements now compacted into lower 64 bits */
12536 IRTemp new64 = newTempV128();
12537 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
12538 putLO64andZUorPutHI64(is2, dd, new64);
12539 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12540 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12541 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
12542 : (isR ? "rsubhn" : "subhn");
12543 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
12544 nameQReg128(dd), arrNarrow,
12545 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
12546 return True;
12549 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
12550 /* -------- 0,0101 SABAL{2} -------- */
12551 /* -------- 1,0101 UABAL{2} -------- */
12552 /* -------- 0,0111 SABDL{2} -------- */
12553 /* -------- 1,0111 UABDL{2} -------- */
12554 /* Widens, and size refers to the narrow lanes. */
12555 if (size == X11) return False;
12556 vassert(size <= 2);
12557 Bool isU = bitU == 1;
12558 Bool isACC = opcode == BITS4(0,1,0,1);
12559 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
12560 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
12561 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
12562 IRTemp res = newTempV128();
12563 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
12564 : mkexpr(abd));
12565 putQReg128(dd, mkexpr(res));
12566 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12567 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12568 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
12569 : (isU ? "uabdl" : "sabdl");
12570 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
12571 nameQReg128(dd), arrWide,
12572 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
12573 return True;
12576 if (opcode == BITS4(1,1,0,0)
12577 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
12578 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
12579 /* -------- 1,1100 UMULL{2} -------- */ // 0
12580 /* -------- 0,1000 SMLAL{2} -------- */ // 1
12581 /* -------- 1,1000 UMLAL{2} -------- */ // 1
12582 /* -------- 0,1010 SMLSL{2} -------- */ // 2
12583 /* -------- 1,1010 UMLSL{2} -------- */ // 2
12584 /* Widens, and size refers to the narrow lanes. */
12585 UInt ks = 3;
12586 switch (opcode) {
12587 case BITS4(1,1,0,0): ks = 0; break;
12588 case BITS4(1,0,0,0): ks = 1; break;
12589 case BITS4(1,0,1,0): ks = 2; break;
12590 default: vassert(0);
12592 vassert(ks <= 2);
12593 if (size == X11) return False;
12594 vassert(size <= 2);
12595 Bool isU = bitU == 1;
12596 IRTemp vecN = newTempV128();
12597 IRTemp vecM = newTempV128();
12598 IRTemp vecD = newTempV128();
12599 assign(vecN, getQReg128(nn));
12600 assign(vecM, getQReg128(mm));
12601 assign(vecD, getQReg128(dd));
12602 IRTemp res = IRTemp_INVALID;
12603 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12604 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12605 putQReg128(dd, mkexpr(res));
12606 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12607 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12608 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12609 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
12610 nameQReg128(dd), arrWide,
12611 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
12612 return True;
12615 if (bitU == 0
12616 && (opcode == BITS4(1,1,0,1)
12617 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
12618 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
12619 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
12620 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
12621 /* Widens, and size refers to the narrow lanes. */
12622 UInt ks = 3;
12623 switch (opcode) {
12624 case BITS4(1,1,0,1): ks = 0; break;
12625 case BITS4(1,0,0,1): ks = 1; break;
12626 case BITS4(1,0,1,1): ks = 2; break;
12627 default: vassert(0);
12629 vassert(ks <= 2);
12630 if (size == X00 || size == X11) return False;
12631 vassert(size <= 2);
12632 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
12633 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12634 newTempsV128_3(&vecN, &vecM, &vecD);
12635 assign(vecN, getQReg128(nn));
12636 assign(vecM, getQReg128(mm));
12637 assign(vecD, getQReg128(dd));
12638 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12639 is2, size, "mas"[ks],
12640 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12641 putQReg128(dd, mkexpr(res));
12642 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12643 updateQCFLAGwithDifference(sat1q, sat1n);
12644 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12645 updateQCFLAGwithDifference(sat2q, sat2n);
12647 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12648 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12649 const HChar* nm = ks == 0 ? "sqdmull"
12650 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12651 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
12652 nameQReg128(dd), arrWide,
12653 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
12654 return True;
12657 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
12658 /* -------- 0,1110 PMULL{2} -------- */
12659 /* Widens, and size refers to the narrow lanes. */
12660 if (size != X00 && size != X11) return False;
12661 IRTemp res = IRTemp_INVALID;
12662 IRExpr* srcN = getQReg128(nn);
12663 IRExpr* srcM = getQReg128(mm);
12664 const HChar* arrNarrow = NULL;
12665 const HChar* arrWide = NULL;
12666 if (size == X00) {
12667 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
12668 srcN, srcM);
12669 arrNarrow = nameArr_Q_SZ(bitQ, size);
12670 arrWide = nameArr_Q_SZ(1, size+1);
12671 } else {
12672 /* The same thing as the X00 case, except we have to call
12673 a helper to do it. */
12674 vassert(size == X11);
12675 res = newTemp(Ity_V128);
12676 IROp slice
12677 = is2 ? Iop_V128HIto64 : Iop_V128to64;
12678 IRExpr** args
12679 = mkIRExprVec_3( IRExpr_VECRET(),
12680 unop(slice, srcN), unop(slice, srcM));
12681 IRDirty* di
12682 = unsafeIRDirty_1_N( res, 0/*regparms*/,
12683 "arm64g_dirtyhelper_PMULLQ",
12684 &arm64g_dirtyhelper_PMULLQ, args);
12685 stmt(IRStmt_Dirty(di));
12686 /* We can't use nameArr_Q_SZ for this because it can't deal with
12687 Q-sized (128 bit) results. Hence do it by hand. */
12688 arrNarrow = bitQ == 0 ? "1d" : "2d";
12689 arrWide = "1q";
12691 putQReg128(dd, mkexpr(res));
12692 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
12693 nameQReg128(dd), arrWide,
12694 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
12695 return True;
12698 return False;
12699 # undef INSN
12703 static
12704 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
12706 /* 31 30 29 28 23 21 20 15 10 9 4
12707 0 Q U 01110 size 1 m opcode 1 n d
12708 Decode fields: u,size,opcode
12710 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12711 if (INSN(31,31) != 0
12712 || INSN(28,24) != BITS5(0,1,1,1,0)
12713 || INSN(21,21) != 1
12714 || INSN(10,10) != 1) {
12715 return False;
12717 UInt bitQ = INSN(30,30);
12718 UInt bitU = INSN(29,29);
12719 UInt size = INSN(23,22);
12720 UInt mm = INSN(20,16);
12721 UInt opcode = INSN(15,11);
12722 UInt nn = INSN(9,5);
12723 UInt dd = INSN(4,0);
12724 vassert(size < 4);
12726 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
12727 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
12728 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
12729 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
12730 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
12731 if (size == X11) return False;
12732 Bool isADD = opcode == BITS5(0,0,0,0,0);
12733 Bool isU = bitU == 1;
12734 /* Widen both args out, do the math, narrow to final result. */
12735 IRTemp argL = newTempV128();
12736 IRTemp argLhi = IRTemp_INVALID;
12737 IRTemp argLlo = IRTemp_INVALID;
12738 IRTemp argR = newTempV128();
12739 IRTemp argRhi = IRTemp_INVALID;
12740 IRTemp argRlo = IRTemp_INVALID;
12741 IRTemp resHi = newTempV128();
12742 IRTemp resLo = newTempV128();
12743 IRTemp res = IRTemp_INVALID;
12744 assign(argL, getQReg128(nn));
12745 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
12746 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
12747 assign(argR, getQReg128(mm));
12748 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
12749 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
12750 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
12751 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
12752 assign(resHi, binop(opSxR,
12753 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
12754 mkU8(1)));
12755 assign(resLo, binop(opSxR,
12756 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
12757 mkU8(1)));
12758 res = math_NARROW_LANES ( resHi, resLo, size );
12759 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12760 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
12761 : (isU ? "uhsub" : "shsub");
12762 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12763 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12764 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12765 return True;
12768 if (opcode == BITS5(0,0,0,1,0)) {
12769 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
12770 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
12771 if (bitQ == 0 && size == X11) return False; // implied 1d case
12772 Bool isU = bitU == 1;
12773 IRTemp argL = newTempV128();
12774 IRTemp argR = newTempV128();
12775 assign(argL, getQReg128(nn));
12776 assign(argR, getQReg128(mm));
12777 IRTemp res = math_RHADD(size, isU, argL, argR);
12778 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12779 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12780 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
12781 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12782 return True;
12785 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
12786 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
12787 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
12788 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
12789 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
12790 if (bitQ == 0 && size == X11) return False; // implied 1d case
12791 Bool isADD = opcode == BITS5(0,0,0,0,1);
12792 Bool isU = bitU == 1;
12793 IROp qop = Iop_INVALID;
12794 IROp nop = Iop_INVALID;
12795 if (isADD) {
12796 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
12797 nop = mkVecADD(size);
12798 } else {
12799 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
12800 nop = mkVecSUB(size);
12802 IRTemp argL = newTempV128();
12803 IRTemp argR = newTempV128();
12804 IRTemp qres = newTempV128();
12805 IRTemp nres = newTempV128();
12806 assign(argL, getQReg128(nn));
12807 assign(argR, getQReg128(mm));
12808 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12809 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12810 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12811 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12812 putQReg128(dd, mkexpr(qres));
12813 updateQCFLAGwithDifference(qres, nres);
12814 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
12815 : (isU ? "uqsub" : "sqsub");
12816 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12817 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12818 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12819 return True;
12822 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
12823 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
12824 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
12825 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
12826 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
12827 Bool isORx = (size & 2) == 2;
12828 Bool invert = (size & 1) == 1;
12829 IRTemp res = newTempV128();
12830 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
12831 getQReg128(nn),
12832 invert ? unop(Iop_NotV128, getQReg128(mm))
12833 : getQReg128(mm)));
12834 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12835 const HChar* names[4] = { "and", "bic", "orr", "orn" };
12836 const HChar* ar = bitQ == 1 ? "16b" : "8b";
12837 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
12838 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
12839 return True;
12842 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
12843 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
12844 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
12845 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
12846 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
12847 IRTemp argD = newTempV128();
12848 IRTemp argN = newTempV128();
12849 IRTemp argM = newTempV128();
12850 assign(argD, getQReg128(dd));
12851 assign(argN, getQReg128(nn));
12852 assign(argM, getQReg128(mm));
12853 const IROp opXOR = Iop_XorV128;
12854 const IROp opAND = Iop_AndV128;
12855 const IROp opNOT = Iop_NotV128;
12856 IRTemp res = newTempV128();
12857 switch (size) {
12858 case BITS2(0,0): /* EOR */
12859 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
12860 break;
12861 case BITS2(0,1): /* BSL */
12862 assign(res, binop(opXOR, mkexpr(argM),
12863 binop(opAND,
12864 binop(opXOR, mkexpr(argM), mkexpr(argN)),
12865 mkexpr(argD))));
12866 break;
12867 case BITS2(1,0): /* BIT */
12868 assign(res, binop(opXOR, mkexpr(argD),
12869 binop(opAND,
12870 binop(opXOR, mkexpr(argD), mkexpr(argN)),
12871 mkexpr(argM))));
12872 break;
12873 case BITS2(1,1): /* BIF */
12874 assign(res, binop(opXOR, mkexpr(argD),
12875 binop(opAND,
12876 binop(opXOR, mkexpr(argD), mkexpr(argN)),
12877 unop(opNOT, mkexpr(argM)))));
12878 break;
12879 default:
12880 vassert(0);
12882 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12883 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
12884 const HChar* arr = bitQ == 1 ? "16b" : "8b";
12885 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
12886 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12887 return True;
12890 if (opcode == BITS5(0,0,1,1,0)) {
12891 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
12892 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
12893 if (bitQ == 0 && size == X11) return False; // implied 1d case
12894 Bool isGT = bitU == 0;
12895 IRExpr* argL = getQReg128(nn);
12896 IRExpr* argR = getQReg128(mm);
12897 IRTemp res = newTempV128();
12898 assign(res,
12899 isGT ? binop(mkVecCMPGTS(size), argL, argR)
12900 : binop(mkVecCMPGTU(size), argL, argR));
12901 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12902 const HChar* nm = isGT ? "cmgt" : "cmhi";
12903 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12904 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12905 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12906 return True;
12909 if (opcode == BITS5(0,0,1,1,1)) {
12910 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
12911 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
12912 if (bitQ == 0 && size == X11) return False; // implied 1d case
12913 Bool isGE = bitU == 0;
12914 IRExpr* argL = getQReg128(nn);
12915 IRExpr* argR = getQReg128(mm);
12916 IRTemp res = newTempV128();
12917 assign(res,
12918 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
12919 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
12920 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12921 const HChar* nm = isGE ? "cmge" : "cmhs";
12922 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12923 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12924 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12925 return True;
12928 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
12929 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
12930 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
12931 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
12932 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
12933 if (bitQ == 0 && size == X11) return False; // implied 1d case
12934 Bool isU = bitU == 1;
12935 Bool isR = opcode == BITS5(0,1,0,1,0);
12936 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
12937 : (isU ? mkVecSHU(size) : mkVecSHS(size));
12938 IRTemp res = newTempV128();
12939 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12940 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12941 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
12942 : (isU ? "ushl" : "sshl");
12943 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12944 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12945 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12946 return True;
12949 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
12950 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
12951 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
12952 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
12953 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
12954 if (bitQ == 0 && size == X11) return False; // implied 1d case
12955 Bool isU = bitU == 1;
12956 Bool isR = opcode == BITS5(0,1,0,1,1);
12957 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
12958 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
12959 /* This is a bit tricky. If we're only interested in the lowest 64 bits
12960 of the result (viz, bitQ == 0), then we must adjust the operands to
12961 ensure that the upper part of the result, that we don't care about,
12962 doesn't pollute the returned Q value. To do this, zero out the upper
12963 operand halves beforehand. This works because it means, for the
12964 lanes we don't care about, we are shifting zero by zero, which can
12965 never saturate. */
12966 IRTemp res256 = newTemp(Ity_V256);
12967 IRTemp resSH = newTempV128();
12968 IRTemp resQ = newTempV128();
12969 IRTemp zero = newTempV128();
12970 assign(res256, binop(op,
12971 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
12972 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
12973 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
12974 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
12975 assign(zero, mkV128(0x0000));
12976 putQReg128(dd, mkexpr(resSH));
12977 updateQCFLAGwithDifference(resQ, zero);
12978 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
12979 : (isU ? "uqshl" : "sqshl");
12980 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12981 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
12982 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12983 return True;
12986 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
12987 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
12988 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
12989 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
12990 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
12991 if (bitQ == 0 && size == X11) return False; // implied 1d case
12992 Bool isU = bitU == 1;
12993 Bool isMAX = (opcode & 1) == 0;
12994 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
12995 : (isU ? mkVecMINU(size) : mkVecMINS(size));
12996 IRTemp t = newTempV128();
12997 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
12998 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
12999 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
13000 : (isU ? "umin" : "smin");
13001 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13002 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13003 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13004 return True;
13007 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
13008 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
13009 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
13010 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
13011 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
13012 if (size == X11) return False; // 1d/2d cases not allowed
13013 Bool isU = bitU == 1;
13014 Bool isACC = opcode == BITS5(0,1,1,1,1);
13015 vassert(size <= 2);
13016 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
13017 IRTemp t2 = newTempV128();
13018 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
13019 : mkexpr(t1));
13020 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13021 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
13022 : (isU ? "uabd" : "sabd");
13023 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13024 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13025 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13026 return True;
13029 if (opcode == BITS5(1,0,0,0,0)) {
13030 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
13031 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
13032 if (bitQ == 0 && size == X11) return False; // implied 1d case
13033 Bool isSUB = bitU == 1;
13034 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
13035 IRTemp t = newTempV128();
13036 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
13037 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
13038 const HChar* nm = isSUB ? "sub" : "add";
13039 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13040 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13041 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13042 return True;
13045 if (opcode == BITS5(1,0,0,0,1)) {
13046 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
13047 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
13048 if (bitQ == 0 && size == X11) return False; // implied 1d case
13049 Bool isEQ = bitU == 1;
13050 IRExpr* argL = getQReg128(nn);
13051 IRExpr* argR = getQReg128(mm);
13052 IRTemp res = newTempV128();
13053 assign(res,
13054 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
13055 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
13056 binop(Iop_AndV128, argL, argR),
13057 mkV128(0x0000))));
13058 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13059 const HChar* nm = isEQ ? "cmeq" : "cmtst";
13060 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13061 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13062 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13063 return True;
13066 if (opcode == BITS5(1,0,0,1,0)) {
13067 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
13068 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
13069 if (bitQ == 0 && size == X11) return False; // implied 1d case
13070 Bool isMLS = bitU == 1;
13071 IROp opMUL = mkVecMUL(size);
13072 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
13073 IRTemp res = newTempV128();
13074 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
13075 assign(res, binop(opADDSUB,
13076 getQReg128(dd),
13077 binop(opMUL, getQReg128(nn), getQReg128(mm))));
13078 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13079 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13080 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
13081 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13082 return True;
13084 return False;
13087 if (opcode == BITS5(1,0,0,1,1)) {
13088 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
13089 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
13090 if (bitQ == 0 && size == X11) return False; // implied 1d case
13091 Bool isPMUL = bitU == 1;
13092 const IROp opsPMUL[4]
13093 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
13094 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
13095 IRTemp res = newTempV128();
13096 if (opMUL != Iop_INVALID) {
13097 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
13098 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13099 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13100 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
13101 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13102 return True;
13104 return False;
13107 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
13108 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
13109 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
13110 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
13111 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
13112 if (size == X11) return False;
13113 Bool isU = bitU == 1;
13114 Bool isMAX = opcode == BITS5(1,0,1,0,0);
13115 IRTemp vN = newTempV128();
13116 IRTemp vM = newTempV128();
13117 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
13118 : (isU ? mkVecMINU(size) : mkVecMINS(size));
13119 assign(vN, getQReg128(nn));
13120 assign(vM, getQReg128(mm));
13121 IRTemp res128 = newTempV128();
13122 assign(res128,
13123 binop(op,
13124 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
13125 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
13126 /* In the half-width case, use CatEL32x4 to extract the half-width
13127 result from the full-width result. */
13128 IRExpr* res
13129 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
13130 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
13131 mkexpr(res128)))
13132 : mkexpr(res128);
13133 putQReg128(dd, res);
13134 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13135 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
13136 : (isU ? "uminp" : "sminp");
13137 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13138 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13139 return True;
13142 if (opcode == BITS5(1,0,1,1,0)) {
13143 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
13144 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
13145 if (size == X00 || size == X11) return False;
13146 Bool isR = bitU == 1;
13147 IRTemp res, sat1q, sat1n, vN, vM;
13148 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13149 newTempsV128_2(&vN, &vM);
13150 assign(vN, getQReg128(nn));
13151 assign(vM, getQReg128(mm));
13152 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13153 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13154 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13155 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13156 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13157 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
13158 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13159 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13160 return True;
13163 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
13164 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
13165 if (bitQ == 0 && size == X11) return False; // implied 1d case
13166 IRTemp vN = newTempV128();
13167 IRTemp vM = newTempV128();
13168 assign(vN, getQReg128(nn));
13169 assign(vM, getQReg128(mm));
13170 IRTemp res128 = newTempV128();
13171 assign(res128,
13172 binop(mkVecADD(size),
13173 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
13174 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
13175 /* In the half-width case, use CatEL32x4 to extract the half-width
13176 result from the full-width result. */
13177 IRExpr* res
13178 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
13179 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
13180 mkexpr(res128)))
13181 : mkexpr(res128);
13182 putQReg128(dd, res);
13183 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13184 DIP("addp %s.%s, %s.%s, %s.%s\n",
13185 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13186 return True;
13189 if (bitU == 0
13190 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
13191 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13192 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13193 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13194 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13195 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
13196 Bool isD = (size & 1) == 1;
13197 if (bitQ == 0 && isD) return False; // implied 1d case
13198 Bool isMIN = (size & 2) == 2;
13199 Bool isNM = opcode == BITS5(1,1,0,0,0);
13200 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
13201 IRTemp res = newTempV128();
13202 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
13203 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13204 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13205 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
13206 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
13207 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13208 return True;
13211 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
13212 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13213 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13214 Bool isD = (size & 1) == 1;
13215 Bool isSUB = (size & 2) == 2;
13216 if (bitQ == 0 && isD) return False; // implied 1d case
13217 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
13218 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
13219 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
13220 IRTemp rm = mk_get_IR_rounding_mode();
13221 IRTemp t1 = newTempV128();
13222 IRTemp t2 = newTempV128();
13223 // FIXME: double rounding; use FMA primops instead
13224 assign(t1, triop(opMUL,
13225 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13226 assign(t2, triop(isSUB ? opSUB : opADD,
13227 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
13228 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13229 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13230 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
13231 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13232 return True;
13235 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
13236 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13237 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13238 Bool isD = (size & 1) == 1;
13239 Bool isSUB = (size & 2) == 2;
13240 if (bitQ == 0 && isD) return False; // implied 1d case
13241 const IROp ops[4]
13242 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
13243 IROp op = ops[size];
13244 IRTemp rm = mk_get_IR_rounding_mode();
13245 IRTemp t1 = newTempV128();
13246 IRTemp t2 = newTempV128();
13247 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13248 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
13249 putQReg128(dd, mkexpr(t2));
13250 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13251 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
13252 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13253 return True;
13256 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
13257 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13258 Bool isD = (size & 1) == 1;
13259 if (bitQ == 0 && isD) return False; // implied 1d case
13260 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
13261 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
13262 IRTemp rm = mk_get_IR_rounding_mode();
13263 IRTemp t1 = newTempV128();
13264 IRTemp t2 = newTempV128();
13265 // FIXME: use Abd primop instead?
13266 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13267 assign(t2, unop(opABS, mkexpr(t1)));
13268 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13269 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13270 DIP("fabd %s.%s, %s.%s, %s.%s\n",
13271 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13272 return True;
13275 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
13276 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13277 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13278 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
13279 Bool isD = (size & 1) == 1;
13280 Bool isMULX = bitU == 0;
13281 if (bitQ == 0 && isD) return False; // implied 1d case
13282 IRTemp rm = mk_get_IR_rounding_mode();
13283 IRTemp t1 = newTempV128();
13284 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
13285 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13286 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13287 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13288 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
13289 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13290 return True;
13293 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
13294 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13295 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13296 Bool isD = (size & 1) == 1;
13297 if (bitQ == 0 && isD) return False; // implied 1d case
13298 Bool isGE = bitU == 1;
13299 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
13300 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
13301 IRTemp t1 = newTempV128();
13302 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
13303 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
13304 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13305 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13306 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
13307 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13308 return True;
13311 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
13312 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13313 Bool isD = (size & 1) == 1;
13314 if (bitQ == 0 && isD) return False; // implied 1d case
13315 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
13316 IRTemp t1 = newTempV128();
13317 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
13318 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13319 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13320 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
13321 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13322 return True;
13325 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
13326 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13327 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13328 Bool isD = (size & 1) == 1;
13329 Bool isGT = (size & 2) == 2;
13330 if (bitQ == 0 && isD) return False; // implied 1d case
13331 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
13332 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
13333 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
13334 IRTemp t1 = newTempV128();
13335 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
13336 unop(opABS, getQReg128(nn)))); // swapd
13337 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13338 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13339 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
13340 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13341 return True;
13344 if (bitU == 1
13345 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
13346 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13347 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13348 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13349 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13350 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
13351 Bool isD = (size & 1) == 1;
13352 if (bitQ == 0 && isD) return False; // implied 1d case
13353 Bool isMIN = (size & 2) == 2;
13354 Bool isNM = opcode == BITS5(1,1,0,0,0);
13355 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
13356 IRTemp srcN = newTempV128();
13357 IRTemp srcM = newTempV128();
13358 IRTemp preL = IRTemp_INVALID;
13359 IRTemp preR = IRTemp_INVALID;
13360 assign(srcN, getQReg128(nn));
13361 assign(srcM, getQReg128(mm));
13362 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, srcM, srcN,
13363 isD ? ARM64VSizeD : ARM64VSizeS, bitQ);
13364 putQReg128(
13365 dd, math_MAYBE_ZERO_HI64_fromE(
13366 bitQ,
13367 binop(opMXX, mkexpr(preL), mkexpr(preR))));
13368 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13369 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
13370 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
13371 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13372 return True;
13375 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
13376 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13377 Bool isD = size == X01;
13378 if (bitQ == 0 && isD) return False; // implied 1d case
13379 IRTemp srcN = newTempV128();
13380 IRTemp srcM = newTempV128();
13381 IRTemp preL = IRTemp_INVALID;
13382 IRTemp preR = IRTemp_INVALID;
13383 assign(srcN, getQReg128(nn));
13384 assign(srcM, getQReg128(mm));
13385 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, srcM, srcN,
13386 isD ? ARM64VSizeD : ARM64VSizeS, bitQ);
13387 putQReg128(
13388 dd, math_MAYBE_ZERO_HI64_fromE(
13389 bitQ,
13390 triop(mkVecADDF(isD ? 3 : 2),
13391 mkexpr(mk_get_IR_rounding_mode()),
13392 mkexpr(preL), mkexpr(preR))));
13393 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13394 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
13395 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13396 return True;
13399 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
13400 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13401 Bool isD = (size & 1) == 1;
13402 if (bitQ == 0 && isD) return False; // implied 1d case
13403 vassert(size <= 1);
13404 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
13405 IROp op = ops[size];
13406 IRTemp rm = mk_get_IR_rounding_mode();
13407 IRTemp t1 = newTempV128();
13408 IRTemp t2 = newTempV128();
13409 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13410 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
13411 putQReg128(dd, mkexpr(t2));
13412 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13413 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
13414 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13415 return True;
13418 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
13419 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13420 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
13421 Bool isSQRT = (size & 2) == 2;
13422 Bool isD = (size & 1) == 1;
13423 if (bitQ == 0 && isD) return False; // implied 1d case
13424 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
13425 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
13426 IRTemp res = newTempV128();
13427 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
13428 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13429 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
13430 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
13431 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13432 return True;
13435 return False;
13436 # undef INSN
13440 static
13441 Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
13443 /* 31 30 29 28 23 21 20 15 14 10 9 4
13444 0 Q U 01110 size 0 m 1 opcode 1 n d
13445 Decode fields: u,size,opcode
13447 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13448 if (INSN(31,31) != 0
13449 || INSN(28,24) != BITS5(0,1,1,1,0)
13450 || INSN(21,21) != 0
13451 || INSN(15,15) != 1
13452 || INSN(10,10) != 1) {
13453 return False;
13455 UInt bitQ = INSN(30,30);
13456 UInt bitU = INSN(29,29);
13457 UInt size = INSN(23,22);
13458 UInt mm = INSN(20,16);
13459 UInt opcode = INSN(14,11);
13460 UInt nn = INSN(9,5);
13461 UInt dd = INSN(4,0);
13462 vassert(size < 4);
13463 vassert(mm < 32 && nn < 32 && dd < 32);
13465 if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) {
13466 /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
13467 /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
13468 if (size == X00 || size == X11) return False;
13469 Bool isAdd = opcode == BITS4(0,0,0,0);
13471 IRTemp res, res_nosat, vD, vN, vM;
13472 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
13473 newTempsV128_3(&vD, &vN, &vM);
13474 assign(vD, getQReg128(dd));
13475 assign(vN, getQReg128(nn));
13476 assign(vM, getQReg128(mm));
13478 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
13479 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13480 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
13481 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13483 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13484 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
13485 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
13486 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13487 return True;
13490 return False;
13491 # undef INSN
13494 static
13495 Bool dis_AdvSIMD_three_same_fp16(/*MB_OUT*/DisResult* dres, UInt insn,
13496 const VexArchInfo* archinfo)
13498 /* This decode function only handles instructions with half-precision
13499 floating-point (fp16) operands.
13501 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
13502 return False;
13504 /* 31 30 29 28 23 21 20 15 10 9 4
13505 0 Q U 01110 size 0 m opcode 1 n d
13506 Decode fields: u,size,opcode
13508 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13509 if (INSN(31,31) != 0
13510 || INSN(28,24) != BITS5(0,1,1,1,0)
13511 || INSN(21,21) != 0
13512 || INSN(10,10) != 1) {
13513 return False;
13515 UInt bitQ = INSN(30,30);
13516 UInt bitU = INSN(29,29);
13517 UInt size = INSN(23,22);
13518 UInt mm = INSN(20,16);
13519 UInt opcode = INSN(15,11);
13520 UInt nn = INSN(9,5);
13521 UInt dd = INSN(4,0);
13522 vassert(size < 4);
13523 vassert(mm < 32 && nn < 32 && dd < 32);
13525 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,0,1,0)) {
13526 /* -------- 1,01,00010 FADDP 4h_4h_4h, 8h_8h_8h -------- */
13527 IROp opADD = mkVecADDF(1); //bitQ == 0 ? 0 : 1);
13528 IRTemp srcN = newTempV128();
13529 IRTemp srcM = newTempV128();
13530 IRTemp preL = IRTemp_INVALID;
13531 IRTemp preR = IRTemp_INVALID;
13532 assign(srcN, getQReg128(nn));
13533 assign(srcM, getQReg128(mm));
13534 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, srcM, srcN,
13535 ARM64VSizeH, bitQ);
13536 putQReg128(
13537 dd, math_MAYBE_ZERO_HI64_fromE(
13538 bitQ,
13539 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
13540 mkexpr(preL), mkexpr(preR))));
13541 const HChar* arr = bitQ == 0 ? "4h" : "8h";
13542 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
13543 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13544 return True;
13547 if (bitU == 1 && size == X11 && opcode == BITS5(0,0,0,1,0)) {
13548 /* -------- 1,11,00010 FABD 4h_4h_4h, 8h_8h_8h -------- */
13549 IRTemp rm = mk_get_IR_rounding_mode();
13550 IRTemp t1 = newTempV128();
13551 IRTemp t2 = newTempV128();
13552 assign(t1, triop(Iop_Sub16Fx8, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13553 assign(t2, unop(Iop_Abs16Fx8, mkexpr(t1)));
13554 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
13555 const HChar* arr = bitQ == 0 ? "4h" : "8h";
13556 DIP("%s %s.%s, %s.%s, %s.%s\n", "fabd",
13557 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13558 return True;
13561 if (size == X01 && opcode == BITS5(0,0,1,0,0)) {
13562 /* -------- 0,01,00100 FCMEQ 4h_4h_4h, 8h_8h_8h -------- */
13563 /* -------- 1,01,00100 FCMGE 4h_4h_4h, 8h_8h_8h -------- */
13564 Bool isGE = bitU == 1;
13565 IRTemp t1 = newTempV128();
13566 /* Swap source and destination in order to use existing LE IR op for GE. */
13567 assign(t1, isGE ? binop(Iop_CmpLE16Fx8, getQReg128(mm), getQReg128(nn))
13568 : binop(Iop_CmpEQ16Fx8, getQReg128(nn), getQReg128(mm)));
13569 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13570 const HChar* arr = bitQ == 0 ? "4h" : "8h";
13571 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
13572 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13573 return True;
13576 if (size == X11 && opcode == BITS5(0,0,1,0,0)) {
13577 /* -------- 1,11,00100 FCMGT 4h_4h_4h, 8h_8h_8h -------- */
13578 IRTemp t1 = newTempV128();
13579 /* Swap source and destination in order to use existing LT IR op for GT. */
13580 assign(t1, binop(Iop_CmpLT16Fx8, getQReg128(mm), getQReg128(nn)));
13581 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13582 const HChar* arr = bitQ == 0 ? "4h" : "8h";
13583 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
13584 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13585 return True;
13588 if (bitU == 1 && opcode == BITS5(0,0,1,0,1)) {
13589 /* -------- 1,01,00101 FACGE 4h_4h_4h 8h_8h_8h -------- */
13590 /* -------- 1,11,00101 FACGT 4h_4h_4h 8h_8h_8h -------- */
13591 Bool isGT = (size & 3) == 3;
13592 IROp opCMP = isGT ? Iop_CmpLT16Fx8 : Iop_CmpLE16Fx8;
13593 IROp opABS = Iop_Abs16Fx8;
13594 IRTemp t1 = newTempV128();
13595 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
13596 unop(opABS, getQReg128(nn))));
13597 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
13598 const HChar* arr = bitQ == 0 ? "4h" : "8h";
13599 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
13600 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13601 return True;
13604 if (bitU == 0 && size == X01 && opcode == BITS5(0,0,0,1,0)) {
13605 /* -------- 0,01,00010 FADD 4h_4h_4h, 8h_8h_8h -------- */
13606 IRTemp rm = mk_get_IR_rounding_mode();
13607 IRTemp t1 = newTempV128();
13608 IRTemp t2 = newTempV128();
13609 assign(t1, triop(Iop_Add16Fx8, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
13610 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
13611 putQReg128(dd, mkexpr(t2));
13612 const HChar* arr = bitQ == 0 ? "4h" : "8h";
13613 DIP("%s %s.%s, %s.%s, %s.%s\n", "fadd",
13614 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
13615 return True;
13618 return False;
13619 # undef INSN
13623 static
13624 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
13626 /* 31 30 29 28 23 21 16 11 9 4
13627 0 Q U 01110 size 10000 opcode 10 n d
13628 Decode fields: U,size,opcode
13630 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13631 if (INSN(31,31) != 0
13632 || INSN(28,24) != BITS5(0,1,1,1,0)
13633 || INSN(21,17) != BITS5(1,0,0,0,0)
13634 || INSN(11,10) != BITS2(1,0)) {
13635 return False;
13637 UInt bitQ = INSN(30,30);
13638 UInt bitU = INSN(29,29);
13639 UInt size = INSN(23,22);
13640 UInt opcode = INSN(16,12);
13641 UInt nn = INSN(9,5);
13642 UInt dd = INSN(4,0);
13643 vassert(size < 4);
13645 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
13646 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
13647 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
13648 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
13649 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
13650 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
13651 vassert(size <= 2);
13652 IRTemp res = newTempV128();
13653 assign(res, unop(iops[size], getQReg128(nn)));
13654 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13655 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13656 DIP("%s %s.%s, %s.%s\n", "rev64",
13657 nameQReg128(dd), arr, nameQReg128(nn), arr);
13658 return True;
13661 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
13662 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
13663 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
13664 Bool isH = size == X01;
13665 IRTemp res = newTempV128();
13666 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
13667 assign(res, unop(iop, getQReg128(nn)));
13668 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13669 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13670 DIP("%s %s.%s, %s.%s\n", "rev32",
13671 nameQReg128(dd), arr, nameQReg128(nn), arr);
13672 return True;
13675 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
13676 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
13677 IRTemp res = newTempV128();
13678 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
13679 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13680 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13681 DIP("%s %s.%s, %s.%s\n", "rev16",
13682 nameQReg128(dd), arr, nameQReg128(nn), arr);
13683 return True;
13686 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
13687 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
13688 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
13689 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
13690 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
13691 /* Widens, and size refers to the narrow size. */
13692 if (size == X11) return False; // no 1d or 2d cases
13693 Bool isU = bitU == 1;
13694 Bool isACC = opcode == BITS5(0,0,1,1,0);
13695 IRTemp src = newTempV128();
13696 IRTemp sum = newTempV128();
13697 IRTemp res = newTempV128();
13698 assign(src, getQReg128(nn));
13699 sum = math_ADDLP(size, isU, src);
13700 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
13701 : mkexpr(sum));
13702 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13703 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13704 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
13705 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
13706 : (isU ? "uaddlp" : "saddlp"),
13707 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
13708 return True;
13711 if (opcode == BITS5(0,0,0,1,1)) {
13712 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
13713 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
13714 if (bitQ == 0 && size == X11) return False; // implied 1d case
13715 Bool isUSQADD = bitU == 1;
13716 /* This is switched (in the US vs SU sense) deliberately.
13717 SUQADD corresponds to the ExtUSsatSS variants and
13718 USQADD corresponds to the ExtSUsatUU variants.
13719 See libvex_ir for more details. */
13720 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
13721 : mkVecQADDEXTUSSATSS(size);
13722 IROp nop = mkVecADD(size);
13723 IRTemp argL = newTempV128();
13724 IRTemp argR = newTempV128();
13725 IRTemp qres = newTempV128();
13726 IRTemp nres = newTempV128();
13727 /* Because the two arguments to the addition are implicitly
13728 extended differently (one signedly, the other unsignedly) it is
13729 important to present them to the primop in the correct order. */
13730 assign(argL, getQReg128(nn));
13731 assign(argR, getQReg128(dd));
13732 assign(qres, math_MAYBE_ZERO_HI64_fromE(
13733 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
13734 assign(nres, math_MAYBE_ZERO_HI64_fromE(
13735 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
13736 putQReg128(dd, mkexpr(qres));
13737 updateQCFLAGwithDifference(qres, nres);
13738 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13739 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
13740 nameQReg128(dd), arr, nameQReg128(nn), arr);
13741 return True;
13744 if (opcode == BITS5(0,0,1,0,0)) {
13745 /* -------- 0,xx,00100: CLS std6_std6 -------- */
13746 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
13747 if (size == X11) return False; // no 1d or 2d cases
13748 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
13749 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
13750 Bool isCLZ = bitU == 1;
13751 IRTemp res = newTempV128();
13752 vassert(size <= 2);
13753 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
13754 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13755 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13756 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
13757 nameQReg128(dd), arr, nameQReg128(nn), arr);
13758 return True;
13761 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
13762 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
13763 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
13764 IRTemp res = newTempV128();
13765 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
13766 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13767 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
13768 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
13769 nameQReg128(dd), arr, nameQReg128(nn), arr);
13770 return True;
13773 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
13774 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
13775 IRTemp res = newTempV128();
13776 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
13777 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13778 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
13779 DIP("%s %s.%s, %s.%s\n", "rbit",
13780 nameQReg128(dd), arr, nameQReg128(nn), arr);
13781 return True;
13784 if (opcode == BITS5(0,0,1,1,1)) {
13785 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
13786 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
13787 if (bitQ == 0 && size == X11) return False; // implied 1d case
13788 Bool isNEG = bitU == 1;
13789 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
13790 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
13791 getQReg128(nn), size );
13792 IRTemp qres = newTempV128(), nres = newTempV128();
13793 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
13794 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
13795 putQReg128(dd, mkexpr(qres));
13796 updateQCFLAGwithDifference(qres, nres);
13797 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13798 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
13799 nameQReg128(dd), arr, nameQReg128(nn), arr);
13800 return True;
13803 if (opcode == BITS5(0,1,0,0,0)) {
13804 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
13805 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
13806 if (bitQ == 0 && size == X11) return False; // implied 1d case
13807 Bool isGT = bitU == 0;
13808 IRExpr* argL = getQReg128(nn);
13809 IRExpr* argR = mkV128(0x0000);
13810 IRTemp res = newTempV128();
13811 IROp opGTS = mkVecCMPGTS(size);
13812 assign(res, isGT ? binop(opGTS, argL, argR)
13813 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
13814 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13815 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13816 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
13817 nameQReg128(dd), arr, nameQReg128(nn), arr);
13818 return True;
13821 if (opcode == BITS5(0,1,0,0,1)) {
13822 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
13823 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
13824 if (bitQ == 0 && size == X11) return False; // implied 1d case
13825 Bool isEQ = bitU == 0;
13826 IRExpr* argL = getQReg128(nn);
13827 IRExpr* argR = mkV128(0x0000);
13828 IRTemp res = newTempV128();
13829 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
13830 : unop(Iop_NotV128,
13831 binop(mkVecCMPGTS(size), argL, argR)));
13832 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13833 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13834 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
13835 nameQReg128(dd), arr, nameQReg128(nn), arr);
13836 return True;
13839 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
13840 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
13841 if (bitQ == 0 && size == X11) return False; // implied 1d case
13842 IRExpr* argL = getQReg128(nn);
13843 IRExpr* argR = mkV128(0x0000);
13844 IRTemp res = newTempV128();
13845 assign(res, binop(mkVecCMPGTS(size), argR, argL));
13846 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13847 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13848 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
13849 nameQReg128(dd), arr, nameQReg128(nn), arr);
13850 return True;
13853 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
13854 /* -------- 0,xx,01011: ABS std7_std7 -------- */
13855 if (bitQ == 0 && size == X11) return False; // implied 1d case
13856 IRTemp res = newTempV128();
13857 assign(res, unop(mkVecABS(size), getQReg128(nn)));
13858 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13859 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13860 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
13861 return True;
13864 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
13865 /* -------- 1,xx,01011: NEG std7_std7 -------- */
13866 if (bitQ == 0 && size == X11) return False; // implied 1d case
13867 IRTemp res = newTempV128();
13868 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
13869 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13870 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13871 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
13872 return True;
13875 UInt ix = 0; /*INVALID*/
13876 if (size >= X10) {
13877 switch (opcode) {
13878 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
13879 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
13880 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
13881 default: break;
13884 if (ix > 0) {
13885 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
13886 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
13887 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
13888 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
13889 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
13890 if (bitQ == 0 && size == X11) return False; // implied 1d case
13891 Bool isD = size == X11;
13892 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
13893 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
13894 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
13895 IROp opCmp = Iop_INVALID;
13896 Bool swap = False;
13897 const HChar* nm = "??";
13898 switch (ix) {
13899 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
13900 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
13901 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
13902 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
13903 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
13904 default: vassert(0);
13906 IRExpr* zero = mkV128(0x0000);
13907 IRTemp res = newTempV128();
13908 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
13909 : binop(opCmp, getQReg128(nn), zero));
13910 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13911 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13912 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
13913 nameQReg128(dd), arr, nameQReg128(nn), arr);
13914 return True;
13917 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
13918 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
13919 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
13920 if (bitQ == 0 && size == X11) return False; // implied 1d case
13921 Bool isFNEG = bitU == 1;
13922 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
13923 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
13924 IRTemp res = newTempV128();
13925 assign(res, unop(op, getQReg128(nn)));
13926 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13927 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
13928 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
13929 nameQReg128(dd), arr, nameQReg128(nn), arr);
13930 return True;
13933 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
13934 /* -------- 0,xx,10010: XTN{,2} -------- */
13935 if (size == X11) return False;
13936 vassert(size < 3);
13937 Bool is2 = bitQ == 1;
13938 IROp opN = mkVecNARROWUN(size);
13939 IRTemp resN = newTempV128();
13940 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
13941 putLO64andZUorPutHI64(is2, dd, resN);
13942 const HChar* nm = "xtn";
13943 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13944 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13945 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
13946 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13947 return True;
13950 if (opcode == BITS5(1,0,1,0,0)
13951 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
13952 /* -------- 0,xx,10100: SQXTN{,2} -------- */
13953 /* -------- 1,xx,10100: UQXTN{,2} -------- */
13954 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
13955 if (size == X11) return False;
13956 vassert(size < 3);
13957 Bool is2 = bitQ == 1;
13958 IROp opN = Iop_INVALID;
13959 Bool zWiden = True;
13960 const HChar* nm = "??";
13961 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
13962 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
13964 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
13965 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
13967 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
13968 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
13970 else vassert(0);
13971 IRTemp src = newTempV128();
13972 assign(src, getQReg128(nn));
13973 IRTemp resN = newTempV128();
13974 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
13975 putLO64andZUorPutHI64(is2, dd, resN);
13976 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
13977 size, mkexpr(resN));
13978 updateQCFLAGwithDifference(src, resW);
13979 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13980 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13981 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
13982 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
13983 return True;
13986 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
13987 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
13988 /* Widens, and size is the narrow size. */
13989 if (size == X11) return False;
13990 Bool is2 = bitQ == 1;
13991 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
13992 IROp opSHL = mkVecSHLN(size+1);
13993 IRTemp src = newTempV128();
13994 IRTemp res = newTempV128();
13995 assign(src, getQReg128(nn));
13996 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
13997 mkU8(8 << size)));
13998 putQReg128(dd, mkexpr(res));
13999 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
14000 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
14001 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
14002 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
14003 return True;
14006 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
14007 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
14008 UInt nLanes = size == X00 ? 4 : 2;
14009 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
14010 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
14011 IRTemp rm = mk_get_IR_rounding_mode();
14012 IRTemp src[nLanes];
14013 for (UInt i = 0; i < nLanes; i++) {
14014 src[i] = newTemp(srcTy);
14015 assign(src[i], getQRegLane(nn, i, srcTy));
14017 for (UInt i = 0; i < nLanes; i++) {
14018 putQRegLane(dd, nLanes * bitQ + i,
14019 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
14021 if (bitQ == 0) {
14022 putQRegLane(dd, 1, mkU64(0));
14024 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
14025 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
14026 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
14027 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
14028 return True;
14031 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
14032 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
14033 /* Using Irrm_NEAREST here isn't right. The docs say "round to
14034 odd" but I don't know what that really means. */
14035 IRType srcTy = Ity_F64;
14036 IROp opCvt = Iop_F64toF32;
14037 IRTemp src[2];
14038 for (UInt i = 0; i < 2; i++) {
14039 src[i] = newTemp(srcTy);
14040 assign(src[i], getQRegLane(nn, i, srcTy));
14042 for (UInt i = 0; i < 2; i++) {
14043 putQRegLane(dd, 2 * bitQ + i,
14044 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
14046 if (bitQ == 0) {
14047 putQRegLane(dd, 1, mkU64(0));
14049 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
14050 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
14051 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
14052 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
14053 return True;
14056 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
14057 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
14058 UInt nLanes = size == X00 ? 4 : 2;
14059 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
14060 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
14061 IRTemp src[nLanes];
14062 for (UInt i = 0; i < nLanes; i++) {
14063 src[i] = newTemp(srcTy);
14064 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
14066 for (UInt i = 0; i < nLanes; i++) {
14067 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
14069 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
14070 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
14071 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
14072 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
14073 return True;
14076 ix = 0;
14077 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
14078 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
14079 // = 1 + bitU[0]:size[1]:opcode[0]
14080 vassert(ix >= 1 && ix <= 8);
14081 if (ix == 7) ix = 0;
14083 if (ix > 0) {
14084 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
14085 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
14086 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
14087 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
14088 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
14089 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
14090 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
14091 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
14092 /* rm plan:
14093 FRINTN: tieeven
14094 FRINTM: -inf
14095 FRINTP: +inf
14096 FRINTZ: zero
14097 FRINTA: tieaway
14098 FRINTX: per FPCR + "exact = TRUE"
14099 FRINTI: per FPCR
14101 Bool isD = (size & 1) == 1;
14102 if (bitQ == 0 && isD) return False; // implied 1d case
14104 UChar ch = '?';
14105 IROp op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
14106 Bool isBinop = True;
14107 IRExpr* irrmE = NULL;
14108 switch (ix) {
14109 case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
14110 case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
14111 case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
14112 case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
14113 case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
14114 // I am unsure about the following, due to the "integral exact"
14115 // description in the manual. What does it mean? (frintx, that is)
14116 case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14117 case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
14118 default: vassert(0);
14121 if (isD) {
14122 for (UInt i = 0; i < 2; i++) {
14123 if (isBinop) {
14124 IRTemp irrm = newTemp(Ity_I32);
14125 assign(irrm, irrmE);
14126 putQRegLane(dd, i, binop(op, mkexpr(irrm),
14127 getQRegLane(nn, i, Ity_F64)));
14128 } else {
14129 putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64)));
14132 } else {
14133 UInt n = bitQ==1 ? 4 : 2;
14134 for (UInt i = 0; i < n; i++) {
14135 if (isBinop) {
14136 IRTemp irrm = newTemp(Ity_I32);
14137 assign(irrm, irrmE);
14138 putQRegLane(dd, i, binop(op, mkexpr(irrm),
14139 getQRegLane(nn, i, Ity_F32)));
14140 } else {
14141 putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32)));
14144 if (bitQ == 0)
14145 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
14147 const HChar* arr = nameArr_Q_SZ(bitQ, size);
14148 DIP("frint%c %s.%s, %s.%s\n", ch,
14149 nameQReg128(dd), arr, nameQReg128(nn), arr);
14150 return True;
14153 ix = 0; /*INVALID*/
14154 switch (opcode) {
14155 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
14156 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
14157 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
14158 default: break;
14160 if (ix > 0) {
14161 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
14162 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
14163 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
14164 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
14165 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
14166 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
14167 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
14168 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
14169 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
14170 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
14171 Bool isD = (size & 1) == 1;
14172 if (bitQ == 0 && isD) return False; // implied 1d case
14174 IRRoundingMode irrm = 8; /*impossible*/
14175 HChar ch = '?';
14176 switch (ix) {
14177 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
14178 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
14179 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
14180 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
14181 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
14182 default: vassert(0);
14184 IROp cvt = Iop_INVALID;
14185 if (bitU == 1) {
14186 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
14187 } else {
14188 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
14190 if (isD) {
14191 for (UInt i = 0; i < 2; i++) {
14192 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
14193 getQRegLane(nn, i, Ity_F64)));
14195 } else {
14196 UInt n = bitQ==1 ? 4 : 2;
14197 for (UInt i = 0; i < n; i++) {
14198 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
14199 getQRegLane(nn, i, Ity_F32)));
14201 if (bitQ == 0)
14202 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
14204 const HChar* arr = nameArr_Q_SZ(bitQ, size);
14205 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
14206 nameQReg128(dd), arr, nameQReg128(nn), arr);
14207 return True;
14210 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
14211 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
14212 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
14213 Bool isREC = bitU == 0;
14214 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
14215 IRTemp res = newTempV128();
14216 assign(res, unop(op, getQReg128(nn)));
14217 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14218 const HChar* nm = isREC ? "urecpe" : "ursqrte";
14219 const HChar* arr = nameArr_Q_SZ(bitQ, size);
14220 DIP("%s %s.%s, %s.%s\n", nm,
14221 nameQReg128(dd), arr, nameQReg128(nn), arr);
14222 return True;
14225 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
14226 /* -------- 0,0x,11101: SCVTF -------- */
14227 /* -------- 1,0x,11101: UCVTF -------- */
14228 /* 31 28 22 21 15 9 4
14229 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
14230 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
14231 with laneage:
14232 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
14234 Bool isQ = bitQ == 1;
14235 Bool isU = bitU == 1;
14236 Bool isF64 = (size & 1) == 1;
14237 if (isQ || !isF64) {
14238 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
14239 UInt nLanes = 0;
14240 Bool zeroHI = False;
14241 const HChar* arrSpec = NULL;
14242 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
14243 isQ, isF64 );
14244 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
14245 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
14246 IRTemp rm = mk_get_IR_rounding_mode();
14247 UInt i;
14248 vassert(ok); /* the 'if' above should ensure this */
14249 for (i = 0; i < nLanes; i++) {
14250 putQRegLane(dd, i,
14251 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
14253 if (zeroHI) {
14254 putQRegLane(dd, 1, mkU64(0));
14256 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
14257 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
14258 return True;
14260 /* else fall through */
14263 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
14264 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
14265 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
14266 Bool isSQRT = bitU == 1;
14267 Bool isD = (size & 1) == 1;
14268 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
14269 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
14270 if (bitQ == 0 && isD) return False; // implied 1d case
14271 IRTemp resV = newTempV128();
14272 assign(resV, unop(op, getQReg128(nn)));
14273 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
14274 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
14275 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
14276 nameQReg128(dd), arr, nameQReg128(nn), arr);
14277 return True;
14280 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
14281 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
14282 Bool isD = (size & 1) == 1;
14283 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
14284 if (bitQ == 0 && isD) return False; // implied 1d case
14285 IRTemp resV = newTempV128();
14286 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
14287 getQReg128(nn)));
14288 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
14289 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
14290 DIP("%s %s.%s, %s.%s\n", "fsqrt",
14291 nameQReg128(dd), arr, nameQReg128(nn), arr);
14292 return True;
14295 return False;
14296 # undef INSN
14300 static
14301 Bool dis_AdvSIMD_two_reg_misc_fp16(/*MB_OUT*/DisResult* dres, UInt insn,
14302 const VexArchInfo* archinfo)
14304 /* This decode function only handles instructions with half-precision
14305 floating-point (fp16) operands.
14307 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
14308 return False;
14310 /* 31 30 29 28 23 21 16 11 9 4
14311 0 Q U 01110 size 11100 opcode 10 n d
14312 Decode fields: U,size,opcode
14314 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14315 if (INSN(31,31) != 0
14316 || INSN(28,24) != BITS5(0,1,1,1,0)
14317 || INSN(21,17) != BITS5(1,1,1,0,0)
14318 || INSN(11,10) != BITS2(1,0)) {
14319 return False;
14321 UInt bitQ = INSN(30,30);
14322 UInt bitU = INSN(29,29);
14323 UInt size = INSN(23,22);
14324 UInt opcode = INSN(16,12);
14325 UInt nn = INSN(9,5);
14326 UInt dd = INSN(4,0);
14327 vassert(size < 4);
14329 if (size == X11 && opcode == BITS5(0,1,1,1,1)) {
14330 /* -------- Q,0,11,01111: FABS 4h_4h, 8h_8h -------- */
14331 /* -------- Q,1,11,01111: FNEG 4h_4h, 8h_8h -------- */
14332 Bool isFNEG = bitU == 1;
14333 IROp op = isFNEG ? Iop_Neg16Fx8 : Iop_Abs16Fx8;
14334 IRTemp res = newTempV128();
14335 assign(res, unop(op, getQReg128(nn)));
14336 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14337 const HChar* arr = bitQ == 0 ? "4h" : "8h";
14338 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
14339 nameQReg128(dd), arr, nameQReg128(nn), arr);
14340 return True;
14343 if (bitU == 1 && size == X11 && opcode == BITS5(1,1,1,1,1)) {
14344 /* -------- 1,11,11111: FSQRT 4h_4h, 8h_8h -------- */
14345 IRTemp resV = newTempV128();
14346 assign(resV, binop(Iop_Sqrt16Fx8, mkexpr(mk_get_IR_rounding_mode()),
14347 getQReg128(nn)));
14348 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
14349 const HChar* arr = bitQ == 0 ? "4h" : "8h";
14350 DIP("%s %s.%s, %s.%s\n", "fsqrt",
14351 nameQReg128(dd), arr, nameQReg128(nn), arr);
14352 return True;
14355 /* Decoding FCM<condtion> based on opcode and bitU. ix used to select
14356 * <condition>
14358 UInt ix = 0; // Invalid <condition>
14359 switch (opcode) {
14360 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 4 : 1; break; // FCMLE=4,FCMEQ=1
14361 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 5 : 2; break; // FCMGE=5,FCMGT=2
14362 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break; // FCMLT=3
14363 default: break;
14365 if (ix > 0) {
14366 /* -------- 0,01101 FCMEQ 4h_4h,8h_8h _#0.0 (ix 1) -------- */
14367 /* -------- 0,01100 FCMGT 4h_4h,8h_8h _#0.0 (ix 2) -------- */
14368 /* -------- 0,01110 FCMLT 4h_4h,8h_8h _#0.0 (ix 3) -------- */
14369 /* -------- 1,01101 FCMLE 4h_4h,8h_8h _#0.0 (ix 4) -------- */
14370 /* -------- 1,01100 FCMGE 4h_4h,8h_8h _#0.0 (ix 5) -------- */
14371 IROp opCmp = Iop_INVALID;
14372 Bool swap = False;
14373 const HChar* nm = "??";
14374 switch (ix) {
14375 case 1: nm = "fcmeq"; opCmp = Iop_CmpEQ16Fx8; break;
14376 case 2: nm = "fcmgt"; opCmp = Iop_CmpLT16Fx8; swap = True; break;
14377 case 3: nm = "fcmlt"; opCmp = Iop_CmpLT16Fx8; break;
14378 case 4: nm = "fcmle"; opCmp = Iop_CmpLE16Fx8; break;
14379 case 5: nm = "fcmge"; opCmp = Iop_CmpLE16Fx8; swap = True; break;
14380 default: vassert(0);
14382 IRExpr* zero = mkV128(0x0000);
14383 IRTemp res = newTempV128();
14384 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
14385 : binop(opCmp, getQReg128(nn), zero));
14386 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14387 const HChar* arr = bitQ == 0 ? "4h" : "8h";
14388 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
14389 nameQReg128(dd), arr, nameQReg128(nn), arr);
14390 return True;
14393 return False;
14394 # undef INSN
14397 static
14398 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
14400 /* 31 28 23 21 20 19 15 11 9 4
14401 0 Q U 01111 size L M m opcode H 0 n d
14402 Decode fields are: u,size,opcode
14403 M is really part of the mm register number. Individual
14404 cases need to inspect L and H though.
14406 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14407 if (INSN(31,31) != 0
14408 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
14409 return False;
14411 UInt bitQ = INSN(30,30);
14412 UInt bitU = INSN(29,29);
14413 UInt size = INSN(23,22);
14414 UInt bitL = INSN(21,21);
14415 UInt bitM = INSN(20,20);
14416 UInt mmLO4 = INSN(19,16);
14417 UInt opcode = INSN(15,12);
14418 UInt bitH = INSN(11,11);
14419 UInt nn = INSN(9,5);
14420 UInt dd = INSN(4,0);
14421 vassert(size < 4);
14422 vassert(bitH < 2 && bitM < 2 && bitL < 2);
14424 if (bitU == 0 && size >= X10
14425 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
14426 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14427 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14428 if (bitQ == 0 && size == X11) return False; // implied 1d case
14429 Bool isD = (size & 1) == 1;
14430 Bool isSUB = opcode == BITS4(0,1,0,1);
14431 UInt index;
14432 if (!isD) index = (bitH << 1) | bitL;
14433 else if (isD && bitL == 0) index = bitH;
14434 else return False; // sz:L == x11 => unallocated encoding
14435 vassert(index < (isD ? 2 : 4));
14436 IRType ity = isD ? Ity_F64 : Ity_F32;
14437 IRTemp elem = newTemp(ity);
14438 UInt mm = (bitM << 4) | mmLO4;
14439 assign(elem, getQRegLane(mm, index, ity));
14440 IRTemp dupd = math_DUP_TO_V128(elem, ity);
14441 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
14442 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
14443 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
14444 IRTemp rm = mk_get_IR_rounding_mode();
14445 IRTemp t1 = newTempV128();
14446 IRTemp t2 = newTempV128();
14447 // FIXME: double rounding; use FMA primops instead
14448 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
14449 assign(t2, triop(isSUB ? opSUB : opADD,
14450 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
14451 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
14452 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
14453 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
14454 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
14455 isD ? 'd' : 's', index);
14456 return True;
14459 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
14460 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14461 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
14462 if (bitQ == 0 && size == X11) return False; // implied 1d case
14463 Bool isD = (size & 1) == 1;
14464 Bool isMULX = bitU == 1;
14465 UInt index;
14466 if (!isD) index = (bitH << 1) | bitL;
14467 else if (isD && bitL == 0) index = bitH;
14468 else return False; // sz:L == x11 => unallocated encoding
14469 vassert(index < (isD ? 2 : 4));
14470 IRType ity = isD ? Ity_F64 : Ity_F32;
14471 IRTemp elem = newTemp(ity);
14472 UInt mm = (bitM << 4) | mmLO4;
14473 assign(elem, getQRegLane(mm, index, ity));
14474 IRTemp dupd = math_DUP_TO_V128(elem, ity);
14475 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
14476 IRTemp res = newTempV128();
14477 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
14478 mkexpr(mk_get_IR_rounding_mode()),
14479 getQReg128(nn), mkexpr(dupd)));
14480 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14481 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
14482 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
14483 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
14484 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
14485 return True;
14488 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
14489 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
14490 /* -------- 1,xx,0000 MLA s/h variants only -------- */
14491 /* -------- 1,xx,0100 MLS s/h variants only -------- */
14492 /* -------- 0,xx,1000 MUL s/h variants only -------- */
14493 Bool isMLA = opcode == BITS4(0,0,0,0);
14494 Bool isMLS = opcode == BITS4(0,1,0,0);
14495 UInt mm = 32; // invalid
14496 UInt ix = 16; // invalid
14497 switch (size) {
14498 case X00:
14499 return False; // b case is not allowed
14500 case X01:
14501 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
14502 case X10:
14503 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
14504 case X11:
14505 return False; // d case is not allowed
14506 default:
14507 vassert(0);
14509 vassert(mm < 32 && ix < 16);
14510 IROp opMUL = mkVecMUL(size);
14511 IROp opADD = mkVecADD(size);
14512 IROp opSUB = mkVecSUB(size);
14513 HChar ch = size == X01 ? 'h' : 's';
14514 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
14515 IRTemp vecD = newTempV128();
14516 IRTemp vecN = newTempV128();
14517 IRTemp res = newTempV128();
14518 assign(vecD, getQReg128(dd));
14519 assign(vecN, getQReg128(nn));
14520 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
14521 if (isMLA || isMLS) {
14522 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
14523 } else {
14524 assign(res, prod);
14526 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14527 const HChar* arr = nameArr_Q_SZ(bitQ, size);
14528 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
14529 : (isMLS ? "mls" : "mul"),
14530 nameQReg128(dd), arr,
14531 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
14532 return True;
14535 if (opcode == BITS4(1,0,1,0)
14536 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
14537 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
14538 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
14539 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
14540 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
14541 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
14542 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
14543 /* Widens, and size refers to the narrowed lanes. */
14544 UInt ks = 3;
14545 switch (opcode) {
14546 case BITS4(1,0,1,0): ks = 0; break;
14547 case BITS4(0,0,1,0): ks = 1; break;
14548 case BITS4(0,1,1,0): ks = 2; break;
14549 default: vassert(0);
14551 vassert(ks <= 2);
14552 Bool isU = bitU == 1;
14553 Bool is2 = bitQ == 1;
14554 UInt mm = 32; // invalid
14555 UInt ix = 16; // invalid
14556 switch (size) {
14557 case X00:
14558 return False; // h_b_b[] case is not allowed
14559 case X01:
14560 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
14561 case X10:
14562 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
14563 case X11:
14564 return False; // q_d_d[] case is not allowed
14565 default:
14566 vassert(0);
14568 vassert(mm < 32 && ix < 16);
14569 IRTemp vecN = newTempV128();
14570 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
14571 IRTemp vecD = newTempV128();
14572 assign(vecN, getQReg128(nn));
14573 assign(vecD, getQReg128(dd));
14574 IRTemp res = IRTemp_INVALID;
14575 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
14576 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
14577 putQReg128(dd, mkexpr(res));
14578 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
14579 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
14580 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
14581 HChar ch = size == X01 ? 'h' : 's';
14582 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
14583 isU ? 'u' : 's', nm, is2 ? "2" : "",
14584 nameQReg128(dd), arrWide,
14585 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
14586 return True;
14589 if (bitU == 0
14590 && (opcode == BITS4(1,0,1,1)
14591 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
14592 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
14593 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
14594 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
14595 /* Widens, and size refers to the narrowed lanes. */
14596 UInt ks = 3;
14597 switch (opcode) {
14598 case BITS4(1,0,1,1): ks = 0; break;
14599 case BITS4(0,0,1,1): ks = 1; break;
14600 case BITS4(0,1,1,1): ks = 2; break;
14601 default: vassert(0);
14603 vassert(ks <= 2);
14604 Bool is2 = bitQ == 1;
14605 UInt mm = 32; // invalid
14606 UInt ix = 16; // invalid
14607 switch (size) {
14608 case X00:
14609 return False; // h_b_b[] case is not allowed
14610 case X01:
14611 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
14612 case X10:
14613 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
14614 case X11:
14615 return False; // q_d_d[] case is not allowed
14616 default:
14617 vassert(0);
14619 vassert(mm < 32 && ix < 16);
14620 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
14621 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
14622 newTempsV128_2(&vecN, &vecD);
14623 assign(vecN, getQReg128(nn));
14624 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
14625 assign(vecD, getQReg128(dd));
14626 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
14627 is2, size, "mas"[ks],
14628 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
14629 putQReg128(dd, mkexpr(res));
14630 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
14631 updateQCFLAGwithDifference(sat1q, sat1n);
14632 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
14633 updateQCFLAGwithDifference(sat2q, sat2n);
14635 const HChar* nm = ks == 0 ? "sqdmull"
14636 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
14637 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
14638 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
14639 HChar ch = size == X01 ? 'h' : 's';
14640 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
14641 nm, is2 ? "2" : "",
14642 nameQReg128(dd), arrWide,
14643 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
14644 return True;
14647 if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
14648 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
14649 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
14650 UInt mm = 32; // invalid
14651 UInt ix = 16; // invalid
14652 switch (size) {
14653 case X00:
14654 return False; // b case is not allowed
14655 case X01:
14656 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
14657 case X10:
14658 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
14659 case X11:
14660 return False; // q case is not allowed
14661 default:
14662 vassert(0);
14664 vassert(mm < 32 && ix < 16);
14665 Bool isR = opcode == BITS4(1,1,0,1);
14666 IRTemp res, sat1q, sat1n, vN, vM;
14667 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
14668 vN = newTempV128();
14669 assign(vN, getQReg128(nn));
14670 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
14671 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
14672 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14673 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
14674 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
14675 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
14676 const HChar* arr = nameArr_Q_SZ(bitQ, size);
14677 HChar ch = size == X01 ? 'h' : 's';
14678 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
14679 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
14680 return True;
14683 if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
14684 /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
14685 /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
14686 UInt mm = 32; // invalid
14687 UInt ix = 16; // invalid
14688 switch (size) {
14689 case X00:
14690 return False; // b case is not allowed
14691 case X01: // h
14692 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
14693 case X10: // s
14694 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
14695 case X11:
14696 return False; // d case is not allowed
14697 default:
14698 vassert(0);
14700 vassert(mm < 32 && ix < 16);
14702 IRTemp res, res_nosat, vD, vN, vM;
14703 res = res_nosat = vD = vN = vM = IRTemp_INVALID;
14704 newTempsV128_2(&vD, &vN);
14705 assign(vD, getQReg128(dd));
14706 assign(vN, getQReg128(nn));
14708 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
14709 Bool isAdd = opcode == BITS4(1,1,0,1);
14710 math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
14711 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
14712 updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
14713 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
14715 const HChar* arr = nameArr_Q_SZ(bitQ, size);
14716 const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
14717 HChar ch = size == X01 ? 'h' : 's';
14718 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
14719 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix);
14720 return True;
14723 return False;
14724 # undef INSN
14728 static
14729 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
14731 /* 31 23 21 16 11 9 4
14732 0100 1110 size 10100 opcode 10 n d
14733 Decode fields are: size,opcode
14734 Size is always 00 in ARMv8, it appears.
14736 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14737 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
14738 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
14739 return False;
14741 UInt size = INSN(23,22);
14742 UInt opcode = INSN(16,12);
14743 UInt nn = INSN(9,5);
14744 UInt dd = INSN(4,0);
14746 if (size == BITS2(0,0)
14747 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
14748 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
14749 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
14750 Bool isD = opcode == BITS5(0,0,1,0,1);
14751 IRTemp op1 = newTemp(Ity_V128);
14752 IRTemp op2 = newTemp(Ity_V128);
14753 IRTemp xord = newTemp(Ity_V128);
14754 IRTemp res = newTemp(Ity_V128);
14755 void* helper = isD ? &arm64g_dirtyhelper_AESD
14756 : &arm64g_dirtyhelper_AESE;
14757 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
14758 : "arm64g_dirtyhelper_AESE";
14759 assign(op1, getQReg128(dd));
14760 assign(op2, getQReg128(nn));
14761 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
14762 IRDirty* di
14763 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
14764 mkIRExprVec_3(
14765 IRExpr_VECRET(),
14766 unop(Iop_V128HIto64, mkexpr(xord)),
14767 unop(Iop_V128to64, mkexpr(xord)) ) );
14768 stmt(IRStmt_Dirty(di));
14769 putQReg128(dd, mkexpr(res));
14770 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
14771 nameQReg128(dd), nameQReg128(nn));
14772 return True;
14775 if (size == BITS2(0,0)
14776 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
14777 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
14778 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
14779 Bool isI = opcode == BITS5(0,0,1,1,1);
14780 IRTemp src = newTemp(Ity_V128);
14781 IRTemp res = newTemp(Ity_V128);
14782 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
14783 : &arm64g_dirtyhelper_AESMC;
14784 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
14785 : "arm64g_dirtyhelper_AESMC";
14786 assign(src, getQReg128(nn));
14787 IRDirty* di
14788 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
14789 mkIRExprVec_3(
14790 IRExpr_VECRET(),
14791 unop(Iop_V128HIto64, mkexpr(src)),
14792 unop(Iop_V128to64, mkexpr(src)) ) );
14793 stmt(IRStmt_Dirty(di));
14794 putQReg128(dd, mkexpr(res));
14795 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
14796 nameQReg128(dd), nameQReg128(nn));
14797 return True;
14800 return False;
14801 # undef INSN
14805 static
14806 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
14808 /* 31 27 23 21 20 15 14 11 9 4
14809 0101 1110 sz 0 m 0 opc 00 n d
14810 Decode fields are: sz,opc
14812 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14813 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
14814 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
14815 return False;
14817 UInt sz = INSN(23,22);
14818 UInt mm = INSN(20,16);
14819 UInt opc = INSN(14,12);
14820 UInt nn = INSN(9,5);
14821 UInt dd = INSN(4,0);
14822 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
14823 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
14824 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
14825 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
14826 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
14827 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
14828 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
14829 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
14830 vassert(opc < 7);
14831 const HChar* inames[7]
14832 = { "sha1c", "sha1p", "sha1m", "sha1su0",
14833 "sha256h", "sha256h2", "sha256su1" };
14834 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
14835 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
14836 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
14837 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
14838 &arm64g_dirtyhelper_SHA256SU1 };
14839 const HChar* hnames[7]
14840 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
14841 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
14842 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
14843 "arm64g_dirtyhelper_SHA256SU1" };
14844 IRTemp vD = newTemp(Ity_V128);
14845 IRTemp vN = newTemp(Ity_V128);
14846 IRTemp vM = newTemp(Ity_V128);
14847 IRTemp vDhi = newTemp(Ity_I64);
14848 IRTemp vDlo = newTemp(Ity_I64);
14849 IRTemp vNhiPre = newTemp(Ity_I64);
14850 IRTemp vNloPre = newTemp(Ity_I64);
14851 IRTemp vNhi = newTemp(Ity_I64);
14852 IRTemp vNlo = newTemp(Ity_I64);
14853 IRTemp vMhi = newTemp(Ity_I64);
14854 IRTemp vMlo = newTemp(Ity_I64);
14855 assign(vD, getQReg128(dd));
14856 assign(vN, getQReg128(nn));
14857 assign(vM, getQReg128(mm));
14858 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
14859 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
14860 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
14861 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
14862 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
14863 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
14864 /* Mask off any bits of the N register operand that aren't actually
14865 needed, so that Memcheck doesn't complain unnecessarily. */
14866 switch (opc) {
14867 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
14868 assign(vNhi, mkU64(0));
14869 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
14870 break;
14871 case BITS3(0,1,1): case BITS3(1,0,0):
14872 case BITS3(1,0,1): case BITS3(1,1,0):
14873 assign(vNhi, mkexpr(vNhiPre));
14874 assign(vNlo, mkexpr(vNloPre));
14875 break;
14876 default:
14877 vassert(0);
14879 IRTemp res = newTemp(Ity_V128);
14880 IRDirty* di
14881 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
14882 mkIRExprVec_7(
14883 IRExpr_VECRET(),
14884 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
14885 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
14886 stmt(IRStmt_Dirty(di));
14887 putQReg128(dd, mkexpr(res));
14888 switch (opc) {
14889 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
14890 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
14891 break;
14892 case BITS3(0,1,1): case BITS3(1,1,0):
14893 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
14894 break;
14895 case BITS3(1,0,0): case BITS3(1,0,1):
14896 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
14897 break;
14898 default:
14899 vassert(0);
14901 return True;
14904 return False;
14905 # undef INSN
14909 static
14910 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
14912 /* 31 27 23 21 16 11 9 4
14913 0101 1110 sz 10100 opc 10 n d
14914 Decode fields are: sz,opc
14916 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14917 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
14918 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
14919 return False;
14921 UInt sz = INSN(23,22);
14922 UInt opc = INSN(16,12);
14923 UInt nn = INSN(9,5);
14924 UInt dd = INSN(4,0);
14925 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
14926 /* -------- 00,00000 SHA1H Sd, Sn -------- */
14927 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
14928 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
14929 vassert(opc < 3);
14930 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
14931 IRTemp vD = newTemp(Ity_V128);
14932 IRTemp vN = newTemp(Ity_V128);
14933 IRTemp vDhi = newTemp(Ity_I64);
14934 IRTemp vDlo = newTemp(Ity_I64);
14935 IRTemp vNhi = newTemp(Ity_I64);
14936 IRTemp vNlo = newTemp(Ity_I64);
14937 assign(vD, getQReg128(dd));
14938 assign(vN, getQReg128(nn));
14939 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
14940 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
14941 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
14942 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
14943 /* Mask off any bits of the N register operand that aren't actually
14944 needed, so that Memcheck doesn't complain unnecessarily. Also
14945 construct the calls, given that the helper functions don't take
14946 the same number of arguments. */
14947 IRDirty* di = NULL;
14948 IRTemp res = newTemp(Ity_V128);
14949 switch (opc) {
14950 case BITS5(0,0,0,0,0): {
14951 IRExpr* vNloMasked = unop(Iop_32Uto64,
14952 unop(Iop_64to32, mkexpr(vNlo)));
14953 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
14954 "arm64g_dirtyhelper_SHA1H",
14955 &arm64g_dirtyhelper_SHA1H,
14956 mkIRExprVec_3(
14957 IRExpr_VECRET(),
14958 mkU64(0), vNloMasked) );
14959 break;
14961 case BITS5(0,0,0,0,1):
14962 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
14963 "arm64g_dirtyhelper_SHA1SU1",
14964 &arm64g_dirtyhelper_SHA1SU1,
14965 mkIRExprVec_5(
14966 IRExpr_VECRET(),
14967 mkexpr(vDhi), mkexpr(vDlo),
14968 mkexpr(vNhi), mkexpr(vNlo)) );
14969 break;
14970 case BITS5(0,0,0,1,0):
14971 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
14972 "arm64g_dirtyhelper_SHA256SU0",
14973 &arm64g_dirtyhelper_SHA256SU0,
14974 mkIRExprVec_5(
14975 IRExpr_VECRET(),
14976 mkexpr(vDhi), mkexpr(vDlo),
14977 mkexpr(vNhi), mkexpr(vNlo)) );
14978 break;
14979 default:
14980 vassert(0);
14982 stmt(IRStmt_Dirty(di));
14983 putQReg128(dd, mkexpr(res));
14984 switch (opc) {
14985 case BITS5(0,0,0,0,0):
14986 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
14987 break;
14988 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
14989 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
14990 break;
14991 default:
14992 vassert(0);
14994 return True;
14997 return False;
14998 # undef INSN
15002 static
15003 Bool dis_AdvSIMD_crypto_three_reg_sha512(/*MB_OUT*/DisResult* dres, UInt insn)
15005 /* 31 27 23 20 15 14 13 11 9 4
15006 1100 1110 011 m 1 o 00 opc n d
15007 Decode fields are: o,opc
15009 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15010 if (INSN(31,21) != BITS11(1,1,0,0,1,1,1,0,0,1,1) || INSN(15,15) != 1
15011 || INSN(13,12) != BITS2(0,0)) {
15012 return False;
15014 UInt mm = INSN(20,16);
15015 UInt bitO = INSN(14,14);
15016 UInt opc = INSN(11,10);
15017 UInt nn = INSN(9,5);
15018 UInt dd = INSN(4,0);
15019 if (bitO == 0 && opc <= BITS2(1,0)) {
15020 /* -------- 0,00 SHA512H Qd, Qn, Vm.2D -------- */
15021 /* -------- 0,01 SHA512H2 Qd, Qn, Vm.2D -------- */
15022 /* -------- 0,10 SHA512SU1 Vd.2D, Vn.2D, Vm.2D -------- */
15023 vassert(opc < 3);
15024 const HChar* inames[3] = { "sha512h", "sha512h2", "sha512su1" };
15025 void(*helpers[3])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
15026 = { &arm64g_dirtyhelper_SHA512H, &arm64g_dirtyhelper_SHA512H2,
15027 &arm64g_dirtyhelper_SHA512SU1 };
15028 const HChar* hnames[3]
15029 = { "arm64g_dirtyhelper_SHA512H", "arm64g_dirtyhelper_SHA512H2",
15030 "arm64g_dirtyhelper_SHA512SU1" };
15031 IRTemp vD = newTemp(Ity_V128);
15032 IRTemp vN = newTemp(Ity_V128);
15033 IRTemp vM = newTemp(Ity_V128);
15034 IRTemp vDhi = newTemp(Ity_I64);
15035 IRTemp vDlo = newTemp(Ity_I64);
15036 IRTemp vNhi = newTemp(Ity_I64);
15037 IRTemp vNlo = newTemp(Ity_I64);
15038 IRTemp vMhi = newTemp(Ity_I64);
15039 IRTemp vMlo = newTemp(Ity_I64);
15040 assign(vD, getQReg128(dd));
15041 assign(vN, getQReg128(nn));
15042 assign(vM, getQReg128(mm));
15043 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
15044 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
15045 /* vNhi is initialized below. */
15046 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
15047 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
15048 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
15049 /* SHA512H2 does not use the upper half of the N register. Mask it off so
15050 that Memcheck doesn't complain unnecessarily. */
15051 switch (opc) {
15052 case BITS2(0,1):
15053 assign(vNhi, mkU64(0));
15054 break;
15055 case BITS2(0,0): case BITS2(1,0):
15056 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
15057 break;
15058 default:
15059 vassert(0);
15061 IRTemp res = newTemp(Ity_V128);
15062 IRDirty* di
15063 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
15064 mkIRExprVec_7(
15065 IRExpr_VECRET(),
15066 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
15067 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
15068 stmt(IRStmt_Dirty(di));
15069 putQReg128(dd, mkexpr(res));
15070 switch (opc) {
15071 case BITS2(0,0): case BITS2(0,1):
15072 DIP("%s q%u, q%u, v%u.2d\n", inames[opc], dd, nn, mm);
15073 break;
15074 case BITS2(1,0):
15075 DIP("%s v%u.2d, v%u.2d, v%u.2d\n", inames[opc], dd, nn, mm);
15076 break;
15077 default:
15078 vassert(0);
15080 return True;
15083 return False;
15084 # undef INSN
15088 static
15089 Bool dis_AdvSIMD_crypto_two_reg_sha512(/*MB_OUT*/DisResult* dres, UInt insn)
15091 /* 31 27 23 19 15 11 9 4
15092 1100 1110 1100 0000 1000 opc n d
15093 Decode fields are: opc
15095 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15096 if (INSN(31,20) != BITS12(1,1,0,0,1,1,1,0,1,1,0,0)
15097 || INSN(19,12) != BITS8(0,0,0,0,1,0,0,0)) {
15098 return False;
15100 UInt opc = INSN(11,10);
15101 UInt nn = INSN(9,5);
15102 UInt dd = INSN(4,0);
15103 if (opc == BITS2(0,0)) {
15104 /* -------- 00 SHA512SU0 Vd.2D, Vn.2D -------- */
15105 IRTemp vD = newTemp(Ity_V128);
15106 IRTemp vN = newTemp(Ity_V128);
15107 IRTemp vDhi = newTemp(Ity_I64);
15108 IRTemp vDlo = newTemp(Ity_I64);
15109 IRTemp vNhi = newTemp(Ity_I64);
15110 IRTemp vNlo = newTemp(Ity_I64);
15111 assign(vD, getQReg128(dd));
15112 assign(vN, getQReg128(nn));
15113 /* SHA512SU0 ignores the upper half of the N register. Mask it off, so
15114 that Memcheck doesn't complain unnecessarily. */
15115 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
15116 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
15117 assign(vNhi, mkU64(0));
15118 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
15119 IRTemp res = newTemp(Ity_V128);
15120 IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
15121 "arm64g_dirtyhelper_SHA512SU0",
15122 &arm64g_dirtyhelper_SHA512SU0,
15123 mkIRExprVec_5(
15124 IRExpr_VECRET(),
15125 mkexpr(vDhi), mkexpr(vDlo),
15126 mkexpr(vNhi), mkexpr(vNlo)) );
15127 stmt(IRStmt_Dirty(di));
15128 putQReg128(dd, mkexpr(res));
15129 DIP("sha512su0 v%u.2d, v%u.2d\n", dd, nn);
15130 return True;
15133 return False;
15134 # undef INSN
15138 static
15139 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
15141 /* 31 28 23 21 20 15 13 9 4
15142 000 11110 ty 1 m op 1000 n opcode2
15143 The first 3 bits are really "M 0 S", but M and S are always zero.
15144 Decode fields are: ty,op,opcode2
15146 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15147 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15148 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
15149 return False;
15151 UInt ty = INSN(23,22);
15152 UInt mm = INSN(20,16);
15153 UInt op = INSN(15,14);
15154 UInt nn = INSN(9,5);
15155 UInt opcode2 = INSN(4,0);
15156 vassert(ty < 4);
15158 if (ty <= X01 && op == X00
15159 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
15160 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
15161 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
15162 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
15163 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
15164 /* 31 23 20 15 9 4
15165 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
15166 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
15167 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
15168 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
15170 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
15171 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
15172 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
15173 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
15175 FCMPE generates Invalid Operation exn if either arg is any kind
15176 of NaN. FCMP generates Invalid Operation exn if either arg is a
15177 signalling NaN. We ignore this detail here and produce the same
15178 IR for both.
15180 Bool isD = (ty & 1) == 1;
15181 Bool isCMPE = (opcode2 & 16) == 16;
15182 Bool cmpZero = (opcode2 & 8) == 8;
15183 IRType ity = isD ? Ity_F64 : Ity_F32;
15184 Bool valid = True;
15185 if (cmpZero && mm != 0) valid = False;
15186 if (valid) {
15187 IRTemp argL = newTemp(ity);
15188 IRTemp argR = newTemp(ity);
15189 IRTemp irRes = newTemp(Ity_I32);
15190 assign(argL, getQRegLO(nn, ity));
15191 assign(argR,
15192 cmpZero
15193 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
15194 : getQRegLO(mm, ity));
15195 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
15196 mkexpr(argL), mkexpr(argR)));
15197 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15198 IRTemp nzcv_28x0 = newTemp(Ity_I64);
15199 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
15200 setFlags_COPY(nzcv_28x0);
15201 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
15202 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
15203 return True;
15205 return False;
15208 return False;
15209 # undef INSN
15213 static
15214 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn,
15215 const VexArchInfo* archinfo, Bool sigill_diag)
15217 /* 31 28 23 21 20 15 11 9 4 3
15218 000 11110 ty 1 m cond 01 n op nzcv
15219 The first 3 bits are really "M 0 S", but M and S are always zero.
15220 Decode fields are: ty,op
15222 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15223 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15224 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
15225 return False;
15227 UInt ty = INSN(23,22);
15228 UInt mm = INSN(20,16);
15229 UInt cond = INSN(15,12);
15230 UInt nn = INSN(9,5);
15231 UInt op = INSN(4,4);
15232 UInt nzcv = INSN(3,0);
15233 vassert(ty < 4 && op <= 1);
15235 /* -------- 00,0 FCCMP s_s -------- */
15236 /* -------- 00,1 FCCMPE s_s -------- */
15237 /* -------- 01,0 FCCMP d_d -------- */
15238 /* -------- 01,1 FCCMPE d_d -------- */
15239 /* -------- 11,0 FCCMP h_h -------- */
15240 /* -------- 11,1 FCCMPE h_h -------- */
15242 /* FCCMPE generates Invalid Operation exn if either arg is any kind
15243 of NaN. FCCMP generates Invalid Operation exn if either arg is a
15244 signalling NaN. We ignore this detail here and produce the same
15245 IR for both.
15247 Bool isCMPE = op == 1;
15248 IRType ity;
15249 IROp irop;
15250 if (ty == 0) {
15251 ity = Ity_F32;
15252 irop = Iop_CmpF32;
15254 else if (ty == 1) {
15255 ity = Ity_F64;
15256 irop = Iop_CmpF64;
15258 else if (ty == 3) {
15259 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
15260 return False;
15261 ity = Ity_F16;
15262 irop = Iop_CmpF16;
15264 else {
15265 /* ty = 2 is an illegal encoding */
15266 if (sigill_diag) {
15267 vex_printf("ARM64 front end: dis_AdvSIMD_fp_conditional_compare\n");
15269 return False;
15271 IRTemp argL = newTemp(ity);
15272 IRTemp argR = newTemp(ity);
15273 IRTemp irRes = newTemp(Ity_I32);
15274 assign(argL, getQRegLO(nn, ity));
15275 assign(argR, getQRegLO(mm, ity));
15276 assign(irRes, binop(irop, mkexpr(argL), mkexpr(argR)));
15277 IRTemp condT = newTemp(Ity_I1);
15278 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
15279 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15281 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
15282 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
15284 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
15286 IRTemp nzcv_28x0 = newTemp(Ity_I64);
15287 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
15288 mkexpr(nzcvT_28x0), nzcvF_28x0));
15289 setFlags_COPY(nzcv_28x0);
15290 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
15291 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
15292 return True;
15294 # undef INSN
15298 static
15299 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
15301 /* 31 23 21 20 15 11 9 5
15302 000 11110 ty 1 m cond 11 n d
15303 The first 3 bits are really "M 0 S", but M and S are always zero.
15304 Decode fields: ty
15306 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15307 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
15308 || INSN(11,10) != BITS2(1,1)) {
15309 return False;
15311 UInt ty = INSN(23,22);
15312 UInt mm = INSN(20,16);
15313 UInt cond = INSN(15,12);
15314 UInt nn = INSN(9,5);
15315 UInt dd = INSN(4,0);
15316 if (ty <= X01) {
15317 /* -------- 00: FCSEL s_s -------- */
15318 /* -------- 00: FCSEL d_d -------- */
15319 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
15320 IRTemp srcT = newTemp(ity);
15321 IRTemp srcF = newTemp(ity);
15322 IRTemp res = newTemp(ity);
15323 assign(srcT, getQRegLO(nn, ity));
15324 assign(srcF, getQRegLO(mm, ity));
15325 assign(res, IRExpr_ITE(
15326 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
15327 mkexpr(srcT), mkexpr(srcF)));
15328 putQReg128(dd, mkV128(0x0000));
15329 putQRegLO(dd, mkexpr(res));
15330 DIP("fcsel %s, %s, %s, %s\n",
15331 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
15332 nameCC(cond));
15333 return True;
15335 return False;
15336 # undef INSN
15340 static
15341 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
15343 /* 31 28 23 21 20 14 9 4
15344 000 11110 ty 1 opcode 10000 n d
15345 The first 3 bits are really "M 0 S", but M and S are always zero.
15346 Decode fields: ty,opcode
15348 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15349 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15350 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
15351 return False;
15353 UInt ty = INSN(23,22);
15354 UInt opcode = INSN(20,15);
15355 UInt nn = INSN(9,5);
15356 UInt dd = INSN(4,0);
15358 if (opcode <= BITS6(0,0,0,0,1,1)) {
15359 /* -------- 0x,000000: FMOV d_d, s_s -------- */
15360 /* -------- 0x,000001: FABS d_d, s_s, h_h --- */
15361 /* -------- 0x,000010: FNEG d_d, s_s, h_h --- */
15362 /* -------- 0x,000011: FSQRT d_d, s_s, h_h --- */
15363 IRType ity;
15364 if (ty == X01) ity = Ity_F64;
15365 else if (ty == X00) ity = Ity_F32;
15366 else if (ty == X11) ity = Ity_F16;
15367 else vassert(0);
15368 IRTemp src = newTemp(ity);
15369 IRTemp res = newTemp(ity);
15370 const HChar* nm = "??";
15371 assign(src, getQRegLO(nn, ity));
15372 switch (opcode) {
15373 case BITS6(0,0,0,0,0,0):
15374 nm = "fmov"; assign(res, mkexpr(src)); break;
15375 case BITS6(0,0,0,0,0,1):
15376 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
15377 case BITS6(0,0,0,0,1,0):
15378 nm = "fneg"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
15379 case BITS6(0,0,0,0,1,1):
15380 nm = "fsqrt";
15381 assign(res, binop(mkSQRTF(ity),
15382 mkexpr(mk_get_IR_rounding_mode()),
15383 mkexpr(src))); break;
15384 default:
15385 vassert(0);
15387 putQReg128(dd, mkV128(0x0000));
15388 putQRegLO(dd, mkexpr(res));
15389 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
15390 return True;
15393 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
15394 || opcode == BITS6(0,0,0,1,0,1)))
15395 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
15396 || opcode == BITS6(0,0,0,1,0,1)))
15397 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
15398 || opcode == BITS6(0,0,0,1,0,0)))) {
15399 /* -------- 11,000100: FCVT s_h -------- */
15400 /* -------- 11,000101: FCVT d_h -------- */
15401 /* -------- 00,000111: FCVT h_s -------- */
15402 /* -------- 00,000101: FCVT d_s -------- */
15403 /* -------- 01,000111: FCVT h_d -------- */
15404 /* -------- 01,000100: FCVT s_d -------- */
15405 /* 31 23 21 16 14 9 4
15406 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
15407 --------- 11 ----- 01 --------- FCVT Dd, Hn
15408 --------- 00 ----- 11 --------- FCVT Hd, Sn
15409 --------- 00 ----- 01 --------- FCVT Dd, Sn
15410 --------- 01 ----- 11 --------- FCVT Hd, Dn
15411 --------- 01 ----- 00 --------- FCVT Sd, Dn
15412 Rounding, when dst is smaller than src, is per the FPCR.
15414 UInt b2322 = ty;
15415 UInt b1615 = opcode & BITS2(1,1);
15416 switch ((b2322 << 2) | b1615) {
15417 case BITS4(0,0,0,1): // S -> D
15418 case BITS4(1,1,0,1): { // H -> D
15419 Bool srcIsH = b2322 == BITS2(1,1);
15420 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
15421 IRTemp res = newTemp(Ity_F64);
15422 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
15423 getQRegLO(nn, srcTy)));
15424 putQReg128(dd, mkV128(0x0000));
15425 putQRegLO(dd, mkexpr(res));
15426 DIP("fcvt %s, %s\n",
15427 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
15428 return True;
15430 case BITS4(0,1,0,0): // D -> S
15431 case BITS4(0,1,1,1): { // D -> H
15432 Bool dstIsH = b1615 == BITS2(1,1);
15433 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
15434 IRTemp res = newTemp(dstTy);
15435 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
15436 mkexpr(mk_get_IR_rounding_mode()),
15437 getQRegLO(nn, Ity_F64)));
15438 putQReg128(dd, mkV128(0x0000));
15439 putQRegLO(dd, mkexpr(res));
15440 DIP("fcvt %s, %s\n",
15441 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
15442 return True;
15444 case BITS4(0,0,1,1): // S -> H
15445 case BITS4(1,1,0,0): { // H -> S
15446 Bool toH = b1615 == BITS2(1,1);
15447 IRType srcTy = toH ? Ity_F32 : Ity_F16;
15448 IRType dstTy = toH ? Ity_F16 : Ity_F32;
15449 IRTemp res = newTemp(dstTy);
15450 if (toH) {
15451 assign(res, binop(Iop_F32toF16,
15452 mkexpr(mk_get_IR_rounding_mode()),
15453 getQRegLO(nn, srcTy)));
15455 } else {
15456 assign(res, unop(Iop_F16toF32,
15457 getQRegLO(nn, srcTy)));
15459 putQReg128(dd, mkV128(0x0000));
15460 putQRegLO(dd, mkexpr(res));
15461 DIP("fcvt %s, %s\n",
15462 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
15463 return True;
15465 default:
15466 break;
15468 /* else unhandled */
15469 return False;
15472 if (ty <= X01
15473 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
15474 && opcode != BITS6(0,0,1,1,0,1)) {
15475 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
15476 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
15477 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
15478 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
15479 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
15480 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
15481 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
15482 /* 31 23 21 17 14 9 4
15483 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
15485 x==0 => S-registers, x==1 => D-registers
15486 rm (17:15) encodings:
15487 111 per FPCR (FRINTI)
15488 001 +inf (FRINTP)
15489 010 -inf (FRINTM)
15490 011 zero (FRINTZ)
15491 000 tieeven (FRINTN)
15492 100 tieaway (FRINTA)
15493 110 per FPCR + "exact = TRUE" (FRINTX)
15494 101 unallocated
15496 Bool isD = (ty & 1) == 1;
15497 UInt rm = opcode & BITS6(0,0,0,1,1,1);
15498 IRType ity = isD ? Ity_F64 : Ity_F32;
15499 IRExpr* irrmE = NULL;
15500 UChar ch = '?';
15501 IROp op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
15502 Bool isBinop = True;
15503 switch (rm) {
15504 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
15505 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
15506 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
15507 case BITS3(1,0,0): ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
15508 // I am unsure about the following, due to the "integral exact"
15509 // description in the manual. What does it mean? (frintx, that is)
15510 // PJF exact means that if the rounding can't be done without
15511 // precision loss (dst numerically equal to src after the rounding)
15512 // then an exception is raised / the IXC bit gets set in the FPSR
15513 case BITS3(1,1,0):
15514 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
15515 case BITS3(1,1,1):
15516 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
15517 case BITS3(0,0,0): ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
15518 default: break;
15520 if (irrmE || !isBinop) {
15521 IRTemp src = newTemp(ity);
15522 IRTemp dst = newTemp(ity);
15523 assign(src, getQRegLO(nn, ity));
15524 if (isBinop) {
15525 assign(dst, binop(op, irrmE, mkexpr(src)));
15526 } else {
15527 assign(dst, unop(op, mkexpr(src)));
15529 putQReg128(dd, mkV128(0x0000));
15530 putQRegLO(dd, mkexpr(dst));
15531 DIP("frint%c %s, %s\n",
15532 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
15533 return True;
15535 return False;
15538 return False;
15539 # undef INSN
15543 static
15544 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn,
15545 const VexArchInfo* archinfo)
15547 /* 31 28 23 21 20 15 11 9 4
15548 000 11110 ty 1 m opcode 10 n d
15549 The first 3 bits are really "M 0 S", but M and S are always zero.
15550 Decode fields: ty, opcode
15552 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15553 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15554 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
15555 return False;
15557 UInt ty = INSN(23,22);
15558 UInt mm = INSN(20,16);
15559 UInt opcode = INSN(15,12);
15560 UInt nn = INSN(9,5);
15561 UInt dd = INSN(4,0);
15563 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
15564 /* ------- 0x,0000: FMUL d_d, s_s ------- */
15565 /* ------- 0x,0001: FDIV d_d, s_s ------- */
15566 /* ------- 0x,0010: FADD d_d, s_s ------- */
15567 /* ------- 0x,0011: FSUB d_d, s_s ------- */
15568 /* ------- 0x,0100: FMAX d_d, s_s ------- */
15569 /* ------- 0x,0101: FMIN d_d, s_s ------- */
15570 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
15571 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
15572 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
15573 IROp iop = Iop_INVALID;
15574 const HChar* nm = "???";
15575 switch (opcode) {
15576 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
15577 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
15578 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
15579 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
15580 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
15581 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
15582 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
15583 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
15584 default: vassert(0);
15586 if (opcode <= BITS4(0,0,1,1)) {
15587 // This is really not good code. TODO: avoid width-changing
15588 IRTemp res = newTemp(ity);
15589 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
15590 getQRegLO(nn, ity), getQRegLO(mm, ity)));
15591 putQReg128(dd, mkV128(0));
15592 putQRegLO(dd, mkexpr(res));
15593 } else {
15594 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
15595 binop(iop, getQReg128(nn), getQReg128(mm))));
15597 DIP("%s %s, %s, %s\n",
15598 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
15599 return True;
15602 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
15603 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
15604 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
15605 IROp iop = mkMULF(ity);
15606 IROp iopn = mkNEGF(ity);
15607 const HChar* nm = "fnmul";
15608 IRExpr* resE = unop(iopn,
15609 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
15610 getQRegLO(nn, ity), getQRegLO(mm, ity)));
15611 IRTemp res = newTemp(ity);
15612 assign(res, resE);
15613 putQReg128(dd, mkV128(0));
15614 putQRegLO(dd, mkexpr(res));
15615 DIP("%s %s, %s, %s\n",
15616 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
15617 return True;
15620 if (ty == X11 && opcode <= BITS4(0,0,1,0)) {
15621 /* ------- 11,0010: FADD h_h ------- */
15622 if ((archinfo->hwcaps & VEX_HWCAPS_ARM64_FP16) == 0)
15623 return False;
15624 IRTemp res = newTemp(Ity_F16);
15625 assign(res, triop(mkADDF(Ity_F16), mkexpr(mk_get_IR_rounding_mode()),
15626 getQRegLO(nn, Ity_F16), getQRegLO(mm, Ity_F16)));
15627 putQReg128(dd, mkV128(0));
15628 putQRegLO(dd, mkexpr(res));
15629 DIP("fadd %s, %s, %s\n",
15630 nameQRegLO(dd, Ity_F16), nameQRegLO(nn, Ity_F16), nameQRegLO(mm, Ity_F16));
15631 return True;
15634 return False;
15635 # undef INSN
15639 static
15640 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
15642 /* 31 28 23 21 20 15 14 9 4
15643 000 11111 ty o1 m o0 a n d
15644 The first 3 bits are really "M 0 S", but M and S are always zero.
15645 Decode fields: ty,o1,o0
15647 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15648 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
15649 return False;
15651 UInt ty = INSN(23,22);
15652 UInt bitO1 = INSN(21,21);
15653 UInt mm = INSN(20,16);
15654 UInt bitO0 = INSN(15,15);
15655 UInt aa = INSN(14,10);
15656 UInt nn = INSN(9,5);
15657 UInt dd = INSN(4,0);
15658 vassert(ty < 4);
15660 if (ty <= X01) {
15661 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
15662 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
15663 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
15664 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
15665 /* -------------------- F{N}M{ADD,SUB} -------------------- */
15666 /* 31 22 20 15 14 9 4 ix
15667 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
15668 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
15669 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
15670 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
15671 where Fx=Dx when sz=1, Fx=Sx when sz=0
15673 -----SPEC------ ----IMPL----
15674 fmadd a + n * m fmadd (a, n, m)
15675 fmsub a + (-n) * m fmsub (a, n, m)
15676 fnmadd (-a) + (-n) * m fmadd (-a, -n, m)
15677 fnmsub (-a) + n * m fmadd (-a, n, m)
15679 Note Iop_MAdd/SubF32/64 take arguments in the order: rm, N, M, A
15681 Bool isD = (ty & 1) == 1;
15682 UInt ix = (bitO1 << 1) | bitO0;
15683 IRType ity = isD ? Ity_F64 : Ity_F32;
15684 IROp opFMADD = mkFMADDF(ity);
15685 IROp opFMSUB = mkFMSUBF(ity);
15686 IROp opNEG = mkNEGF(ity);
15687 IRTemp res = newTemp(ity);
15688 IRExpr* eA = getQRegLO(aa, ity);
15689 IRExpr* eN = getQRegLO(nn, ity);
15690 IRExpr* eM = getQRegLO(mm, ity);
15691 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
15692 switch (ix) {
15693 case 0: /* FMADD */
15694 assign(res, qop(opFMADD, rm, eN, eM, eA));
15695 break;
15696 case 1: /* FMSUB */
15697 assign(res, qop(opFMSUB, rm, eN, eM, eA));
15698 break;
15699 case 2: /* FNMADD */
15700 assign(res, qop(opFMADD, rm, unop(opNEG, eN), eM,
15701 unop(opNEG,eA)));
15702 break;
15703 case 3: /* FNMSUB */
15704 assign(res, qop(opFMADD, rm, eN, eM, unop(opNEG, eA)));
15705 break;
15706 default:
15707 vassert(0);
15709 putQReg128(dd, mkV128(0x0000));
15710 putQRegLO(dd, mkexpr(res));
15711 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
15712 DIP("%s %s, %s, %s, %s\n",
15713 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
15714 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
15715 return True;
15718 return False;
15719 # undef INSN
15723 static
15724 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
15726 /* 31 28 23 21 20 12 9 4
15727 000 11110 ty 1 imm8 100 imm5 d
15728 The first 3 bits are really "M 0 S", but M and S are always zero.
15730 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15731 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
15732 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
15733 return False;
15735 UInt ty = INSN(23,22);
15736 UInt imm8 = INSN(20,13);
15737 UInt imm5 = INSN(9,5);
15738 UInt dd = INSN(4,0);
15740 /* ------- 00,00000: FMOV s_imm ------- */
15741 /* ------- 01,00000: FMOV d_imm ------- */
15742 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
15743 Bool isD = (ty & 1) == 1;
15744 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
15745 if (!isD) {
15746 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
15748 putQReg128(dd, mkV128(0));
15749 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
15750 DIP("fmov %s, #0x%llx\n",
15751 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
15752 return True;
15755 return False;
15756 # undef INSN
15760 static
15761 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
15763 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15764 /* 31 30 29 28 23 21 20 18 15 9 4
15765 sf 0 0 11110 type 0 rmode opcode scale n d
15766 The first 3 bits are really "sf 0 S", but S is always zero.
15767 Decode fields: sf,type,rmode,opcode
15769 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15770 if (INSN(30,29) != BITS2(0,0)
15771 || INSN(28,24) != BITS5(1,1,1,1,0)
15772 || INSN(21,21) != 0) {
15773 return False;
15775 UInt bitSF = INSN(31,31);
15776 UInt ty = INSN(23,22); // type
15777 UInt rm = INSN(20,19); // rmode
15778 UInt op = INSN(18,16); // opcode
15779 UInt sc = INSN(15,10); // scale
15780 UInt nn = INSN(9,5);
15781 UInt dd = INSN(4,0);
15783 if (ty <= X01 && rm == X11
15784 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
15785 /* -------- (ix) sf ty rm opc -------- */
15786 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
15787 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
15788 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
15789 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
15791 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
15792 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
15793 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
15794 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
15795 Bool isI64 = bitSF == 1;
15796 Bool isF64 = (ty & 1) == 1;
15797 Bool isU = (op & 1) == 1;
15798 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
15800 Int fbits = 64 - sc;
15801 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
15803 Double scale = two_to_the_plus(fbits);
15804 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
15805 : IRExpr_Const(IRConst_F32( (Float)scale ));
15806 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
15808 const IROp ops[8]
15809 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
15810 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
15811 IRTemp irrm = newTemp(Ity_I32);
15812 assign(irrm, mkU32(Irrm_ZERO));
15814 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
15815 IRExpr* res = binop(ops[ix], mkexpr(irrm),
15816 triop(opMUL, mkexpr(irrm), src, scaleE));
15817 putIRegOrZR(isI64, dd, res);
15819 DIP("fcvtz%c %s, %s, #%d\n",
15820 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
15821 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
15822 return True;
15825 /* ------ sf,ty,rm,opc ------ */
15826 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
15827 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
15828 /* (ix) sf S 28 ty rm opc 15 9 4
15829 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
15830 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
15831 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
15832 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
15834 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
15835 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
15836 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
15837 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
15839 These are signed/unsigned conversion from integer registers to
15840 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
15841 scaled per |scale|.
15843 if (ty <= X01 && rm == X00
15844 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
15845 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
15846 Bool isI64 = bitSF == 1;
15847 Bool isF64 = (ty & 1) == 1;
15848 Bool isU = (op & 1) == 1;
15849 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
15851 Int fbits = 64 - sc;
15852 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
15854 Double scale = two_to_the_minus(fbits);
15855 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
15856 : IRExpr_Const(IRConst_F32( (Float)scale ));
15857 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
15859 const IROp ops[8]
15860 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
15861 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
15862 IRExpr* src = getIRegOrZR(isI64, nn);
15863 IRExpr* res = (isF64 && !isI64)
15864 ? unop(ops[ix], src)
15865 : binop(ops[ix],
15866 mkexpr(mk_get_IR_rounding_mode()), src);
15867 putQReg128(dd, mkV128(0));
15868 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
15870 DIP("%ccvtf %s, %s, #%d\n",
15871 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
15872 nameIRegOrZR(isI64, nn), fbits);
15873 return True;
15876 return False;
15877 # undef INSN
15881 static
15882 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
15884 /* 31 30 29 28 23 21 20 18 15 9 4
15885 sf 0 0 11110 type 1 rmode opcode 000000 n d
15886 The first 3 bits are really "sf 0 S", but S is always zero.
15887 Decode fields: sf,type,rmode,opcode
15889 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
15890 if (INSN(30,29) != BITS2(0,0)
15891 || INSN(28,24) != BITS5(1,1,1,1,0)
15892 || INSN(21,21) != 1
15893 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
15894 return False;
15896 UInt bitSF = INSN(31,31);
15897 UInt ty = INSN(23,22); // type
15898 UInt rm = INSN(20,19); // rmode
15899 UInt op = INSN(18,16); // opcode
15900 UInt nn = INSN(9,5);
15901 UInt dd = INSN(4,0);
15903 // op = 000, 001
15904 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
15905 /* 30 23 20 18 15 9 4
15906 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
15907 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
15908 ---------------- 01 -------------- FCVTP-------- (round to +inf)
15909 ---------------- 10 -------------- FCVTM-------- (round to -inf)
15910 ---------------- 11 -------------- FCVTZ-------- (round to zero)
15911 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
15912 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
15914 Rd is Xd when sf==1, Wd when sf==0
15915 Fn is Dn when x==1, Sn when x==0
15916 20:19 carry the rounding mode, using the same encoding as FPCR
15918 if (ty <= X01
15919 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
15920 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
15923 Bool isI64 = bitSF == 1;
15924 Bool isF64 = (ty & 1) == 1;
15925 Bool isU = (op & 1) == 1;
15926 /* Decide on the IR rounding mode to use. */
15927 IRRoundingMode irrm = 8; /*impossible*/
15928 HChar ch = '?';
15929 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
15930 switch (rm) {
15931 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
15932 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
15933 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
15934 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
15935 default: vassert(0);
15937 } else {
15938 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
15939 switch (rm) {
15940 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST_TIE_AWAY_0; break;
15941 default: vassert(0);
15944 vassert(irrm != 8);
15945 /* Decide on the conversion primop, based on the source size,
15946 dest size and signedness (8 possibilities). Case coding:
15947 F32 ->s I32 0
15948 F32 ->u I32 1
15949 F32 ->s I64 2
15950 F32 ->u I64 3
15951 F64 ->s I32 4
15952 F64 ->u I32 5
15953 F64 ->s I64 6
15954 F64 ->u I64 7
15956 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
15957 vassert(ix < 8);
15958 const IROp iops[8]
15959 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
15960 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
15961 IROp iop = iops[ix];
15962 // A bit of ATCery: bounce all cases we haven't seen an example of.
15963 if (/* F32toI32S */
15964 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
15965 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
15966 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
15967 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST) /* FCVTNS W,S */
15968 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS W,S */
15969 /* F32toI32U */
15970 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
15971 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
15972 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
15973 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST) /* FCVTNU W,S */
15974 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU W,S */
15975 /* F32toI64S */
15976 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
15977 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
15978 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
15979 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST) /* FCVTNS X,S */
15980 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS X,S */
15981 /* F32toI64U */
15982 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
15983 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
15984 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
15985 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST) /* FCVTNU X,S */
15986 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU X,S */
15987 /* F64toI32S */
15988 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
15989 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
15990 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
15991 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST) /* FCVTNS W,D */
15992 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS W,D */
15993 /* F64toI32U */
15994 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
15995 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
15996 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
15997 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST) /* FCVTNU W,D */
15998 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU W,D */
15999 /* F64toI64S */
16000 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
16001 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
16002 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
16003 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST) /* FCVTNS X,D */
16004 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS X,D */
16005 /* F64toI64U */
16006 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
16007 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
16008 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
16009 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST) /* FCVTNU X,D */
16010 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU X,D */
16012 /* validated */
16013 } else {
16014 return False;
16016 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
16017 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
16018 IRTemp src = newTemp(srcTy);
16019 IRTemp dst = newTemp(dstTy);
16020 assign(src, getQRegLO(nn, srcTy));
16021 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
16022 putIRegOrZR(isI64, dd, mkexpr(dst));
16023 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
16024 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
16025 return True;
16028 // op = 010, 011
16029 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
16030 /* (ix) sf S 28 ty rm op 15 9 4
16031 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
16032 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
16033 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
16034 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
16036 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
16037 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
16038 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
16039 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
16041 These are signed/unsigned conversion from integer registers to
16042 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
16044 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
16045 Bool isI64 = bitSF == 1;
16046 Bool isF64 = (ty & 1) == 1;
16047 Bool isU = (op & 1) == 1;
16048 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
16049 const IROp ops[8]
16050 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
16051 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
16052 IRExpr* src = getIRegOrZR(isI64, nn);
16053 IRExpr* res = (isF64 && !isI64)
16054 ? unop(ops[ix], src)
16055 : binop(ops[ix],
16056 mkexpr(mk_get_IR_rounding_mode()), src);
16057 putQReg128(dd, mkV128(0));
16058 putQRegLO(dd, res);
16059 DIP("%ccvtf %s, %s\n",
16060 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
16061 nameIRegOrZR(isI64, nn));
16062 return True;
16065 // op = 110, 111
16066 /* -------- FMOV (general) -------- */
16067 /* case sf S ty rm op 15 9 4
16068 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
16069 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
16070 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
16072 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
16073 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
16074 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
16076 if (1) {
16077 UInt ix = 0; // case
16078 if (bitSF == 0) {
16079 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
16080 ix = 1;
16081 else
16082 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
16083 ix = 4;
16084 } else {
16085 vassert(bitSF == 1);
16086 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
16087 ix = 2;
16088 else
16089 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
16090 ix = 5;
16091 else
16092 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
16093 ix = 3;
16094 else
16095 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
16096 ix = 6;
16098 if (ix > 0) {
16099 switch (ix) {
16100 case 1:
16101 putQReg128(dd, mkV128(0));
16102 putQRegLO(dd, getIReg32orZR(nn));
16103 DIP("fmov s%u, w%u\n", dd, nn);
16104 break;
16105 case 2:
16106 putQReg128(dd, mkV128(0));
16107 putQRegLO(dd, getIReg64orZR(nn));
16108 DIP("fmov d%u, x%u\n", dd, nn);
16109 break;
16110 case 3:
16111 putQRegHI64(dd, getIReg64orZR(nn));
16112 DIP("fmov v%u.d[1], x%u\n", dd, nn);
16113 break;
16114 case 4:
16115 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
16116 DIP("fmov w%u, s%u\n", dd, nn);
16117 break;
16118 case 5:
16119 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
16120 DIP("fmov x%u, d%u\n", dd, nn);
16121 break;
16122 case 6:
16123 putIReg64orZR(dd, getQRegHI64(nn));
16124 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
16125 break;
16126 default:
16127 vassert(0);
16129 return True;
16131 /* undecodable; fall through */
16134 return False;
16135 # undef INSN
16139 static
16140 Bool dis_AdvSIMD_dot_product(/*MB_OUT*/DisResult* dres, UInt insn)
16142 /* by element
16143 31 30 29 28 23 21 20 15 11 10 9 4
16144 0 Q U 01111 size L m 1110 H 0 n d
16145 vector
16146 31 30 29 28 23 21 20 15 11 10 9 4
16147 0 Q U 01110 size 0 m 1001 0 1 n d
16149 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
16150 if (INSN(31,31) != 0) {
16151 return False;
16153 UInt bitQ = INSN(30,30);
16154 UInt bitU = INSN(29,29);
16155 UInt opcode1 = INSN(28,24);
16156 UInt size = INSN(23,22);
16157 UInt bitL = INSN(21,21);
16158 UInt mm = INSN(20,16);
16159 UInt opcode2 = INSN(15,12);
16160 UInt bitH = INSN(11,11);
16161 UInt opcode3 = INSN(10,10);
16162 UInt nn = INSN(9,5);
16163 UInt dd = INSN(4,0);
16164 UInt index = (bitH << 1) + bitL;
16165 vassert(index <= 3);
16167 Bool byElement;
16168 if (opcode1 == BITS5(0,1,1,1,1)
16169 && opcode2 == BITS4(1,1,1,0)
16170 && opcode3 == 0) {
16171 byElement = True;
16172 } else if (opcode1 == BITS5(0,1,1,1,0)
16173 && opcode2 == BITS4(1,0,0,1)
16174 && opcode3 == 1
16175 && bitL == 0 && bitH == 0) {
16176 byElement = False;
16177 } else {
16178 return False;
16181 // '10' is the only valid size
16182 if (size != X10) return False;
16184 IRExpr* src1 = math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn));
16185 IRExpr* src2 = getQReg128(mm);
16186 if (byElement) {
16187 src2 = mkexpr(math_DUP_VEC_ELEM(src2, X10, index));
16190 IROp mulOp = bitU ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
16191 IRTemp loProductSums = math_ADDLP(
16192 X01, bitU, math_BINARY_WIDENING_V128(False, mulOp, src1, src2));
16193 IRTemp hiProductSums = math_ADDLP(
16194 X01, bitU, math_BINARY_WIDENING_V128(True, mulOp, src1, src2));
16196 IRTemp res = newTempV128();
16197 assign(res, binop(Iop_Add32x4,
16198 mk_CatEvenLanes32x4(hiProductSums, loProductSums),
16199 mk_CatOddLanes32x4(hiProductSums, loProductSums)));
16201 // These instructions accumulate into the destination, but in non-q
16202 // form the upper 64 bits get forced to 0
16203 IRExpr* accVal = math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(dd));
16204 putQReg128(dd, binop(mkVecADD(size), mkexpr(res), accVal));
16206 const HChar* nm = bitU ? "udot" : "sdot";
16207 const HChar* destWidth = nameArr_Q_SZ(bitQ, size);
16208 const HChar* srcWidth = nameArr_Q_SZ(bitQ, X00);
16209 if (byElement) {
16210 DIP("%s v%u.%s, v%u.%s, v%u.4b[%u]\n", nm,
16211 dd, destWidth,
16212 nn, srcWidth, mm, index);
16213 } else {
16214 DIP("%s v%u.%s, v%u.%s, v%u.%s\n", nm,
16215 dd, destWidth,
16216 nn, srcWidth, mm, srcWidth);
16219 return True;
16220 # undef INSN
16224 static
16225 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn,
16226 const VexArchInfo* archinfo, Bool sigill_diag)
16228 Bool ok;
16229 ok = dis_AdvSIMD_EXT(dres, insn);
16230 if (UNLIKELY(ok)) return True;
16231 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
16232 if (UNLIKELY(ok)) return True;
16233 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
16234 if (UNLIKELY(ok)) return True;
16235 ok = dis_AdvSIMD_across_lanes(dres, insn);
16236 if (UNLIKELY(ok)) return True;
16237 ok = dis_AdvSIMD_copy(dres, insn);
16238 if (UNLIKELY(ok)) return True;
16239 ok = dis_AdvSIMD_modified_immediate(dres, insn);
16240 if (UNLIKELY(ok)) return True;
16241 ok = dis_AdvSIMD_scalar_copy(dres, insn);
16242 if (UNLIKELY(ok)) return True;
16243 ok = dis_AdvSIMD_scalar_pairwise(dres, insn, archinfo);
16244 if (UNLIKELY(ok)) return True;
16245 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
16246 if (UNLIKELY(ok)) return True;
16247 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
16248 if (UNLIKELY(ok)) return True;
16249 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
16250 if (UNLIKELY(ok)) return True;
16251 ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn, archinfo);
16252 if (UNLIKELY(ok)) return True;
16253 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
16254 if (UNLIKELY(ok)) return True;
16255 ok = dis_AdvSIMD_scalar_two_reg_misc_fp16(dres, insn, archinfo);
16256 if (UNLIKELY(ok)) return True;
16257 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
16258 if (UNLIKELY(ok)) return True;
16259 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
16260 if (UNLIKELY(ok)) return True;
16261 ok = dis_AdvSIMD_three_different(dres, insn);
16262 if (UNLIKELY(ok)) return True;
16263 ok = dis_AdvSIMD_three_same(dres, insn);
16264 if (UNLIKELY(ok)) return True;
16265 ok = dis_AdvSIMD_three_same_extra(dres, insn);
16266 if (UNLIKELY(ok)) return True;
16267 ok = dis_AdvSIMD_three_same_fp16(dres, insn, archinfo);
16268 if (UNLIKELY(ok)) return True;
16269 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
16270 if (UNLIKELY(ok)) return True;
16271 ok = dis_AdvSIMD_two_reg_misc_fp16(dres, insn, archinfo);
16272 if (UNLIKELY(ok)) return True;
16273 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
16274 if (UNLIKELY(ok)) return True;
16275 ok = dis_AdvSIMD_crypto_aes(dres, insn);
16276 if (UNLIKELY(ok)) return True;
16277 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
16278 if (UNLIKELY(ok)) return True;
16279 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
16280 if (UNLIKELY(ok)) return True;
16281 ok = dis_AdvSIMD_crypto_three_reg_sha512(dres, insn);
16282 if (UNLIKELY(ok)) return True;
16283 ok = dis_AdvSIMD_crypto_two_reg_sha512(dres, insn);
16284 if (UNLIKELY(ok)) return True;
16285 ok = dis_AdvSIMD_fp_compare(dres, insn);
16286 if (UNLIKELY(ok)) return True;
16287 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn, archinfo, sigill_diag);
16288 if (UNLIKELY(ok)) return True;
16289 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
16290 if (UNLIKELY(ok)) return True;
16291 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
16292 if (UNLIKELY(ok)) return True;
16293 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn, archinfo);
16294 if (UNLIKELY(ok)) return True;
16295 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
16296 if (UNLIKELY(ok)) return True;
16297 ok = dis_AdvSIMD_fp_immediate(dres, insn);
16298 if (UNLIKELY(ok)) return True;
16299 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
16300 if (UNLIKELY(ok)) return True;
16301 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
16302 if (UNLIKELY(ok)) return True;
16303 ok = dis_AdvSIMD_dot_product(dres, insn);
16304 if (UNLIKELY(ok)) return True;
16305 return False;
16309 /*------------------------------------------------------------*/
16310 /*--- Disassemble a single ARM64 instruction ---*/
16311 /*------------------------------------------------------------*/
16313 /* Disassemble a single ARM64 instruction into IR. The instruction
16314 has is located at |guest_instr| and has guest IP of
16315 |guest_PC_curr_instr|, which will have been set before the call
16316 here. Returns True iff the instruction was decoded, in which case
16317 *dres will be set accordingly, or False, in which case *dres should
16318 be ignored by the caller. */
16320 static
16321 Bool disInstr_ARM64_WRK (
16322 /*MB_OUT*/DisResult* dres,
16323 const UChar* guest_instr,
16324 const VexArchInfo* archinfo,
16325 const VexAbiInfo* abiinfo,
16326 Bool sigill_diag
16329 // A macro to fish bits out of 'insn'.
16330 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
16332 //ZZ DisResult dres;
16333 //ZZ UInt insn;
16334 //ZZ //Bool allow_VFP = False;
16335 //ZZ //UInt hwcaps = archinfo->hwcaps;
16336 //ZZ IRTemp condT; /* :: Ity_I32 */
16337 //ZZ UInt summary;
16338 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
16339 //ZZ
16340 //ZZ /* What insn variants are we supporting today? */
16341 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
16342 //ZZ // etc etc
16344 /* Set result defaults. */
16345 dres->whatNext = Dis_Continue;
16346 dres->len = 4;
16347 dres->jk_StopHere = Ijk_INVALID;
16348 dres->hint = Dis_HintNone;
16350 /* At least this is simple on ARM64: insns are all 4 bytes long, and
16351 4-aligned. So just fish the whole thing out of memory right now
16352 and have done. */
16353 UInt insn = getUIntLittleEndianly( guest_instr );
16355 if (0) vex_printf("insn: 0x%x\n", insn);
16357 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
16359 vassert(0 == (guest_PC_curr_instr & 3ULL));
16361 /* ----------------------------------------------------------- */
16363 /* Spot "Special" instructions (see comment at top of file). */
16365 const UChar* code = guest_instr;
16366 /* Spot the 16-byte preamble:
16367 93CC0D8C ror x12, x12, #3
16368 93CC358C ror x12, x12, #13
16369 93CCCD8C ror x12, x12, #51
16370 93CCF58C ror x12, x12, #61
16372 UInt word1 = 0x93CC0D8C;
16373 UInt word2 = 0x93CC358C;
16374 UInt word3 = 0x93CCCD8C;
16375 UInt word4 = 0x93CCF58C;
16376 if (getUIntLittleEndianly(code+ 0) == word1 &&
16377 getUIntLittleEndianly(code+ 4) == word2 &&
16378 getUIntLittleEndianly(code+ 8) == word3 &&
16379 getUIntLittleEndianly(code+12) == word4) {
16380 /* Got a "Special" instruction preamble. Which one is it? */
16381 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
16382 /* orr x10,x10,x10 */) {
16383 /* X3 = client_request ( X4 ) */
16384 DIP("x3 = client_request ( x4 )\n");
16385 putPC(mkU64( guest_PC_curr_instr + 20 ));
16386 dres->jk_StopHere = Ijk_ClientReq;
16387 dres->whatNext = Dis_StopHere;
16388 return True;
16390 else
16391 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
16392 /* orr x11,x11,x11 */) {
16393 /* X3 = guest_NRADDR */
16394 DIP("x3 = guest_NRADDR\n");
16395 dres->len = 20;
16396 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
16397 return True;
16399 else
16400 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
16401 /* orr x12,x12,x12 */) {
16402 /* branch-and-link-to-noredir X8 */
16403 DIP("branch-and-link-to-noredir x8\n");
16404 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
16405 putPC(getIReg64orZR(8));
16406 dres->jk_StopHere = Ijk_NoRedir;
16407 dres->whatNext = Dis_StopHere;
16408 return True;
16410 else
16411 if (getUIntLittleEndianly(code+16) == 0xAA090129
16412 /* orr x9,x9,x9 */) {
16413 /* IR injection */
16414 DIP("IR injection\n");
16415 vex_inject_ir(irsb, Iend_LE);
16416 // Invalidate the current insn. The reason is that the IRop we're
16417 // injecting here can change. In which case the translation has to
16418 // be redone. For ease of handling, we simply invalidate all the
16419 // time.
16420 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
16421 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
16422 putPC(mkU64( guest_PC_curr_instr + 20 ));
16423 dres->whatNext = Dis_StopHere;
16424 dres->jk_StopHere = Ijk_InvalICache;
16425 return True;
16427 /* We don't know what it is. */
16428 return False;
16429 /*NOTREACHED*/
16433 /* ----------------------------------------------------------- */
16435 /* Main ARM64 instruction decoder starts here. */
16437 Bool ok = False;
16439 /* insn[28:25] determines the top-level grouping, so let's start
16440 off with that.
16442 For all of these dis_ARM64_ functions, we pass *dres with the
16443 normal default results "insn OK, 4 bytes long, keep decoding" so
16444 they don't need to change it. However, decodes of control-flow
16445 insns may cause *dres to change.
16447 switch (INSN(28,25)) {
16448 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
16449 // Data processing - immediate
16450 ok = dis_ARM64_data_processing_immediate(dres, insn, sigill_diag);
16451 break;
16452 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
16453 // Branch, exception generation and system instructions
16454 ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo, sigill_diag);
16455 break;
16456 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
16457 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
16458 // Loads and stores
16459 ok = dis_ARM64_load_store(dres, insn, abiinfo, sigill_diag);
16460 break;
16461 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
16462 // Data processing - register
16463 ok = dis_ARM64_data_processing_register(dres, insn, sigill_diag);
16464 break;
16465 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
16466 // Data processing - SIMD and floating point
16467 ok = dis_ARM64_simd_and_fp(dres, insn, archinfo, sigill_diag);
16468 break;
16469 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
16470 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
16471 // UNALLOCATED
16472 break;
16473 default:
16474 vassert(0); /* Can't happen */
16477 /* If the next-level down decoders failed, make sure |dres| didn't
16478 get changed. */
16479 if (!ok) {
16480 vassert(dres->whatNext == Dis_Continue);
16481 vassert(dres->len == 4);
16482 vassert(dres->jk_StopHere == Ijk_INVALID);
16485 return ok;
16487 # undef INSN
16491 /*------------------------------------------------------------*/
16492 /*--- Top-level fn ---*/
16493 /*------------------------------------------------------------*/
16495 /* Disassemble a single instruction into IR. The instruction
16496 is located in host memory at &guest_code[delta]. */
16498 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
16499 const UChar* guest_code_IN,
16500 Long delta_IN,
16501 Addr guest_IP,
16502 VexArch guest_arch,
16503 const VexArchInfo* archinfo,
16504 const VexAbiInfo* abiinfo,
16505 VexEndness host_endness_IN,
16506 Bool sigill_diag_IN )
16508 DisResult dres;
16509 vex_bzero(&dres, sizeof(dres));
16511 /* Set globals (see top of this file) */
16512 vassert(guest_arch == VexArchARM64);
16514 irsb = irsb_IN;
16515 host_endness = host_endness_IN;
16516 guest_PC_curr_instr = (Addr64)guest_IP;
16518 /* Sanity checks */
16519 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
16520 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
16521 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
16523 /* Try to decode */
16524 Bool ok = disInstr_ARM64_WRK( &dres,
16525 &guest_code_IN[delta_IN],
16526 archinfo, abiinfo, sigill_diag_IN );
16527 if (ok) {
16528 /* All decode successes end up here. */
16529 vassert(dres.len == 4 || dres.len == 20);
16530 switch (dres.whatNext) {
16531 case Dis_Continue:
16532 putPC( mkU64(dres.len + guest_PC_curr_instr) );
16533 break;
16534 case Dis_StopHere:
16535 break;
16536 default:
16537 vassert(0);
16539 DIP("\n");
16540 } else {
16541 /* All decode failures end up here. */
16542 if (sigill_diag_IN) {
16543 Int i, j;
16544 UChar buf[64];
16545 UInt insn
16546 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
16547 vex_bzero(buf, sizeof(buf));
16548 for (i = j = 0; i < 32; i++) {
16549 if (i > 0) {
16550 if ((i & 7) == 0) buf[j++] = ' ';
16551 else if ((i & 3) == 0) buf[j++] = '\'';
16553 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
16555 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
16556 vex_printf("disInstr(arm64): %s\n", buf);
16559 /* Tell the dispatcher that this insn cannot be decoded, and so
16560 has not been executed, and (is currently) the next to be
16561 executed. PC should be up-to-date since it is made so at the
16562 start of each insn, but nevertheless be paranoid and update
16563 it again right now. */
16564 putPC( mkU64(guest_PC_curr_instr) );
16565 dres.len = 0;
16566 dres.whatNext = Dis_StopHere;
16567 dres.jk_StopHere = Ijk_NoDecode;
16569 return dres;
16573 /*--------------------------------------------------------------------*/
16574 /*--- end guest_arm64_toIR.c ---*/
16575 /*--------------------------------------------------------------------*/