VEX/priv/guest_arm_toIR.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- begin                                       guest_arm_toIR.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    NEON support is
  14    Copyright (C) 2010-2017 Samsung Electronics
  15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
  16               and Kirill Batuzov <batuzovk@ispras.ru>
  17
  18    This program is free software; you can redistribute it and/or
  19    modify it under the terms of the GNU General Public License as
  20    published by the Free Software Foundation; either version 2 of the
  21    License, or (at your option) any later version.
  22
  23    This program is distributed in the hope that it will be useful, but
  24    WITHOUT ANY WARRANTY; without even the implied warranty of
  25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26    General Public License for more details.
  27
  28    You should have received a copy of the GNU General Public License
  29    along with this program; if not, write to the Free Software
  30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  31    02110-1301, USA.
  32
  33    The GNU General Public License is contained in the file COPYING.
  34 */
  35
  36 /* XXXX thumb to check:
  37    that all cases where putIRegT writes r15, we generate a jump.
  38
  39    All uses of newTemp assign to an IRTemp and not a UInt
  40
  41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
  42    backed out before the memory op, and restored afterwards.  This
  43    needs to happen even after we go uncond.  (and for sure it doesn't
  44    happen for VFP loads/stores right now).
  45
  46    VFP on thumb: check that we exclude all r13/r15 cases that we
  47    should.
  48
  49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
  50    taking into account the number of insns guarded by an IT.
  51
  52    remove the nasty hack, in the spechelper, of looking for Or32(...,
  53    0xE0) in as the first arg to armg_calculate_condition, and instead
  54    use Slice44 as specified in comments in the spechelper.
  55
  56    add specialisations for armg_calculate_flag_c and _v, as they
  57    are moderately often needed in Thumb code.
  58
  59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
  60
  61    Correctness (obscure): in m_transtab, when invalidating code
  62    address ranges, invalidate up to 18 bytes after the end of the
  63    range.  This is because the ITSTATE optimisation at the top of
  64    _THUMB_WRK below analyses up to 18 bytes before the start of any
  65    given instruction, and so might depend on the invalidated area.
  66 */
  67
  68 /* Limitations, etc
  69
  70    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
  71      These instructions are non-restartable in the case where the
  72      transfer(s) fault.
  73
  74    - SWP: the restart jump back is Ijk_Boring; it should be
  75      Ijk_NoRedir but that's expensive.  See comments on casLE() in
  76      guest_x86_toIR.c.
  77 */
  78
  79 /* "Special" instructions.
  80
  81    This instruction decoder can decode four special instructions
  82    which mean nothing natively (are no-ops as far as regs/mem are
  83    concerned) but have meaning for supporting Valgrind.  A special
  84    instruction is flagged by a 16-byte preamble:
  85
  86       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
  87       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
  88        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
  89
  90    Following that, one of the following 3 are allowed
  91    (standard interpretation in parentheses):
  92
  93       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
  94       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
  95       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
  96       E1899009 (orr r9,r9,r9)      IR injection
  97
  98    Any other bytes following the 16-byte preamble are illegal and
  99    constitute a failure in instruction decoding.  This all assumes
 100    that the preamble will never occur except in specific code
 101    fragments designed for Valgrind to catch.
 102 */
 103
 104 /* Translates ARM(v5) code to IR. */
 105
 106 #include "libvex_basictypes.h"
 107 #include "libvex_ir.h"
 108 #include "libvex.h"
 109 #include "libvex_guest_arm.h"
 110
 111 #include "main_util.h"
 112 #include "main_globals.h"
 113 #include "guest_generic_bb_to_IR.h"
 114 #include "guest_arm_defs.h"
 115
 116
 117 /*------------------------------------------------------------*/
 118 /*--- Globals                                              ---*/
 119 /*------------------------------------------------------------*/
 120
 121 /* These are set at the start of the translation of a instruction, so
 122    that we don't have to pass them around endlessly.  CONST means does
 123    not change during translation of the instruction.
 124 */
 125
 126 /* CONST: what is the host's endianness?  This has to do with float vs
 127    double register accesses on VFP, but it's complex and not properly
 128    thought out. */
 129 static VexEndness host_endness;
 130
 131 /* CONST: The guest address for the instruction currently being
 132    translated.  This is the real, "decoded" address (not subject
 133    to the CPSR.T kludge). */
 134 static Addr32 guest_R15_curr_instr_notENC;
 135
 136 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
 137    insn is Thumb (True) or ARM (False). */
 138 static Bool __curr_is_Thumb;
 139
 140 /* MOD: The IRSB* into which we're generating code. */
 141 static IRSB* irsb;
 142
 143 /* These are to do with handling writes to r15.  They are initially
 144    set at the start of disInstr_ARM_WRK to indicate no update,
 145    possibly updated during the routine, and examined again at the end.
 146    If they have been set to indicate a r15 update then a jump is
 147    generated.  Note, "explicit" jumps (b, bx, etc) are generated
 148    directly, not using this mechanism -- this is intended to handle
 149    the implicit-style jumps resulting from (eg) assigning to r15 as
 150    the result of insns we wouldn't normally consider branchy. */
 151
 152 /* MOD.  Initially False; set to True iff abovementioned handling is
 153    required. */
 154 static Bool r15written;
 155
 156 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
 157    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
 158    branch to be generated is unconditional, this remains
 159    IRTemp_INVALID. */
 160 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
 161
 162 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
 163    this holds the jump kind. */
 164 static IRTemp r15kind;
 165
 166
 167 /*------------------------------------------------------------*/
 168 /*--- Debugging output                                     ---*/
 169 /*------------------------------------------------------------*/
 170
 171 #define DIP(format, args...)           \
 172    if (vex_traceflags & VEX_TRACE_FE)  \
 173       vex_printf(format, ## args)
 174
 175 #define DIS(buf, format, args...)      \
 176    if (vex_traceflags & VEX_TRACE_FE)  \
 177       vex_sprintf(buf, format, ## args)
 178
 179 #define ASSERT_IS_THUMB \
 180    do { vassert(__curr_is_Thumb); } while (0)
 181
 182 #define ASSERT_IS_ARM \
 183    do { vassert(! __curr_is_Thumb); } while (0)
 184
 185
 186 /*------------------------------------------------------------*/
 187 /*--- Helper bits and pieces for deconstructing the        ---*/
 188 /*--- arm insn stream.                                     ---*/
 189 /*------------------------------------------------------------*/
 190
 191 /* Do a little-endian load of a 32-bit word, regardless of the
 192    endianness of the underlying host. */
 193 static inline UInt getUIntLittleEndianly ( const UChar* p )
 194 {
 195    UInt w = 0;
 196    w = (w << 8) | p[3];
 197    w = (w << 8) | p[2];
 198    w = (w << 8) | p[1];
 199    w = (w << 8) | p[0];
 200    return w;
 201 }
 202
 203 /* Do a little-endian load of a 16-bit word, regardless of the
 204    endianness of the underlying host. */
 205 static inline UShort getUShortLittleEndianly ( const UChar* p )
 206 {
 207    UShort w = 0;
 208    w = (w << 8) | p[1];
 209    w = (w << 8) | p[0];
 210    return w;
 211 }
 212
 213 static UInt ROR32 ( UInt x, UInt sh ) {
 214    vassert(sh >= 0 && sh < 32);
 215    if (sh == 0)
 216       return x;
 217    else
 218       return (x << (32-sh)) | (x >> sh);
 219 }
 220
 221 static Int popcount32 ( UInt x )
 222 {
 223    Int res = 0, i;
 224    for (i = 0; i < 32; i++) {
 225       res += (x & 1);
 226       x >>= 1;
 227    }
 228    return res;
 229 }
 230
 231 static UInt setbit32 ( UInt x, Int ix, UInt b )
 232 {
 233    UInt mask = 1 << ix;
 234    x &= ~mask;
 235    x |= ((b << ix) & mask);
 236    return x;
 237 }
 238
 239 #define BITS2(_b1,_b0) \
 240    (((_b1) << 1) | (_b0))
 241
 242 #define BITS3(_b2,_b1,_b0)                      \
 243   (((_b2) << 2) | ((_b1) << 1) | (_b0))
 244
 245 #define BITS4(_b3,_b2,_b1,_b0) \
 246    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
 247
 248 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 249    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
 250     | BITS4((_b3),(_b2),(_b1),(_b0)))
 251
 252 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
 253    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
 254 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
 255    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 256 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 257    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 258
 259 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
 260    (((_b8) << 8) \
 261     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 262
 263 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 264    (((_b9) << 9) | ((_b8) << 8)                                \
 265     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 266
 267 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 268    ( ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8)              \
 269     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 270
 271 #define BITS12(_b11,_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 272    ( ((_b11) << 11) | ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8) \
 273     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 274
 275 /* produces _uint[_bMax:_bMin] */
 276 #define SLICE_UInt(_uint,_bMax,_bMin) \
 277    (( ((UInt)(_uint)) >> (_bMin)) \
 278     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
 279
 280
 281 /*------------------------------------------------------------*/
 282 /*--- Helper bits and pieces for creating IR fragments.    ---*/
 283 /*------------------------------------------------------------*/
 284
 285 static IRExpr* mkU64 ( ULong i )
 286 {
 287    return IRExpr_Const(IRConst_U64(i));
 288 }
 289
 290 static IRExpr* mkU32 ( UInt i )
 291 {
 292    return IRExpr_Const(IRConst_U32(i));
 293 }
 294
 295 static IRExpr* mkU8 ( UInt i )
 296 {
 297    vassert(i < 256);
 298    return IRExpr_Const(IRConst_U8( (UChar)i ));
 299 }
 300
 301 static IRExpr* mkexpr ( IRTemp tmp )
 302 {
 303    return IRExpr_RdTmp(tmp);
 304 }
 305
 306 static IRExpr* unop ( IROp op, IRExpr* a )
 307 {
 308    return IRExpr_Unop(op, a);
 309 }
 310
 311 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
 312 {
 313    return IRExpr_Binop(op, a1, a2);
 314 }
 315
 316 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
 317 {
 318    return IRExpr_Triop(op, a1, a2, a3);
 319 }
 320
 321 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
 322 {
 323    return IRExpr_Load(Iend_LE, ty, addr);
 324 }
 325
 326 /* Add a statement to the list held by "irbb". */
 327 static void stmt ( IRStmt* st )
 328 {
 329    addStmtToIRSB( irsb, st );
 330 }
 331
 332 static void assign ( IRTemp dst, IRExpr* e )
 333 {
 334    stmt( IRStmt_WrTmp(dst, e) );
 335 }
 336
 337 static void storeLE ( IRExpr* addr, IRExpr* data )
 338 {
 339    stmt( IRStmt_Store(Iend_LE, addr, data) );
 340 }
 341
 342 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
 343 {
 344    if (guardT == IRTemp_INVALID) {
 345       /* unconditional */
 346       storeLE(addr, data);
 347    } else {
 348       stmt( IRStmt_StoreG(Iend_LE, addr, data,
 349                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 350    }
 351 }
 352
 353 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
 354                             IRExpr* addr, IRExpr* alt,
 355                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
 356 {
 357    if (guardT == IRTemp_INVALID) {
 358       /* unconditional */
 359       IRExpr* loaded = NULL;
 360       switch (cvt) {
 361          case ILGop_Ident32:
 362             loaded = loadLE(Ity_I32, addr); break;
 363          case ILGop_8Uto32:
 364             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
 365          case ILGop_8Sto32:
 366             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
 367          case ILGop_16Uto32:
 368             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
 369          case ILGop_16Sto32:
 370             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
 371          default:
 372             vassert(0);
 373       }
 374       vassert(loaded != NULL);
 375       assign(dst, loaded);
 376    } else {
 377       /* Generate a guarded load into 'dst', but apply 'cvt' to the
 378          loaded data before putting the data in 'dst'.  If the load
 379          does not take place, 'alt' is placed directly in 'dst'. */
 380       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
 381                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 382    }
 383 }
 384
 385 /* Generate a new temporary of the given type. */
 386 static IRTemp newTemp ( IRType ty )
 387 {
 388    vassert(isPlausibleIRType(ty));
 389    return newIRTemp( irsb->tyenv, ty );
 390 }
 391
 392 /* Produces a value in 0 .. 3, which is encoded as per the type
 393    IRRoundingMode. */
 394 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
 395 {
 396    return mkU32(Irrm_NEAREST);
 397 }
 398
 399 /* Generate an expression for SRC rotated right by ROT. */
 400 static IRExpr* genROR32( IRTemp src, Int rot )
 401 {
 402    vassert(rot >= 0 && rot < 32);
 403    if (rot == 0)
 404       return mkexpr(src);
 405    return
 406       binop(Iop_Or32,
 407             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
 408             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
 409 }
 410
 411 static IRExpr* mkU128 ( ULong i )
 412 {
 413    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
 414 }
 415
 416 /* Generate a 4-aligned version of the given expression if
 417    the given condition is true.  Else return it unchanged. */
 418 static IRExpr* align4if ( IRExpr* e, Bool b )
 419 {
 420    if (b)
 421       return binop(Iop_And32, e, mkU32(~3));
 422    else
 423       return e;
 424 }
 425
 426
 427 /*------------------------------------------------------------*/
 428 /*--- Helpers for accessing guest registers.               ---*/
 429 /*------------------------------------------------------------*/
 430
 431 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
 432 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
 433 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
 434 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
 435 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
 436 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
 437 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
 438 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
 439 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
 440 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
 441 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
 442 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
 443 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
 444 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
 445 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
 446 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
 447
 448 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
 449 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
 450 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
 451 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
 452 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
 453
 454 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
 455 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
 456 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
 457 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
 458 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
 459 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
 460 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
 461 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
 462 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
 463 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
 464 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
 465 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
 466 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
 467 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
 468 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
 469 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
 470 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
 471 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
 472 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
 473 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
 474 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
 475 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
 476 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
 477 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
 478 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
 479 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
 480 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
 481 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
 482 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
 483 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
 484 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
 485 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
 486
 487 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
 488 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
 489 #define OFFB_TPIDRURW offsetof(VexGuestARMState,guest_TPIDRURW)
 490 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
 491 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
 492 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
 493 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
 494 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
 495 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
 496
 497 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
 498 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
 499
 500
 501 /* ---------------- Integer registers ---------------- */
 502
 503 static Int integerGuestRegOffset ( UInt iregNo )
 504 {
 505    /* Do we care about endianness here?  We do if sub-parts of integer
 506       registers are accessed, but I don't think that ever happens on
 507       ARM. */
 508    switch (iregNo) {
 509       case 0:  return OFFB_R0;
 510       case 1:  return OFFB_R1;
 511       case 2:  return OFFB_R2;
 512       case 3:  return OFFB_R3;
 513       case 4:  return OFFB_R4;
 514       case 5:  return OFFB_R5;
 515       case 6:  return OFFB_R6;
 516       case 7:  return OFFB_R7;
 517       case 8:  return OFFB_R8;
 518       case 9:  return OFFB_R9;
 519       case 10: return OFFB_R10;
 520       case 11: return OFFB_R11;
 521       case 12: return OFFB_R12;
 522       case 13: return OFFB_R13;
 523       case 14: return OFFB_R14;
 524       case 15: return OFFB_R15T;
 525       default: vassert(0);
 526    }
 527 }
 528
 529 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
 530 static IRExpr* llGetIReg ( UInt iregNo )
 531 {
 532    vassert(iregNo < 16);
 533    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 534 }
 535
 536 /* Architected read from a reg in ARM mode.  This automagically adds 8
 537    to all reads of r15. */
 538 static IRExpr* getIRegA ( UInt iregNo )
 539 {
 540    IRExpr* e;
 541    ASSERT_IS_ARM;
 542    vassert(iregNo < 16);
 543    if (iregNo == 15) {
 544       /* If asked for r15, don't read the guest state value, as that
 545          may not be up to date in the case where loop unrolling has
 546          happened, because the first insn's write to the block is
 547          omitted; hence in the 2nd and subsequent unrollings we don't
 548          have a correct value in guest r15.  Instead produce the
 549          constant that we know would be produced at this point. */
 550       vassert(0 == (guest_R15_curr_instr_notENC & 3));
 551       e = mkU32(guest_R15_curr_instr_notENC + 8);
 552    } else {
 553       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 554    }
 555    return e;
 556 }
 557
 558 /* Architected read from a reg in Thumb mode.  This automagically adds
 559    4 to all reads of r15. */
 560 static IRExpr* getIRegT ( UInt iregNo )
 561 {
 562    IRExpr* e;
 563    ASSERT_IS_THUMB;
 564    vassert(iregNo < 16);
 565    if (iregNo == 15) {
 566       /* Ditto comment in getIReg. */
 567       vassert(0 == (guest_R15_curr_instr_notENC & 1));
 568       e = mkU32(guest_R15_curr_instr_notENC + 4);
 569    } else {
 570       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 571    }
 572    return e;
 573 }
 574
 575 /* Plain ("low level") write to a reg; no jump or alignment magic for
 576    r15. */
 577 static void llPutIReg ( UInt iregNo, IRExpr* e )
 578 {
 579    vassert(iregNo < 16);
 580    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 581    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
 582 }
 583
 584 /* Architected write to an integer register in ARM mode.  If it is to
 585    r15, record info so at the end of this insn's translation, a branch
 586    to it can be made.  Also handles conditional writes to the
 587    register: if guardT == IRTemp_INVALID then the write is
 588    unconditional.  If writing r15, also 4-align it. */
 589 static void putIRegA ( UInt       iregNo,
 590                        IRExpr*    e,
 591                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
 592                        IRJumpKind jk /* if a jump is generated */ )
 593 {
 594    /* if writing r15, force e to be 4-aligned. */
 595    // INTERWORKING FIXME.  this needs to be relaxed so that
 596    // puts caused by LDMxx which load r15 interwork right.
 597    // but is no aligned too relaxed?
 598    //if (iregNo == 15)
 599    //   e = binop(Iop_And32, e, mkU32(~3));
 600    ASSERT_IS_ARM;
 601    /* So, generate either an unconditional or a conditional write to
 602       the reg. */
 603    if (guardT == IRTemp_INVALID) {
 604       /* unconditional write */
 605       llPutIReg( iregNo, e );
 606    } else {
 607       llPutIReg( iregNo,
 608                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 609                              e, llGetIReg(iregNo) ));
 610    }
 611    if (iregNo == 15) {
 612       // assert against competing r15 updates.  Shouldn't
 613       // happen; should be ruled out by the instr matching
 614       // logic.
 615       vassert(r15written == False);
 616       vassert(r15guard   == IRTemp_INVALID);
 617       vassert(r15kind    == Ijk_Boring);
 618       r15written = True;
 619       r15guard   = guardT;
 620       r15kind    = jk;
 621    }
 622 }
 623
 624
 625 /* Architected write to an integer register in Thumb mode.  Writes to
 626    r15 are not allowed.  Handles conditional writes to the register:
 627    if guardT == IRTemp_INVALID then the write is unconditional. */
 628 static void putIRegT ( UInt       iregNo,
 629                        IRExpr*    e,
 630                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
 631 {
 632    /* So, generate either an unconditional or a conditional write to
 633       the reg. */
 634    ASSERT_IS_THUMB;
 635    vassert(iregNo >= 0 && iregNo <= 14);
 636    if (guardT == IRTemp_INVALID) {
 637       /* unconditional write */
 638       llPutIReg( iregNo, e );
 639    } else {
 640       llPutIReg( iregNo,
 641                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 642                              e, llGetIReg(iregNo) ));
 643    }
 644 }
 645
 646
 647 /* Thumb16 and Thumb32 only.
 648    Returns true if reg is 13 or 15.  Implements the BadReg
 649    predicate in the ARM ARM. */
 650 static Bool isBadRegT ( UInt r )
 651 {
 652    vassert(r <= 15);
 653    ASSERT_IS_THUMB;
 654    return r == 13 || r == 15;
 655 }
 656
 657
 658 /* ---------------- Double registers ---------------- */
 659
 660 static Int doubleGuestRegOffset ( UInt dregNo )
 661 {
 662    /* Do we care about endianness here?  Probably do if we ever get
 663       into the situation of dealing with the single-precision VFP
 664       registers. */
 665    switch (dregNo) {
 666       case 0:  return OFFB_D0;
 667       case 1:  return OFFB_D1;
 668       case 2:  return OFFB_D2;
 669       case 3:  return OFFB_D3;
 670       case 4:  return OFFB_D4;
 671       case 5:  return OFFB_D5;
 672       case 6:  return OFFB_D6;
 673       case 7:  return OFFB_D7;
 674       case 8:  return OFFB_D8;
 675       case 9:  return OFFB_D9;
 676       case 10: return OFFB_D10;
 677       case 11: return OFFB_D11;
 678       case 12: return OFFB_D12;
 679       case 13: return OFFB_D13;
 680       case 14: return OFFB_D14;
 681       case 15: return OFFB_D15;
 682       case 16: return OFFB_D16;
 683       case 17: return OFFB_D17;
 684       case 18: return OFFB_D18;
 685       case 19: return OFFB_D19;
 686       case 20: return OFFB_D20;
 687       case 21: return OFFB_D21;
 688       case 22: return OFFB_D22;
 689       case 23: return OFFB_D23;
 690       case 24: return OFFB_D24;
 691       case 25: return OFFB_D25;
 692       case 26: return OFFB_D26;
 693       case 27: return OFFB_D27;
 694       case 28: return OFFB_D28;
 695       case 29: return OFFB_D29;
 696       case 30: return OFFB_D30;
 697       case 31: return OFFB_D31;
 698       default: vassert(0);
 699    }
 700 }
 701
 702 /* Plain ("low level") read from a VFP Dreg. */
 703 static IRExpr* llGetDReg ( UInt dregNo )
 704 {
 705    vassert(dregNo < 32);
 706    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
 707 }
 708
 709 /* Architected read from a VFP Dreg. */
 710 static IRExpr* getDReg ( UInt dregNo ) {
 711    return llGetDReg( dregNo );
 712 }
 713
 714 /* Plain ("low level") write to a VFP Dreg. */
 715 static void llPutDReg ( UInt dregNo, IRExpr* e )
 716 {
 717    vassert(dregNo < 32);
 718    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
 719    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 720 }
 721
 722 /* Architected write to a VFP Dreg.  Handles conditional writes to the
 723    register: if guardT == IRTemp_INVALID then the write is
 724    unconditional. */
 725 static void putDReg ( UInt    dregNo,
 726                       IRExpr* e,
 727                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 728 {
 729    /* So, generate either an unconditional or a conditional write to
 730       the reg. */
 731    if (guardT == IRTemp_INVALID) {
 732       /* unconditional write */
 733       llPutDReg( dregNo, e );
 734    } else {
 735       llPutDReg( dregNo,
 736                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 737                              e, llGetDReg(dregNo) ));
 738    }
 739 }
 740
 741 /* And now exactly the same stuff all over again, but this time
 742    taking/returning I64 rather than F64, to support 64-bit Neon
 743    ops. */
 744
 745 /* Plain ("low level") read from a Neon Integer Dreg. */
 746 static IRExpr* llGetDRegI64 ( UInt dregNo )
 747 {
 748    vassert(dregNo < 32);
 749    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
 750 }
 751
 752 /* Architected read from a Neon Integer Dreg. */
 753 static IRExpr* getDRegI64 ( UInt dregNo ) {
 754    return llGetDRegI64( dregNo );
 755 }
 756
 757 /* Plain ("low level") write to a Neon Integer Dreg. */
 758 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
 759 {
 760    vassert(dregNo < 32);
 761    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
 762    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 763 }
 764
 765 /* Architected write to a Neon Integer Dreg.  Handles conditional
 766    writes to the register: if guardT == IRTemp_INVALID then the write
 767    is unconditional. */
 768 static void putDRegI64 ( UInt    dregNo,
 769                          IRExpr* e,
 770                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 771 {
 772    /* So, generate either an unconditional or a conditional write to
 773       the reg. */
 774    if (guardT == IRTemp_INVALID) {
 775       /* unconditional write */
 776       llPutDRegI64( dregNo, e );
 777    } else {
 778       llPutDRegI64( dregNo,
 779                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 780                                 e, llGetDRegI64(dregNo) ));
 781    }
 782 }
 783
 784 /* ---------------- Quad registers ---------------- */
 785
 786 static Int quadGuestRegOffset ( UInt qregNo )
 787 {
 788    /* Do we care about endianness here?  Probably do if we ever get
 789       into the situation of dealing with the 64 bit Neon registers. */
 790    switch (qregNo) {
 791       case 0:  return OFFB_D0;
 792       case 1:  return OFFB_D2;
 793       case 2:  return OFFB_D4;
 794       case 3:  return OFFB_D6;
 795       case 4:  return OFFB_D8;
 796       case 5:  return OFFB_D10;
 797       case 6:  return OFFB_D12;
 798       case 7:  return OFFB_D14;
 799       case 8:  return OFFB_D16;
 800       case 9:  return OFFB_D18;
 801       case 10: return OFFB_D20;
 802       case 11: return OFFB_D22;
 803       case 12: return OFFB_D24;
 804       case 13: return OFFB_D26;
 805       case 14: return OFFB_D28;
 806       case 15: return OFFB_D30;
 807       default: vassert(0);
 808    }
 809 }
 810
 811 /* Plain ("low level") read from a Neon Qreg. */
 812 static IRExpr* llGetQReg ( UInt qregNo )
 813 {
 814    vassert(qregNo < 16);
 815    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
 816 }
 817
 818 /* Architected read from a Neon Qreg. */
 819 static IRExpr* getQReg ( UInt qregNo ) {
 820    return llGetQReg( qregNo );
 821 }
 822
 823 /* Plain ("low level") write to a Neon Qreg. */
 824 static void llPutQReg ( UInt qregNo, IRExpr* e )
 825 {
 826    vassert(qregNo < 16);
 827    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
 828    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
 829 }
 830
 831 /* Architected write to a Neon Qreg.  Handles conditional writes to the
 832    register: if guardT == IRTemp_INVALID then the write is
 833    unconditional. */
 834 static void putQReg ( UInt    qregNo,
 835                       IRExpr* e,
 836                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 837 {
 838    /* So, generate either an unconditional or a conditional write to
 839       the reg. */
 840    if (guardT == IRTemp_INVALID) {
 841       /* unconditional write */
 842       llPutQReg( qregNo, e );
 843    } else {
 844       llPutQReg( qregNo,
 845                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 846                              e, llGetQReg(qregNo) ));
 847    }
 848 }
 849
 850
 851 /* ---------------- Float registers ---------------- */
 852
 853 static Int floatGuestRegOffset ( UInt fregNo )
 854 {
 855    /* Start with the offset of the containing double, and then correct
 856       for endianness.  Actually this is completely bogus and needs
 857       careful thought. */
 858    Int off;
 859    /* NB! Limit is 64, not 32, because we might be pulling F32 bits
 860       out of SIMD registers, and there are 16 SIMD registers each of
 861       128 bits (4 x F32). */
 862    vassert(fregNo < 64);
 863    off = doubleGuestRegOffset(fregNo >> 1);
 864    if (host_endness == VexEndnessLE) {
 865       if (fregNo & 1)
 866          off += 4;
 867    } else {
 868       vassert(0);
 869    }
 870    return off;
 871 }
 872
 873 /* Plain ("low level") read from a VFP Freg. */
 874 static IRExpr* llGetFReg ( UInt fregNo )
 875 {
 876    vassert(fregNo < 32);
 877    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 878 }
 879
 880 static IRExpr* llGetFReg_up_to_64 ( UInt fregNo )
 881 {
 882    vassert(fregNo < 64);
 883    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 884 }
 885
 886 /* Architected read from a VFP Freg. */
 887 static IRExpr* getFReg ( UInt fregNo ) {
 888    return llGetFReg( fregNo );
 889 }
 890
 891 /* Plain ("low level") write to a VFP Freg. */
 892 static void llPutFReg ( UInt fregNo, IRExpr* e )
 893 {
 894    vassert(fregNo < 32);
 895    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 896    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 897 }
 898
 899 static void llPutFReg_up_to_64 ( UInt fregNo, IRExpr* e )
 900 {
 901    vassert(fregNo < 64);
 902    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 903    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 904 }
 905
 906 /* Architected write to a VFP Freg.  Handles conditional writes to the
 907    register: if guardT == IRTemp_INVALID then the write is
 908    unconditional. */
 909 static void putFReg ( UInt    fregNo,
 910                       IRExpr* e,
 911                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 912 {
 913    /* So, generate either an unconditional or a conditional write to
 914       the reg. */
 915    if (guardT == IRTemp_INVALID) {
 916       /* unconditional write */
 917       llPutFReg( fregNo, e );
 918    } else {
 919       llPutFReg( fregNo,
 920                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 921                              e, llGetFReg(fregNo) ));
 922    }
 923 }
 924
 925
 926 /* ---------------- Misc registers ---------------- */
 927
 928 static void putMiscReg32 ( UInt    gsoffset,
 929                            IRExpr* e, /* :: Ity_I32 */
 930                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 931 {
 932    switch (gsoffset) {
 933       case OFFB_FPSCR:   break;
 934       case OFFB_QFLAG32: break;
 935       case OFFB_GEFLAG0: break;
 936       case OFFB_GEFLAG1: break;
 937       case OFFB_GEFLAG2: break;
 938       case OFFB_GEFLAG3: break;
 939       case OFFB_TPIDRURW: break;
 940       default: vassert(0); /* awaiting more cases */
 941    }
 942    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 943
 944    if (guardT == IRTemp_INVALID) {
 945       /* unconditional write */
 946       stmt(IRStmt_Put(gsoffset, e));
 947    } else {
 948       stmt(IRStmt_Put(
 949          gsoffset,
 950          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 951                      e, IRExpr_Get(gsoffset, Ity_I32) )
 952       ));
 953    }
 954 }
 955
 956 static IRTemp get_ITSTATE ( void )
 957 {
 958    ASSERT_IS_THUMB;
 959    IRTemp t = newTemp(Ity_I32);
 960    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
 961    return t;
 962 }
 963
 964 static void put_ITSTATE ( IRTemp t )
 965 {
 966    ASSERT_IS_THUMB;
 967    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
 968 }
 969
 970 static IRTemp get_QFLAG32 ( void )
 971 {
 972    IRTemp t = newTemp(Ity_I32);
 973    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
 974    return t;
 975 }
 976
 977 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
 978 {
 979    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
 980 }
 981
 982 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
 983    Status Register) to indicate that overflow or saturation occurred.
 984    Nb: t must be zero to denote no saturation, and any nonzero
 985    value to indicate saturation. */
 986 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
 987 {
 988    IRTemp old = get_QFLAG32();
 989    IRTemp nyu = newTemp(Ity_I32);
 990    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
 991    put_QFLAG32(nyu, condT);
 992 }
 993
 994 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
 995    flagNo: which flag bit to set [3...0]
 996    lowbits_to_ignore:  0 = look at all 32 bits
 997                        8 = look at top 24 bits only
 998                       16 = look at top 16 bits only
 999                       31 = look at the top bit only
1000    e: input value to be evaluated.
1001    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
1002    masked out.  If the resulting value is zero then the GE flag is
1003    set to 0; any other value sets the flag to 1. */
1004 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
1005                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
1006                            IRExpr* e,             /* Ity_I32 */
1007                            IRTemp condT )
1008 {
1009    vassert( flagNo >= 0 && flagNo <= 3 );
1010    vassert( lowbits_to_ignore == 0  ||
1011             lowbits_to_ignore == 8  ||
1012             lowbits_to_ignore == 16 ||
1013             lowbits_to_ignore == 31 );
1014    IRTemp masked = newTemp(Ity_I32);
1015    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
1016
1017    switch (flagNo) {
1018       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
1019       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
1020       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
1021       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
1022       default: vassert(0);
1023    }
1024 }
1025
1026 /* Return the (32-bit, zero-or-nonzero representation scheme) of
1027    the specified GE flag. */
1028 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1029 {
1030    switch (flagNo) {
1031       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1032       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1033       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1034       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1035       default: vassert(0);
1036    }
1037 }
1038
1039 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1040    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1041    15 of the value.  All other bits are ignored. */
1042 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1043 {
1044    IRTemp ge10 = newTemp(Ity_I32);
1045    IRTemp ge32 = newTemp(Ity_I32);
1046    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1047    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1048    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1049    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1050    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1051    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1052 }
1053
1054
1055 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1056    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1057    bit 7.  All other bits are ignored. */
1058 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1059 {
1060    IRTemp ge0 = newTemp(Ity_I32);
1061    IRTemp ge1 = newTemp(Ity_I32);
1062    IRTemp ge2 = newTemp(Ity_I32);
1063    IRTemp ge3 = newTemp(Ity_I32);
1064    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1065    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1066    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1067    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1068    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1069    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1070    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1071    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1072 }
1073
1074
1075 /* ---------------- FPSCR stuff ---------------- */
1076
1077 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
1078    convert them to IR format.  Bind the final result to the
1079    returned temp. */
1080 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1081 {
1082    /* The ARMvfp encoding for rounding mode bits is:
1083          00  to nearest
1084          01  to +infinity
1085          10  to -infinity
1086          11  to zero
1087       We need to convert that to the IR encoding:
1088          00  to nearest (the default)
1089          10  to +infinity
1090          01  to -infinity
1091          11  to zero
1092       Which can be done by swapping bits 0 and 1.
1093       The rmode bits are at 23:22 in FPSCR.
1094    */
1095    IRTemp armEncd = newTemp(Ity_I32);
1096    IRTemp swapped = newTemp(Ity_I32);
1097    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1098       we don't zero out bits 24 and above, since the assignment to
1099       'swapped' will mask them out anyway. */
1100    assign(armEncd,
1101           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1102    /* Now swap them. */
1103    assign(swapped,
1104           binop(Iop_Or32,
1105                 binop(Iop_And32,
1106                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1107                       mkU32(2)),
1108                 binop(Iop_And32,
1109                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1110                       mkU32(1))
1111          ));
1112    return swapped;
1113 }
1114
1115
1116 /*------------------------------------------------------------*/
1117 /*--- Helpers for flag handling and conditional insns      ---*/
1118 /*------------------------------------------------------------*/
1119
1120 static const HChar* name_ARMCondcode ( ARMCondcode cond )
1121 {
1122    switch (cond) {
1123       case ARMCondEQ:  return "{eq}";
1124       case ARMCondNE:  return "{ne}";
1125       case ARMCondHS:  return "{hs}";  // or 'cs'
1126       case ARMCondLO:  return "{lo}";  // or 'cc'
1127       case ARMCondMI:  return "{mi}";
1128       case ARMCondPL:  return "{pl}";
1129       case ARMCondVS:  return "{vs}";
1130       case ARMCondVC:  return "{vc}";
1131       case ARMCondHI:  return "{hi}";
1132       case ARMCondLS:  return "{ls}";
1133       case ARMCondGE:  return "{ge}";
1134       case ARMCondLT:  return "{lt}";
1135       case ARMCondGT:  return "{gt}";
1136       case ARMCondLE:  return "{le}";
1137       case ARMCondAL:  return ""; // {al}: is the default
1138       case ARMCondNV:  return "{nv}";
1139       default: vpanic("name_ARMCondcode");
1140    }
1141 }
1142 /* and a handy shorthand for it */
1143 static const HChar* nCC ( ARMCondcode cond ) {
1144    return name_ARMCondcode(cond);
1145 }
1146
1147
1148 /* Build IR to calculate some particular condition from stored
1149    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1150    Ity_I32, suitable for narrowing.  Although the return type is
1151    Ity_I32, the returned value is either 0 or 1.  'cond' must be
1152    :: Ity_I32 and must denote the condition to compute in
1153    bits 7:4, and be zero everywhere else.
1154 */
1155 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1156 {
1157    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1158    /* And 'cond' had better produce a value in which only bits 7:4 are
1159       nonzero.  However, obviously we can't assert for that. */
1160
1161    /* So what we're constructing for the first argument is
1162       "(cond << 4) | stored-operation".
1163       However, as per comments above, 'cond' must be supplied
1164       pre-shifted to this function.
1165
1166       This pairing scheme requires that the ARM_CC_OP_ values all fit
1167       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1168       8 bits of the first argument. */
1169    IRExpr** args
1170       = mkIRExprVec_4(
1171            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1172            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1173            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1174            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1175         );
1176    IRExpr* call
1177       = mkIRExprCCall(
1178            Ity_I32,
1179            0/*regparm*/,
1180            "armg_calculate_condition", &armg_calculate_condition,
1181            args
1182         );
1183
1184    /* Exclude the requested condition, OP and NDEP from definedness
1185       checking.  We're only interested in DEP1 and DEP2. */
1186    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1187    return call;
1188 }
1189
1190
1191 /* Build IR to calculate some particular condition from stored
1192    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1193    Ity_I32, suitable for narrowing.  Although the return type is
1194    Ity_I32, the returned value is either 0 or 1.
1195 */
1196 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1197 {
1198   /* First arg is "(cond << 4) | condition".  This requires that the
1199      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1200      (COND, OP) pair in the lowest 8 bits of the first argument. */
1201    vassert(cond >= 0 && cond <= 15);
1202    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1203 }
1204
1205
1206 /* Build IR to calculate just the carry flag from stored
1207    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1208    Ity_I32. */
1209 static IRExpr* mk_armg_calculate_flag_c ( void )
1210 {
1211    IRExpr** args
1212       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1213                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1214                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1215                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1216    IRExpr* call
1217       = mkIRExprCCall(
1218            Ity_I32,
1219            0/*regparm*/,
1220            "armg_calculate_flag_c", &armg_calculate_flag_c,
1221            args
1222         );
1223    /* Exclude OP and NDEP from definedness checking.  We're only
1224       interested in DEP1 and DEP2. */
1225    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1226    return call;
1227 }
1228
1229
1230 /* Build IR to calculate just the overflow flag from stored
1231    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1232    Ity_I32. */
1233 static IRExpr* mk_armg_calculate_flag_v ( void )
1234 {
1235    IRExpr** args
1236       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1237                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1238                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1239                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1240    IRExpr* call
1241       = mkIRExprCCall(
1242            Ity_I32,
1243            0/*regparm*/,
1244            "armg_calculate_flag_v", &armg_calculate_flag_v,
1245            args
1246         );
1247    /* Exclude OP and NDEP from definedness checking.  We're only
1248       interested in DEP1 and DEP2. */
1249    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1250    return call;
1251 }
1252
1253
1254 /* Build IR to calculate N Z C V in bits 31:28 of the
1255    returned word. */
1256 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1257 {
1258    IRExpr** args
1259       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1260                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1261                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1262                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1263    IRExpr* call
1264       = mkIRExprCCall(
1265            Ity_I32,
1266            0/*regparm*/,
1267            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1268            args
1269         );
1270    /* Exclude OP and NDEP from definedness checking.  We're only
1271       interested in DEP1 and DEP2. */
1272    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1273    return call;
1274 }
1275
1276 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1277 {
1278    IRExpr** args1;
1279    IRExpr** args2;
1280    IRExpr *call1, *call2, *res;
1281
1282    if (Q) {
1283       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1284                               binop(Iop_GetElem32x4, resL, mkU8(1)),
1285                               binop(Iop_GetElem32x4, resR, mkU8(0)),
1286                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
1287       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1288                               binop(Iop_GetElem32x4, resL, mkU8(3)),
1289                               binop(Iop_GetElem32x4, resR, mkU8(2)),
1290                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
1291    } else {
1292       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1293                               binop(Iop_GetElem32x2, resL, mkU8(1)),
1294                               binop(Iop_GetElem32x2, resR, mkU8(0)),
1295                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
1296    }
1297
1298    call1 = mkIRExprCCall(
1299              Ity_I32,
1300              0/*regparm*/,
1301              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1302              args1
1303           );
1304    if (Q) {
1305       call2 = mkIRExprCCall(
1306                 Ity_I32,
1307                 0/*regparm*/,
1308                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1309                 args2
1310              );
1311    }
1312    if (Q) {
1313       res = binop(Iop_Or32, call1, call2);
1314    } else {
1315       res = call1;
1316    }
1317    return res;
1318 }
1319
1320 // FIXME: this is named wrongly .. looks like a sticky set of
1321 // QC, not a write to it.
1322 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1323                          IRTemp condT )
1324 {
1325    putMiscReg32 (OFFB_FPSCR,
1326                  binop(Iop_Or32,
1327                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
1328                        binop(Iop_Shl32,
1329                              mk_armg_calculate_flag_qc(resL, resR, Q),
1330                              mkU8(27))),
1331                  condT);
1332 }
1333
1334 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
1335    guard is IRTemp_INVALID then it's unconditional, else it holds a
1336    condition :: Ity_I32. */
1337 static
1338 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1339                          IRTemp t_dep2, IRTemp t_ndep,
1340                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1341 {
1342    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1343    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1344    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1345    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1346    if (guardT == IRTemp_INVALID) {
1347       /* unconditional */
1348       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1349       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1350       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1351       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1352    } else {
1353       /* conditional */
1354       IRTemp c1 = newTemp(Ity_I1);
1355       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1356       stmt( IRStmt_Put(
1357                OFFB_CC_OP,
1358                IRExpr_ITE( mkexpr(c1),
1359                            mkU32(cc_op),
1360                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1361       stmt( IRStmt_Put(
1362                OFFB_CC_DEP1,
1363                IRExpr_ITE( mkexpr(c1),
1364                            mkexpr(t_dep1),
1365                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1366       stmt( IRStmt_Put(
1367                OFFB_CC_DEP2,
1368                IRExpr_ITE( mkexpr(c1),
1369                            mkexpr(t_dep2),
1370                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1371       stmt( IRStmt_Put(
1372                OFFB_CC_NDEP,
1373                IRExpr_ITE( mkexpr(c1),
1374                            mkexpr(t_ndep),
1375                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1376    }
1377 }
1378
1379
1380 /* Minor variant of the above that sets NDEP to zero (if it
1381    sets it at all) */
1382 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1383                              IRTemp t_dep2,
1384                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1385 {
1386    IRTemp z32 = newTemp(Ity_I32);
1387    assign( z32, mkU32(0) );
1388    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1389 }
1390
1391
1392 /* Minor variant of the above that sets DEP2 to zero (if it
1393    sets it at all) */
1394 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1395                              IRTemp t_ndep,
1396                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1397 {
1398    IRTemp z32 = newTemp(Ity_I32);
1399    assign( z32, mkU32(0) );
1400    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1401 }
1402
1403
1404 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1405    sets them at all) */
1406 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1407                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1408 {
1409    IRTemp z32 = newTemp(Ity_I32);
1410    assign( z32, mkU32(0) );
1411    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1412 }
1413
1414
1415 /* ARM only */
1416 /* Generate a side-exit to the next instruction, if the given guard
1417    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1418    condition is false!)  This is used to skip over conditional
1419    instructions which we can't generate straight-line code for, either
1420    because they are too complex or (more likely) they potentially
1421    generate exceptions.
1422 */
1423 static void mk_skip_over_A32_if_cond_is_false (
1424                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1425             )
1426 {
1427    ASSERT_IS_ARM;
1428    vassert(guardT != IRTemp_INVALID);
1429    vassert(0 == (guest_R15_curr_instr_notENC & 3));
1430    stmt( IRStmt_Exit(
1431             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1432             Ijk_Boring,
1433             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1434             OFFB_R15T
1435        ));
1436 }
1437
1438 /* Thumb16 only */
1439 /* ditto, but jump over a 16-bit thumb insn */
1440 static void mk_skip_over_T16_if_cond_is_false (
1441                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1442             )
1443 {
1444    ASSERT_IS_THUMB;
1445    vassert(guardT != IRTemp_INVALID);
1446    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1447    stmt( IRStmt_Exit(
1448             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1449             Ijk_Boring,
1450             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1451             OFFB_R15T
1452        ));
1453 }
1454
1455
1456 /* Thumb32 only */
1457 /* ditto, but jump over a 32-bit thumb insn */
1458 static void mk_skip_over_T32_if_cond_is_false (
1459                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1460             )
1461 {
1462    ASSERT_IS_THUMB;
1463    vassert(guardT != IRTemp_INVALID);
1464    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1465    stmt( IRStmt_Exit(
1466             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1467             Ijk_Boring,
1468             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1469             OFFB_R15T
1470        ));
1471 }
1472
1473
1474 /* Thumb16 and Thumb32 only
1475    Generate a SIGILL followed by a restart of the current instruction
1476    if the given temp is nonzero. */
1477 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1478 {
1479    ASSERT_IS_THUMB;
1480    vassert(t != IRTemp_INVALID);
1481    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1482    stmt(
1483       IRStmt_Exit(
1484          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1485          Ijk_NoDecode,
1486          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1487          OFFB_R15T
1488       )
1489    );
1490 }
1491
1492
1493 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
1494    we are currently in an IT block and are not the last in the block.
1495    This also rolls back guest_ITSTATE to its old value before the exit
1496    and restores it to its new value afterwards.  This is so that if
1497    the exit is taken, we have an up to date version of ITSTATE
1498    available.  Without doing that, we have no hope of making precise
1499    exceptions work. */
1500 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1501                IRTemp old_itstate /* :: Ity_I32 */,
1502                IRTemp new_itstate /* :: Ity_I32 */
1503             )
1504 {
1505    ASSERT_IS_THUMB;
1506    put_ITSTATE(old_itstate); // backout
1507    IRTemp guards_for_next3 = newTemp(Ity_I32);
1508    assign(guards_for_next3,
1509           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1510    gen_SIGILL_T_if_nonzero(guards_for_next3);
1511    put_ITSTATE(new_itstate); //restore
1512 }
1513
1514
1515 /* Simpler version of the above, which generates a SIGILL if
1516    we're anywhere within an IT block. */
1517 static void gen_SIGILL_T_if_in_ITBlock (
1518                IRTemp old_itstate /* :: Ity_I32 */,
1519                IRTemp new_itstate /* :: Ity_I32 */
1520             )
1521 {
1522    put_ITSTATE(old_itstate); // backout
1523    gen_SIGILL_T_if_nonzero(old_itstate);
1524    put_ITSTATE(new_itstate); //restore
1525 }
1526
1527
1528 /* Generate an APSR value, from the NZCV thunk, and
1529    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1530 static IRTemp synthesise_APSR ( void )
1531 {
1532    IRTemp res1 = newTemp(Ity_I32);
1533    // Get NZCV
1534    assign( res1, mk_armg_calculate_flags_nzcv() );
1535    // OR in the Q value
1536    IRTemp res2 = newTemp(Ity_I32);
1537    assign(
1538       res2,
1539       binop(Iop_Or32,
1540             mkexpr(res1),
1541             binop(Iop_Shl32,
1542                   unop(Iop_1Uto32,
1543                        binop(Iop_CmpNE32,
1544                              mkexpr(get_QFLAG32()),
1545                              mkU32(0))),
1546                   mkU8(ARMG_CC_SHIFT_Q)))
1547    );
1548    // OR in GE0 .. GE3
1549    IRExpr* ge0
1550       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1551    IRExpr* ge1
1552       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1553    IRExpr* ge2
1554       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1555    IRExpr* ge3
1556       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1557    IRTemp res3 = newTemp(Ity_I32);
1558    assign(res3,
1559           binop(Iop_Or32,
1560                 mkexpr(res2),
1561                 binop(Iop_Or32,
1562                       binop(Iop_Or32,
1563                             binop(Iop_Shl32, ge0, mkU8(16)),
1564                             binop(Iop_Shl32, ge1, mkU8(17))),
1565                       binop(Iop_Or32,
1566                             binop(Iop_Shl32, ge2, mkU8(18)),
1567                             binop(Iop_Shl32, ge3, mkU8(19))) )));
1568    return res3;
1569 }
1570
1571
1572 /* and the inverse transformation: given an APSR value,
1573    set the NZCV thunk, the Q flag, and the GE flags. */
1574 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1575                                 IRTemp apsrT, IRTemp condT )
1576 {
1577    vassert(write_nzcvq || write_ge);
1578    if (write_nzcvq) {
1579       // Do NZCV
1580       IRTemp immT = newTemp(Ity_I32);
1581       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1582       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1583       // Do Q
1584       IRTemp qnewT = newTemp(Ity_I32);
1585       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1586       put_QFLAG32(qnewT, condT);
1587    }
1588    if (write_ge) {
1589       // Do GE3..0
1590       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1591                    condT);
1592       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1593                    condT);
1594       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1595                    condT);
1596       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1597                    condT);
1598    }
1599 }
1600
1601
1602 /*------------------------------------------------------------*/
1603 /*--- Helpers for saturation                               ---*/
1604 /*------------------------------------------------------------*/
1605
1606 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1607    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1608    (b) the floor is computed from the value of imm5.  these two fnsn
1609    should be commoned up. */
1610
1611 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1612    Optionally return flag resQ saying whether saturation occurred.
1613    See definition in manual, section A2.2.1, page 41
1614    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1615    {
1616      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1617      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1618      else               { result = i; saturated = FALSE; }
1619      return ( result<N-1:0>, saturated );
1620    }
1621 */
1622 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1623                              IRTemp* resQ, /* OUT - Ity_I32  */
1624                              IRTemp regT,  /* value to clamp - Ity_I32 */
1625                              UInt imm5 )   /* saturation ceiling */
1626 {
1627    ULong ceil64  = (1ULL << imm5) - 1;    // (2^imm5)-1
1628    UInt  ceil    = (UInt)ceil64;
1629    UInt  floor   = 0;
1630
1631    IRTemp nd0 = newTemp(Ity_I32);
1632    IRTemp nd1 = newTemp(Ity_I32);
1633    IRTemp nd2 = newTemp(Ity_I1);
1634    IRTemp nd3 = newTemp(Ity_I32);
1635    IRTemp nd4 = newTemp(Ity_I32);
1636    IRTemp nd5 = newTemp(Ity_I1);
1637    IRTemp nd6 = newTemp(Ity_I32);
1638
1639    assign( nd0, mkexpr(regT) );
1640    assign( nd1, mkU32(ceil) );
1641    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1642    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1643    assign( nd4, mkU32(floor) );
1644    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1645    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1646    assign( *res, mkexpr(nd6) );
1647
1648    /* if saturation occurred, then resQ is set to some nonzero value
1649       if sat did not occur, resQ is guaranteed to be zero. */
1650    if (resQ) {
1651       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1652    }
1653 }
1654
1655
1656 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1657    Optionally return flag resQ saying whether saturation occurred.
1658    - see definition in manual, section A2.2.1, page 41
1659    (bits(N), boolean ) SignedSatQ( integer i, integer N )
1660    {
1661      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1662      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1663      else                      { result = i;           saturated = FALSE; }
1664      return ( result[N-1:0], saturated );
1665    }
1666 */
1667 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1668                            UInt imm5,      /* saturation ceiling */
1669                            IRTemp* res,    /* OUT - Ity_I32 */
1670                            IRTemp* resQ )  /* OUT - Ity_I32  */
1671 {
1672    Long ceil64  =  (1LL << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1673    Long floor64 = -(1LL << (imm5-1));      // -(2^(imm5-1))
1674    Int  ceil    = (Int)ceil64;
1675    Int  floor   = (Int)floor64;
1676
1677    IRTemp nd0 = newTemp(Ity_I32);
1678    IRTemp nd1 = newTemp(Ity_I32);
1679    IRTemp nd2 = newTemp(Ity_I1);
1680    IRTemp nd3 = newTemp(Ity_I32);
1681    IRTemp nd4 = newTemp(Ity_I32);
1682    IRTemp nd5 = newTemp(Ity_I1);
1683    IRTemp nd6 = newTemp(Ity_I32);
1684
1685    assign( nd0, mkexpr(regT) );
1686    assign( nd1, mkU32(ceil) );
1687    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1688    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1689    assign( nd4, mkU32(floor) );
1690    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1691    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1692    assign( *res, mkexpr(nd6) );
1693
1694    /* if saturation occurred, then resQ is set to some nonzero value
1695       if sat did not occur, resQ is guaranteed to be zero. */
1696    if (resQ) {
1697      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1698    }
1699 }
1700
1701
1702 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1703    overflow occurred for 32-bit addition.  Needs both args and the
1704    result.  HD p27. */
1705 static
1706 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1707                                       IRTemp argL, IRTemp argR )
1708 {
1709    IRTemp res = newTemp(Ity_I32);
1710    assign(res, resE);
1711    return
1712       binop( Iop_Shr32,
1713              binop( Iop_And32,
1714                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1715                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1716              mkU8(31) );
1717 }
1718
1719 /* Similarly .. also from HD p27 .. */
1720 static
1721 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1722                                       IRTemp argL, IRTemp argR )
1723 {
1724    IRTemp res = newTemp(Ity_I32);
1725    assign(res, resE);
1726    return
1727       binop( Iop_Shr32,
1728              binop( Iop_And32,
1729                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1730                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1731              mkU8(31) );
1732 }
1733
1734
1735 /*------------------------------------------------------------*/
1736 /*--- Larger helpers                                       ---*/
1737 /*------------------------------------------------------------*/
1738
1739 /* Compute both the result and new C flag value for a LSL by an imm5
1740    or by a register operand.  May generate reads of the old C value
1741    (hence only safe to use before any writes to guest state happen).
1742    Are factored out so can be used by both ARM and Thumb.
1743
1744    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1745    "res" (the result)  is a.k.a. "shop", shifter operand
1746    "newC" (the new C)  is a.k.a. "shco", shifter carry out
1747
1748    The calling convention for res and newC is a bit funny.  They could
1749    be passed by value, but instead are passed by ref.
1750
1751    The C (shco) value computed must be zero in bits 31:1, as the IR
1752    optimisations for flag handling (guest_arm_spechelper) rely on
1753    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1754    for it.  Same applies to all these functions that compute shco
1755    after a shift or rotate, not just this one.
1756 */
1757
1758 static void compute_result_and_C_after_LSL_by_imm5 (
1759                /*OUT*/HChar* buf,
1760                IRTemp* res,
1761                IRTemp* newC,
1762                IRTemp rMt, UInt shift_amt, /* operands */
1763                UInt rM      /* only for debug printing */
1764             )
1765 {
1766    if (shift_amt == 0) {
1767       if (newC) {
1768          assign( *newC, mk_armg_calculate_flag_c() );
1769       }
1770       assign( *res, mkexpr(rMt) );
1771       DIS(buf, "r%u", rM);
1772    } else {
1773       vassert(shift_amt >= 1 && shift_amt <= 31);
1774       if (newC) {
1775          assign( *newC,
1776                  binop(Iop_And32,
1777                        binop(Iop_Shr32, mkexpr(rMt),
1778                                         mkU8(32 - shift_amt)),
1779                        mkU32(1)));
1780       }
1781       assign( *res,
1782               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1783       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1784    }
1785 }
1786
1787
1788 static void compute_result_and_C_after_LSL_by_reg (
1789                /*OUT*/HChar* buf,
1790                IRTemp* res,
1791                IRTemp* newC,
1792                IRTemp rMt, IRTemp rSt,  /* operands */
1793                UInt rM,    UInt rS      /* only for debug printing */
1794             )
1795 {
1796    // shift left in range 0 .. 255
1797    // amt  = rS & 255
1798    // res  = amt < 32 ?  Rm << amt  : 0
1799    // newC = amt == 0     ? oldC  :
1800    //        amt in 1..32 ?  Rm[32-amt]  : 0
1801    IRTemp amtT = newTemp(Ity_I32);
1802    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1803    if (newC) {
1804       /* mux0X(amt == 0,
1805                mux0X(amt < 32,
1806                      0,
1807                      Rm[(32-amt) & 31]),
1808                oldC)
1809       */
1810       /* About the best you can do is pray that iropt is able
1811          to nuke most or all of the following junk. */
1812       IRTemp oldC = newTemp(Ity_I32);
1813       assign(oldC, mk_armg_calculate_flag_c() );
1814       assign(
1815          *newC,
1816          IRExpr_ITE(
1817             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1818             mkexpr(oldC),
1819             IRExpr_ITE(
1820                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1821                binop(Iop_And32,
1822                      binop(Iop_Shr32,
1823                            mkexpr(rMt),
1824                            unop(Iop_32to8,
1825                                 binop(Iop_And32,
1826                                       binop(Iop_Sub32,
1827                                             mkU32(32),
1828                                             mkexpr(amtT)),
1829                                       mkU32(31)
1830                                 )
1831                            )
1832                      ),
1833                      mkU32(1)
1834                      ),
1835                mkU32(0)
1836             )
1837          )
1838       );
1839    }
1840    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1841    // Lhs of the & limits the shift to 31 bits, so as to
1842    // give known IR semantics.  Rhs of the & is all 1s for
1843    // Rs <= 31 and all 0s for Rs >= 32.
1844    assign(
1845       *res,
1846       binop(
1847          Iop_And32,
1848          binop(Iop_Shl32,
1849                mkexpr(rMt),
1850                unop(Iop_32to8,
1851                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1852          binop(Iop_Sar32,
1853                binop(Iop_Sub32,
1854                      mkexpr(amtT),
1855                      mkU32(32)),
1856                mkU8(31))));
1857     DIS(buf, "r%u, LSL r%u", rM, rS);
1858 }
1859
1860
1861 static void compute_result_and_C_after_LSR_by_imm5 (
1862                /*OUT*/HChar* buf,
1863                IRTemp* res,
1864                IRTemp* newC,
1865                IRTemp rMt, UInt shift_amt, /* operands */
1866                UInt rM      /* only for debug printing */
1867             )
1868 {
1869    if (shift_amt == 0) {
1870       // conceptually a 32-bit shift, however:
1871       // res  = 0
1872       // newC = Rm[31]
1873       if (newC) {
1874          assign( *newC,
1875                  binop(Iop_And32,
1876                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1877                        mkU32(1)));
1878       }
1879       assign( *res, mkU32(0) );
1880       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1881    } else {
1882       // shift in range 1..31
1883       // res  = Rm >>u shift_amt
1884       // newC = Rm[shift_amt - 1]
1885       vassert(shift_amt >= 1 && shift_amt <= 31);
1886       if (newC) {
1887          assign( *newC,
1888                  binop(Iop_And32,
1889                        binop(Iop_Shr32, mkexpr(rMt),
1890                                         mkU8(shift_amt - 1)),
1891                        mkU32(1)));
1892       }
1893       assign( *res,
1894               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1895       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1896    }
1897 }
1898
1899
1900 static void compute_result_and_C_after_LSR_by_reg (
1901                /*OUT*/HChar* buf,
1902                IRTemp* res,
1903                IRTemp* newC,
1904                IRTemp rMt, IRTemp rSt,  /* operands */
1905                UInt rM,    UInt rS      /* only for debug printing */
1906             )
1907 {
1908    // shift right in range 0 .. 255
1909    // amt = rS & 255
1910    // res  = amt < 32 ?  Rm >>u amt  : 0
1911    // newC = amt == 0     ? oldC  :
1912    //        amt in 1..32 ?  Rm[amt-1]  : 0
1913    IRTemp amtT = newTemp(Ity_I32);
1914    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1915    if (newC) {
1916       /* mux0X(amt == 0,
1917                mux0X(amt < 32,
1918                      0,
1919                      Rm[(amt-1) & 31]),
1920                oldC)
1921       */
1922       IRTemp oldC = newTemp(Ity_I32);
1923       assign(oldC, mk_armg_calculate_flag_c() );
1924       assign(
1925          *newC,
1926          IRExpr_ITE(
1927             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1928             mkexpr(oldC),
1929             IRExpr_ITE(
1930                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1931                binop(Iop_And32,
1932                      binop(Iop_Shr32,
1933                            mkexpr(rMt),
1934                            unop(Iop_32to8,
1935                                 binop(Iop_And32,
1936                                       binop(Iop_Sub32,
1937                                             mkexpr(amtT),
1938                                             mkU32(1)),
1939                                       mkU32(31)
1940                                 )
1941                            )
1942                      ),
1943                      mkU32(1)
1944                      ),
1945                mkU32(0)
1946             )
1947          )
1948       );
1949    }
1950    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1951    // Lhs of the & limits the shift to 31 bits, so as to
1952    // give known IR semantics.  Rhs of the & is all 1s for
1953    // Rs <= 31 and all 0s for Rs >= 32.
1954    assign(
1955       *res,
1956       binop(
1957          Iop_And32,
1958          binop(Iop_Shr32,
1959                mkexpr(rMt),
1960                unop(Iop_32to8,
1961                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1962          binop(Iop_Sar32,
1963                binop(Iop_Sub32,
1964                      mkexpr(amtT),
1965                      mkU32(32)),
1966                mkU8(31))));
1967     DIS(buf, "r%u, LSR r%u", rM, rS);
1968 }
1969
1970
1971 static void compute_result_and_C_after_ASR_by_imm5 (
1972                /*OUT*/HChar* buf,
1973                IRTemp* res,
1974                IRTemp* newC,
1975                IRTemp rMt, UInt shift_amt, /* operands */
1976                UInt rM      /* only for debug printing */
1977             )
1978 {
1979    if (shift_amt == 0) {
1980       // conceptually a 32-bit shift, however:
1981       // res  = Rm >>s 31
1982       // newC = Rm[31]
1983       if (newC) {
1984          assign( *newC,
1985                  binop(Iop_And32,
1986                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1987                        mkU32(1)));
1988       }
1989       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1990       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1991    } else {
1992       // shift in range 1..31
1993       // res = Rm >>s shift_amt
1994       // newC = Rm[shift_amt - 1]
1995       vassert(shift_amt >= 1 && shift_amt <= 31);
1996       if (newC) {
1997          assign( *newC,
1998                  binop(Iop_And32,
1999                        binop(Iop_Shr32, mkexpr(rMt),
2000                                         mkU8(shift_amt - 1)),
2001                        mkU32(1)));
2002       }
2003       assign( *res,
2004               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
2005       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
2006    }
2007 }
2008
2009
2010 static void compute_result_and_C_after_ASR_by_reg (
2011                /*OUT*/HChar* buf,
2012                IRTemp* res,
2013                IRTemp* newC,
2014                IRTemp rMt, IRTemp rSt,  /* operands */
2015                UInt rM,    UInt rS      /* only for debug printing */
2016             )
2017 {
2018    // arithmetic shift right in range 0 .. 255
2019    // amt = rS & 255
2020    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
2021    // newC = amt == 0     ? oldC  :
2022    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
2023    IRTemp amtT = newTemp(Ity_I32);
2024    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2025    if (newC) {
2026       /* mux0X(amt == 0,
2027                mux0X(amt < 32,
2028                      Rm[31],
2029                      Rm[(amt-1) & 31])
2030                oldC)
2031       */
2032       IRTemp oldC = newTemp(Ity_I32);
2033       assign(oldC, mk_armg_calculate_flag_c() );
2034       assign(
2035          *newC,
2036          IRExpr_ITE(
2037             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2038             mkexpr(oldC),
2039             IRExpr_ITE(
2040                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2041                binop(Iop_And32,
2042                      binop(Iop_Shr32,
2043                            mkexpr(rMt),
2044                            unop(Iop_32to8,
2045                                 binop(Iop_And32,
2046                                       binop(Iop_Sub32,
2047                                             mkexpr(amtT),
2048                                             mkU32(1)),
2049                                       mkU32(31)
2050                                 )
2051                            )
2052                      ),
2053                      mkU32(1)
2054                      ),
2055                binop(Iop_And32,
2056                      binop(Iop_Shr32,
2057                            mkexpr(rMt),
2058                            mkU8(31)
2059                      ),
2060                      mkU32(1)
2061                )
2062             )
2063          )
2064       );
2065    }
2066    // (Rm >>s (amt <u 32 ? amt : 31))
2067    assign(
2068       *res,
2069       binop(
2070          Iop_Sar32,
2071          mkexpr(rMt),
2072          unop(
2073             Iop_32to8,
2074             IRExpr_ITE(
2075                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2076                mkexpr(amtT),
2077                mkU32(31)))));
2078     DIS(buf, "r%u, ASR r%u", rM, rS);
2079 }
2080
2081
2082 static void compute_result_and_C_after_ROR_by_reg (
2083                /*OUT*/HChar* buf,
2084                IRTemp* res,
2085                IRTemp* newC,
2086                IRTemp rMt, IRTemp rSt,  /* operands */
2087                UInt rM,    UInt rS      /* only for debug printing */
2088             )
2089 {
2090    // rotate right in range 0 .. 255
2091    // amt = rS & 255
2092    // shop =  Rm `ror` (amt & 31)
2093    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2094    IRTemp amtT = newTemp(Ity_I32);
2095    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2096    IRTemp amt5T = newTemp(Ity_I32);
2097    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2098    IRTemp oldC = newTemp(Ity_I32);
2099    assign(oldC, mk_armg_calculate_flag_c() );
2100    if (newC) {
2101       assign(
2102          *newC,
2103          IRExpr_ITE(
2104             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2105             binop(Iop_And32,
2106                   binop(Iop_Shr32,
2107                         mkexpr(rMt),
2108                         unop(Iop_32to8,
2109                              binop(Iop_And32,
2110                                    binop(Iop_Sub32,
2111                                          mkexpr(amtT),
2112                                          mkU32(1)
2113                                    ),
2114                                    mkU32(31)
2115                              )
2116                         )
2117                   ),
2118                   mkU32(1)
2119             ),
2120             mkexpr(oldC)
2121          )
2122       );
2123    }
2124    assign(
2125       *res,
2126       IRExpr_ITE(
2127          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2128          binop(Iop_Or32,
2129                binop(Iop_Shr32,
2130                      mkexpr(rMt),
2131                      unop(Iop_32to8, mkexpr(amt5T))
2132                ),
2133                binop(Iop_Shl32,
2134                      mkexpr(rMt),
2135                      unop(Iop_32to8,
2136                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2137                      )
2138                )
2139                ),
2140          mkexpr(rMt)
2141       )
2142    );
2143    DIS(buf, "r%u, ROR r#%u", rM, rS);
2144 }
2145
2146
2147 /* Generate an expression corresponding to the immediate-shift case of
2148    a shifter operand.  This is used both for ARM and Thumb2.
2149
2150    Bind it to a temporary, and return that via *res.  If newC is
2151    non-NULL, also compute a value for the shifter's carry out (in the
2152    LSB of a word), bind it to a temporary, and return that via *shco.
2153
2154    Generates GETs from the guest state and is therefore not safe to
2155    use once we start doing PUTs to it, for any given instruction.
2156
2157    'how' is encoded thusly:
2158       00b LSL,  01b LSR,  10b ASR,  11b ROR
2159    Most but not all ARM and Thumb integer insns use this encoding.
2160    Be careful to ensure the right value is passed here.
2161 */
2162 static void compute_result_and_C_after_shift_by_imm5 (
2163                /*OUT*/HChar* buf,
2164                /*OUT*/IRTemp* res,
2165                /*OUT*/IRTemp* newC,
2166                IRTemp  rMt,       /* reg to shift */
2167                UInt    how,       /* what kind of shift */
2168                UInt    shift_amt, /* shift amount (0..31) */
2169                UInt    rM         /* only for debug printing */
2170             )
2171 {
2172    vassert(shift_amt < 32);
2173    vassert(how < 4);
2174
2175    switch (how) {
2176
2177       case 0:
2178          compute_result_and_C_after_LSL_by_imm5(
2179             buf, res, newC, rMt, shift_amt, rM
2180          );
2181          break;
2182
2183       case 1:
2184          compute_result_and_C_after_LSR_by_imm5(
2185             buf, res, newC, rMt, shift_amt, rM
2186          );
2187          break;
2188
2189       case 2:
2190          compute_result_and_C_after_ASR_by_imm5(
2191             buf, res, newC, rMt, shift_amt, rM
2192          );
2193          break;
2194
2195       case 3:
2196          if (shift_amt == 0) {
2197             IRTemp oldcT = newTemp(Ity_I32);
2198             // rotate right 1 bit through carry (?)
2199             // RRX -- described at ARM ARM A5-17
2200             // res  = (oldC << 31) | (Rm >>u 1)
2201             // newC = Rm[0]
2202             if (newC) {
2203                assign( *newC,
2204                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2205             }
2206             assign( oldcT, mk_armg_calculate_flag_c() );
2207             assign( *res,
2208                     binop(Iop_Or32,
2209                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2210                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2211             DIS(buf, "r%u, RRX", rM);
2212          } else {
2213             // rotate right in range 1..31
2214             // res  = Rm `ror` shift_amt
2215             // newC = Rm[shift_amt - 1]
2216             vassert(shift_amt >= 1 && shift_amt <= 31);
2217             if (newC) {
2218                assign( *newC,
2219                        binop(Iop_And32,
2220                              binop(Iop_Shr32, mkexpr(rMt),
2221                                               mkU8(shift_amt - 1)),
2222                              mkU32(1)));
2223             }
2224             assign( *res,
2225                     binop(Iop_Or32,
2226                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2227                           binop(Iop_Shl32, mkexpr(rMt),
2228                                            mkU8(32-shift_amt))));
2229             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2230          }
2231          break;
2232
2233       default:
2234          /*NOTREACHED*/
2235          vassert(0);
2236    }
2237 }
2238
2239
2240 /* Generate an expression corresponding to the register-shift case of
2241    a shifter operand.  This is used both for ARM and Thumb2.
2242
2243    Bind it to a temporary, and return that via *res.  If newC is
2244    non-NULL, also compute a value for the shifter's carry out (in the
2245    LSB of a word), bind it to a temporary, and return that via *shco.
2246
2247    Generates GETs from the guest state and is therefore not safe to
2248    use once we start doing PUTs to it, for any given instruction.
2249
2250    'how' is encoded thusly:
2251       00b LSL,  01b LSR,  10b ASR,  11b ROR
2252    Most but not all ARM and Thumb integer insns use this encoding.
2253    Be careful to ensure the right value is passed here.
2254 */
2255 static void compute_result_and_C_after_shift_by_reg (
2256                /*OUT*/HChar*  buf,
2257                /*OUT*/IRTemp* res,
2258                /*OUT*/IRTemp* newC,
2259                IRTemp  rMt,       /* reg to shift */
2260                UInt    how,       /* what kind of shift */
2261                IRTemp  rSt,       /* shift amount */
2262                UInt    rM,        /* only for debug printing */
2263                UInt    rS         /* only for debug printing */
2264             )
2265 {
2266    vassert(how < 4);
2267    switch (how) {
2268       case 0: { /* LSL */
2269          compute_result_and_C_after_LSL_by_reg(
2270             buf, res, newC, rMt, rSt, rM, rS
2271          );
2272          break;
2273       }
2274       case 1: { /* LSR */
2275          compute_result_and_C_after_LSR_by_reg(
2276             buf, res, newC, rMt, rSt, rM, rS
2277          );
2278          break;
2279       }
2280       case 2: { /* ASR */
2281          compute_result_and_C_after_ASR_by_reg(
2282             buf, res, newC, rMt, rSt, rM, rS
2283          );
2284          break;
2285       }
2286       case 3: { /* ROR */
2287          compute_result_and_C_after_ROR_by_reg(
2288              buf, res, newC, rMt, rSt, rM, rS
2289          );
2290          break;
2291       }
2292       default:
2293          /*NOTREACHED*/
2294          vassert(0);
2295    }
2296 }
2297
2298
2299 /* Generate an expression corresponding to a shifter_operand, bind it
2300    to a temporary, and return that via *shop.  If shco is non-NULL,
2301    also compute a value for the shifter's carry out (in the LSB of a
2302    word), bind it to a temporary, and return that via *shco.
2303
2304    If for some reason we can't come up with a shifter operand (missing
2305    case?  not really a shifter operand?) return False.
2306
2307    Generates GETs from the guest state and is therefore not safe to
2308    use once we start doing PUTs to it, for any given instruction.
2309
2310    For ARM insns only; not for Thumb.
2311 */
2312 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2313                                  /*OUT*/IRTemp* shop,
2314                                  /*OUT*/IRTemp* shco,
2315                                  /*OUT*/HChar* buf )
2316 {
2317    UInt insn_4 = (insn_11_0 >> 4) & 1;
2318    UInt insn_7 = (insn_11_0 >> 7) & 1;
2319    vassert(insn_25 <= 0x1);
2320    vassert(insn_11_0 <= 0xFFF);
2321
2322    vassert(shop && *shop == IRTemp_INVALID);
2323    *shop = newTemp(Ity_I32);
2324
2325    if (shco) {
2326       vassert(*shco == IRTemp_INVALID);
2327       *shco = newTemp(Ity_I32);
2328    }
2329
2330    /* 32-bit immediate */
2331
2332    if (insn_25 == 1) {
2333       /* immediate: (7:0) rotated right by 2 * (11:8) */
2334       UInt imm = (insn_11_0 >> 0) & 0xFF;
2335       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2336       vassert(rot <= 30);
2337       imm = ROR32(imm, rot);
2338       if (shco) {
2339          if (rot == 0) {
2340             assign( *shco, mk_armg_calculate_flag_c() );
2341          } else {
2342             assign( *shco, mkU32( (imm >> 31) & 1 ) );
2343          }
2344       }
2345       DIS(buf, "#0x%x", imm);
2346       assign( *shop, mkU32(imm) );
2347       return True;
2348    }
2349
2350    /* Shift/rotate by immediate */
2351
2352    if (insn_25 == 0 && insn_4 == 0) {
2353       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2354       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2355       UInt rM        = (insn_11_0 >> 0) & 0xF;
2356       UInt how       = (insn_11_0 >> 5) & 3;
2357       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2358       IRTemp rMt = newTemp(Ity_I32);
2359       assign(rMt, getIRegA(rM));
2360
2361       vassert(shift_amt <= 31);
2362
2363       compute_result_and_C_after_shift_by_imm5(
2364          buf, shop, shco, rMt, how, shift_amt, rM
2365       );
2366       return True;
2367    }
2368
2369    /* Shift/rotate by register */
2370    if (insn_25 == 0 && insn_4 == 1) {
2371       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2372       UInt rM  = (insn_11_0 >> 0) & 0xF;
2373       UInt rS  = (insn_11_0 >> 8) & 0xF;
2374       UInt how = (insn_11_0 >> 5) & 3;
2375       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2376       IRTemp rMt = newTemp(Ity_I32);
2377       IRTemp rSt = newTemp(Ity_I32);
2378
2379       if (insn_7 == 1)
2380          return False; /* not really a shifter operand */
2381
2382       assign(rMt, getIRegA(rM));
2383       assign(rSt, getIRegA(rS));
2384
2385       compute_result_and_C_after_shift_by_reg(
2386          buf, shop, shco, rMt, how, rSt, rM, rS
2387       );
2388       return True;
2389    }
2390
2391    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2392    return False;
2393 }
2394
2395
2396 /* ARM only */
2397 static
2398 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2399                                     /*OUT*/HChar* buf )
2400 {
2401    vassert(rN < 16);
2402    vassert(bU < 2);
2403    vassert(imm12 < 0x1000);
2404    HChar opChar = bU == 1 ? '+' : '-';
2405    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2406    return
2407       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2408              getIRegA(rN),
2409              mkU32(imm12) );
2410 }
2411
2412
2413 /* ARM only.
2414    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2415 */
2416 static
2417 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2418                                           UInt sh2, UInt imm5,
2419                                           /*OUT*/HChar* buf )
2420 {
2421    vassert(rN < 16);
2422    vassert(bU < 2);
2423    vassert(rM < 16);
2424    vassert(sh2 < 4);
2425    vassert(imm5 < 32);
2426    HChar   opChar = bU == 1 ? '+' : '-';
2427    IRExpr* index  = NULL;
2428    switch (sh2) {
2429       case 0: /* LSL */
2430          /* imm5 can be in the range 0 .. 31 inclusive. */
2431          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2432          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2433          break;
2434       case 1: /* LSR */
2435          if (imm5 == 0) {
2436             index = mkU32(0);
2437             vassert(0); // ATC
2438          } else {
2439             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2440          }
2441          DIS(buf, "[r%u, %cr%u, LSR #%u]",
2442                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2443          break;
2444       case 2: /* ASR */
2445          /* Doesn't this just mean that the behaviour with imm5 == 0
2446             is the same as if it had been 31 ? */
2447          if (imm5 == 0) {
2448             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2449             vassert(0); // ATC
2450          } else {
2451             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2452          }
2453          DIS(buf, "[r%u, %cr%u, ASR #%u]",
2454                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2455          break;
2456       case 3: /* ROR or RRX */
2457          if (imm5 == 0) {
2458             IRTemp rmT    = newTemp(Ity_I32);
2459             IRTemp cflagT = newTemp(Ity_I32);
2460             assign(rmT, getIRegA(rM));
2461             assign(cflagT, mk_armg_calculate_flag_c());
2462             index = binop(Iop_Or32,
2463                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2464                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2465             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2466          } else {
2467             IRTemp rmT = newTemp(Ity_I32);
2468             assign(rmT, getIRegA(rM));
2469             vassert(imm5 >= 1 && imm5 <= 31);
2470             index = binop(Iop_Or32,
2471                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2472                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2473             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2474          }
2475          break;
2476       default:
2477          vassert(0);
2478    }
2479    vassert(index);
2480    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2481                 getIRegA(rN), index);
2482 }
2483
2484
2485 /* ARM only */
2486 static
2487 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2488                                    /*OUT*/HChar* buf )
2489 {
2490    vassert(rN < 16);
2491    vassert(bU < 2);
2492    vassert(imm8 < 0x100);
2493    HChar opChar = bU == 1 ? '+' : '-';
2494    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2495    return
2496       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2497              getIRegA(rN),
2498              mkU32(imm8) );
2499 }
2500
2501
2502 /* ARM only */
2503 static
2504 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2505                                   /*OUT*/HChar* buf )
2506 {
2507    vassert(rN < 16);
2508    vassert(bU < 2);
2509    vassert(rM < 16);
2510    HChar   opChar = bU == 1 ? '+' : '-';
2511    IRExpr* index  = getIRegA(rM);
2512    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2513    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2514                 getIRegA(rN), index);
2515 }
2516
2517
2518 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2519    as an IRCmpF64Result.  Generate code to convert it to an
2520    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2521    Assign a new temp to hold that value, and return the temp. */
2522 static
2523 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2524 {
2525    IRTemp ix       = newTemp(Ity_I32);
2526    IRTemp termL    = newTemp(Ity_I32);
2527    IRTemp termR    = newTemp(Ity_I32);
2528    IRTemp nzcv     = newTemp(Ity_I32);
2529
2530    /* This is where the fun starts.  We have to convert 'irRes' from
2531       an IR-convention return result (IRCmpF64Result) to an
2532       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2533       4 bits of 'nzcv'. */
2534    /* Map compare result from IR to ARM(nzcv) */
2535    /*
2536       FP cmp result | IR   | ARM(nzcv)
2537       --------------------------------
2538       UN              0x45   0011
2539       LT              0x01   1000
2540       GT              0x00   0010
2541       EQ              0x40   0110
2542    */
2543    /* Now since you're probably wondering WTF ..
2544
2545       ix fishes the useful bits out of the IR value, bits 6 and 0, and
2546       places them side by side, giving a number which is 0, 1, 2 or 3.
2547
2548       termL is a sequence cooked up by GNU superopt.  It converts ix
2549          into an almost correct value NZCV value (incredibly), except
2550          for the case of UN, where it produces 0100 instead of the
2551          required 0011.
2552
2553       termR is therefore a correction term, also computed from ix.  It
2554          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2555          the final correct value, we subtract termR from termL.
2556
2557       Don't take my word for it.  There's a test program at the bottom
2558       of this file, to try this out with.
2559    */
2560    assign(
2561       ix,
2562       binop(Iop_Or32,
2563             binop(Iop_And32,
2564                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2565                   mkU32(3)),
2566             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2567
2568    assign(
2569       termL,
2570       binop(Iop_Add32,
2571             binop(Iop_Shr32,
2572                   binop(Iop_Sub32,
2573                         binop(Iop_Shl32,
2574                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2575                               mkU8(30)),
2576                         mkU32(1)),
2577                   mkU8(29)),
2578             mkU32(1)));
2579
2580    assign(
2581       termR,
2582       binop(Iop_And32,
2583             binop(Iop_And32,
2584                   mkexpr(ix),
2585                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2586             mkU32(1)));
2587
2588    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2589    return nzcv;
2590 }
2591
2592
2593 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2594    updatesC is non-NULL, a boolean is written to it indicating whether
2595    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2596 */
2597 static UInt thumbExpandImm ( Bool* updatesC,
2598                              UInt imm1, UInt imm3, UInt imm8 )
2599 {
2600    vassert(imm1 < (1<<1));
2601    vassert(imm3 < (1<<3));
2602    vassert(imm8 < (1<<8));
2603    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2604    UInt abcdefgh = imm8;
2605    UInt lbcdefgh = imm8 | 0x80;
2606    if (updatesC) {
2607       *updatesC = i_imm3_a >= 8;
2608    }
2609    switch (i_imm3_a) {
2610       case 0: case 1:
2611          return abcdefgh;
2612       case 2: case 3:
2613          return (abcdefgh << 16) | abcdefgh;
2614       case 4: case 5:
2615          return (abcdefgh << 24) | (abcdefgh << 8);
2616       case 6: case 7:
2617          return (abcdefgh << 24) | (abcdefgh << 16)
2618                 | (abcdefgh << 8) | abcdefgh;
2619       case 8 ... 31:
2620          return lbcdefgh << (32 - i_imm3_a);
2621       default:
2622          break;
2623    }
2624    /*NOTREACHED*/vassert(0);
2625 }
2626
2627
2628 /* Version of thumbExpandImm where we simply feed it the
2629    instruction halfwords (the lowest addressed one is I0). */
2630 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2631                                         UShort i0s, UShort i1s )
2632 {
2633    UInt i0    = (UInt)i0s;
2634    UInt i1    = (UInt)i1s;
2635    UInt imm1  = SLICE_UInt(i0,10,10);
2636    UInt imm3  = SLICE_UInt(i1,14,12);
2637    UInt imm8  = SLICE_UInt(i1,7,0);
2638    return thumbExpandImm(updatesC, imm1, imm3, imm8);
2639 }
2640
2641
2642 /* Thumb16 only.  Given the firstcond and mask fields from an IT
2643    instruction, compute the 32-bit ITSTATE value implied, as described
2644    in libvex_guest_arm.h.  This is not the ARM ARM representation.
2645    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2646    disassembly printing.  Returns False if firstcond or mask
2647    denote something invalid.
2648
2649    The number and conditions for the instructions to be
2650    conditionalised depend on firstcond and mask:
2651
2652    mask      cond 1    cond 2      cond 3      cond 4
2653
2654    1000      fc[3:0]
2655    x100      fc[3:0]   fc[3:1]:x
2656    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2657    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2658
2659    The condition fields are assembled in *itstate backwards (cond 4 at
2660    the top, cond 1 at the bottom).  Conditions are << 4'd and then
2661    ^0xE'd, and those fields that correspond to instructions in the IT
2662    block are tagged with a 1 bit.
2663 */
2664 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2665                               /*OUT*/HChar* ch1,
2666                               /*OUT*/HChar* ch2,
2667                               /*OUT*/HChar* ch3,
2668                               UInt firstcond, UInt mask )
2669 {
2670    vassert(firstcond <= 0xF);
2671    vassert(mask <= 0xF);
2672    *itstate = 0;
2673    *ch1 = *ch2 = *ch3 = '.';
2674    if (mask == 0)
2675       return False; /* the logic below actually ensures this anyway,
2676                        but clearer to make it explicit. */
2677    if (firstcond == 0xF)
2678       return False; /* NV is not allowed */
2679    if (firstcond == 0xE && popcount32(mask) != 1)
2680       return False; /* if firstcond is AL then all the rest must be too */
2681
2682    UInt m3 = (mask >> 3) & 1;
2683    UInt m2 = (mask >> 2) & 1;
2684    UInt m1 = (mask >> 1) & 1;
2685    UInt m0 = (mask >> 0) & 1;
2686
2687    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2688    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2689
2690    if (m3 == 1 && (m2|m1|m0) == 0) {
2691       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2692       *itstate ^= 0xE0E0E0E0;
2693       return True;
2694    }
2695
2696    if (m2 == 1 && (m1|m0) == 0) {
2697       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2698       *itstate ^= 0xE0E0E0E0;
2699       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2700       return True;
2701    }
2702
2703    if (m1 == 1 && m0 == 0) {
2704       *itstate = (ni << 24)
2705                  | (setbit32(fc, 4, m2) << 16)
2706                  | (setbit32(fc, 4, m3) << 8) | fc;
2707       *itstate ^= 0xE0E0E0E0;
2708       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2709       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2710       return True;
2711    }
2712
2713    if (m0 == 1) {
2714       *itstate = (setbit32(fc, 4, m1) << 24)
2715                  | (setbit32(fc, 4, m2) << 16)
2716                  | (setbit32(fc, 4, m3) << 8) | fc;
2717       *itstate ^= 0xE0E0E0E0;
2718       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2719       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2720       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2721       return True;
2722    }
2723
2724    return False;
2725 }
2726
2727
2728 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2729    Chapter 7 Section 1. */
2730 static IRTemp gen_BITREV ( IRTemp x0 )
2731 {
2732    IRTemp x1 = newTemp(Ity_I32);
2733    IRTemp x2 = newTemp(Ity_I32);
2734    IRTemp x3 = newTemp(Ity_I32);
2735    IRTemp x4 = newTemp(Ity_I32);
2736    IRTemp x5 = newTemp(Ity_I32);
2737    UInt   c1 = 0x55555555;
2738    UInt   c2 = 0x33333333;
2739    UInt   c3 = 0x0F0F0F0F;
2740    UInt   c4 = 0x00FF00FF;
2741    UInt   c5 = 0x0000FFFF;
2742    assign(x1,
2743           binop(Iop_Or32,
2744                 binop(Iop_Shl32,
2745                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2746                       mkU8(1)),
2747                 binop(Iop_Shr32,
2748                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2749                       mkU8(1))
2750    ));
2751    assign(x2,
2752           binop(Iop_Or32,
2753                 binop(Iop_Shl32,
2754                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2755                       mkU8(2)),
2756                 binop(Iop_Shr32,
2757                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2758                       mkU8(2))
2759    ));
2760    assign(x3,
2761           binop(Iop_Or32,
2762                 binop(Iop_Shl32,
2763                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2764                       mkU8(4)),
2765                 binop(Iop_Shr32,
2766                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2767                       mkU8(4))
2768    ));
2769    assign(x4,
2770           binop(Iop_Or32,
2771                 binop(Iop_Shl32,
2772                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2773                       mkU8(8)),
2774                 binop(Iop_Shr32,
2775                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2776                       mkU8(8))
2777    ));
2778    assign(x5,
2779           binop(Iop_Or32,
2780                 binop(Iop_Shl32,
2781                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2782                       mkU8(16)),
2783                 binop(Iop_Shr32,
2784                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2785                       mkU8(16))
2786    ));
2787    return x5;
2788 }
2789
2790
2791 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2792    0:1:2:3 (aka byte-swap). */
2793 static IRTemp gen_REV ( IRTemp arg )
2794 {
2795    IRTemp res = newTemp(Ity_I32);
2796    assign(res,
2797           binop(Iop_Or32,
2798                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2799           binop(Iop_Or32,
2800                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2801                                  mkU32(0x00FF0000)),
2802           binop(Iop_Or32,
2803                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2804                                        mkU32(0x0000FF00)),
2805                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2806                                        mkU32(0x000000FF) )
2807    ))));
2808    return res;
2809 }
2810
2811
2812 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2813    2:3:0:1 (swap within lo and hi halves). */
2814 static IRTemp gen_REV16 ( IRTemp arg )
2815 {
2816    IRTemp res = newTemp(Ity_I32);
2817    assign(res,
2818           binop(Iop_Or32,
2819                 binop(Iop_And32,
2820                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2821                       mkU32(0xFF00FF00)),
2822                 binop(Iop_And32,
2823                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2824                       mkU32(0x00FF00FF))));
2825    return res;
2826 }
2827
2828
2829 /*------------------------------------------------------------*/
2830 /*--- Advanced SIMD (NEON) instructions                    ---*/
2831 /*------------------------------------------------------------*/
2832
2833 /*------------------------------------------------------------*/
2834 /*--- NEON data processing                                 ---*/
2835 /*------------------------------------------------------------*/
2836
2837 /* For all NEON DP ops, we use the normal scheme to handle conditional
2838    writes to registers -- pass in condT and hand that on to the
2839    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2840    since NEON is unconditional for ARM.  In Thumb mode condT is
2841    derived from the ITSTATE shift register in the normal way. */
2842
2843 static
2844 UInt get_neon_d_regno(UInt theInstr)
2845 {
2846    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2847    if (theInstr & 0x40) {
2848       if (x & 1) {
2849          x = x + 0x100;
2850       } else {
2851          x = x >> 1;
2852       }
2853    }
2854    return x;
2855 }
2856
2857 static
2858 UInt get_neon_n_regno(UInt theInstr)
2859 {
2860    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2861    if (theInstr & 0x40) {
2862       if (x & 1) {
2863          x = x + 0x100;
2864       } else {
2865          x = x >> 1;
2866       }
2867    }
2868    return x;
2869 }
2870
2871 static
2872 UInt get_neon_m_regno(UInt theInstr)
2873 {
2874    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2875    if (theInstr & 0x40) {
2876       if (x & 1) {
2877          x = x + 0x100;
2878       } else {
2879          x = x >> 1;
2880       }
2881    }
2882    return x;
2883 }
2884
2885 static
2886 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2887 {
2888    UInt dreg = get_neon_d_regno(theInstr);
2889    UInt mreg = get_neon_m_regno(theInstr);
2890    UInt nreg = get_neon_n_regno(theInstr);
2891    UInt imm4 = (theInstr >> 8) & 0xf;
2892    UInt Q = (theInstr >> 6) & 1;
2893    HChar reg_t = Q ? 'q' : 'd';
2894
2895    if (Q) {
2896       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
2897                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
2898    } else {
2899       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
2900                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
2901    }
2902    DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
2903                                          reg_t, mreg, imm4);
2904    return True;
2905 }
2906
2907 /* Generate specific vector FP binary ops, possibly with a fake
2908    rounding mode as required by the primop. */
2909 static
2910 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2911 {
2912    switch (op) {
2913       case Iop_Add32Fx4:
2914       case Iop_Sub32Fx4:
2915       case Iop_Mul32Fx4:
2916          return triop(op, get_FAKE_roundingmode(), argL, argR );
2917       case Iop_Add32x4: case Iop_Add16x8:
2918       case Iop_Sub32x4: case Iop_Sub16x8:
2919       case Iop_Mul32x4: case Iop_Mul16x8:
2920       case Iop_Mul32x2: case Iop_Mul16x4:
2921       case Iop_Add32Fx2:
2922       case Iop_Sub32Fx2:
2923       case Iop_Mul32Fx2:
2924       case Iop_PwAdd32Fx2:
2925          return binop(op, argL, argR);
2926       default:
2927         ppIROp(op);
2928         vassert(0);
2929    }
2930 }
2931
2932 /* VTBL, VTBX */
2933 static
2934 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2935 {
2936    UInt op = (theInstr >> 6) & 1;
2937    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2938    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2939    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2940    UInt len = (theInstr >> 8) & 3;
2941    Int i;
2942    IROp cmp;
2943    ULong imm;
2944    IRTemp arg_l;
2945    IRTemp old_mask, new_mask, cur_mask;
2946    IRTemp old_res, new_res;
2947    IRTemp old_arg, new_arg;
2948
2949    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2950       return False;
2951    if (nreg + len > 31)
2952       return False;
2953
2954    cmp = Iop_CmpGT8Ux8;
2955
2956    old_mask = newTemp(Ity_I64);
2957    old_res = newTemp(Ity_I64);
2958    old_arg = newTemp(Ity_I64);
2959    assign(old_mask, mkU64(0));
2960    assign(old_res, mkU64(0));
2961    assign(old_arg, getDRegI64(mreg));
2962    imm = 8;
2963    imm = (imm <<  8) | imm;
2964    imm = (imm << 16) | imm;
2965    imm = (imm << 32) | imm;
2966
2967    for (i = 0; i <= len; i++) {
2968       arg_l = newTemp(Ity_I64);
2969       new_mask = newTemp(Ity_I64);
2970       cur_mask = newTemp(Ity_I64);
2971       new_res = newTemp(Ity_I64);
2972       new_arg = newTemp(Ity_I64);
2973       assign(arg_l, getDRegI64(nreg+i));
2974       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2975       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2976       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2977       assign(new_res, binop(Iop_Or64,
2978                             mkexpr(old_res),
2979                             binop(Iop_And64,
2980                                   binop(Iop_Perm8x8,
2981                                         mkexpr(arg_l),
2982                                         binop(Iop_And64,
2983                                               mkexpr(old_arg),
2984                                               mkexpr(cur_mask))),
2985                                   mkexpr(cur_mask))));
2986
2987       old_arg = new_arg;
2988       old_mask = new_mask;
2989       old_res = new_res;
2990    }
2991    if (op) {
2992       new_res = newTemp(Ity_I64);
2993       assign(new_res, binop(Iop_Or64,
2994                             binop(Iop_And64,
2995                                   getDRegI64(dreg),
2996                                   unop(Iop_Not64, mkexpr(old_mask))),
2997                             mkexpr(old_res)));
2998       old_res = new_res;
2999    }
3000
3001    putDRegI64(dreg, mkexpr(old_res), condT);
3002    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
3003    if (len > 0) {
3004       DIP("d%u-d%u", nreg, nreg + len);
3005    } else {
3006       DIP("d%u", nreg);
3007    }
3008    DIP("}, d%u\n", mreg);
3009    return True;
3010 }
3011
3012 /* VDUP (scalar)  */
3013 static
3014 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
3015 {
3016    UInt Q = (theInstr >> 6) & 1;
3017    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
3018    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
3019    UInt imm4 = (theInstr >> 16) & 0xF;
3020    UInt index;
3021    UInt size;
3022    IRTemp arg_m;
3023    IRTemp res;
3024    IROp op, op2;
3025
3026    if ((imm4 == 0) || (imm4 == 8))
3027       return False;
3028    if ((Q == 1) && ((dreg & 1) == 1))
3029       return False;
3030    if (Q)
3031       dreg >>= 1;
3032    arg_m = newTemp(Ity_I64);
3033    assign(arg_m, getDRegI64(mreg));
3034    if (Q)
3035       res = newTemp(Ity_V128);
3036    else
3037       res = newTemp(Ity_I64);
3038    if ((imm4 & 1) == 1) {
3039       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3040       op2 = Iop_GetElem8x8;
3041       index = imm4 >> 1;
3042       size = 8;
3043    } else if ((imm4 & 3) == 2) {
3044       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3045       op2 = Iop_GetElem16x4;
3046       index = imm4 >> 2;
3047       size = 16;
3048    } else if ((imm4 & 7) == 4) {
3049       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3050       op2 = Iop_GetElem32x2;
3051       index = imm4 >> 3;
3052       size = 32;
3053    } else {
3054       return False; // can this ever happen?
3055    }
3056    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3057    if (Q) {
3058       putQReg(dreg, mkexpr(res), condT);
3059    } else {
3060       putDRegI64(dreg, mkexpr(res), condT);
3061    }
3062    DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3063    return True;
3064 }
3065
3066 /* A7.4.1 Three registers of the same length */
3067 static
3068 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3069 {
3070    /* In paths where this returns False, indicating a non-decodable
3071       instruction, there may still be some IR assignments to temporaries
3072       generated.  This is inconvenient but harmless, and the post-front-end
3073       IR optimisation pass will just remove them anyway.  So there's no
3074       effort made here to tidy it up.
3075    */
3076    UInt Q = (theInstr >> 6) & 1;
3077    UInt dreg = get_neon_d_regno(theInstr);
3078    UInt nreg = get_neon_n_regno(theInstr);
3079    UInt mreg = get_neon_m_regno(theInstr);
3080    UInt A = (theInstr >> 8) & 0xF;
3081    UInt B = (theInstr >> 4) & 1;
3082    UInt C = (theInstr >> 20) & 0x3;
3083    UInt U = (theInstr >> 24) & 1;
3084    UInt size = C;
3085
3086    IRTemp arg_n;
3087    IRTemp arg_m;
3088    IRTemp res;
3089
3090    if (Q) {
3091       arg_n = newTemp(Ity_V128);
3092       arg_m = newTemp(Ity_V128);
3093       res = newTemp(Ity_V128);
3094       assign(arg_n, getQReg(nreg));
3095       assign(arg_m, getQReg(mreg));
3096    } else {
3097       arg_n = newTemp(Ity_I64);
3098       arg_m = newTemp(Ity_I64);
3099       res = newTemp(Ity_I64);
3100       assign(arg_n, getDRegI64(nreg));
3101       assign(arg_m, getDRegI64(mreg));
3102    }
3103
3104    switch(A) {
3105       case 0:
3106          if (B == 0) {
3107             /* VHADD */
3108             ULong imm = 0;
3109             IRExpr *imm_val;
3110             IROp addOp;
3111             IROp andOp;
3112             IROp shOp;
3113             HChar regType = Q ? 'q' : 'd';
3114
3115             if (size == 3)
3116                return False;
3117             switch(size) {
3118                case 0: imm = 0x101010101010101LL; break;
3119                case 1: imm = 0x1000100010001LL; break;
3120                case 2: imm = 0x100000001LL; break;
3121                default: vassert(0);
3122             }
3123             if (Q) {
3124                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3125                andOp = Iop_AndV128;
3126             } else {
3127                imm_val = mkU64(imm);
3128                andOp = Iop_And64;
3129             }
3130             if (U) {
3131                switch(size) {
3132                   case 0:
3133                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3134                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3135                      break;
3136                   case 1:
3137                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3138                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3139                      break;
3140                   case 2:
3141                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3142                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3143                      break;
3144                   default:
3145                      vassert(0);
3146                }
3147             } else {
3148                switch(size) {
3149                   case 0:
3150                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3151                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3152                      break;
3153                   case 1:
3154                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3155                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3156                      break;
3157                   case 2:
3158                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3159                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3160                      break;
3161                   default:
3162                      vassert(0);
3163                }
3164             }
3165             assign(res,
3166                    binop(addOp,
3167                          binop(addOp,
3168                                binop(shOp, mkexpr(arg_m), mkU8(1)),
3169                                binop(shOp, mkexpr(arg_n), mkU8(1))),
3170                          binop(shOp,
3171                                binop(addOp,
3172                                      binop(andOp, mkexpr(arg_m), imm_val),
3173                                      binop(andOp, mkexpr(arg_n), imm_val)),
3174                                mkU8(1))));
3175             DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
3176                 U ? 'u' : 's', 8 << size, regType,
3177                 dreg, regType, nreg, regType, mreg);
3178          } else {
3179             /* VQADD */
3180             IROp op, op2;
3181             IRTemp tmp;
3182             HChar reg_t = Q ? 'q' : 'd';
3183             if (Q) {
3184                switch (size) {
3185                   case 0:
3186                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3187                      op2 = Iop_Add8x16;
3188                      break;
3189                   case 1:
3190                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3191                      op2 = Iop_Add16x8;
3192                      break;
3193                   case 2:
3194                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3195                      op2 = Iop_Add32x4;
3196                      break;
3197                   case 3:
3198                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3199                      op2 = Iop_Add64x2;
3200                      break;
3201                   default:
3202                      vassert(0);
3203                }
3204             } else {
3205                switch (size) {
3206                   case 0:
3207                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3208                      op2 = Iop_Add8x8;
3209                      break;
3210                   case 1:
3211                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3212                      op2 = Iop_Add16x4;
3213                      break;
3214                   case 2:
3215                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3216                      op2 = Iop_Add32x2;
3217                      break;
3218                   case 3:
3219                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3220                      op2 = Iop_Add64;
3221                      break;
3222                   default:
3223                      vassert(0);
3224                }
3225             }
3226             if (Q) {
3227                tmp = newTemp(Ity_V128);
3228             } else {
3229                tmp = newTemp(Ity_I64);
3230             }
3231             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3232             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3233             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3234             DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
3235                 U ? 'u' : 's',
3236                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3237          }
3238          break;
3239       case 1:
3240          if (B == 0) {
3241             /* VRHADD */
3242             /* VRHADD C, A, B ::=
3243                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3244             IROp shift_op, add_op;
3245             IRTemp cc;
3246             ULong one = 1;
3247             HChar reg_t = Q ? 'q' : 'd';
3248             switch (size) {
3249                case 0: one = (one <<  8) | one; /* fall through */
3250                case 1: one = (one << 16) | one; /* fall through */
3251                case 2: one = (one << 32) | one; break;
3252                case 3: return False;
3253                default: vassert(0);
3254             }
3255             if (Q) {
3256                switch (size) {
3257                   case 0:
3258                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3259                      add_op = Iop_Add8x16;
3260                      break;
3261                   case 1:
3262                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3263                      add_op = Iop_Add16x8;
3264                      break;
3265                   case 2:
3266                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3267                      add_op = Iop_Add32x4;
3268                      break;
3269                   case 3:
3270                      return False;
3271                   default:
3272                      vassert(0);
3273                }
3274             } else {
3275                switch (size) {
3276                   case 0:
3277                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3278                      add_op = Iop_Add8x8;
3279                      break;
3280                   case 1:
3281                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3282                      add_op = Iop_Add16x4;
3283                      break;
3284                   case 2:
3285                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3286                      add_op = Iop_Add32x2;
3287                      break;
3288                   case 3:
3289                      return False;
3290                   default:
3291                      vassert(0);
3292                }
3293             }
3294             if (Q) {
3295                cc = newTemp(Ity_V128);
3296                assign(cc, binop(shift_op,
3297                                 binop(add_op,
3298                                       binop(add_op,
3299                                             binop(Iop_AndV128,
3300                                                   mkexpr(arg_n),
3301                                                   binop(Iop_64HLtoV128,
3302                                                         mkU64(one),
3303                                                         mkU64(one))),
3304                                             binop(Iop_AndV128,
3305                                                   mkexpr(arg_m),
3306                                                   binop(Iop_64HLtoV128,
3307                                                         mkU64(one),
3308                                                         mkU64(one)))),
3309                                       binop(Iop_64HLtoV128,
3310                                             mkU64(one),
3311                                             mkU64(one))),
3312                                 mkU8(1)));
3313                assign(res, binop(add_op,
3314                                  binop(add_op,
3315                                        binop(shift_op,
3316                                              mkexpr(arg_n),
3317                                              mkU8(1)),
3318                                        binop(shift_op,
3319                                              mkexpr(arg_m),
3320                                              mkU8(1))),
3321                                  mkexpr(cc)));
3322             } else {
3323                cc = newTemp(Ity_I64);
3324                assign(cc, binop(shift_op,
3325                                 binop(add_op,
3326                                       binop(add_op,
3327                                             binop(Iop_And64,
3328                                                   mkexpr(arg_n),
3329                                                   mkU64(one)),
3330                                             binop(Iop_And64,
3331                                                   mkexpr(arg_m),
3332                                                   mkU64(one))),
3333                                       mkU64(one)),
3334                                 mkU8(1)));
3335                assign(res, binop(add_op,
3336                                  binop(add_op,
3337                                        binop(shift_op,
3338                                              mkexpr(arg_n),
3339                                              mkU8(1)),
3340                                        binop(shift_op,
3341                                              mkexpr(arg_m),
3342                                              mkU8(1))),
3343                                  mkexpr(cc)));
3344             }
3345             DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
3346                 U ? 'u' : 's',
3347                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3348          } else {
3349             if (U == 0)  {
3350                switch(C) {
3351                   case 0: {
3352                      /* VAND  */
3353                      HChar reg_t = Q ? 'q' : 'd';
3354                      if (Q) {
3355                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3356                                                        mkexpr(arg_m)));
3357                      } else {
3358                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3359                                                      mkexpr(arg_m)));
3360                      }
3361                      DIP("vand %c%u, %c%u, %c%u\n",
3362                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3363                      break;
3364                   }
3365                   case 1: {
3366                      /* VBIC  */
3367                      HChar reg_t = Q ? 'q' : 'd';
3368                      if (Q) {
3369                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3370                                unop(Iop_NotV128, mkexpr(arg_m))));
3371                      } else {
3372                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3373                                unop(Iop_Not64, mkexpr(arg_m))));
3374                      }
3375                      DIP("vbic %c%u, %c%u, %c%u\n",
3376                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3377                      break;
3378                   }
3379                   case 2:
3380                      if ( nreg != mreg) {
3381                         /* VORR  */
3382                         HChar reg_t = Q ? 'q' : 'd';
3383                         if (Q) {
3384                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3385                                                          mkexpr(arg_m)));
3386                         } else {
3387                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
3388                                                        mkexpr(arg_m)));
3389                         }
3390                         DIP("vorr %c%u, %c%u, %c%u\n",
3391                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
3392                      } else {
3393                         /* VMOV  */
3394                         HChar reg_t = Q ? 'q' : 'd';
3395                         assign(res, mkexpr(arg_m));
3396                         DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
3397                      }
3398                      break;
3399                   case 3:{
3400                      /* VORN  */
3401                      HChar reg_t = Q ? 'q' : 'd';
3402                      if (Q) {
3403                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3404                                unop(Iop_NotV128, mkexpr(arg_m))));
3405                      } else {
3406                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
3407                                unop(Iop_Not64, mkexpr(arg_m))));
3408                      }
3409                      DIP("vorn %c%u, %c%u, %c%u\n",
3410                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3411                      break;
3412                   }
3413                   default:
3414                      vassert(0);
3415                }
3416             } else {
3417                switch(C) {
3418                   case 0:
3419                      /* VEOR (XOR)  */
3420                      if (Q) {
3421                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3422                                                        mkexpr(arg_m)));
3423                      } else {
3424                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3425                                                      mkexpr(arg_m)));
3426                      }
3427                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3428                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3429                      break;
3430                   case 1:
3431                      /* VBSL  */
3432                      if (Q) {
3433                         IRTemp reg_d = newTemp(Ity_V128);
3434                         assign(reg_d, getQReg(dreg));
3435                         assign(res,
3436                                binop(Iop_OrV128,
3437                                      binop(Iop_AndV128, mkexpr(arg_n),
3438                                                         mkexpr(reg_d)),
3439                                      binop(Iop_AndV128,
3440                                            mkexpr(arg_m),
3441                                            unop(Iop_NotV128,
3442                                                  mkexpr(reg_d)) ) ) );
3443                      } else {
3444                         IRTemp reg_d = newTemp(Ity_I64);
3445                         assign(reg_d, getDRegI64(dreg));
3446                         assign(res,
3447                                binop(Iop_Or64,
3448                                      binop(Iop_And64, mkexpr(arg_n),
3449                                                       mkexpr(reg_d)),
3450                                      binop(Iop_And64,
3451                                            mkexpr(arg_m),
3452                                            unop(Iop_Not64, mkexpr(reg_d)))));
3453                      }
3454                      DIP("vbsl %c%u, %c%u, %c%u\n",
3455                          Q ? 'q' : 'd', dreg,
3456                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3457                      break;
3458                   case 2:
3459                      /* VBIT  */
3460                      if (Q) {
3461                         IRTemp reg_d = newTemp(Ity_V128);
3462                         assign(reg_d, getQReg(dreg));
3463                         assign(res,
3464                                binop(Iop_OrV128,
3465                                      binop(Iop_AndV128, mkexpr(arg_n),
3466                                                         mkexpr(arg_m)),
3467                                      binop(Iop_AndV128,
3468                                            mkexpr(reg_d),
3469                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3470                      } else {
3471                         IRTemp reg_d = newTemp(Ity_I64);
3472                         assign(reg_d, getDRegI64(dreg));
3473                         assign(res,
3474                                binop(Iop_Or64,
3475                                      binop(Iop_And64, mkexpr(arg_n),
3476                                                       mkexpr(arg_m)),
3477                                      binop(Iop_And64,
3478                                            mkexpr(reg_d),
3479                                            unop(Iop_Not64, mkexpr(arg_m)))));
3480                      }
3481                      DIP("vbit %c%u, %c%u, %c%u\n",
3482                          Q ? 'q' : 'd', dreg,
3483                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3484                      break;
3485                   case 3:
3486                      /* VBIF  */
3487                      if (Q) {
3488                         IRTemp reg_d = newTemp(Ity_V128);
3489                         assign(reg_d, getQReg(dreg));
3490                         assign(res,
3491                                binop(Iop_OrV128,
3492                                      binop(Iop_AndV128, mkexpr(reg_d),
3493                                                         mkexpr(arg_m)),
3494                                      binop(Iop_AndV128,
3495                                            mkexpr(arg_n),
3496                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3497                      } else {
3498                         IRTemp reg_d = newTemp(Ity_I64);
3499                         assign(reg_d, getDRegI64(dreg));
3500                         assign(res,
3501                                binop(Iop_Or64,
3502                                      binop(Iop_And64, mkexpr(reg_d),
3503                                                       mkexpr(arg_m)),
3504                                      binop(Iop_And64,
3505                                            mkexpr(arg_n),
3506                                            unop(Iop_Not64, mkexpr(arg_m)))));
3507                      }
3508                      DIP("vbif %c%u, %c%u, %c%u\n",
3509                          Q ? 'q' : 'd', dreg,
3510                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3511                      break;
3512                   default:
3513                      vassert(0);
3514                }
3515             }
3516          }
3517          break;
3518       case 2:
3519          if (B == 0) {
3520             /* VHSUB */
3521             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3522             ULong imm = 0;
3523             IRExpr *imm_val;
3524             IROp subOp;
3525             IROp notOp;
3526             IROp andOp;
3527             IROp shOp;
3528             if (size == 3)
3529                return False;
3530             switch(size) {
3531                case 0: imm = 0x101010101010101LL; break;
3532                case 1: imm = 0x1000100010001LL; break;
3533                case 2: imm = 0x100000001LL; break;
3534                default: vassert(0);
3535             }
3536             if (Q) {
3537                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3538                andOp = Iop_AndV128;
3539                notOp = Iop_NotV128;
3540             } else {
3541                imm_val = mkU64(imm);
3542                andOp = Iop_And64;
3543                notOp = Iop_Not64;
3544             }
3545             if (U) {
3546                switch(size) {
3547                   case 0:
3548                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3549                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3550                      break;
3551                   case 1:
3552                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3553                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3554                      break;
3555                   case 2:
3556                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3557                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3558                      break;
3559                   default:
3560                      vassert(0);
3561                }
3562             } else {
3563                switch(size) {
3564                   case 0:
3565                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3566                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3567                      break;
3568                   case 1:
3569                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3570                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3571                      break;
3572                   case 2:
3573                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3574                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3575                      break;
3576                   default:
3577                      vassert(0);
3578                }
3579             }
3580             assign(res,
3581                    binop(subOp,
3582                          binop(subOp,
3583                                binop(shOp, mkexpr(arg_n), mkU8(1)),
3584                                binop(shOp, mkexpr(arg_m), mkU8(1))),
3585                          binop(andOp,
3586                                binop(andOp,
3587                                      unop(notOp, mkexpr(arg_n)),
3588                                      mkexpr(arg_m)),
3589                                imm_val)));
3590             DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
3591                 U ? 'u' : 's', 8 << size,
3592                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3593                 mreg);
3594          } else {
3595             /* VQSUB */
3596             IROp op, op2;
3597             IRTemp tmp;
3598             if (Q) {
3599                switch (size) {
3600                   case 0:
3601                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3602                      op2 = Iop_Sub8x16;
3603                      break;
3604                   case 1:
3605                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3606                      op2 = Iop_Sub16x8;
3607                      break;
3608                   case 2:
3609                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3610                      op2 = Iop_Sub32x4;
3611                      break;
3612                   case 3:
3613                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3614                      op2 = Iop_Sub64x2;
3615                      break;
3616                   default:
3617                      vassert(0);
3618                }
3619             } else {
3620                switch (size) {
3621                   case 0:
3622                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3623                      op2 = Iop_Sub8x8;
3624                      break;
3625                   case 1:
3626                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3627                      op2 = Iop_Sub16x4;
3628                      break;
3629                   case 2:
3630                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3631                      op2 = Iop_Sub32x2;
3632                      break;
3633                   case 3:
3634                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3635                      op2 = Iop_Sub64;
3636                      break;
3637                   default:
3638                      vassert(0);
3639                }
3640             }
3641             if (Q)
3642                tmp = newTemp(Ity_V128);
3643             else
3644                tmp = newTemp(Ity_I64);
3645             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3646             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3647             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3648             DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
3649                 U ? 'u' : 's', 8 << size,
3650                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3651                 mreg);
3652          }
3653          break;
3654       case 3: {
3655             IROp op;
3656             if (Q) {
3657                switch (size) {
3658                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3659                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3660                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3661                   case 3: return False;
3662                   default: vassert(0);
3663                }
3664             } else {
3665                switch (size) {
3666                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3667                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3668                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3669                   case 3: return False;
3670                   default: vassert(0);
3671                }
3672             }
3673             if (B == 0) {
3674                /* VCGT  */
3675                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3676                DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
3677                    U ? 'u' : 's', 8 << size,
3678                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3679                    mreg);
3680             } else {
3681                /* VCGE  */
3682                /* VCGE res, argn, argm
3683                     is equal to
3684                   VCGT tmp, argm, argn
3685                   VNOT res, tmp */
3686                assign(res,
3687                       unop(Q ? Iop_NotV128 : Iop_Not64,
3688                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3689                DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
3690                    U ? 'u' : 's', 8 << size,
3691                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3692                    mreg);
3693             }
3694          }
3695          break;
3696       case 4:
3697          if (B == 0) {
3698             /* VSHL */
3699             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
3700             IRTemp tmp = IRTemp_INVALID;
3701             if (U) {
3702                switch (size) {
3703                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3704                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3705                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3706                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3707                   default: vassert(0);
3708                }
3709             } else {
3710                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3711                switch (size) {
3712                   case 0:
3713                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3714                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3715                      break;
3716                   case 1:
3717                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3718                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3719                      break;
3720                   case 2:
3721                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3722                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3723                      break;
3724                   case 3:
3725                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
3726                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3727                      break;
3728                   default:
3729                      vassert(0);
3730                }
3731             }
3732             if (U) {
3733                if (!Q && (size == 3))
3734                   assign(res, binop(op, mkexpr(arg_m),
3735                                         unop(Iop_64to8, mkexpr(arg_n))));
3736                else
3737                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3738             } else {
3739                if (Q)
3740                   assign(tmp, binop(sub_op,
3741                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3742                                     mkexpr(arg_n)));
3743                else
3744                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3745                if (!Q && (size == 3))
3746                   assign(res, binop(op, mkexpr(arg_m),
3747                                         unop(Iop_64to8, mkexpr(tmp))));
3748                else
3749                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3750             }
3751             DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
3752                 U ? 'u' : 's', 8 << size,
3753                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3754                 nreg);
3755          } else {
3756             /* VQSHL */
3757             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3758             IRTemp tmp, shval, mask, old_shval;
3759             UInt i;
3760             ULong esize;
3761             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3762             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3763             if (U) {
3764                switch (size) {
3765                   case 0:
3766                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3767                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3768                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3769                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3770                      break;
3771                   case 1:
3772                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3773                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3774                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3775                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3776                      break;
3777                   case 2:
3778                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3779                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3780                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3781                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3782                      break;
3783                   case 3:
3784                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3785                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3786                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3787                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3788                      break;
3789                   default:
3790                      vassert(0);
3791                }
3792             } else {
3793                switch (size) {
3794                   case 0:
3795                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3796                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3797                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3798                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3799                      break;
3800                   case 1:
3801                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3802                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3803                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3804                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3805                      break;
3806                   case 2:
3807                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3808                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3809                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3810                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3811                      break;
3812                   case 3:
3813                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3814                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3815                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3816                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3817                      break;
3818                   default:
3819                      vassert(0);
3820                }
3821             }
3822             if (Q) {
3823                tmp = newTemp(Ity_V128);
3824                shval = newTemp(Ity_V128);
3825                mask = newTemp(Ity_V128);
3826             } else {
3827                tmp = newTemp(Ity_I64);
3828                shval = newTemp(Ity_I64);
3829                mask = newTemp(Ity_I64);
3830             }
3831             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3832             /* Only least significant byte from second argument is used.
3833                Copy this byte to the whole vector element. */
3834             assign(shval, binop(op_shrn,
3835                                 binop(op_shln,
3836                                        mkexpr(arg_n),
3837                                        mkU8((8 << size) - 8)),
3838                                 mkU8((8 << size) - 8)));
3839             for(i = 0; i < size; i++) {
3840                old_shval = shval;
3841                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3842                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3843                                    mkexpr(old_shval),
3844                                    binop(op_shln,
3845                                          mkexpr(old_shval),
3846                                          mkU8(8 << i))));
3847             }
3848             /* If shift is greater or equal to the element size and
3849                element is non-zero, then QC flag should be set. */
3850             esize = (8 << size) - 1;
3851             esize = (esize <<  8) | esize;
3852             esize = (esize << 16) | esize;
3853             esize = (esize << 32) | esize;
3854             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3855                              binop(cmp_gt, mkexpr(shval),
3856                                            Q ? mkU128(esize) : mkU64(esize)),
3857                              unop(cmp_neq, mkexpr(arg_m))),
3858                        Q ? mkU128(0) : mkU64(0),
3859                        Q, condT);
3860             /* Othervise QC flag should be set if shift value is positive and
3861                result beign rightshifted the same value is not equal to left
3862                argument. */
3863             assign(mask, binop(cmp_gt, mkexpr(shval),
3864                                        Q ? mkU128(0) : mkU64(0)));
3865             if (!Q && size == 3)
3866                assign(tmp, binop(op_rev, mkexpr(res),
3867                                          unop(Iop_64to8, mkexpr(arg_n))));
3868             else
3869                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3870             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3871                              mkexpr(tmp), mkexpr(mask)),
3872                        binop(Q ? Iop_AndV128 : Iop_And64,
3873                              mkexpr(arg_m), mkexpr(mask)),
3874                        Q, condT);
3875             DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
3876                 U ? 'u' : 's', 8 << size,
3877                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3878                 nreg);
3879          }
3880          break;
3881       case 5:
3882          if (B == 0) {
3883             /* VRSHL */
3884             IROp op, op_shrn, op_shln, cmp_gt, op_add;
3885             IRTemp shval, old_shval, imm_val, round;
3886             UInt i;
3887             ULong imm;
3888             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3889             imm = 1L;
3890             switch (size) {
3891                case 0: imm = (imm <<  8) | imm; /* fall through */
3892                case 1: imm = (imm << 16) | imm; /* fall through */
3893                case 2: imm = (imm << 32) | imm; /* fall through */
3894                case 3: break;
3895                default: vassert(0);
3896             }
3897             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3898             round = newTemp(Q ? Ity_V128 : Ity_I64);
3899             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3900             if (U) {
3901                switch (size) {
3902                   case 0:
3903                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3904                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3905                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3906                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3907                      break;
3908                   case 1:
3909                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3910                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3911                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3912                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3913                      break;
3914                   case 2:
3915                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3916                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3917                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3918                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3919                      break;
3920                   case 3:
3921                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
3922                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3923                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3924                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3925                      break;
3926                   default:
3927                      vassert(0);
3928                }
3929             } else {
3930                switch (size) {
3931                   case 0:
3932                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3933                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3934                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3935                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3936                      break;
3937                   case 1:
3938                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3939                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3940                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3941                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3942                      break;
3943                   case 2:
3944                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3945                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3946                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3947                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3948                      break;
3949                   case 3:
3950                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3951                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3952                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3953                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3954                      break;
3955                   default:
3956                      vassert(0);
3957                }
3958             }
3959             if (Q) {
3960                shval = newTemp(Ity_V128);
3961             } else {
3962                shval = newTemp(Ity_I64);
3963             }
3964             /* Only least significant byte from second argument is used.
3965                Copy this byte to the whole vector element. */
3966             assign(shval, binop(op_shrn,
3967                                 binop(op_shln,
3968                                        mkexpr(arg_n),
3969                                        mkU8((8 << size) - 8)),
3970                                 mkU8((8 << size) - 8)));
3971             for (i = 0; i < size; i++) {
3972                old_shval = shval;
3973                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3974                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3975                                    mkexpr(old_shval),
3976                                    binop(op_shln,
3977                                          mkexpr(old_shval),
3978                                          mkU8(8 << i))));
3979             }
3980             /* Compute the result */
3981             if (!Q && size == 3 && U) {
3982                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3983                                    binop(op,
3984                                          mkexpr(arg_m),
3985                                          unop(Iop_64to8,
3986                                               binop(op_add,
3987                                                     mkexpr(arg_n),
3988                                                     mkexpr(imm_val)))),
3989                                    binop(Q ? Iop_AndV128 : Iop_And64,
3990                                          mkexpr(imm_val),
3991                                          binop(cmp_gt,
3992                                                Q ? mkU128(0) : mkU64(0),
3993                                                mkexpr(arg_n)))));
3994                assign(res, binop(op_add,
3995                                  binop(op,
3996                                        mkexpr(arg_m),
3997                                        unop(Iop_64to8, mkexpr(arg_n))),
3998                                  mkexpr(round)));
3999             } else {
4000                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4001                                    binop(op,
4002                                          mkexpr(arg_m),
4003                                          binop(op_add,
4004                                                mkexpr(arg_n),
4005                                                mkexpr(imm_val))),
4006                                    binop(Q ? Iop_AndV128 : Iop_And64,
4007                                          mkexpr(imm_val),
4008                                          binop(cmp_gt,
4009                                                Q ? mkU128(0) : mkU64(0),
4010                                                mkexpr(arg_n)))));
4011                assign(res, binop(op_add,
4012                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4013                                  mkexpr(round)));
4014             }
4015             DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
4016                 U ? 'u' : 's', 8 << size,
4017                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4018                 nreg);
4019          } else {
4020             /* VQRSHL */
4021             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
4022             IRTemp tmp, shval, mask, old_shval, imm_val, round;
4023             UInt i;
4024             ULong esize, imm;
4025             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
4026             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
4027             imm = 1L;
4028             switch (size) {
4029                case 0: imm = (imm <<  8) | imm; /* fall through */
4030                case 1: imm = (imm << 16) | imm; /* fall through */
4031                case 2: imm = (imm << 32) | imm; /* fall through */
4032                case 3: break;
4033                default: vassert(0);
4034             }
4035             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
4036             round = newTemp(Q ? Ity_V128 : Ity_I64);
4037             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
4038             if (U) {
4039                switch (size) {
4040                   case 0:
4041                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4042                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4043                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4044                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4045                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4046                      break;
4047                   case 1:
4048                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4049                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4050                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4051                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4052                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4053                      break;
4054                   case 2:
4055                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4056                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4057                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4058                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4059                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4060                      break;
4061                   case 3:
4062                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4063                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4064                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4065                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4066                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4067                      break;
4068                   default:
4069                      vassert(0);
4070                }
4071             } else {
4072                switch (size) {
4073                   case 0:
4074                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4075                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4076                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4077                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4078                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4079                      break;
4080                   case 1:
4081                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4082                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4083                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4084                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4085                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4086                      break;
4087                   case 2:
4088                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4089                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4090                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4091                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4092                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4093                      break;
4094                   case 3:
4095                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4096                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4097                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4098                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4099                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4100                      break;
4101                   default:
4102                      vassert(0);
4103                }
4104             }
4105             if (Q) {
4106                tmp = newTemp(Ity_V128);
4107                shval = newTemp(Ity_V128);
4108                mask = newTemp(Ity_V128);
4109             } else {
4110                tmp = newTemp(Ity_I64);
4111                shval = newTemp(Ity_I64);
4112                mask = newTemp(Ity_I64);
4113             }
4114             /* Only least significant byte from second argument is used.
4115                Copy this byte to the whole vector element. */
4116             assign(shval, binop(op_shrn,
4117                                 binop(op_shln,
4118                                        mkexpr(arg_n),
4119                                        mkU8((8 << size) - 8)),
4120                                 mkU8((8 << size) - 8)));
4121             for (i = 0; i < size; i++) {
4122                old_shval = shval;
4123                shval = newTemp(Q ? Ity_V128 : Ity_I64);
4124                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4125                                    mkexpr(old_shval),
4126                                    binop(op_shln,
4127                                          mkexpr(old_shval),
4128                                          mkU8(8 << i))));
4129             }
4130             /* Compute the result */
4131             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4132                                 binop(op,
4133                                       mkexpr(arg_m),
4134                                       binop(op_add,
4135                                             mkexpr(arg_n),
4136                                             mkexpr(imm_val))),
4137                                 binop(Q ? Iop_AndV128 : Iop_And64,
4138                                       mkexpr(imm_val),
4139                                       binop(cmp_gt,
4140                                             Q ? mkU128(0) : mkU64(0),
4141                                             mkexpr(arg_n)))));
4142             assign(res, binop(op_add,
4143                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4144                               mkexpr(round)));
4145             /* If shift is greater or equal to the element size and element is
4146                non-zero, then QC flag should be set. */
4147             esize = (8 << size) - 1;
4148             esize = (esize <<  8) | esize;
4149             esize = (esize << 16) | esize;
4150             esize = (esize << 32) | esize;
4151             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4152                              binop(cmp_gt, mkexpr(shval),
4153                                            Q ? mkU128(esize) : mkU64(esize)),
4154                              unop(cmp_neq, mkexpr(arg_m))),
4155                        Q ? mkU128(0) : mkU64(0),
4156                        Q, condT);
4157             /* Othervise QC flag should be set if shift value is positive and
4158                result beign rightshifted the same value is not equal to left
4159                argument. */
4160             assign(mask, binop(cmp_gt, mkexpr(shval),
4161                                Q ? mkU128(0) : mkU64(0)));
4162             if (!Q && size == 3)
4163                assign(tmp, binop(op_rev, mkexpr(res),
4164                                          unop(Iop_64to8, mkexpr(arg_n))));
4165             else
4166                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4167             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4168                              mkexpr(tmp), mkexpr(mask)),
4169                        binop(Q ? Iop_AndV128 : Iop_And64,
4170                              mkexpr(arg_m), mkexpr(mask)),
4171                        Q, condT);
4172             DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
4173                 U ? 'u' : 's', 8 << size,
4174                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4175                 nreg);
4176          }
4177          break;
4178       case 6:
4179          /* VMAX, VMIN  */
4180          if (B == 0) {
4181             /* VMAX */
4182             IROp op;
4183             if (U == 0) {
4184                switch (size) {
4185                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4186                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4187                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4188                   case 3: return False;
4189                   default: vassert(0);
4190                }
4191             } else {
4192                switch (size) {
4193                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4194                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4195                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4196                   case 3: return False;
4197                   default: vassert(0);
4198                }
4199             }
4200             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4201             DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
4202                 U ? 'u' : 's', 8 << size,
4203                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4204                 mreg);
4205          } else {
4206             /* VMIN */
4207             IROp op;
4208             if (U == 0) {
4209                switch (size) {
4210                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4211                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4212                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4213                   case 3: return False;
4214                   default: vassert(0);
4215                }
4216             } else {
4217                switch (size) {
4218                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4219                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4220                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4221                   case 3: return False;
4222                   default: vassert(0);
4223                }
4224             }
4225             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4226             DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
4227                 U ? 'u' : 's', 8 << size,
4228                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4229                 mreg);
4230          }
4231          break;
4232       case 7:
4233          if (B == 0) {
4234             /* VABD */
4235             IROp op_cmp, op_sub;
4236             IRTemp cond;
4237             if ((theInstr >> 23) & 1) {
4238                vpanic("VABDL should not be in dis_neon_data_3same\n");
4239             }
4240             if (Q) {
4241                switch (size) {
4242                   case 0:
4243                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4244                      op_sub = Iop_Sub8x16;
4245                      break;
4246                   case 1:
4247                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4248                      op_sub = Iop_Sub16x8;
4249                      break;
4250                   case 2:
4251                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4252                      op_sub = Iop_Sub32x4;
4253                      break;
4254                   case 3:
4255                      return False;
4256                   default:
4257                      vassert(0);
4258                }
4259             } else {
4260                switch (size) {
4261                   case 0:
4262                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4263                      op_sub = Iop_Sub8x8;
4264                      break;
4265                   case 1:
4266                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4267                      op_sub = Iop_Sub16x4;
4268                      break;
4269                   case 2:
4270                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4271                      op_sub = Iop_Sub32x2;
4272                      break;
4273                   case 3:
4274                      return False;
4275                   default:
4276                      vassert(0);
4277                }
4278             }
4279             if (Q) {
4280                cond = newTemp(Ity_V128);
4281             } else {
4282                cond = newTemp(Ity_I64);
4283             }
4284             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4285             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4286                               binop(Q ? Iop_AndV128 : Iop_And64,
4287                                     binop(op_sub, mkexpr(arg_n),
4288                                                   mkexpr(arg_m)),
4289                                     mkexpr(cond)),
4290                               binop(Q ? Iop_AndV128 : Iop_And64,
4291                                     binop(op_sub, mkexpr(arg_m),
4292                                                   mkexpr(arg_n)),
4293                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4294                                          mkexpr(cond)))));
4295             DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
4296                 U ? 'u' : 's', 8 << size,
4297                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4298                 mreg);
4299          } else {
4300             /* VABA */
4301             IROp op_cmp, op_sub, op_add;
4302             IRTemp cond, acc, tmp;
4303             if ((theInstr >> 23) & 1) {
4304                vpanic("VABAL should not be in dis_neon_data_3same");
4305             }
4306             if (Q) {
4307                switch (size) {
4308                   case 0:
4309                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4310                      op_sub = Iop_Sub8x16;
4311                      op_add = Iop_Add8x16;
4312                      break;
4313                   case 1:
4314                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4315                      op_sub = Iop_Sub16x8;
4316                      op_add = Iop_Add16x8;
4317                      break;
4318                   case 2:
4319                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4320                      op_sub = Iop_Sub32x4;
4321                      op_add = Iop_Add32x4;
4322                      break;
4323                   case 3:
4324                      return False;
4325                   default:
4326                      vassert(0);
4327                }
4328             } else {
4329                switch (size) {
4330                   case 0:
4331                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4332                      op_sub = Iop_Sub8x8;
4333                      op_add = Iop_Add8x8;
4334                      break;
4335                   case 1:
4336                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4337                      op_sub = Iop_Sub16x4;
4338                      op_add = Iop_Add16x4;
4339                      break;
4340                   case 2:
4341                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4342                      op_sub = Iop_Sub32x2;
4343                      op_add = Iop_Add32x2;
4344                      break;
4345                   case 3:
4346                      return False;
4347                   default:
4348                      vassert(0);
4349                }
4350             }
4351             if (Q) {
4352                cond = newTemp(Ity_V128);
4353                acc = newTemp(Ity_V128);
4354                tmp = newTemp(Ity_V128);
4355                assign(acc, getQReg(dreg));
4356             } else {
4357                cond = newTemp(Ity_I64);
4358                acc = newTemp(Ity_I64);
4359                tmp = newTemp(Ity_I64);
4360                assign(acc, getDRegI64(dreg));
4361             }
4362             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4363             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4364                               binop(Q ? Iop_AndV128 : Iop_And64,
4365                                     binop(op_sub, mkexpr(arg_n),
4366                                                   mkexpr(arg_m)),
4367                                     mkexpr(cond)),
4368                               binop(Q ? Iop_AndV128 : Iop_And64,
4369                                     binop(op_sub, mkexpr(arg_m),
4370                                                   mkexpr(arg_n)),
4371                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4372                                          mkexpr(cond)))));
4373             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4374             DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
4375                 U ? 'u' : 's', 8 << size,
4376                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4377                 mreg);
4378          }
4379          break;
4380       case 8:
4381          if (B == 0) {
4382             IROp op;
4383             if (U == 0) {
4384                /* VADD  */
4385                switch (size) {
4386                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4387                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4388                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4389                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4390                   default: vassert(0);
4391                }
4392                DIP("vadd.i%d %c%u, %c%u, %c%u\n",
4393                    8 << size, Q ? 'q' : 'd',
4394                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4395             } else {
4396                /* VSUB  */
4397                switch (size) {
4398                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4399                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4400                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4401                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4402                   default: vassert(0);
4403                }
4404                DIP("vsub.i%d %c%u, %c%u, %c%u\n",
4405                    8 << size, Q ? 'q' : 'd',
4406                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4407             }
4408             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4409          } else {
4410             IROp op;
4411             switch (size) {
4412                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4413                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4414                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4415                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4416                default: vassert(0);
4417             }
4418             if (U == 0) {
4419                /* VTST  */
4420                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4421                                           mkexpr(arg_n),
4422                                           mkexpr(arg_m))));
4423                DIP("vtst.%d %c%u, %c%u, %c%u\n",
4424                    8 << size, Q ? 'q' : 'd',
4425                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4426             } else {
4427                /* VCEQ  */
4428                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4429                                 unop(op,
4430                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
4431                                            mkexpr(arg_n),
4432                                            mkexpr(arg_m)))));
4433                DIP("vceq.i%d %c%u, %c%u, %c%u\n",
4434                    8 << size, Q ? 'q' : 'd',
4435                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4436             }
4437          }
4438          break;
4439       case 9:
4440          if (B == 0) {
4441             /* VMLA, VMLS (integer) */
4442             IROp op, op2;
4443             UInt P = (theInstr >> 24) & 1;
4444             if (P) {
4445                switch (size) {
4446                   case 0:
4447                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4448                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4449                      break;
4450                   case 1:
4451                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4452                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4453                      break;
4454                   case 2:
4455                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4456                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4457                      break;
4458                   case 3:
4459                      return False;
4460                   default:
4461                      vassert(0);
4462                }
4463             } else {
4464                switch (size) {
4465                   case 0:
4466                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4467                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4468                      break;
4469                   case 1:
4470                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4471                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4472                      break;
4473                   case 2:
4474                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4475                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4476                      break;
4477                   case 3:
4478                      return False;
4479                   default:
4480                      vassert(0);
4481                }
4482             }
4483             assign(res, binop(op2,
4484                               Q ? getQReg(dreg) : getDRegI64(dreg),
4485                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4486             DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
4487                 P ? 's' : 'a', 8 << size,
4488                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4489                 mreg);
4490          } else {
4491             /* VMUL */
4492             IROp op;
4493             UInt P = (theInstr >> 24) & 1;
4494             if (P) {
4495                switch (size) {
4496                   case 0:
4497                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4498                      break;
4499                   case 1: case 2: case 3: return False;
4500                   default: vassert(0);
4501                }
4502             } else {
4503                switch (size) {
4504                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4505                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4506                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4507                   case 3: return False;
4508                   default: vassert(0);
4509                }
4510             }
4511             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4512             DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
4513                 P ? 'p' : 'i', 8 << size,
4514                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4515                 mreg);
4516          }
4517          break;
4518       case 10: {
4519          /* VPMAX, VPMIN  */
4520          UInt P = (theInstr >> 4) & 1;
4521          IROp op;
4522          if (Q)
4523             return False;
4524          if (P) {
4525             switch (size) {
4526                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4527                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4528                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4529                case 3: return False;
4530                default: vassert(0);
4531             }
4532          } else {
4533             switch (size) {
4534                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4535                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4536                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4537                case 3: return False;
4538                default: vassert(0);
4539             }
4540          }
4541          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4542          DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
4543              P ? "min" : "max", U ? 'u' : 's',
4544              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4545              Q ? 'q' : 'd', mreg);
4546          break;
4547       }
4548       case 11:
4549          if (B == 0) {
4550             if (U == 0) {
4551                /* VQDMULH  */
4552                IROp op ,op2;
4553                ULong imm;
4554                switch (size) {
4555                   case 0: case 3:
4556                      return False;
4557                   case 1:
4558                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4559                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4560                      imm = 1LL << 15;
4561                      imm = (imm << 16) | imm;
4562                      imm = (imm << 32) | imm;
4563                      break;
4564                   case 2:
4565                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4566                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4567                      imm = 1LL << 31;
4568                      imm = (imm << 32) | imm;
4569                      break;
4570                   default:
4571                      vassert(0);
4572                }
4573                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4574                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4575                                 binop(op2, mkexpr(arg_n),
4576                                            Q ? mkU128(imm) : mkU64(imm)),
4577                                 binop(op2, mkexpr(arg_m),
4578                                            Q ? mkU128(imm) : mkU64(imm))),
4579                           Q ? mkU128(0) : mkU64(0),
4580                           Q, condT);
4581                DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
4582                    8 << size, Q ? 'q' : 'd',
4583                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4584             } else {
4585                /* VQRDMULH */
4586                IROp op ,op2;
4587                ULong imm;
4588                switch(size) {
4589                   case 0: case 3:
4590                      return False;
4591                   case 1:
4592                      imm = 1LL << 15;
4593                      imm = (imm << 16) | imm;
4594                      imm = (imm << 32) | imm;
4595                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4596                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4597                      break;
4598                   case 2:
4599                      imm = 1LL << 31;
4600                      imm = (imm << 32) | imm;
4601                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4602                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4603                      break;
4604                   default:
4605                      vassert(0);
4606                }
4607                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4608                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4609                                 binop(op2, mkexpr(arg_n),
4610                                            Q ? mkU128(imm) : mkU64(imm)),
4611                                 binop(op2, mkexpr(arg_m),
4612                                            Q ? mkU128(imm) : mkU64(imm))),
4613                           Q ? mkU128(0) : mkU64(0),
4614                           Q, condT);
4615                DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
4616                    8 << size, Q ? 'q' : 'd',
4617                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4618             }
4619          } else {
4620             if (U == 0) {
4621                /* VPADD */
4622                IROp op;
4623                if (Q)
4624                   return False;
4625                switch (size) {
4626                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4627                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4628                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4629                   case 3: return False;
4630                   default: vassert(0);
4631                }
4632                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4633                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4634                    8 << size, Q ? 'q' : 'd',
4635                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4636             } else {
4637                return False;
4638             }
4639          }
4640          break;
4641       case 12: {
4642          return False;
4643       }
4644       /* Starting from here these are FP SIMD cases */
4645       case 13:
4646          if (B == 0) {
4647             IROp op;
4648             if (U == 0) {
4649                if ((C >> 1) == 0) {
4650                   /* VADD  */
4651                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4652                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4653                       Q ? 'q' : 'd', dreg,
4654                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4655                } else {
4656                   /* VSUB  */
4657                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4658                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4659                       Q ? 'q' : 'd', dreg,
4660                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4661                }
4662             } else {
4663                if ((C >> 1) == 0) {
4664                   /* VPADD */
4665                   if (Q)
4666                      return False;
4667                   op = Iop_PwAdd32Fx2;
4668                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4669                } else {
4670                   /* VABD  */
4671                   if (Q) {
4672                      assign(res, unop(Iop_Abs32Fx4,
4673                                       triop(Iop_Sub32Fx4,
4674                                             get_FAKE_roundingmode(),
4675                                             mkexpr(arg_n),
4676                                             mkexpr(arg_m))));
4677                   } else {
4678                      assign(res, unop(Iop_Abs32Fx2,
4679                                       binop(Iop_Sub32Fx2,
4680                                             mkexpr(arg_n),
4681                                             mkexpr(arg_m))));
4682                   }
4683                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4684                       Q ? 'q' : 'd', dreg,
4685                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4686                   break;
4687                }
4688             }
4689             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4690          } else {
4691             if (U == 0) {
4692                /* VMLA, VMLS  */
4693                IROp op, op2;
4694                UInt P = (theInstr >> 21) & 1;
4695                if (P) {
4696                   switch (size & 1) {
4697                      case 0:
4698                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4699                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4700                         break;
4701                      case 1: return False;
4702                      default: vassert(0);
4703                   }
4704                } else {
4705                   switch (size & 1) {
4706                      case 0:
4707                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4708                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4709                         break;
4710                      case 1: return False;
4711                      default: vassert(0);
4712                   }
4713                }
4714                assign(res, binop_w_fake_RM(
4715                               op2,
4716                               Q ? getQReg(dreg) : getDRegI64(dreg),
4717                               binop_w_fake_RM(op, mkexpr(arg_n),
4718                                                   mkexpr(arg_m))));
4719
4720                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4721                    P ? 's' : 'a', Q ? 'q' : 'd',
4722                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4723             } else {
4724                /* VMUL  */
4725                IROp op;
4726                if ((C >> 1) != 0)
4727                   return False;
4728                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4729                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4730                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4731                    Q ? 'q' : 'd', dreg,
4732                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4733             }
4734          }
4735          break;
4736       case 14:
4737          if (B == 0) {
4738             if (U == 0) {
4739                if ((C >> 1) == 0) {
4740                   /* VCEQ  */
4741                   IROp op;
4742                   if ((theInstr >> 20) & 1)
4743                      return False;
4744                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4745                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4746                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4747                       Q ? 'q' : 'd', dreg,
4748                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4749                } else {
4750                   return False;
4751                }
4752             } else {
4753                if ((C >> 1) == 0) {
4754                   /* VCGE  */
4755                   IROp op;
4756                   if ((theInstr >> 20) & 1)
4757                      return False;
4758                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4759                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4760                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4761                       Q ? 'q' : 'd', dreg,
4762                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4763                } else {
4764                   /* VCGT  */
4765                   IROp op;
4766                   if ((theInstr >> 20) & 1)
4767                      return False;
4768                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4769                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4770                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4771                       Q ? 'q' : 'd', dreg,
4772                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4773                }
4774             }
4775          } else {
4776             if (U == 1) {
4777                /* VACGE, VACGT */
4778                UInt op_bit = (theInstr >> 21) & 1;
4779                IROp op, op2;
4780                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4781                if (op_bit) {
4782                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4783                   assign(res, binop(op,
4784                                     unop(op2, mkexpr(arg_n)),
4785                                     unop(op2, mkexpr(arg_m))));
4786                } else {
4787                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4788                   assign(res, binop(op,
4789                                     unop(op2, mkexpr(arg_n)),
4790                                     unop(op2, mkexpr(arg_m))));
4791                }
4792                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4793                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4794                    Q ? 'q' : 'd', mreg);
4795             } else {
4796                return False;
4797             }
4798          }
4799          break;
4800       case 15:
4801          if (B == 0) {
4802             if (U == 0) {
4803                /* VMAX, VMIN  */
4804                IROp op;
4805                if ((theInstr >> 20) & 1)
4806                   return False;
4807                if ((theInstr >> 21) & 1) {
4808                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4809                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4810                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4811                } else {
4812                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4813                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4814                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4815                }
4816                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4817             } else {
4818                /* VPMAX, VPMIN   */
4819                IROp op;
4820                if (Q)
4821                   return False;
4822                if ((theInstr >> 20) & 1)
4823                   return False;
4824                if ((theInstr >> 21) & 1) {
4825                   op = Iop_PwMin32Fx2;
4826                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4827                } else {
4828                   op = Iop_PwMax32Fx2;
4829                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4830                }
4831                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4832             }
4833          } else {
4834             if (U == 0) {
4835                if ((C >> 1) == 0) {
4836                   /* VRECPS */
4837                   if ((theInstr >> 20) & 1)
4838                      return False;
4839                   assign(res, binop(Q ? Iop_RecipStep32Fx4
4840                                       : Iop_RecipStep32Fx2,
4841                                     mkexpr(arg_n),
4842                                     mkexpr(arg_m)));
4843                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4844                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4845                } else {
4846                   /* VRSQRTS  */
4847                   if ((theInstr >> 20) & 1)
4848                      return False;
4849                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
4850                                       : Iop_RSqrtStep32Fx2,
4851                                     mkexpr(arg_n),
4852                                     mkexpr(arg_m)));
4853                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4854                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4855                }
4856             } else {
4857                return False;
4858             }
4859          }
4860          break;
4861       default:
4862          /*NOTREACHED*/
4863          vassert(0);
4864    }
4865
4866    if (Q) {
4867       putQReg(dreg, mkexpr(res), condT);
4868    } else {
4869       putDRegI64(dreg, mkexpr(res), condT);
4870    }
4871
4872    return True;
4873 }
4874
4875 /* A7.4.2 Three registers of different length */
4876 static
4877 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4878 {
4879    /* In paths where this returns False, indicating a non-decodable
4880       instruction, there may still be some IR assignments to temporaries
4881       generated.  This is inconvenient but harmless, and the post-front-end
4882       IR optimisation pass will just remove them anyway.  So there's no
4883       effort made here to tidy it up.
4884    */
4885    UInt A = (theInstr >> 8) & 0xf;
4886    UInt B = (theInstr >> 20) & 3;
4887    UInt U = (theInstr >> 24) & 1;
4888    UInt P = (theInstr >> 9) & 1;
4889    UInt mreg = get_neon_m_regno(theInstr);
4890    UInt nreg = get_neon_n_regno(theInstr);
4891    UInt dreg = get_neon_d_regno(theInstr);
4892    UInt size = B;
4893    ULong imm;
4894    IRTemp res, arg_m, arg_n, cond, tmp;
4895    IROp cvt, cvt2, cmp, op, op2, sh, add;
4896    switch (A) {
4897       case 0: case 1: case 2: case 3:
4898          /* VADDL, VADDW, VSUBL, VSUBW */
4899          if (dreg & 1)
4900             return False;
4901          dreg >>= 1;
4902          size = B;
4903          switch (size) {
4904             case 0:
4905                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4906                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4907                break;
4908             case 1:
4909                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4910                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4911                break;
4912             case 2:
4913                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4914                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4915                break;
4916             case 3:
4917                return False;
4918             default:
4919                vassert(0);
4920          }
4921          arg_n = newTemp(Ity_V128);
4922          arg_m = newTemp(Ity_V128);
4923          if (A & 1) {
4924             if (nreg & 1)
4925                return False;
4926             nreg >>= 1;
4927             assign(arg_n, getQReg(nreg));
4928          } else {
4929             assign(arg_n, unop(cvt, getDRegI64(nreg)));
4930          }
4931          assign(arg_m, unop(cvt, getDRegI64(mreg)));
4932          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4933                        condT);
4934          DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4935              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4936              (A & 1) ? 'q' : 'd', nreg, mreg);
4937          return True;
4938       case 4:
4939          /* VADDHN, VRADDHN */
4940          if (mreg & 1)
4941             return False;
4942          mreg >>= 1;
4943          if (nreg & 1)
4944             return False;
4945          nreg >>= 1;
4946          size = B;
4947          switch (size) {
4948             case 0:
4949                op = Iop_Add16x8;
4950                cvt = Iop_NarrowUn16to8x8;
4951                sh = Iop_ShrN16x8;
4952                imm = 1U << 7;
4953                imm = (imm << 16) | imm;
4954                imm = (imm << 32) | imm;
4955                break;
4956             case 1:
4957                op = Iop_Add32x4;
4958                cvt = Iop_NarrowUn32to16x4;
4959                sh = Iop_ShrN32x4;
4960                imm = 1U << 15;
4961                imm = (imm << 32) | imm;
4962                break;
4963             case 2:
4964                op = Iop_Add64x2;
4965                cvt = Iop_NarrowUn64to32x2;
4966                sh = Iop_ShrN64x2;
4967                imm = 1U << 31;
4968                break;
4969             case 3:
4970                return False;
4971             default:
4972                vassert(0);
4973          }
4974          tmp = newTemp(Ity_V128);
4975          res = newTemp(Ity_V128);
4976          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4977          if (U) {
4978             /* VRADDHN */
4979             assign(res, binop(op, mkexpr(tmp),
4980                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4981          } else {
4982             assign(res, mkexpr(tmp));
4983          }
4984          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4985                     condT);
4986          DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4987              nreg, mreg);
4988          return True;
4989       case 5:
4990          /* VABAL */
4991          if (!((theInstr >> 23) & 1)) {
4992             vpanic("VABA should not be in dis_neon_data_3diff\n");
4993          }
4994          if (dreg & 1)
4995             return False;
4996          dreg >>= 1;
4997          switch (size) {
4998             case 0:
4999                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5000                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5001                cvt2 = Iop_Widen8Sto16x8;
5002                op = Iop_Sub16x8;
5003                op2 = Iop_Add16x8;
5004                break;
5005             case 1:
5006                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5007                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5008                cvt2 = Iop_Widen16Sto32x4;
5009                op = Iop_Sub32x4;
5010                op2 = Iop_Add32x4;
5011                break;
5012             case 2:
5013                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5014                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5015                cvt2 = Iop_Widen32Sto64x2;
5016                op = Iop_Sub64x2;
5017                op2 = Iop_Add64x2;
5018                break;
5019             case 3:
5020                return False;
5021             default:
5022                vassert(0);
5023          }
5024          arg_n = newTemp(Ity_V128);
5025          arg_m = newTemp(Ity_V128);
5026          cond = newTemp(Ity_V128);
5027          res = newTemp(Ity_V128);
5028          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5029          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5030          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5031                                             getDRegI64(mreg))));
5032          assign(res, binop(op2,
5033                            binop(Iop_OrV128,
5034                                  binop(Iop_AndV128,
5035                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5036                                        mkexpr(cond)),
5037                                  binop(Iop_AndV128,
5038                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5039                                        unop(Iop_NotV128, mkexpr(cond)))),
5040                            getQReg(dreg)));
5041          putQReg(dreg, mkexpr(res), condT);
5042          DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5043              nreg, mreg);
5044          return True;
5045       case 6:
5046          /* VSUBHN, VRSUBHN */
5047          if (mreg & 1)
5048             return False;
5049          mreg >>= 1;
5050          if (nreg & 1)
5051             return False;
5052          nreg >>= 1;
5053          size = B;
5054          switch (size) {
5055             case 0:
5056                op = Iop_Sub16x8;
5057                op2 = Iop_Add16x8;
5058                cvt = Iop_NarrowUn16to8x8;
5059                sh = Iop_ShrN16x8;
5060                imm = 1U << 7;
5061                imm = (imm << 16) | imm;
5062                imm = (imm << 32) | imm;
5063                break;
5064             case 1:
5065                op = Iop_Sub32x4;
5066                op2 = Iop_Add32x4;
5067                cvt = Iop_NarrowUn32to16x4;
5068                sh = Iop_ShrN32x4;
5069                imm = 1U << 15;
5070                imm = (imm << 32) | imm;
5071                break;
5072             case 2:
5073                op = Iop_Sub64x2;
5074                op2 = Iop_Add64x2;
5075                cvt = Iop_NarrowUn64to32x2;
5076                sh = Iop_ShrN64x2;
5077                imm = 1U << 31;
5078                break;
5079             case 3:
5080                return False;
5081             default:
5082                vassert(0);
5083          }
5084          tmp = newTemp(Ity_V128);
5085          res = newTemp(Ity_V128);
5086          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5087          if (U) {
5088             /* VRSUBHN */
5089             assign(res, binop(op2, mkexpr(tmp),
5090                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5091          } else {
5092             assign(res, mkexpr(tmp));
5093          }
5094          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5095                     condT);
5096          DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5097              nreg, mreg);
5098          return True;
5099       case 7:
5100          /* VABDL */
5101          if (!((theInstr >> 23) & 1)) {
5102             vpanic("VABL should not be in dis_neon_data_3diff\n");
5103          }
5104          if (dreg & 1)
5105             return False;
5106          dreg >>= 1;
5107          switch (size) {
5108             case 0:
5109                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5110                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5111                cvt2 = Iop_Widen8Sto16x8;
5112                op = Iop_Sub16x8;
5113                break;
5114             case 1:
5115                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5116                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5117                cvt2 = Iop_Widen16Sto32x4;
5118                op = Iop_Sub32x4;
5119                break;
5120             case 2:
5121                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5122                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5123                cvt2 = Iop_Widen32Sto64x2;
5124                op = Iop_Sub64x2;
5125                break;
5126             case 3:
5127                return False;
5128             default:
5129                vassert(0);
5130          }
5131          arg_n = newTemp(Ity_V128);
5132          arg_m = newTemp(Ity_V128);
5133          cond = newTemp(Ity_V128);
5134          res = newTemp(Ity_V128);
5135          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5136          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5137          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5138                                             getDRegI64(mreg))));
5139          assign(res, binop(Iop_OrV128,
5140                            binop(Iop_AndV128,
5141                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5142                                  mkexpr(cond)),
5143                            binop(Iop_AndV128,
5144                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5145                                  unop(Iop_NotV128, mkexpr(cond)))));
5146          putQReg(dreg, mkexpr(res), condT);
5147          DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5148              nreg, mreg);
5149          return True;
5150       case 8:
5151       case 10:
5152          /* VMLAL, VMLSL (integer) */
5153          if (dreg & 1)
5154             return False;
5155          dreg >>= 1;
5156          size = B;
5157          switch (size) {
5158             case 0:
5159                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5160                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5161                break;
5162             case 1:
5163                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5164                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5165                break;
5166             case 2:
5167                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5168                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5169                break;
5170             case 3:
5171                return False;
5172             default:
5173                vassert(0);
5174          }
5175          res = newTemp(Ity_V128);
5176          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5177          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5178          DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5179              8 << size, dreg, nreg, mreg);
5180          return True;
5181       case 9:
5182       case 11:
5183          /* VQDMLAL, VQDMLSL */
5184          if (U)
5185             return False;
5186          if (dreg & 1)
5187             return False;
5188          dreg >>= 1;
5189          size = B;
5190          switch (size) {
5191             case 0: case 3:
5192                return False;
5193             case 1:
5194                op = Iop_QDMull16Sx4;
5195                cmp = Iop_CmpEQ16x4;
5196                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5197                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5198                imm = 1LL << 15;
5199                imm = (imm << 16) | imm;
5200                imm = (imm << 32) | imm;
5201                break;
5202             case 2:
5203                op = Iop_QDMull32Sx2;
5204                cmp = Iop_CmpEQ32x2;
5205                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5206                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5207                imm = 1LL << 31;
5208                imm = (imm << 32) | imm;
5209                break;
5210             default:
5211                vassert(0);
5212          }
5213          res = newTemp(Ity_V128);
5214          tmp = newTemp(Ity_V128);
5215          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5216          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5217          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5218                     True, condT);
5219          setFlag_QC(binop(Iop_And64,
5220                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
5221                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
5222                     mkU64(0),
5223                     False, condT);
5224          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5225          DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5226              nreg, mreg);
5227          return True;
5228       case 12:
5229       case 14:
5230          /* VMULL (integer or polynomial) */
5231          if (dreg & 1)
5232             return False;
5233          dreg >>= 1;
5234          size = B;
5235          switch (size) {
5236             case 0:
5237                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5238                if (P)
5239                   op = Iop_PolynomialMull8x8;
5240                break;
5241             case 1:
5242                if (P) return False;
5243                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5244                break;
5245             case 2:
5246                if (P) return False;
5247                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5248                break;
5249             case 3:
5250                return False;
5251             default:
5252                vassert(0);
5253          }
5254          putQReg(dreg, binop(op, getDRegI64(nreg),
5255                                  getDRegI64(mreg)), condT);
5256          DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5257                8 << size, dreg, nreg, mreg);
5258          return True;
5259       case 13:
5260          /* VQDMULL */
5261          if (U)
5262             return False;
5263          if (dreg & 1)
5264             return False;
5265          dreg >>= 1;
5266          size = B;
5267          switch (size) {
5268             case 0:
5269             case 3:
5270                return False;
5271             case 1:
5272                op = Iop_QDMull16Sx4;
5273                op2 = Iop_CmpEQ16x4;
5274                imm = 1LL << 15;
5275                imm = (imm << 16) | imm;
5276                imm = (imm << 32) | imm;
5277                break;
5278             case 2:
5279                op = Iop_QDMull32Sx2;
5280                op2 = Iop_CmpEQ32x2;
5281                imm = 1LL << 31;
5282                imm = (imm << 32) | imm;
5283                break;
5284             default:
5285                vassert(0);
5286          }
5287          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5288                condT);
5289          setFlag_QC(binop(Iop_And64,
5290                           binop(op2, getDRegI64(nreg), mkU64(imm)),
5291                           binop(op2, getDRegI64(mreg), mkU64(imm))),
5292                     mkU64(0),
5293                     False, condT);
5294          DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5295          return True;
5296       default:
5297          return False;
5298    }
5299    return False;
5300 }
5301
5302 /* A7.4.3 Two registers and a scalar */
5303 static
5304 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5305 {
5306 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5307    UInt U = INSN(24,24);
5308    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5309    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5310    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5311    UInt size = INSN(21,20);
5312    UInt index;
5313    UInt Q = INSN(24,24);
5314
5315    if (INSN(27,25) != 1 || INSN(23,23) != 1
5316        || INSN(6,6) != 1 || INSN(4,4) != 0)
5317       return False;
5318
5319    /* VMLA, VMLS (scalar)  */
5320    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5321       IRTemp res, arg_m, arg_n;
5322       IROp dup, get, op, op2, add, sub;
5323       if (Q) {
5324          if ((dreg & 1) || (nreg & 1))
5325             return False;
5326          dreg >>= 1;
5327          nreg >>= 1;
5328          res = newTemp(Ity_V128);
5329          arg_m = newTemp(Ity_V128);
5330          arg_n = newTemp(Ity_V128);
5331          assign(arg_n, getQReg(nreg));
5332          switch(size) {
5333             case 1:
5334                dup = Iop_Dup16x8;
5335                get = Iop_GetElem16x4;
5336                index = mreg >> 3;
5337                mreg &= 7;
5338                break;
5339             case 2:
5340                dup = Iop_Dup32x4;
5341                get = Iop_GetElem32x2;
5342                index = mreg >> 4;
5343                mreg &= 0xf;
5344                break;
5345             case 0:
5346             case 3:
5347                return False;
5348             default:
5349                vassert(0);
5350          }
5351          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5352       } else {
5353          res = newTemp(Ity_I64);
5354          arg_m = newTemp(Ity_I64);
5355          arg_n = newTemp(Ity_I64);
5356          assign(arg_n, getDRegI64(nreg));
5357          switch(size) {
5358             case 1:
5359                dup = Iop_Dup16x4;
5360                get = Iop_GetElem16x4;
5361                index = mreg >> 3;
5362                mreg &= 7;
5363                break;
5364             case 2:
5365                dup = Iop_Dup32x2;
5366                get = Iop_GetElem32x2;
5367                index = mreg >> 4;
5368                mreg &= 0xf;
5369                break;
5370             case 0:
5371             case 3:
5372                return False;
5373             default:
5374                vassert(0);
5375          }
5376          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5377       }
5378       if (INSN(8,8)) {
5379          switch (size) {
5380             case 2:
5381                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5382                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5383                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5384                break;
5385             case 0:
5386             case 1:
5387             case 3:
5388                return False;
5389             default:
5390                vassert(0);
5391          }
5392       } else {
5393          switch (size) {
5394             case 1:
5395                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5396                add = Q ? Iop_Add16x8 : Iop_Add16x4;
5397                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5398                break;
5399             case 2:
5400                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5401                add = Q ? Iop_Add32x4 : Iop_Add32x2;
5402                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5403                break;
5404             case 0:
5405             case 3:
5406                return False;
5407             default:
5408                vassert(0);
5409          }
5410       }
5411       op2 = INSN(10,10) ? sub : add;
5412       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5413       if (Q)
5414          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5415                  condT);
5416       else
5417          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5418                     condT);
5419       DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5420             INSN(8,8) ? 'f' : 'i', 8 << size,
5421             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5422       return True;
5423    }
5424
5425    /* VMLAL, VMLSL (scalar)   */
5426    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5427       IRTemp res, arg_m, arg_n;
5428       IROp dup, get, op, op2, add, sub;
5429       if (dreg & 1)
5430          return False;
5431       dreg >>= 1;
5432       res = newTemp(Ity_V128);
5433       arg_m = newTemp(Ity_I64);
5434       arg_n = newTemp(Ity_I64);
5435       assign(arg_n, getDRegI64(nreg));
5436       switch(size) {
5437          case 1:
5438             dup = Iop_Dup16x4;
5439             get = Iop_GetElem16x4;
5440             index = mreg >> 3;
5441             mreg &= 7;
5442             break;
5443          case 2:
5444             dup = Iop_Dup32x2;
5445             get = Iop_GetElem32x2;
5446             index = mreg >> 4;
5447             mreg &= 0xf;
5448             break;
5449          case 0:
5450          case 3:
5451             return False;
5452          default:
5453             vassert(0);
5454       }
5455       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5456       switch (size) {
5457          case 1:
5458             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5459             add = Iop_Add32x4;
5460             sub = Iop_Sub32x4;
5461             break;
5462          case 2:
5463             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5464             add = Iop_Add64x2;
5465             sub = Iop_Sub64x2;
5466             break;
5467          case 0:
5468          case 3:
5469             return False;
5470          default:
5471             vassert(0);
5472       }
5473       op2 = INSN(10,10) ? sub : add;
5474       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5475       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5476       DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
5477           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5478           8 << size, dreg, nreg, mreg, index);
5479       return True;
5480    }
5481
5482    /* VQDMLAL, VQDMLSL (scalar)  */
5483    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5484       IRTemp res, arg_m, arg_n, tmp;
5485       IROp dup, get, op, op2, add, cmp;
5486       UInt P = INSN(10,10);
5487       ULong imm;
5488       if (dreg & 1)
5489          return False;
5490       dreg >>= 1;
5491       res = newTemp(Ity_V128);
5492       arg_m = newTemp(Ity_I64);
5493       arg_n = newTemp(Ity_I64);
5494       assign(arg_n, getDRegI64(nreg));
5495       switch(size) {
5496          case 1:
5497             dup = Iop_Dup16x4;
5498             get = Iop_GetElem16x4;
5499             index = mreg >> 3;
5500             mreg &= 7;
5501             break;
5502          case 2:
5503             dup = Iop_Dup32x2;
5504             get = Iop_GetElem32x2;
5505             index = mreg >> 4;
5506             mreg &= 0xf;
5507             break;
5508          case 0:
5509          case 3:
5510             return False;
5511          default:
5512             vassert(0);
5513       }
5514       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5515       switch (size) {
5516          case 0:
5517          case 3:
5518             return False;
5519          case 1:
5520             op = Iop_QDMull16Sx4;
5521             cmp = Iop_CmpEQ16x4;
5522             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5523             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5524             imm = 1LL << 15;
5525             imm = (imm << 16) | imm;
5526             imm = (imm << 32) | imm;
5527             break;
5528          case 2:
5529             op = Iop_QDMull32Sx2;
5530             cmp = Iop_CmpEQ32x2;
5531             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5532             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5533             imm = 1LL << 31;
5534             imm = (imm << 32) | imm;
5535             break;
5536          default:
5537             vassert(0);
5538       }
5539       res = newTemp(Ity_V128);
5540       tmp = newTemp(Ity_V128);
5541       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5542       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5543       setFlag_QC(binop(Iop_And64,
5544                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
5545                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
5546                  mkU64(0),
5547                  False, condT);
5548       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5549                  True, condT);
5550       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5551       DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5552           dreg, nreg, mreg, index);
5553       return True;
5554    }
5555
5556    /* VMUL (by scalar)  */
5557    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5558       IRTemp res, arg_m, arg_n;
5559       IROp dup, get, op;
5560       if (Q) {
5561          if ((dreg & 1) || (nreg & 1))
5562             return False;
5563          dreg >>= 1;
5564          nreg >>= 1;
5565          res = newTemp(Ity_V128);
5566          arg_m = newTemp(Ity_V128);
5567          arg_n = newTemp(Ity_V128);
5568          assign(arg_n, getQReg(nreg));
5569          switch(size) {
5570             case 1:
5571                dup = Iop_Dup16x8;
5572                get = Iop_GetElem16x4;
5573                index = mreg >> 3;
5574                mreg &= 7;
5575                break;
5576             case 2:
5577                dup = Iop_Dup32x4;
5578                get = Iop_GetElem32x2;
5579                index = mreg >> 4;
5580                mreg &= 0xf;
5581                break;
5582             case 0:
5583             case 3:
5584                return False;
5585             default:
5586                vassert(0);
5587          }
5588          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5589       } else {
5590          res = newTemp(Ity_I64);
5591          arg_m = newTemp(Ity_I64);
5592          arg_n = newTemp(Ity_I64);
5593          assign(arg_n, getDRegI64(nreg));
5594          switch(size) {
5595             case 1:
5596                dup = Iop_Dup16x4;
5597                get = Iop_GetElem16x4;
5598                index = mreg >> 3;
5599                mreg &= 7;
5600                break;
5601             case 2:
5602                dup = Iop_Dup32x2;
5603                get = Iop_GetElem32x2;
5604                index = mreg >> 4;
5605                mreg &= 0xf;
5606                break;
5607             case 0:
5608             case 3:
5609                return False;
5610             default:
5611                vassert(0);
5612          }
5613          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5614       }
5615       if (INSN(8,8)) {
5616          switch (size) {
5617             case 2:
5618                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5619                break;
5620             case 0:
5621             case 1:
5622             case 3:
5623                return False;
5624             default:
5625                vassert(0);
5626          }
5627       } else {
5628          switch (size) {
5629             case 1:
5630                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5631                break;
5632             case 2:
5633                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5634                break;
5635             case 0:
5636             case 3:
5637                return False;
5638             default:
5639                vassert(0);
5640          }
5641       }
5642       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5643       if (Q)
5644          putQReg(dreg, mkexpr(res), condT);
5645       else
5646          putDRegI64(dreg, mkexpr(res), condT);
5647       DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5648           8 << size, Q ? 'q' : 'd', dreg,
5649           Q ? 'q' : 'd', nreg, mreg, index);
5650       return True;
5651    }
5652
5653    /* VMULL (scalar) */
5654    if (INSN(11,8) == BITS4(1,0,1,0)) {
5655       IRTemp res, arg_m, arg_n;
5656       IROp dup, get, op;
5657       if (dreg & 1)
5658          return False;
5659       dreg >>= 1;
5660       res = newTemp(Ity_V128);
5661       arg_m = newTemp(Ity_I64);
5662       arg_n = newTemp(Ity_I64);
5663       assign(arg_n, getDRegI64(nreg));
5664       switch(size) {
5665          case 1:
5666             dup = Iop_Dup16x4;
5667             get = Iop_GetElem16x4;
5668             index = mreg >> 3;
5669             mreg &= 7;
5670             break;
5671          case 2:
5672             dup = Iop_Dup32x2;
5673             get = Iop_GetElem32x2;
5674             index = mreg >> 4;
5675             mreg &= 0xf;
5676             break;
5677          case 0:
5678          case 3:
5679             return False;
5680          default:
5681             vassert(0);
5682       }
5683       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5684       switch (size) {
5685          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5686          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5687          case 0: case 3: return False;
5688          default: vassert(0);
5689       }
5690       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5691       putQReg(dreg, mkexpr(res), condT);
5692       DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5693           nreg, mreg, index);
5694       return True;
5695    }
5696
5697    /* VQDMULL */
5698    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5699       IROp op ,op2, dup, get;
5700       ULong imm;
5701       IRTemp arg_m, arg_n;
5702       if (dreg & 1)
5703          return False;
5704       dreg >>= 1;
5705       arg_m = newTemp(Ity_I64);
5706       arg_n = newTemp(Ity_I64);
5707       assign(arg_n, getDRegI64(nreg));
5708       switch(size) {
5709          case 1:
5710             dup = Iop_Dup16x4;
5711             get = Iop_GetElem16x4;
5712             index = mreg >> 3;
5713             mreg &= 7;
5714             break;
5715          case 2:
5716             dup = Iop_Dup32x2;
5717             get = Iop_GetElem32x2;
5718             index = mreg >> 4;
5719             mreg &= 0xf;
5720             break;
5721          case 0:
5722          case 3:
5723             return False;
5724          default:
5725             vassert(0);
5726       }
5727       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5728       switch (size) {
5729          case 0:
5730          case 3:
5731             return False;
5732          case 1:
5733             op = Iop_QDMull16Sx4;
5734             op2 = Iop_CmpEQ16x4;
5735             imm = 1LL << 15;
5736             imm = (imm << 16) | imm;
5737             imm = (imm << 32) | imm;
5738             break;
5739          case 2:
5740             op = Iop_QDMull32Sx2;
5741             op2 = Iop_CmpEQ32x2;
5742             imm = 1LL << 31;
5743             imm = (imm << 32) | imm;
5744             break;
5745          default:
5746             vassert(0);
5747       }
5748       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5749             condT);
5750       setFlag_QC(binop(Iop_And64,
5751                        binop(op2, mkexpr(arg_n), mkU64(imm)),
5752                        binop(op2, mkexpr(arg_m), mkU64(imm))),
5753                  mkU64(0),
5754                  False, condT);
5755       DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5756           index);
5757       return True;
5758    }
5759
5760    /* VQDMULH */
5761    if (INSN(11,8) == BITS4(1,1,0,0)) {
5762       IROp op ,op2, dup, get;
5763       ULong imm;
5764       IRTemp res, arg_m, arg_n;
5765       if (Q) {
5766          if ((dreg & 1) || (nreg & 1))
5767             return False;
5768          dreg >>= 1;
5769          nreg >>= 1;
5770          res = newTemp(Ity_V128);
5771          arg_m = newTemp(Ity_V128);
5772          arg_n = newTemp(Ity_V128);
5773          assign(arg_n, getQReg(nreg));
5774          switch(size) {
5775             case 1:
5776                dup = Iop_Dup16x8;
5777                get = Iop_GetElem16x4;
5778                index = mreg >> 3;
5779                mreg &= 7;
5780                break;
5781             case 2:
5782                dup = Iop_Dup32x4;
5783                get = Iop_GetElem32x2;
5784                index = mreg >> 4;
5785                mreg &= 0xf;
5786                break;
5787             case 0:
5788             case 3:
5789                return False;
5790             default:
5791                vassert(0);
5792          }
5793          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5794       } else {
5795          res = newTemp(Ity_I64);
5796          arg_m = newTemp(Ity_I64);
5797          arg_n = newTemp(Ity_I64);
5798          assign(arg_n, getDRegI64(nreg));
5799          switch(size) {
5800             case 1:
5801                dup = Iop_Dup16x4;
5802                get = Iop_GetElem16x4;
5803                index = mreg >> 3;
5804                mreg &= 7;
5805                break;
5806             case 2:
5807                dup = Iop_Dup32x2;
5808                get = Iop_GetElem32x2;
5809                index = mreg >> 4;
5810                mreg &= 0xf;
5811                break;
5812             case 0:
5813             case 3:
5814                return False;
5815             default:
5816                vassert(0);
5817          }
5818          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5819       }
5820       switch (size) {
5821          case 0:
5822          case 3:
5823             return False;
5824          case 1:
5825             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5826             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5827             imm = 1LL << 15;
5828             imm = (imm << 16) | imm;
5829             imm = (imm << 32) | imm;
5830             break;
5831          case 2:
5832             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5833             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5834             imm = 1LL << 31;
5835             imm = (imm << 32) | imm;
5836             break;
5837          default:
5838             vassert(0);
5839       }
5840       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5841       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5842                        binop(op2, mkexpr(arg_n),
5843                                   Q ? mkU128(imm) : mkU64(imm)),
5844                        binop(op2, mkexpr(arg_m),
5845                              Q ? mkU128(imm) : mkU64(imm))),
5846                  Q ? mkU128(0) : mkU64(0),
5847                  Q, condT);
5848       if (Q)
5849          putQReg(dreg, mkexpr(res), condT);
5850       else
5851          putDRegI64(dreg, mkexpr(res), condT);
5852       DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5853           8 << size, Q ? 'q' : 'd', dreg,
5854           Q ? 'q' : 'd', nreg, mreg, index);
5855       return True;
5856    }
5857
5858    /* VQRDMULH (scalar) */
5859    if (INSN(11,8) == BITS4(1,1,0,1)) {
5860       IROp op ,op2, dup, get;
5861       ULong imm;
5862       IRTemp res, arg_m, arg_n;
5863       if (Q) {
5864          if ((dreg & 1) || (nreg & 1))
5865             return False;
5866          dreg >>= 1;
5867          nreg >>= 1;
5868          res = newTemp(Ity_V128);
5869          arg_m = newTemp(Ity_V128);
5870          arg_n = newTemp(Ity_V128);
5871          assign(arg_n, getQReg(nreg));
5872          switch(size) {
5873             case 1:
5874                dup = Iop_Dup16x8;
5875                get = Iop_GetElem16x4;
5876                index = mreg >> 3;
5877                mreg &= 7;
5878                break;
5879             case 2:
5880                dup = Iop_Dup32x4;
5881                get = Iop_GetElem32x2;
5882                index = mreg >> 4;
5883                mreg &= 0xf;
5884                break;
5885             case 0:
5886             case 3:
5887                return False;
5888             default:
5889                vassert(0);
5890          }
5891          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5892       } else {
5893          res = newTemp(Ity_I64);
5894          arg_m = newTemp(Ity_I64);
5895          arg_n = newTemp(Ity_I64);
5896          assign(arg_n, getDRegI64(nreg));
5897          switch(size) {
5898             case 1:
5899                dup = Iop_Dup16x4;
5900                get = Iop_GetElem16x4;
5901                index = mreg >> 3;
5902                mreg &= 7;
5903                break;
5904             case 2:
5905                dup = Iop_Dup32x2;
5906                get = Iop_GetElem32x2;
5907                index = mreg >> 4;
5908                mreg &= 0xf;
5909                break;
5910             case 0:
5911             case 3:
5912                return False;
5913             default:
5914                vassert(0);
5915          }
5916          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5917       }
5918       switch (size) {
5919          case 0:
5920          case 3:
5921             return False;
5922          case 1:
5923             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5924             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5925             imm = 1LL << 15;
5926             imm = (imm << 16) | imm;
5927             imm = (imm << 32) | imm;
5928             break;
5929          case 2:
5930             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5931             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5932             imm = 1LL << 31;
5933             imm = (imm << 32) | imm;
5934             break;
5935          default:
5936             vassert(0);
5937       }
5938       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5939       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5940                        binop(op2, mkexpr(arg_n),
5941                                   Q ? mkU128(imm) : mkU64(imm)),
5942                        binop(op2, mkexpr(arg_m),
5943                                   Q ? mkU128(imm) : mkU64(imm))),
5944                  Q ? mkU128(0) : mkU64(0),
5945                  Q, condT);
5946       if (Q)
5947          putQReg(dreg, mkexpr(res), condT);
5948       else
5949          putDRegI64(dreg, mkexpr(res), condT);
5950       DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5951           8 << size, Q ? 'q' : 'd', dreg,
5952           Q ? 'q' : 'd', nreg, mreg, index);
5953       return True;
5954    }
5955
5956    return False;
5957 #  undef INSN
5958 }
5959
5960 /* A7.4.4 Two registers and a shift amount */
5961 static
5962 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5963 {
5964    UInt A = (theInstr >> 8) & 0xf;
5965    UInt B = (theInstr >> 6) & 1;
5966    UInt L = (theInstr >> 7) & 1;
5967    UInt U = (theInstr >> 24) & 1;
5968    UInt Q = B;
5969    UInt imm6 = (theInstr >> 16) & 0x3f;
5970    UInt shift_imm;
5971    UInt size = 4;
5972    UInt tmp;
5973    UInt mreg = get_neon_m_regno(theInstr);
5974    UInt dreg = get_neon_d_regno(theInstr);
5975    ULong imm = 0;
5976    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5977    IRTemp reg_m, res, mask;
5978
5979    if (L == 0 && ((theInstr >> 19) & 7) == 0)
5980       /* It is one reg and immediate */
5981       return False;
5982
5983    tmp = (L << 6) | imm6;
5984    if (tmp & 0x40) {
5985       size = 3;
5986       shift_imm = 64 - imm6;
5987    } else if (tmp & 0x20) {
5988       size = 2;
5989       shift_imm = 64 - imm6;
5990    } else if (tmp & 0x10) {
5991       size = 1;
5992       shift_imm = 32 - imm6;
5993    } else if (tmp & 0x8) {
5994       size = 0;
5995       shift_imm = 16 - imm6;
5996    } else {
5997       return False;
5998    }
5999
6000    switch (A) {
6001       case 3:
6002       case 2:
6003          /* VRSHR, VRSRA */
6004          if (shift_imm > 0) {
6005             IRExpr *imm_val;
6006             imm = 1L;
6007             switch (size) {
6008                case 0:
6009                   imm = (imm << 8) | imm;
6010                   /* fall through */
6011                case 1:
6012                   imm = (imm << 16) | imm;
6013                   /* fall through */
6014                case 2:
6015                   imm = (imm << 32) | imm;
6016                   /* fall through */
6017                case 3:
6018                   break;
6019                default:
6020                   vassert(0);
6021             }
6022             if (Q) {
6023                reg_m = newTemp(Ity_V128);
6024                res = newTemp(Ity_V128);
6025                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6026                assign(reg_m, getQReg(mreg));
6027                switch (size) {
6028                   case 0:
6029                      add = Iop_Add8x16;
6030                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6031                      break;
6032                   case 1:
6033                      add = Iop_Add16x8;
6034                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6035                      break;
6036                   case 2:
6037                      add = Iop_Add32x4;
6038                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6039                      break;
6040                   case 3:
6041                      add = Iop_Add64x2;
6042                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6043                      break;
6044                   default:
6045                      vassert(0);
6046                }
6047             } else {
6048                reg_m = newTemp(Ity_I64);
6049                res = newTemp(Ity_I64);
6050                imm_val = mkU64(imm);
6051                assign(reg_m, getDRegI64(mreg));
6052                switch (size) {
6053                   case 0:
6054                      add = Iop_Add8x8;
6055                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
6056                      break;
6057                   case 1:
6058                      add = Iop_Add16x4;
6059                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6060                      break;
6061                   case 2:
6062                      add = Iop_Add32x2;
6063                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6064                      break;
6065                   case 3:
6066                      add = Iop_Add64;
6067                      op = U ? Iop_Shr64 : Iop_Sar64;
6068                      break;
6069                   default:
6070                      vassert(0);
6071                }
6072             }
6073             assign(res,
6074                    binop(add,
6075                          binop(op,
6076                                mkexpr(reg_m),
6077                                mkU8(shift_imm)),
6078                          binop(Q ? Iop_AndV128 : Iop_And64,
6079                                binop(op,
6080                                      mkexpr(reg_m),
6081                                      mkU8(shift_imm - 1)),
6082                                imm_val)));
6083          } else {
6084             if (Q) {
6085                res = newTemp(Ity_V128);
6086                assign(res, getQReg(mreg));
6087             } else {
6088                res = newTemp(Ity_I64);
6089                assign(res, getDRegI64(mreg));
6090             }
6091          }
6092          if (A == 3) {
6093             if (Q) {
6094                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6095                              condT);
6096             } else {
6097                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6098                                 condT);
6099             }
6100             DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
6101                 U ? 'u' : 's', 8 << size,
6102                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6103          } else {
6104             if (Q) {
6105                putQReg(dreg, mkexpr(res), condT);
6106             } else {
6107                putDRegI64(dreg, mkexpr(res), condT);
6108             }
6109             DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6110                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6111          }
6112          return True;
6113       case 1:
6114       case 0:
6115          /* VSHR, VSRA */
6116          if (Q) {
6117             reg_m = newTemp(Ity_V128);
6118             assign(reg_m, getQReg(mreg));
6119             res = newTemp(Ity_V128);
6120          } else {
6121             reg_m = newTemp(Ity_I64);
6122             assign(reg_m, getDRegI64(mreg));
6123             res = newTemp(Ity_I64);
6124          }
6125          if (Q) {
6126             switch (size) {
6127                case 0:
6128                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6129                   add = Iop_Add8x16;
6130                   break;
6131                case 1:
6132                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6133                   add = Iop_Add16x8;
6134                   break;
6135                case 2:
6136                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6137                   add = Iop_Add32x4;
6138                   break;
6139                case 3:
6140                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6141                   add = Iop_Add64x2;
6142                   break;
6143                default:
6144                   vassert(0);
6145             }
6146          } else {
6147             switch (size) {
6148                case 0:
6149                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6150                   add = Iop_Add8x8;
6151                   break;
6152                case 1:
6153                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6154                   add = Iop_Add16x4;
6155                   break;
6156                case 2:
6157                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6158                   add = Iop_Add32x2;
6159                   break;
6160                case 3:
6161                   op = U ? Iop_Shr64 : Iop_Sar64;
6162                   add = Iop_Add64;
6163                   break;
6164                default:
6165                   vassert(0);
6166             }
6167          }
6168          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6169          if (A == 1) {
6170             if (Q) {
6171                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6172                              condT);
6173             } else {
6174                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6175                                 condT);
6176             }
6177             DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6178                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6179          } else {
6180             if (Q) {
6181                putQReg(dreg, mkexpr(res), condT);
6182             } else {
6183                putDRegI64(dreg, mkexpr(res), condT);
6184             }
6185             DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6186                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6187          }
6188          return True;
6189       case 4:
6190          /* VSRI */
6191          if (!U)
6192             return False;
6193          if (Q) {
6194             res = newTemp(Ity_V128);
6195             mask = newTemp(Ity_V128);
6196          } else {
6197             res = newTemp(Ity_I64);
6198             mask = newTemp(Ity_I64);
6199          }
6200          switch (size) {
6201             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6202             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6203             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6204             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6205             default: vassert(0);
6206          }
6207          if (Q) {
6208             assign(mask, binop(op, binop(Iop_64HLtoV128,
6209                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
6210                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
6211                                mkU8(shift_imm)));
6212             assign(res, binop(Iop_OrV128,
6213                               binop(Iop_AndV128,
6214                                     getQReg(dreg),
6215                                     unop(Iop_NotV128,
6216                                          mkexpr(mask))),
6217                               binop(op,
6218                                     getQReg(mreg),
6219                                     mkU8(shift_imm))));
6220             putQReg(dreg, mkexpr(res), condT);
6221          } else {
6222             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6223                                mkU8(shift_imm)));
6224             assign(res, binop(Iop_Or64,
6225                               binop(Iop_And64,
6226                                     getDRegI64(dreg),
6227                                     unop(Iop_Not64,
6228                                          mkexpr(mask))),
6229                               binop(op,
6230                                     getDRegI64(mreg),
6231                                     mkU8(shift_imm))));
6232             putDRegI64(dreg, mkexpr(res), condT);
6233          }
6234          DIP("vsri.%d %c%u, %c%u, #%u\n",
6235              8 << size, Q ? 'q' : 'd', dreg,
6236              Q ? 'q' : 'd', mreg, shift_imm);
6237          return True;
6238       case 5:
6239          if (U) {
6240             /* VSLI */
6241             shift_imm = 8 * (1 << size) - shift_imm;
6242             if (Q) {
6243                res = newTemp(Ity_V128);
6244                mask = newTemp(Ity_V128);
6245             } else {
6246                res = newTemp(Ity_I64);
6247                mask = newTemp(Ity_I64);
6248             }
6249             switch (size) {
6250                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6251                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6252                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6253                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6254                default: vassert(0);
6255             }
6256             if (Q) {
6257                assign(mask, binop(op, binop(Iop_64HLtoV128,
6258                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
6259                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
6260                                   mkU8(shift_imm)));
6261                assign(res, binop(Iop_OrV128,
6262                                  binop(Iop_AndV128,
6263                                        getQReg(dreg),
6264                                        unop(Iop_NotV128,
6265                                             mkexpr(mask))),
6266                                  binop(op,
6267                                        getQReg(mreg),
6268                                        mkU8(shift_imm))));
6269                putQReg(dreg, mkexpr(res), condT);
6270             } else {
6271                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6272                                   mkU8(shift_imm)));
6273                assign(res, binop(Iop_Or64,
6274                                  binop(Iop_And64,
6275                                        getDRegI64(dreg),
6276                                        unop(Iop_Not64,
6277                                             mkexpr(mask))),
6278                                  binop(op,
6279                                        getDRegI64(mreg),
6280                                        mkU8(shift_imm))));
6281                putDRegI64(dreg, mkexpr(res), condT);
6282             }
6283             DIP("vsli.%d %c%u, %c%u, #%u\n",
6284                 8 << size, Q ? 'q' : 'd', dreg,
6285                 Q ? 'q' : 'd', mreg, shift_imm);
6286             return True;
6287          } else {
6288             /* VSHL #imm */
6289             shift_imm = 8 * (1 << size) - shift_imm;
6290             if (Q) {
6291                res = newTemp(Ity_V128);
6292             } else {
6293                res = newTemp(Ity_I64);
6294             }
6295             switch (size) {
6296                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6297                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6298                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6299                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6300                default: vassert(0);
6301             }
6302             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6303                      mkU8(shift_imm)));
6304             if (Q) {
6305                putQReg(dreg, mkexpr(res), condT);
6306             } else {
6307                putDRegI64(dreg, mkexpr(res), condT);
6308             }
6309             DIP("vshl.i%d %c%u, %c%u, #%u\n",
6310                 8 << size, Q ? 'q' : 'd', dreg,
6311                 Q ? 'q' : 'd', mreg, shift_imm);
6312             return True;
6313          }
6314          break;
6315       case 6:
6316       case 7:
6317          /* VQSHL, VQSHLU */
6318          shift_imm = 8 * (1 << size) - shift_imm;
6319          if (U) {
6320             if (A & 1) {
6321                switch (size) {
6322                   case 0:
6323                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
6324                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6325                      break;
6326                   case 1:
6327                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
6328                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6329                      break;
6330                   case 2:
6331                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
6332                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6333                      break;
6334                   case 3:
6335                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
6336                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6337                      break;
6338                   default:
6339                      vassert(0);
6340                }
6341                DIP("vqshl.u%d %c%u, %c%u, #%u\n",
6342                    8 << size,
6343                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6344             } else {
6345                switch (size) {
6346                   case 0:
6347                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
6348                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6349                      break;
6350                   case 1:
6351                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
6352                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6353                      break;
6354                   case 2:
6355                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
6356                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6357                      break;
6358                   case 3:
6359                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
6360                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6361                      break;
6362                   default:
6363                      vassert(0);
6364                }
6365                DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
6366                    8 << size,
6367                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6368             }
6369          } else {
6370             if (!(A & 1))
6371                return False;
6372             switch (size) {
6373                case 0:
6374                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
6375                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6376                   break;
6377                case 1:
6378                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
6379                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6380                   break;
6381                case 2:
6382                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
6383                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6384                   break;
6385                case 3:
6386                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
6387                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6388                   break;
6389                default:
6390                   vassert(0);
6391             }
6392             DIP("vqshl.s%d %c%u, %c%u, #%u\n",
6393                 8 << size,
6394                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6395          }
6396          if (Q) {
6397             tmp = newTemp(Ity_V128);
6398             res = newTemp(Ity_V128);
6399             reg_m = newTemp(Ity_V128);
6400             assign(reg_m, getQReg(mreg));
6401          } else {
6402             tmp = newTemp(Ity_I64);
6403             res = newTemp(Ity_I64);
6404             reg_m = newTemp(Ity_I64);
6405             assign(reg_m, getDRegI64(mreg));
6406          }
6407          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6408          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6409          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6410          if (Q)
6411             putQReg(dreg, mkexpr(res), condT);
6412          else
6413             putDRegI64(dreg, mkexpr(res), condT);
6414          return True;
6415       case 8:
6416          if (!U) {
6417             if (L == 1)
6418                return False;
6419             size++;
6420             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6421             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6422             if (mreg & 1)
6423                return False;
6424             mreg >>= 1;
6425             if (!B) {
6426                /* VSHRN*/
6427                IROp narOp;
6428                reg_m = newTemp(Ity_V128);
6429                assign(reg_m, getQReg(mreg));
6430                res = newTemp(Ity_I64);
6431                switch (size) {
6432                   case 1:
6433                      op = Iop_ShrN16x8;
6434                      narOp = Iop_NarrowUn16to8x8;
6435                      break;
6436                   case 2:
6437                      op = Iop_ShrN32x4;
6438                      narOp = Iop_NarrowUn32to16x4;
6439                      break;
6440                   case 3:
6441                      op = Iop_ShrN64x2;
6442                      narOp = Iop_NarrowUn64to32x2;
6443                      break;
6444                   default:
6445                      vassert(0);
6446                }
6447                assign(res, unop(narOp,
6448                                 binop(op,
6449                                       mkexpr(reg_m),
6450                                       mkU8(shift_imm))));
6451                putDRegI64(dreg, mkexpr(res), condT);
6452                DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6453                    shift_imm);
6454                return True;
6455             } else {
6456                /* VRSHRN   */
6457                IROp addOp, shOp, narOp;
6458                IRExpr *imm_val;
6459                reg_m = newTemp(Ity_V128);
6460                assign(reg_m, getQReg(mreg));
6461                res = newTemp(Ity_I64);
6462                imm = 1L;
6463                switch (size) {
6464                   case 0: imm = (imm <<  8) | imm; /* fall through */
6465                   case 1: imm = (imm << 16) | imm; /* fall through */
6466                   case 2: imm = (imm << 32) | imm; /* fall through */
6467                   case 3: break;
6468                   default: vassert(0);
6469                }
6470                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6471                switch (size) {
6472                   case 1:
6473                      addOp = Iop_Add16x8;
6474                      shOp = Iop_ShrN16x8;
6475                      narOp = Iop_NarrowUn16to8x8;
6476                      break;
6477                   case 2:
6478                      addOp = Iop_Add32x4;
6479                      shOp = Iop_ShrN32x4;
6480                      narOp = Iop_NarrowUn32to16x4;
6481                      break;
6482                   case 3:
6483                      addOp = Iop_Add64x2;
6484                      shOp = Iop_ShrN64x2;
6485                      narOp = Iop_NarrowUn64to32x2;
6486                      break;
6487                   default:
6488                      vassert(0);
6489                }
6490                assign(res, unop(narOp,
6491                                 binop(addOp,
6492                                       binop(shOp,
6493                                             mkexpr(reg_m),
6494                                             mkU8(shift_imm)),
6495                                       binop(Iop_AndV128,
6496                                             binop(shOp,
6497                                                   mkexpr(reg_m),
6498                                                   mkU8(shift_imm - 1)),
6499                                             imm_val))));
6500                putDRegI64(dreg, mkexpr(res), condT);
6501                if (shift_imm == 0) {
6502                   DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6503                       shift_imm);
6504                } else {
6505                   DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6506                       shift_imm);
6507                }
6508                return True;
6509             }
6510          } else {
6511             /* fall through */
6512          }
6513       case 9:
6514          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6515          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6516          if (mreg & 1)
6517             return False;
6518          mreg >>= 1;
6519          size++;
6520          if ((theInstr >> 8) & 1) {
6521             switch (size) {
6522                case 1:
6523                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6524                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6525                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6526                   break;
6527                case 2:
6528                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6529                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6530                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6531                   break;
6532                case 3:
6533                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6534                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6535                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6536                   break;
6537                default:
6538                   vassert(0);
6539             }
6540             DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
6541                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6542          } else {
6543             vassert(U);
6544             switch (size) {
6545                case 1:
6546                   op = Iop_SarN16x8;
6547                   cvt = Iop_QNarrowUn16Sto8Ux8;
6548                   cvt2 = Iop_Widen8Uto16x8;
6549                   break;
6550                case 2:
6551                   op = Iop_SarN32x4;
6552                   cvt = Iop_QNarrowUn32Sto16Ux4;
6553                   cvt2 = Iop_Widen16Uto32x4;
6554                   break;
6555                case 3:
6556                   op = Iop_SarN64x2;
6557                   cvt = Iop_QNarrowUn64Sto32Ux2;
6558                   cvt2 = Iop_Widen32Uto64x2;
6559                   break;
6560                default:
6561                   vassert(0);
6562             }
6563             DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
6564                 8 << size, dreg, mreg, shift_imm);
6565          }
6566          if (B) {
6567             if (shift_imm > 0) {
6568                imm = 1;
6569                switch (size) {
6570                   case 1: imm = (imm << 16) | imm; /* fall through */
6571                   case 2: imm = (imm << 32) | imm; /* fall through */
6572                   case 3: break;
6573                   case 0: default: vassert(0);
6574                }
6575                switch (size) {
6576                   case 1: add = Iop_Add16x8; break;
6577                   case 2: add = Iop_Add32x4; break;
6578                   case 3: add = Iop_Add64x2; break;
6579                   case 0: default: vassert(0);
6580                }
6581             }
6582          }
6583          reg_m = newTemp(Ity_V128);
6584          res = newTemp(Ity_V128);
6585          assign(reg_m, getQReg(mreg));
6586          if (B) {
6587             /* VQRSHRN, VQRSHRUN */
6588             assign(res, binop(add,
6589                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6590                               binop(Iop_AndV128,
6591                                     binop(op,
6592                                           mkexpr(reg_m),
6593                                           mkU8(shift_imm - 1)),
6594                                     mkU128(imm))));
6595          } else {
6596             /* VQSHRN, VQSHRUN */
6597             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6598          }
6599          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6600                     True, condT);
6601          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6602          return True;
6603       case 10:
6604          /* VSHLL
6605             VMOVL ::= VSHLL #0 */
6606          if (B)
6607             return False;
6608          if (dreg & 1)
6609             return False;
6610          dreg >>= 1;
6611          shift_imm = (8 << size) - shift_imm;
6612          res = newTemp(Ity_V128);
6613          switch (size) {
6614             case 0:
6615                op = Iop_ShlN16x8;
6616                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6617                break;
6618             case 1:
6619                op = Iop_ShlN32x4;
6620                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6621                break;
6622             case 2:
6623                op = Iop_ShlN64x2;
6624                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6625                break;
6626             case 3:
6627                return False;
6628             default:
6629                vassert(0);
6630          }
6631          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6632          putQReg(dreg, mkexpr(res), condT);
6633          if (shift_imm == 0) {
6634             DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
6635                 dreg, mreg);
6636          } else {
6637             DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6638                 dreg, mreg, shift_imm);
6639          }
6640          return True;
6641       case 14:
6642       case 15:
6643          /* VCVT floating-point <-> fixed-point */
6644          if ((theInstr >> 8) & 1) {
6645             if (U) {
6646                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6647             } else {
6648                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6649             }
6650             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6651                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6652                 64 - ((theInstr >> 16) & 0x3f));
6653          } else {
6654             if (U) {
6655                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6656             } else {
6657                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6658             }
6659             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6660                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6661                 64 - ((theInstr >> 16) & 0x3f));
6662          }
6663          if (((theInstr >> 21) & 1) == 0)
6664             return False;
6665          if (Q) {
6666             putQReg(dreg, binop(op, getQReg(mreg),
6667                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6668          } else {
6669             putDRegI64(dreg, binop(op, getDRegI64(mreg),
6670                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6671          }
6672          return True;
6673       default:
6674          return False;
6675
6676    }
6677    return False;
6678 }
6679
6680 /* A7.4.5 Two registers, miscellaneous */
6681 static
6682 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6683 {
6684    UInt A = (theInstr >> 16) & 3;
6685    UInt B = (theInstr >> 6) & 0x1f;
6686    UInt Q = (theInstr >> 6) & 1;
6687    UInt U = (theInstr >> 24) & 1;
6688    UInt size = (theInstr >> 18) & 3;
6689    UInt dreg = get_neon_d_regno(theInstr);
6690    UInt mreg = get_neon_m_regno(theInstr);
6691    UInt F = (theInstr >> 10) & 1;
6692    IRTemp arg_d = IRTemp_INVALID;
6693    IRTemp arg_m = IRTemp_INVALID;
6694    IRTemp res = IRTemp_INVALID;
6695    switch (A) {
6696       case 0:
6697          if (Q) {
6698             arg_m = newTemp(Ity_V128);
6699             res = newTemp(Ity_V128);
6700             assign(arg_m, getQReg(mreg));
6701          } else {
6702             arg_m = newTemp(Ity_I64);
6703             res = newTemp(Ity_I64);
6704             assign(arg_m, getDRegI64(mreg));
6705          }
6706          switch (B >> 1) {
6707             case 0: {
6708                /* VREV64 */
6709                IROp op;
6710                switch (size) {
6711                   case 0:
6712                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
6713                      break;
6714                   case 1:
6715                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
6716                      break;
6717                   case 2:
6718                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
6719                      break;
6720                   case 3:
6721                      return False;
6722                   default:
6723                      vassert(0);
6724                }
6725                assign(res, unop(op, mkexpr(arg_m)));
6726                DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
6727                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6728                break;
6729             }
6730             case 1: {
6731                /* VREV32 */
6732                IROp op;
6733                switch (size) {
6734                   case 0:
6735                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
6736                      break;
6737                   case 1:
6738                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
6739                      break;
6740                   case 2:
6741                   case 3:
6742                      return False;
6743                   default:
6744                      vassert(0);
6745                }
6746                assign(res, unop(op, mkexpr(arg_m)));
6747                DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
6748                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6749                break;
6750             }
6751             case 2: {
6752                /* VREV16 */
6753                IROp op;
6754                switch (size) {
6755                   case 0:
6756                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
6757                      break;
6758                   case 1:
6759                   case 2:
6760                   case 3:
6761                      return False;
6762                   default:
6763                      vassert(0);
6764                }
6765                assign(res, unop(op, mkexpr(arg_m)));
6766                DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
6767                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6768                break;
6769             }
6770             case 3:
6771                return False;
6772             case 4:
6773             case 5: {
6774                /* VPADDL */
6775                IROp op;
6776                U = (theInstr >> 7) & 1;
6777                if (Q) {
6778                   switch (size) {
6779                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6780                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6781                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6782                      case 3: return False;
6783                      default: vassert(0);
6784                   }
6785                } else {
6786                   switch (size) {
6787                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6788                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6789                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6790                      case 3: return False;
6791                      default: vassert(0);
6792                   }
6793                }
6794                assign(res, unop(op, mkexpr(arg_m)));
6795                DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6796                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6797                break;
6798             }
6799             case 6:
6800             case 7:
6801                return False;
6802             case 8: {
6803                /* VCLS */
6804                IROp op;
6805                switch (size) {
6806                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
6807                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
6808                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
6809                   case 3: return False;
6810                   default: vassert(0);
6811                }
6812                assign(res, unop(op, mkexpr(arg_m)));
6813                DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6814                    Q ? 'q' : 'd', mreg);
6815                break;
6816             }
6817             case 9: {
6818                /* VCLZ */
6819                IROp op;
6820                switch (size) {
6821                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
6822                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
6823                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
6824                   case 3: return False;
6825                   default: vassert(0);
6826                }
6827                assign(res, unop(op, mkexpr(arg_m)));
6828                DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6829                    Q ? 'q' : 'd', mreg);
6830                break;
6831             }
6832             case 10:
6833                /* VCNT */
6834                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6835                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6836                    mreg);
6837                break;
6838             case 11:
6839                /* VMVN */
6840                if (Q)
6841                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6842                else
6843                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6844                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6845                    mreg);
6846                break;
6847             case 12:
6848             case 13: {
6849                /* VPADAL */
6850                IROp op, add_op;
6851                U = (theInstr >> 7) & 1;
6852                if (Q) {
6853                   switch (size) {
6854                      case 0:
6855                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6856                         add_op = Iop_Add16x8;
6857                         break;
6858                      case 1:
6859                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6860                         add_op = Iop_Add32x4;
6861                         break;
6862                      case 2:
6863                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6864                         add_op = Iop_Add64x2;
6865                         break;
6866                      case 3:
6867                         return False;
6868                      default:
6869                         vassert(0);
6870                   }
6871                } else {
6872                   switch (size) {
6873                      case 0:
6874                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6875                         add_op = Iop_Add16x4;
6876                         break;
6877                      case 1:
6878                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6879                         add_op = Iop_Add32x2;
6880                         break;
6881                      case 2:
6882                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6883                         add_op = Iop_Add64;
6884                         break;
6885                      case 3:
6886                         return False;
6887                      default:
6888                         vassert(0);
6889                   }
6890                }
6891                if (Q) {
6892                   arg_d = newTemp(Ity_V128);
6893                   assign(arg_d, getQReg(dreg));
6894                } else {
6895                   arg_d = newTemp(Ity_I64);
6896                   assign(arg_d, getDRegI64(dreg));
6897                }
6898                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6899                                          mkexpr(arg_d)));
6900                DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6901                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6902                break;
6903             }
6904             case 14: {
6905                /* VQABS */
6906                IROp op_sub, op_qsub, op_cmp;
6907                IRTemp mask, tmp;
6908                IRExpr *zero1, *zero2;
6909                IRExpr *neg, *neg2;
6910                if (Q) {
6911                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6912                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6913                   mask = newTemp(Ity_V128);
6914                   tmp = newTemp(Ity_V128);
6915                } else {
6916                   zero1 = mkU64(0);
6917                   zero2 = mkU64(0);
6918                   mask = newTemp(Ity_I64);
6919                   tmp = newTemp(Ity_I64);
6920                }
6921                switch (size) {
6922                   case 0:
6923                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6924                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6925                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6926                      break;
6927                   case 1:
6928                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6929                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6930                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6931                      break;
6932                   case 2:
6933                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6934                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6935                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6936                      break;
6937                   case 3:
6938                      return False;
6939                   default:
6940                      vassert(0);
6941                }
6942                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6943                neg = binop(op_qsub, zero2, mkexpr(arg_m));
6944                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6945                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6946                                  binop(Q ? Iop_AndV128 : Iop_And64,
6947                                        mkexpr(mask),
6948                                        mkexpr(arg_m)),
6949                                  binop(Q ? Iop_AndV128 : Iop_And64,
6950                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6951                                             mkexpr(mask)),
6952                                        neg)));
6953                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6954                                  binop(Q ? Iop_AndV128 : Iop_And64,
6955                                        mkexpr(mask),
6956                                        mkexpr(arg_m)),
6957                                  binop(Q ? Iop_AndV128 : Iop_And64,
6958                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6959                                             mkexpr(mask)),
6960                                        neg2)));
6961                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6962                DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6963                    Q ? 'q' : 'd', mreg);
6964                break;
6965             }
6966             case 15: {
6967                /* VQNEG */
6968                IROp op, op2;
6969                IRExpr *zero;
6970                if (Q) {
6971                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6972                } else {
6973                   zero = mkU64(0);
6974                }
6975                switch (size) {
6976                   case 0:
6977                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6978                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6979                      break;
6980                   case 1:
6981                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6982                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6983                      break;
6984                   case 2:
6985                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6986                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6987                      break;
6988                   case 3:
6989                      return False;
6990                   default:
6991                      vassert(0);
6992                }
6993                assign(res, binop(op, zero, mkexpr(arg_m)));
6994                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6995                           Q, condT);
6996                DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6997                    Q ? 'q' : 'd', mreg);
6998                break;
6999             }
7000             default:
7001                vassert(0);
7002          }
7003          if (Q) {
7004             putQReg(dreg, mkexpr(res), condT);
7005          } else {
7006             putDRegI64(dreg, mkexpr(res), condT);
7007          }
7008          return True;
7009       case 1:
7010          if (Q) {
7011             arg_m = newTemp(Ity_V128);
7012             res = newTemp(Ity_V128);
7013             assign(arg_m, getQReg(mreg));
7014          } else {
7015             arg_m = newTemp(Ity_I64);
7016             res = newTemp(Ity_I64);
7017             assign(arg_m, getDRegI64(mreg));
7018          }
7019          switch ((B >> 1) & 0x7) {
7020             case 0: {
7021                /* VCGT #0 */
7022                IRExpr *zero;
7023                IROp op;
7024                if (Q) {
7025                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7026                } else {
7027                   zero = mkU64(0);
7028                }
7029                if (F) {
7030                   switch (size) {
7031                      case 0: case 1: case 3: return False;
7032                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7033                      default: vassert(0);
7034                   }
7035                } else {
7036                   switch (size) {
7037                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7038                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7039                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7040                      case 3: return False;
7041                      default: vassert(0);
7042                   }
7043                }
7044                assign(res, binop(op, mkexpr(arg_m), zero));
7045                DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7046                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7047                break;
7048             }
7049             case 1: {
7050                /* VCGE #0 */
7051                IROp op;
7052                IRExpr *zero;
7053                if (Q) {
7054                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7055                } else {
7056                   zero = mkU64(0);
7057                }
7058                if (F) {
7059                   switch (size) {
7060                      case 0: case 1: case 3: return False;
7061                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7062                      default: vassert(0);
7063                   }
7064                   assign(res, binop(op, mkexpr(arg_m), zero));
7065                } else {
7066                   switch (size) {
7067                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7068                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7069                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7070                      case 3: return False;
7071                      default: vassert(0);
7072                   }
7073                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7074                                    binop(op, zero, mkexpr(arg_m))));
7075                }
7076                DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7077                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7078                break;
7079             }
7080             case 2: {
7081                /* VCEQ #0 */
7082                IROp op;
7083                IRExpr *zero;
7084                if (F) {
7085                   if (Q) {
7086                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7087                   } else {
7088                      zero = mkU64(0);
7089                   }
7090                   switch (size) {
7091                      case 0: case 1: case 3: return False;
7092                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7093                      default: vassert(0);
7094                   }
7095                   assign(res, binop(op, zero, mkexpr(arg_m)));
7096                } else {
7097                   switch (size) {
7098                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7099                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7100                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7101                      case 3: return False;
7102                      default: vassert(0);
7103                   }
7104                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7105                                    unop(op, mkexpr(arg_m))));
7106                }
7107                DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7108                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7109                break;
7110             }
7111             case 3: {
7112                /* VCLE #0 */
7113                IRExpr *zero;
7114                IROp op;
7115                if (Q) {
7116                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7117                } else {
7118                   zero = mkU64(0);
7119                }
7120                if (F) {
7121                   switch (size) {
7122                      case 0: case 1: case 3: return False;
7123                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7124                      default: vassert(0);
7125                   }
7126                   assign(res, binop(op, zero, mkexpr(arg_m)));
7127                } else {
7128                   switch (size) {
7129                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7130                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7131                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7132                      case 3: return False;
7133                      default: vassert(0);
7134                   }
7135                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7136                                    binop(op, mkexpr(arg_m), zero)));
7137                }
7138                DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7139                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7140                break;
7141             }
7142             case 4: {
7143                /* VCLT #0 */
7144                IROp op;
7145                IRExpr *zero;
7146                if (Q) {
7147                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7148                } else {
7149                   zero = mkU64(0);
7150                }
7151                if (F) {
7152                   switch (size) {
7153                      case 0: case 1: case 3: return False;
7154                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7155                      default: vassert(0);
7156                   }
7157                   assign(res, binop(op, zero, mkexpr(arg_m)));
7158                } else {
7159                   switch (size) {
7160                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7161                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7162                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7163                      case 3: return False;
7164                      default: vassert(0);
7165                   }
7166                   assign(res, binop(op, zero, mkexpr(arg_m)));
7167                }
7168                DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7169                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7170                break;
7171             }
7172             case 5:
7173                return False;
7174             case 6: {
7175                /* VABS */
7176                if (!F) {
7177                   IROp op;
7178                   switch(size) {
7179                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7180                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7181                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7182                      case 3: return False;
7183                      default: vassert(0);
7184                   }
7185                   assign(res, unop(op, mkexpr(arg_m)));
7186                } else {
7187                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7188                                    mkexpr(arg_m)));
7189                }
7190                DIP("vabs.%c%d %c%u, %c%u\n",
7191                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7192                    Q ? 'q' : 'd', mreg);
7193                break;
7194             }
7195             case 7: {
7196                /* VNEG */
7197                IROp op;
7198                IRExpr *zero;
7199                if (F) {
7200                   switch (size) {
7201                      case 0: case 1: case 3: return False;
7202                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7203                      default: vassert(0);
7204                   }
7205                   assign(res, unop(op, mkexpr(arg_m)));
7206                } else {
7207                   if (Q) {
7208                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7209                   } else {
7210                      zero = mkU64(0);
7211                   }
7212                   switch (size) {
7213                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7214                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7215                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7216                      case 3: return False;
7217                      default: vassert(0);
7218                   }
7219                   assign(res, binop(op, zero, mkexpr(arg_m)));
7220                }
7221                DIP("vneg.%c%d %c%u, %c%u\n",
7222                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7223                    Q ? 'q' : 'd', mreg);
7224                break;
7225             }
7226             default:
7227                vassert(0);
7228          }
7229          if (Q) {
7230             putQReg(dreg, mkexpr(res), condT);
7231          } else {
7232             putDRegI64(dreg, mkexpr(res), condT);
7233          }
7234          return True;
7235       case 2:
7236          if ((B >> 1) == 0) {
7237             /* VSWP */
7238             if (Q) {
7239                arg_m = newTemp(Ity_V128);
7240                assign(arg_m, getQReg(mreg));
7241                putQReg(mreg, getQReg(dreg), condT);
7242                putQReg(dreg, mkexpr(arg_m), condT);
7243             } else {
7244                arg_m = newTemp(Ity_I64);
7245                assign(arg_m, getDRegI64(mreg));
7246                putDRegI64(mreg, getDRegI64(dreg), condT);
7247                putDRegI64(dreg, mkexpr(arg_m), condT);
7248             }
7249             DIP("vswp %c%u, %c%u\n",
7250                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7251             return True;
7252          } else if ((B >> 1) == 1) {
7253             /* VTRN */
7254             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7255             IRTemp old_m, old_d, new_d, new_m;
7256             if (Q) {
7257                old_m = newTemp(Ity_V128);
7258                old_d = newTemp(Ity_V128);
7259                new_m = newTemp(Ity_V128);
7260                new_d = newTemp(Ity_V128);
7261                assign(old_m, getQReg(mreg));
7262                assign(old_d, getQReg(dreg));
7263             } else {
7264                old_m = newTemp(Ity_I64);
7265                old_d = newTemp(Ity_I64);
7266                new_m = newTemp(Ity_I64);
7267                new_d = newTemp(Ity_I64);
7268                assign(old_m, getDRegI64(mreg));
7269                assign(old_d, getDRegI64(dreg));
7270             }
7271             if (Q) {
7272                switch (size) {
7273                   case 0:
7274                      op_odd  = Iop_InterleaveOddLanes8x16;
7275                      op_even = Iop_InterleaveEvenLanes8x16;
7276                      break;
7277                   case 1:
7278                      op_odd  = Iop_InterleaveOddLanes16x8;
7279                      op_even = Iop_InterleaveEvenLanes16x8;
7280                      break;
7281                   case 2:
7282                      op_odd  = Iop_InterleaveOddLanes32x4;
7283                      op_even = Iop_InterleaveEvenLanes32x4;
7284                      break;
7285                   case 3:
7286                      return False;
7287                   default:
7288                      vassert(0);
7289                }
7290             } else {
7291                switch (size) {
7292                   case 0:
7293                      op_odd  = Iop_InterleaveOddLanes8x8;
7294                      op_even = Iop_InterleaveEvenLanes8x8;
7295                      break;
7296                   case 1:
7297                      op_odd  = Iop_InterleaveOddLanes16x4;
7298                      op_even = Iop_InterleaveEvenLanes16x4;
7299                      break;
7300                   case 2:
7301                      op_odd  = Iop_InterleaveHI32x2;
7302                      op_even = Iop_InterleaveLO32x2;
7303                      break;
7304                   case 3:
7305                      return False;
7306                   default:
7307                      vassert(0);
7308                }
7309             }
7310             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7311             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7312             if (Q) {
7313                putQReg(dreg, mkexpr(new_d), condT);
7314                putQReg(mreg, mkexpr(new_m), condT);
7315             } else {
7316                putDRegI64(dreg, mkexpr(new_d), condT);
7317                putDRegI64(mreg, mkexpr(new_m), condT);
7318             }
7319             DIP("vtrn.%d %c%u, %c%u\n",
7320                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7321             return True;
7322          } else if ((B >> 1) == 2) {
7323             /* VUZP */
7324             IROp op_even, op_odd;
7325             IRTemp old_m, old_d, new_m, new_d;
7326             if (!Q && size == 2)
7327                return False;
7328             if (Q) {
7329                old_m = newTemp(Ity_V128);
7330                old_d = newTemp(Ity_V128);
7331                new_m = newTemp(Ity_V128);
7332                new_d = newTemp(Ity_V128);
7333                assign(old_m, getQReg(mreg));
7334                assign(old_d, getQReg(dreg));
7335             } else {
7336                old_m = newTemp(Ity_I64);
7337                old_d = newTemp(Ity_I64);
7338                new_m = newTemp(Ity_I64);
7339                new_d = newTemp(Ity_I64);
7340                assign(old_m, getDRegI64(mreg));
7341                assign(old_d, getDRegI64(dreg));
7342             }
7343             switch (size) {
7344                case 0:
7345                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7346                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7347                   break;
7348                case 1:
7349                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7350                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7351                   break;
7352                case 2:
7353                   op_odd  = Iop_CatOddLanes32x4;
7354                   op_even = Iop_CatEvenLanes32x4;
7355                   break;
7356                case 3:
7357                   return False;
7358                default:
7359                   vassert(0);
7360             }
7361             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7362             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7363             if (Q) {
7364                putQReg(dreg, mkexpr(new_d), condT);
7365                putQReg(mreg, mkexpr(new_m), condT);
7366             } else {
7367                putDRegI64(dreg, mkexpr(new_d), condT);
7368                putDRegI64(mreg, mkexpr(new_m), condT);
7369             }
7370             DIP("vuzp.%d %c%u, %c%u\n",
7371                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7372             return True;
7373          } else if ((B >> 1) == 3) {
7374             /* VZIP */
7375             IROp op_lo, op_hi;
7376             IRTemp old_m, old_d, new_m, new_d;
7377             if (!Q && size == 2)
7378                return False;
7379             if (Q) {
7380                old_m = newTemp(Ity_V128);
7381                old_d = newTemp(Ity_V128);
7382                new_m = newTemp(Ity_V128);
7383                new_d = newTemp(Ity_V128);
7384                assign(old_m, getQReg(mreg));
7385                assign(old_d, getQReg(dreg));
7386             } else {
7387                old_m = newTemp(Ity_I64);
7388                old_d = newTemp(Ity_I64);
7389                new_m = newTemp(Ity_I64);
7390                new_d = newTemp(Ity_I64);
7391                assign(old_m, getDRegI64(mreg));
7392                assign(old_d, getDRegI64(dreg));
7393             }
7394             switch (size) {
7395                case 0:
7396                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7397                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7398                   break;
7399                case 1:
7400                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7401                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7402                   break;
7403                case 2:
7404                   op_hi = Iop_InterleaveHI32x4;
7405                   op_lo = Iop_InterleaveLO32x4;
7406                   break;
7407                case 3:
7408                   return False;
7409                default:
7410                   vassert(0);
7411             }
7412             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7413             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7414             if (Q) {
7415                putQReg(dreg, mkexpr(new_d), condT);
7416                putQReg(mreg, mkexpr(new_m), condT);
7417             } else {
7418                putDRegI64(dreg, mkexpr(new_d), condT);
7419                putDRegI64(mreg, mkexpr(new_m), condT);
7420             }
7421             DIP("vzip.%d %c%u, %c%u\n",
7422                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7423             return True;
7424          } else if (B == 8) {
7425             /* VMOVN */
7426             IROp op;
7427             mreg >>= 1;
7428             switch (size) {
7429                case 0: op = Iop_NarrowUn16to8x8;  break;
7430                case 1: op = Iop_NarrowUn32to16x4; break;
7431                case 2: op = Iop_NarrowUn64to32x2; break;
7432                case 3: return False;
7433                default: vassert(0);
7434             }
7435             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7436             DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
7437             return True;
7438          } else if (B == 9 || (B >> 1) == 5) {
7439             /* VQMOVN, VQMOVUN */
7440             IROp op, op2;
7441             IRTemp tmp;
7442             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7443             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7444             if (mreg & 1)
7445                return False;
7446             mreg >>= 1;
7447             switch (size) {
7448                case 0: op2 = Iop_NarrowUn16to8x8;  break;
7449                case 1: op2 = Iop_NarrowUn32to16x4; break;
7450                case 2: op2 = Iop_NarrowUn64to32x2; break;
7451                case 3: return False;
7452                default: vassert(0);
7453             }
7454             switch (B & 3) {
7455                case 0:
7456                   vassert(0);
7457                case 1:
7458                   switch (size) {
7459                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7460                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7461                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7462                      case 3: return False;
7463                      default: vassert(0);
7464                   }
7465                   DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7466                   break;
7467                case 2:
7468                   switch (size) {
7469                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7470                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7471                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7472                      case 3: return False;
7473                      default: vassert(0);
7474                   }
7475                   DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7476                   break;
7477                case 3:
7478                   switch (size) {
7479                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7480                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7481                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7482                      case 3: return False;
7483                      default: vassert(0);
7484                   }
7485                   DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
7486                   break;
7487                default:
7488                   vassert(0);
7489             }
7490             res = newTemp(Ity_I64);
7491             tmp = newTemp(Ity_I64);
7492             assign(res, unop(op, getQReg(mreg)));
7493             assign(tmp, unop(op2, getQReg(mreg)));
7494             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7495             putDRegI64(dreg, mkexpr(res), condT);
7496             return True;
7497          } else if (B == 12) {
7498             /* VSHLL (maximum shift) */
7499             IROp op, cvt;
7500             UInt shift_imm;
7501             if (Q)
7502                return False;
7503             if (dreg & 1)
7504                return False;
7505             dreg >>= 1;
7506             shift_imm = 8 << size;
7507             res = newTemp(Ity_V128);
7508             switch (size) {
7509                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7510                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7511                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7512                case 3: return False;
7513                default: vassert(0);
7514             }
7515             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7516                                   mkU8(shift_imm)));
7517             putQReg(dreg, mkexpr(res), condT);
7518             DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
7519             return True;
7520          } else if ((B >> 3) == 3 && (B & 3) == 0) {
7521             /* VCVT (half<->single) */
7522             /* Half-precision extensions are needed to run this */
7523             vassert(0); // ATC
7524             if (((theInstr >> 18) & 3) != 1)
7525                return False;
7526             if ((theInstr >> 8) & 1) {
7527                if (dreg & 1)
7528                   return False;
7529                dreg >>= 1;
7530                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7531                      condT);
7532                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7533             } else {
7534                if (mreg & 1)
7535                   return False;
7536                mreg >>= 1;
7537                putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
7538                                 condT);
7539                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7540             }
7541             return True;
7542          } else {
7543             return False;
7544          }
7545          vassert(0);
7546          return True;
7547       case 3:
7548          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7549             /* VRECPE */
7550             IROp op;
7551             F = (theInstr >> 8) & 1;
7552             if (size != 2)
7553                return False;
7554             if (Q) {
7555                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
7556                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7557                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7558             } else {
7559                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
7560                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7561                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7562             }
7563             return True;
7564          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7565             /* VRSQRTE */
7566             IROp op;
7567             F = (B >> 2) & 1;
7568             if (size != 2)
7569                return False;
7570             if (F) {
7571                /* fp */
7572                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
7573             } else {
7574                /* unsigned int */
7575                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
7576             }
7577             if (Q) {
7578                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7579                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7580             } else {
7581                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7582                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7583             }
7584             return True;
7585          } else if ((B >> 3) == 3) {
7586             /* VCVT (fp<->integer) */
7587             IROp op;
7588             if (size != 2)
7589                return False;
7590             switch ((B >> 1) & 3) {
7591                case 0:
7592                   op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
7593                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
7594                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7595                   break;
7596                case 1:
7597                   op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
7598                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
7599                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7600                   break;
7601                case 2:
7602                   op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
7603                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
7604                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7605                   break;
7606                case 3:
7607                   op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
7608                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
7609                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7610                   break;
7611                default:
7612                   vassert(0);
7613             }
7614             if (Q) {
7615                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7616             } else {
7617                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7618             }
7619             return True;
7620          } else {
7621             return False;
7622          }
7623          vassert(0);
7624          return True;
7625       default:
7626          vassert(0);
7627    }
7628    return False;
7629 }
7630
7631 /* A7.4.6 One register and a modified immediate value */
7632 static
7633 void ppNeonImm(UInt imm, UInt cmode, UInt op)
7634 {
7635    int i;
7636    switch (cmode) {
7637       case 0: case 1: case 8: case 9:
7638          vex_printf("0x%x", imm);
7639          break;
7640       case 2: case 3: case 10: case 11:
7641          vex_printf("0x%x00", imm);
7642          break;
7643       case 4: case 5:
7644          vex_printf("0x%x0000", imm);
7645          break;
7646       case 6: case 7:
7647          vex_printf("0x%x000000", imm);
7648          break;
7649       case 12:
7650          vex_printf("0x%xff", imm);
7651          break;
7652       case 13:
7653          vex_printf("0x%xffff", imm);
7654          break;
7655       case 14:
7656          if (op) {
7657             vex_printf("0x");
7658             for (i = 7; i >= 0; i--)
7659                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7660          } else {
7661             vex_printf("0x%x", imm);
7662          }
7663          break;
7664       case 15:
7665          vex_printf("0x%x", imm);
7666          break;
7667    }
7668 }
7669
7670 static
7671 const char *ppNeonImmType(UInt cmode, UInt op)
7672 {
7673    switch (cmode) {
7674       case 0 ... 7:
7675       case 12: case 13:
7676          return "i32";
7677       case 8 ... 11:
7678          return "i16";
7679       case 14:
7680          if (op)
7681             return "i64";
7682          else
7683             return "i8";
7684       case 15:
7685          if (op)
7686             vassert(0);
7687          else
7688             return "f32";
7689       default:
7690          vassert(0);
7691    }
7692 }
7693
7694 static
7695 void DIPimm(UInt imm, UInt cmode, UInt op,
7696             const char *instr, UInt Q, UInt dreg)
7697 {
7698    if (vex_traceflags & VEX_TRACE_FE) {
7699       vex_printf("%s.%s %c%u, #", instr,
7700                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7701       ppNeonImm(imm, cmode, op);
7702       vex_printf("\n");
7703    }
7704 }
7705
7706 static
7707 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7708 {
7709    UInt dreg = get_neon_d_regno(theInstr);
7710    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7711                   (theInstr & 0xf);
7712    ULong imm_raw_pp = imm_raw;
7713    UInt cmode = (theInstr >> 8) & 0xf;
7714    UInt op_bit = (theInstr >> 5) & 1;
7715    ULong imm = 0;
7716    UInt Q = (theInstr >> 6) & 1;
7717    int i, j;
7718    UInt tmp;
7719    IRExpr *imm_val;
7720    IRExpr *expr;
7721    IRTemp tmp_var;
7722    switch(cmode) {
7723       case 7: case 6:
7724          imm_raw = imm_raw << 8;
7725          /* fallthrough */
7726       case 5: case 4:
7727          imm_raw = imm_raw << 8;
7728          /* fallthrough */
7729       case 3: case 2:
7730          imm_raw = imm_raw << 8;
7731          /* fallthrough */
7732       case 0: case 1:
7733          imm = (imm_raw << 32) | imm_raw;
7734          break;
7735       case 11: case 10:
7736          imm_raw = imm_raw << 8;
7737          /* fallthrough */
7738       case 9: case 8:
7739          imm_raw = (imm_raw << 16) | imm_raw;
7740          imm = (imm_raw << 32) | imm_raw;
7741          break;
7742       case 13:
7743          imm_raw = (imm_raw << 8) | 0xff;
7744          /* fallthrough */
7745       case 12:
7746          imm_raw = (imm_raw << 8) | 0xff;
7747          imm = (imm_raw << 32) | imm_raw;
7748          break;
7749       case 14:
7750          if (! op_bit) {
7751             for(i = 0; i < 8; i++) {
7752                imm = (imm << 8) | imm_raw;
7753             }
7754          } else {
7755             for(i = 7; i >= 0; i--) {
7756                tmp = 0;
7757                for(j = 0; j < 8; j++) {
7758                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7759                }
7760                imm = (imm << 8) | tmp;
7761             }
7762          }
7763          break;
7764       case 15:
7765          imm = (imm_raw & 0x80) << 5;
7766          imm |= ((~imm_raw & 0x40) << 5);
7767          for(i = 1; i <= 4; i++)
7768             imm |= (imm_raw & 0x40) << i;
7769          imm |= (imm_raw & 0x7f);
7770          imm = imm << 19;
7771          imm = (imm << 32) | imm;
7772          break;
7773       default:
7774          return False;
7775    }
7776    if (Q) {
7777       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7778    } else {
7779       imm_val = mkU64(imm);
7780    }
7781    if (((op_bit == 0) &&
7782       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7783       ((op_bit == 1) && (cmode == 14))) {
7784       /* VMOV (immediate) */
7785       if (Q) {
7786          putQReg(dreg, imm_val, condT);
7787       } else {
7788          putDRegI64(dreg, imm_val, condT);
7789       }
7790       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7791       return True;
7792    }
7793    if ((op_bit == 1) &&
7794       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7795       /* VMVN (immediate) */
7796       if (Q) {
7797          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7798       } else {
7799          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7800       }
7801       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7802       return True;
7803    }
7804    if (Q) {
7805       tmp_var = newTemp(Ity_V128);
7806       assign(tmp_var, getQReg(dreg));
7807    } else {
7808       tmp_var = newTemp(Ity_I64);
7809       assign(tmp_var, getDRegI64(dreg));
7810    }
7811    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7812       /* VORR (immediate) */
7813       if (Q)
7814          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7815       else
7816          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7817       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7818    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7819       /* VBIC (immediate) */
7820       if (Q)
7821          expr = binop(Iop_AndV128, mkexpr(tmp_var),
7822                                    unop(Iop_NotV128, imm_val));
7823       else
7824          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7825       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7826    } else {
7827       return False;
7828    }
7829    if (Q)
7830       putQReg(dreg, expr, condT);
7831    else
7832       putDRegI64(dreg, expr, condT);
7833    return True;
7834 }
7835
7836 /* A7.4 Advanced SIMD data-processing instructions */
7837 static
7838 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7839 {
7840    UInt A = (theInstr >> 19) & 0x1F;
7841    UInt B = (theInstr >>  8) & 0xF;
7842    UInt C = (theInstr >>  4) & 0xF;
7843    UInt U = (theInstr >> 24) & 0x1;
7844
7845    if (! (A & 0x10)) {
7846       return dis_neon_data_3same(theInstr, condT);
7847    }
7848    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7849       return dis_neon_data_1reg_and_imm(theInstr, condT);
7850    }
7851    if ((C & 1) == 1) {
7852       return dis_neon_data_2reg_and_shift(theInstr, condT);
7853    }
7854    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7855       return dis_neon_data_3diff(theInstr, condT);
7856    }
7857    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7858       return dis_neon_data_2reg_and_scalar(theInstr, condT);
7859    }
7860    if ((A & 0x16) == 0x16) {
7861       if ((U == 0) && ((C & 1) == 0)) {
7862          return dis_neon_vext(theInstr, condT);
7863       }
7864       if ((U != 1) || ((C & 1) == 1))
7865          return False;
7866       if ((B & 8) == 0) {
7867          return dis_neon_data_2reg_misc(theInstr, condT);
7868       }
7869       if ((B & 12) == 8) {
7870          return dis_neon_vtb(theInstr, condT);
7871       }
7872       if ((B == 12) && ((C & 9) == 0)) {
7873          return dis_neon_vdup(theInstr, condT);
7874       }
7875    }
7876    return False;
7877 }
7878
7879
7880 /*------------------------------------------------------------*/
7881 /*--- NEON loads and stores                                ---*/
7882 /*------------------------------------------------------------*/
7883
7884 /* For NEON memory operations, we use the standard scheme to handle
7885    conditionalisation: generate a jump around the instruction if the
7886    condition is false.  That's only necessary in Thumb mode, however,
7887    since in ARM mode NEON instructions are unconditional. */
7888
7889 /* A helper function for what follows.  It assumes we already went
7890    uncond as per comments at the top of this section. */
7891 static
7892 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7893                                     UInt N, UInt size, IRTemp addr )
7894 {
7895    UInt i;
7896    switch (size) {
7897       case 0:
7898          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7899                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7900          break;
7901       case 1:
7902          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7903                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7904          break;
7905       case 2:
7906          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7907                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7908          break;
7909       default:
7910          vassert(0);
7911    }
7912    for (i = 1; i <= N; i++) {
7913       switch (size) {
7914          case 0:
7915             putDRegI64(rD + i * inc,
7916                        triop(Iop_SetElem8x8,
7917                              getDRegI64(rD + i * inc),
7918                              mkU8(index),
7919                              loadLE(Ity_I8, binop(Iop_Add32,
7920                                                   mkexpr(addr),
7921                                                   mkU32(i * 1)))),
7922                        IRTemp_INVALID);
7923             break;
7924          case 1:
7925             putDRegI64(rD + i * inc,
7926                        triop(Iop_SetElem16x4,
7927                              getDRegI64(rD + i * inc),
7928                              mkU8(index),
7929                              loadLE(Ity_I16, binop(Iop_Add32,
7930                                                    mkexpr(addr),
7931                                                    mkU32(i * 2)))),
7932                        IRTemp_INVALID);
7933             break;
7934          case 2:
7935             putDRegI64(rD + i * inc,
7936                        triop(Iop_SetElem32x2,
7937                              getDRegI64(rD + i * inc),
7938                              mkU8(index),
7939                              loadLE(Ity_I32, binop(Iop_Add32,
7940                                                    mkexpr(addr),
7941                                                    mkU32(i * 4)))),
7942                        IRTemp_INVALID);
7943             break;
7944          default:
7945             vassert(0);
7946       }
7947    }
7948 }
7949
7950 /* A(nother) helper function for what follows.  It assumes we already
7951    went uncond as per comments at the top of this section. */
7952 static
7953 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7954                                        UInt N, UInt size, IRTemp addr )
7955 {
7956    UInt i;
7957    switch (size) {
7958       case 0:
7959          storeLE(mkexpr(addr),
7960                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7961          break;
7962       case 1:
7963          storeLE(mkexpr(addr),
7964                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7965          break;
7966       case 2:
7967          storeLE(mkexpr(addr),
7968                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7969          break;
7970       default:
7971          vassert(0);
7972    }
7973    for (i = 1; i <= N; i++) {
7974       switch (size) {
7975          case 0:
7976             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7977                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7978                                           mkU8(index)));
7979             break;
7980          case 1:
7981             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7982                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7983                                            mkU8(index)));
7984             break;
7985          case 2:
7986             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7987                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7988                                            mkU8(index)));
7989             break;
7990          default:
7991             vassert(0);
7992       }
7993    }
7994 }
7995
7996 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7997    make *u0 and *u1 be valid IRTemps before the call. */
7998 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7999                                  IRTemp i0, IRTemp i1, Int laneszB)
8000 {
8001    /* The following assumes that the guest is little endian, and hence
8002       that the memory-side (interleaved) data is stored
8003       little-endianly. */
8004    vassert(u0 && u1);
8005    /* This is pretty easy, since we have primitives directly to
8006       hand. */
8007    if (laneszB == 4) {
8008       // memLE(128 bits) == A0 B0 A1 B1
8009       // i0 == B0 A0, i1 == B1 A1
8010       // u0 == A1 A0, u1 == B1 B0
8011       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
8012       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
8013    } else if (laneszB == 2) {
8014       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8015       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8016       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8017       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
8018       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
8019    } else if (laneszB == 1) {
8020       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8021       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8022       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8023       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
8024       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
8025    } else {
8026       // Can never happen, since VLD2 only has valid lane widths of 32,
8027       // 16 or 8 bits.
8028       vpanic("math_DEINTERLEAVE_2");
8029    }
8030 }
8031
8032 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
8033    *u0 and *u1 be valid IRTemps before the call. */
8034 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8035                                IRTemp u0, IRTemp u1, Int laneszB)
8036 {
8037    /* The following assumes that the guest is little endian, and hence
8038       that the memory-side (interleaved) data is stored
8039       little-endianly. */
8040    vassert(i0 && i1);
8041    /* This is pretty easy, since we have primitives directly to
8042       hand. */
8043    if (laneszB == 4) {
8044       // memLE(128 bits) == A0 B0 A1 B1
8045       // i0 == B0 A0, i1 == B1 A1
8046       // u0 == A1 A0, u1 == B1 B0
8047       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
8048       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
8049    } else if (laneszB == 2) {
8050       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8051       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8052       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8053       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
8054       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
8055    } else if (laneszB == 1) {
8056       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8057       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8058       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8059       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
8060       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
8061    } else {
8062       // Can never happen, since VST2 only has valid lane widths of 32,
8063       // 16 or 8 bits.
8064       vpanic("math_INTERLEAVE_2");
8065    }
8066 }
8067
8068 // Helper function for generating arbitrary slicing 'n' dicing of
8069 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8070 static IRExpr* math_PERM_8x8x3(const UChar* desc,
8071                                IRTemp s0, IRTemp s1, IRTemp s2)
8072 {
8073    // desc is an array of 8 pairs, encoded as 16 bytes,
8074    // that describe how to assemble the result lanes, starting with
8075    // lane 7.  Each pair is: first component (0..2) says which of
8076    // s0/s1/s2 to use.  Second component (0..7) is the lane number
8077    // in the source to use.
8078    UInt si;
8079    for (si = 0; si < 7; si++) {
8080       vassert(desc[2 * si + 0] <= 2);
8081       vassert(desc[2 * si + 1] <= 7);
8082    }
8083    IRTemp h3 = newTemp(Ity_I64);
8084    IRTemp h2 = newTemp(Ity_I64);
8085    IRTemp h1 = newTemp(Ity_I64);
8086    IRTemp h0 = newTemp(Ity_I64);
8087    IRTemp srcs[3] = {s0, s1, s2};
8088 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8089 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8090    assign(h3, binop(Iop_InterleaveHI8x8,
8091                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8092                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8093    assign(h2, binop(Iop_InterleaveHI8x8,
8094                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8095                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8096    assign(h1, binop(Iop_InterleaveHI8x8,
8097                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8098                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8099    assign(h0, binop(Iop_InterleaveHI8x8,
8100                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8101                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8102 #  undef SRC_VEC
8103 #  undef SRC_SHIFT
8104    // Now h3..h0 are 64 bit vectors with useful information only
8105    // in the top 16 bits.  We now concatentate those four 16-bit
8106    // groups so as to produce the final result.
8107    IRTemp w1 = newTemp(Ity_I64);
8108    IRTemp w0 = newTemp(Ity_I64);
8109    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8110    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8111    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8112 }
8113
8114 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8115    make *u0, *u1 and *u2 be valid IRTemps before the call. */
8116 static void math_DEINTERLEAVE_3 (
8117                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8118                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8119             )
8120 {
8121 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8122 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8123 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8124    /* The following assumes that the guest is little endian, and hence
8125       that the memory-side (interleaved) data is stored
8126       little-endianly. */
8127    vassert(u0 && u1 && u2);
8128    if (laneszB == 4) {
8129       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8130       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8131       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8132       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8133       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8134       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8135    } else if (laneszB == 2) {
8136       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8137       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8138       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8139 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8140                 IHI32x2(                                      \
8141                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8142                            SHL64((_tmp2),48-16*(_la2))),      \
8143                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8144                            SHL64((_tmp0),48-16*(_la0))))
8145       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8146       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8147       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8148 #     undef XXX
8149    } else if (laneszB == 1) {
8150       // These describe how the result vectors [7..0] are
8151       // assembled from the source vectors.  Each pair is
8152       // (source vector number, lane number).
8153       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8154       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8155       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8156       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8157       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8158       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8159    } else {
8160       // Can never happen, since VLD3 only has valid lane widths of 32,
8161       // 16 or 8 bits.
8162       vpanic("math_DEINTERLEAVE_3");
8163    }
8164 #  undef SHL64
8165 #  undef IHI16x4
8166 #  undef IHI32x2
8167 }
8168
8169 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8170    make *i0, *i1 and *i2 be valid IRTemps before the call. */
8171 static void math_INTERLEAVE_3 (
8172                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8173                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8174             )
8175 {
8176 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8177 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8178 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8179    /* The following assumes that the guest is little endian, and hence
8180       that the memory-side (interleaved) data is stored
8181       little-endianly. */
8182    vassert(i0 && i1 && i2);
8183    if (laneszB == 4) {
8184       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8185       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8186       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8187       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8188       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8189       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8190    } else if (laneszB == 2) {
8191       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8192       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8193       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8194 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8195                 IHI32x2(                                      \
8196                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8197                            SHL64((_tmp2),48-16*(_la2))),      \
8198                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8199                            SHL64((_tmp0),48-16*(_la0))))
8200       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8201       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8202       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8203 #     undef XXX
8204    } else if (laneszB == 1) {
8205       // These describe how the result vectors [7..0] are
8206       // assembled from the source vectors.  Each pair is
8207       // (source vector number, lane number).
8208       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8209       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8210       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8211       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8212       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8213       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8214    } else {
8215       // Can never happen, since VST3 only has valid lane widths of 32,
8216       // 16 or 8 bits.
8217       vpanic("math_INTERLEAVE_3");
8218    }
8219 #  undef SHL64
8220 #  undef IHI16x4
8221 #  undef IHI32x2
8222 }
8223
8224 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8225    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8226 static void math_DEINTERLEAVE_4 (
8227                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8228                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8229                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8230             )
8231 {
8232 #  define IHI32x2(_t1, _t2) \
8233              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8234 #  define ILO32x2(_t1, _t2) \
8235              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8236 #  define IHI16x4(_t1, _t2) \
8237              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8238 #  define ILO16x4(_t1, _t2) \
8239              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8240 #  define IHI8x8(_t1, _e2) \
8241              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8242 #  define SHL64(_tmp, _amt) \
8243              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8244    /* The following assumes that the guest is little endian, and hence
8245       that the memory-side (interleaved) data is stored
8246       little-endianly. */
8247    vassert(u0 && u1 && u2 && u3);
8248    if (laneszB == 4) {
8249       assign(*u0, ILO32x2(i2, i0));
8250       assign(*u1, IHI32x2(i2, i0));
8251       assign(*u2, ILO32x2(i3, i1));
8252       assign(*u3, IHI32x2(i3, i1));
8253    } else if (laneszB == 2) {
8254       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8255       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8256       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8257       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8258       assign(b1b0a1a0, ILO16x4(i1, i0));
8259       assign(b3b2a3a2, ILO16x4(i3, i2));
8260       assign(d1d0c1c0, IHI16x4(i1, i0));
8261       assign(d3d2c3c2, IHI16x4(i3, i2));
8262       // And now do what we did for the 32-bit case.
8263       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8264       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8265       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8266       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8267    } else if (laneszB == 1) {
8268       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8269       IRTemp i0x = newTemp(Ity_I64);
8270       IRTemp i1x = newTemp(Ity_I64);
8271       IRTemp i2x = newTemp(Ity_I64);
8272       IRTemp i3x = newTemp(Ity_I64);
8273       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8274       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8275       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8276       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8277       // From here on is like the 16 bit case.
8278       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8279       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8280       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8281       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8282       assign(b1b0a1a0, ILO16x4(i1x, i0x));
8283       assign(b3b2a3a2, ILO16x4(i3x, i2x));
8284       assign(d1d0c1c0, IHI16x4(i1x, i0x));
8285       assign(d3d2c3c2, IHI16x4(i3x, i2x));
8286       // And now do what we did for the 32-bit case.
8287       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8288       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8289       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8290       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8291    } else {
8292       // Can never happen, since VLD4 only has valid lane widths of 32,
8293       // 16 or 8 bits.
8294       vpanic("math_DEINTERLEAVE_4");
8295    }
8296 #  undef SHL64
8297 #  undef IHI8x8
8298 #  undef ILO16x4
8299 #  undef IHI16x4
8300 #  undef ILO32x2
8301 #  undef IHI32x2
8302 }
8303
8304 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8305    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8306 static void math_INTERLEAVE_4 (
8307                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8308                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8309                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8310             )
8311 {
8312 #  define IHI32x2(_t1, _t2) \
8313              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8314 #  define ILO32x2(_t1, _t2) \
8315              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8316 #  define CEV16x4(_t1, _t2) \
8317              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8318 #  define COD16x4(_t1, _t2) \
8319              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8320 #  define COD8x8(_t1, _e2) \
8321              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8322 #  define SHL64(_tmp, _amt) \
8323              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8324    /* The following assumes that the guest is little endian, and hence
8325       that the memory-side (interleaved) data is stored
8326       little-endianly. */
8327    vassert(u0 && u1 && u2 && u3);
8328    if (laneszB == 4) {
8329       assign(*i0, ILO32x2(u1, u0));
8330       assign(*i1, ILO32x2(u3, u2));
8331       assign(*i2, IHI32x2(u1, u0));
8332       assign(*i3, IHI32x2(u3, u2));
8333    } else if (laneszB == 2) {
8334       // First, interleave at the 32-bit lane size.
8335       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8336       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8337       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8338       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8339       assign(b1b0a1a0, ILO32x2(u1, u0));
8340       assign(b3b2a3a2, IHI32x2(u1, u0));
8341       assign(d1d0c1c0, ILO32x2(u3, u2));
8342       assign(d3d2c3c2, IHI32x2(u3, u2));
8343       // And interleave (cat) at the 16 bit size.
8344       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8345       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8346       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8347       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8348    } else if (laneszB == 1) {
8349       // First, interleave at the 32-bit lane size.
8350       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8351       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8352       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8353       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8354       assign(b1b0a1a0, ILO32x2(u1, u0));
8355       assign(b3b2a3a2, IHI32x2(u1, u0));
8356       assign(d1d0c1c0, ILO32x2(u3, u2));
8357       assign(d3d2c3c2, IHI32x2(u3, u2));
8358       // And interleave (cat) at the 16 bit size.
8359       IRTemp i0x = newTemp(Ity_I64);
8360       IRTemp i1x = newTemp(Ity_I64);
8361       IRTemp i2x = newTemp(Ity_I64);
8362       IRTemp i3x = newTemp(Ity_I64);
8363       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8364       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8365       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8366       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8367       // And rearrange within each word, to get the right 8 bit lanes.
8368       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8369       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8370       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8371       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8372    } else {
8373       // Can never happen, since VLD4 only has valid lane widths of 32,
8374       // 16 or 8 bits.
8375       vpanic("math_DEINTERLEAVE_4");
8376    }
8377 #  undef SHL64
8378 #  undef COD8x8
8379 #  undef COD16x4
8380 #  undef CEV16x4
8381 #  undef ILO32x2
8382 #  undef IHI32x2
8383 }
8384
8385 /* A7.7 Advanced SIMD element or structure load/store instructions */
8386 static
8387 Bool dis_neon_load_or_store ( UInt theInstr,
8388                               Bool isT, IRTemp condT )
8389 {
8390 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8391    UInt bA = INSN(23,23);
8392    UInt fB = INSN(11,8);
8393    UInt bL = INSN(21,21);
8394    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8395    UInt rN = INSN(19,16);
8396    UInt rM = INSN(3,0);
8397    UInt N, size, i, j;
8398    UInt inc;
8399    UInt regs = 1;
8400
8401    if (isT) {
8402       vassert(condT != IRTemp_INVALID);
8403    } else {
8404       vassert(condT == IRTemp_INVALID);
8405    }
8406    /* So now, if condT is not IRTemp_INVALID, we know we're
8407       dealing with Thumb code. */
8408
8409    if (INSN(20,20) != 0)
8410       return False;
8411
8412    IRTemp initialRn = newTemp(Ity_I32);
8413    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8414
8415    IRTemp initialRm = newTemp(Ity_I32);
8416    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8417
8418    /* There are 3 cases:
8419       (1) VSTn / VLDn (n-element structure from/to one lane)
8420       (2) VLDn (single element to all lanes)
8421       (3) VSTn / VLDn (multiple n-element structures)
8422    */
8423    if (bA) {
8424       N = fB & 3;
8425       if ((fB >> 2) < 3) {
8426          /* ------------ Case (1) ------------
8427             VSTn / VLDn (n-element structure from/to one lane) */
8428
8429          size = fB >> 2;
8430
8431          switch (size) {
8432             case 0: i = INSN(7,5); inc = 1; break;
8433             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8434             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8435             case 3: return False;
8436             default: vassert(0);
8437          }
8438
8439          IRTemp addr = newTemp(Ity_I32);
8440          assign(addr, mkexpr(initialRn));
8441
8442          // go uncond
8443          if (condT != IRTemp_INVALID)
8444             mk_skip_over_T32_if_cond_is_false(condT);
8445          // now uncond
8446
8447          if (bL)
8448             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8449          else
8450             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8451          DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
8452          for (j = 0; j <= N; j++) {
8453             if (j)
8454                DIP(", ");
8455             DIP("d%u[%u]", rD + j * inc, i);
8456          }
8457          DIP("}, [r%u]", rN);
8458          if (rM != 13 && rM != 15) {
8459             DIP(", r%u\n", rM);
8460          } else {
8461             DIP("%s\n", (rM != 15) ? "!" : "");
8462          }
8463       } else {
8464          /* ------------ Case (2) ------------
8465             VLDn (single element to all lanes) */
8466          UInt r;
8467          if (bL == 0)
8468             return False;
8469
8470          inc = INSN(5,5) + 1;
8471          size = INSN(7,6);
8472
8473          /* size == 3 and size == 2 cases differ in alignment constraints */
8474          if (size == 3 && N == 3 && INSN(4,4) == 1)
8475             size = 2;
8476
8477          if (size == 0 && N == 0 && INSN(4,4) == 1)
8478             return False;
8479          if (N == 2 && INSN(4,4) == 1)
8480             return False;
8481          if (size == 3)
8482             return False;
8483
8484          // go uncond
8485          if (condT != IRTemp_INVALID)
8486             mk_skip_over_T32_if_cond_is_false(condT);
8487          // now uncond
8488
8489          IRTemp addr = newTemp(Ity_I32);
8490          assign(addr, mkexpr(initialRn));
8491
8492          if (N == 0 && INSN(5,5))
8493             regs = 2;
8494
8495          for (r = 0; r < regs; r++) {
8496             switch (size) {
8497                case 0:
8498                   putDRegI64(rD + r, unop(Iop_Dup8x8,
8499                                           loadLE(Ity_I8, mkexpr(addr))),
8500                              IRTemp_INVALID);
8501                   break;
8502                case 1:
8503                   putDRegI64(rD + r, unop(Iop_Dup16x4,
8504                                           loadLE(Ity_I16, mkexpr(addr))),
8505                              IRTemp_INVALID);
8506                   break;
8507                case 2:
8508                   putDRegI64(rD + r, unop(Iop_Dup32x2,
8509                                           loadLE(Ity_I32, mkexpr(addr))),
8510                              IRTemp_INVALID);
8511                   break;
8512                default:
8513                   vassert(0);
8514             }
8515             for (i = 1; i <= N; i++) {
8516                switch (size) {
8517                   case 0:
8518                      putDRegI64(rD + r + i * inc,
8519                                 unop(Iop_Dup8x8,
8520                                      loadLE(Ity_I8, binop(Iop_Add32,
8521                                                           mkexpr(addr),
8522                                                           mkU32(i * 1)))),
8523                                 IRTemp_INVALID);
8524                      break;
8525                   case 1:
8526                      putDRegI64(rD + r + i * inc,
8527                                 unop(Iop_Dup16x4,
8528                                      loadLE(Ity_I16, binop(Iop_Add32,
8529                                                            mkexpr(addr),
8530                                                            mkU32(i * 2)))),
8531                                 IRTemp_INVALID);
8532                      break;
8533                   case 2:
8534                      putDRegI64(rD + r + i * inc,
8535                                 unop(Iop_Dup32x2,
8536                                      loadLE(Ity_I32, binop(Iop_Add32,
8537                                                            mkexpr(addr),
8538                                                            mkU32(i * 4)))),
8539                                 IRTemp_INVALID);
8540                      break;
8541                   default:
8542                      vassert(0);
8543                }
8544             }
8545          }
8546          DIP("vld%u.%d {", N + 1, 8 << size);
8547          for (r = 0; r < regs; r++) {
8548             for (i = 0; i <= N; i++) {
8549                if (i || r)
8550                   DIP(", ");
8551                DIP("d%u[]", rD + r + i * inc);
8552             }
8553          }
8554          DIP("}, [r%u]", rN);
8555          if (rM != 13 && rM != 15) {
8556             DIP(", r%u\n", rM);
8557          } else {
8558             DIP("%s\n", (rM != 15) ? "!" : "");
8559          }
8560       }
8561       /* Writeback.  We're uncond here, so no condT-ing. */
8562       if (rM != 15) {
8563          if (rM == 13) {
8564             IRExpr* e = binop(Iop_Add32,
8565                               mkexpr(initialRn),
8566                               mkU32((1 << size) * (N + 1)));
8567             if (isT)
8568                putIRegT(rN, e, IRTemp_INVALID);
8569             else
8570                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8571          } else {
8572             IRExpr* e = binop(Iop_Add32,
8573                               mkexpr(initialRn),
8574                               mkexpr(initialRm));
8575             if (isT)
8576                putIRegT(rN, e, IRTemp_INVALID);
8577             else
8578                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8579          }
8580       }
8581       return True;
8582    } else {
8583       /* ------------ Case (3) ------------
8584          VSTn / VLDn (multiple n-element structures) */
8585       inc = (fB & 1) + 1;
8586
8587       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8588           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8589           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8590           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8591          N = 0; // VLD1/VST1.  'inc' does not appear to have any
8592                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
8593                 // registers involved.
8594          if (rD + regs > 32) return False;
8595       }
8596       else
8597       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8598           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8599           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8600          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8601          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8602          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8603          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8604       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8605          N = 2; // VLD3/VST3
8606          if (inc == 1 && rD + 2 >= 32) return False;
8607          if (inc == 2 && rD + 4 >= 32) return False;
8608       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8609          N = 3; // VLD4/VST4
8610          if (inc == 1 && rD + 3 >= 32) return False;
8611          if (inc == 2 && rD + 6 >= 32) return False;
8612       } else {
8613          return False;
8614       }
8615
8616       if (N == 1 && fB == BITS4(0,0,1,1)) {
8617          regs = 2;
8618       } else if (N == 0) {
8619          if (fB == BITS4(1,0,1,0)) {
8620             regs = 2;
8621          } else if (fB == BITS4(0,1,1,0)) {
8622             regs = 3;
8623          } else if (fB == BITS4(0,0,1,0)) {
8624             regs = 4;
8625          }
8626       }
8627
8628       size = INSN(7,6);
8629       if (N == 0 && size == 3)
8630          size = 2;
8631       if (size == 3)
8632          return False;
8633
8634       // go uncond
8635       if (condT != IRTemp_INVALID)
8636          mk_skip_over_T32_if_cond_is_false(condT);
8637       // now uncond
8638
8639       IRTemp addr = newTemp(Ity_I32);
8640       assign(addr, mkexpr(initialRn));
8641
8642       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8643          UInt r;
8644          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8645          /* inc has no relevance here */
8646          for (r = 0; r < regs; r++) {
8647             if (bL)
8648                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8649             else
8650                storeLE(mkexpr(addr), getDRegI64(rD+r));
8651             IRTemp tmp = newTemp(Ity_I32);
8652             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8653             addr = tmp;
8654          }
8655       }
8656       else
8657       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8658          vassert( (regs == 1 && (inc == 1 || inc == 2))
8659                    || (regs == 2 && inc == 2) );
8660          // Make 'nregs' be the number of registers and 'regstep'
8661          // equal the actual register-step.  The ARM encoding, using 'regs'
8662          // and 'inc', is bizarre.  After this, we have:
8663          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8664          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8665          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8666          UInt nregs   = 2;
8667          UInt regstep = 1;
8668          if (regs == 1 && inc == 1) {
8669             /* nothing */
8670          } else if (regs == 1 && inc == 2) {
8671             regstep = 2;
8672          } else if (regs == 2 && inc == 2) {
8673             nregs = 4;
8674          } else {
8675             vassert(0);
8676          }
8677          // 'a' is address,
8678          // 'di' is interleaved data, 'du' is uninterleaved data
8679          if (nregs == 2) {
8680             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8681             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8682             IRTemp  di0 = newTemp(Ity_I64);
8683             IRTemp  di1 = newTemp(Ity_I64);
8684             IRTemp  du0 = newTemp(Ity_I64);
8685             IRTemp  du1 = newTemp(Ity_I64);
8686             if (bL) {
8687                assign(di0, loadLE(Ity_I64, a0));
8688                assign(di1, loadLE(Ity_I64, a1));
8689                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8690                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8691                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8692             } else {
8693                assign(du0, getDRegI64(rD + 0 * regstep));
8694                assign(du1, getDRegI64(rD + 1 * regstep));
8695                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8696                storeLE(a0, mkexpr(di0));
8697                storeLE(a1, mkexpr(di1));
8698             }
8699             IRTemp tmp = newTemp(Ity_I32);
8700             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8701             addr = tmp;
8702          } else {
8703             vassert(nregs == 4);
8704             vassert(regstep == 1);
8705             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8706             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8707             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8708             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8709             IRTemp  di0 = newTemp(Ity_I64);
8710             IRTemp  di1 = newTemp(Ity_I64);
8711             IRTemp  di2 = newTemp(Ity_I64);
8712             IRTemp  di3 = newTemp(Ity_I64);
8713             IRTemp  du0 = newTemp(Ity_I64);
8714             IRTemp  du1 = newTemp(Ity_I64);
8715             IRTemp  du2 = newTemp(Ity_I64);
8716             IRTemp  du3 = newTemp(Ity_I64);
8717             if (bL) {
8718                assign(di0, loadLE(Ity_I64, a0));
8719                assign(di1, loadLE(Ity_I64, a1));
8720                assign(di2, loadLE(Ity_I64, a2));
8721                assign(di3, loadLE(Ity_I64, a3));
8722                // Note spooky interleaving: du0, du2, di0, di1 etc
8723                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8724                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8725                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8726                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8727                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8728                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8729             } else {
8730                assign(du0, getDRegI64(rD + 0 * regstep));
8731                assign(du1, getDRegI64(rD + 1 * regstep));
8732                assign(du2, getDRegI64(rD + 2 * regstep));
8733                assign(du3, getDRegI64(rD + 3 * regstep));
8734                // Note spooky interleaving: du0, du2, di0, di1 etc
8735                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8736                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8737                storeLE(a0, mkexpr(di0));
8738                storeLE(a1, mkexpr(di1));
8739                storeLE(a2, mkexpr(di2));
8740                storeLE(a3, mkexpr(di3));
8741             }
8742
8743             IRTemp tmp = newTemp(Ity_I32);
8744             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8745             addr = tmp;
8746          }
8747       }
8748       else
8749       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8750          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8751          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8752          vassert(regs == 1 && (inc == 1 || inc == 2));
8753          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8754          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8755          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8756          IRTemp  di0 = newTemp(Ity_I64);
8757          IRTemp  di1 = newTemp(Ity_I64);
8758          IRTemp  di2 = newTemp(Ity_I64);
8759          IRTemp  du0 = newTemp(Ity_I64);
8760          IRTemp  du1 = newTemp(Ity_I64);
8761          IRTemp  du2 = newTemp(Ity_I64);
8762          if (bL) {
8763             assign(di0, loadLE(Ity_I64, a0));
8764             assign(di1, loadLE(Ity_I64, a1));
8765             assign(di2, loadLE(Ity_I64, a2));
8766             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8767             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8768             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8769             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8770          } else {
8771             assign(du0, getDRegI64(rD + 0 * inc));
8772             assign(du1, getDRegI64(rD + 1 * inc));
8773             assign(du2, getDRegI64(rD + 2 * inc));
8774             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8775             storeLE(a0, mkexpr(di0));
8776             storeLE(a1, mkexpr(di1));
8777             storeLE(a2, mkexpr(di2));
8778          }
8779          IRTemp tmp = newTemp(Ity_I32);
8780          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8781          addr = tmp;
8782       }
8783       else
8784       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8785          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8786          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8787          vassert(regs == 1 && (inc == 1 || inc == 2));
8788          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8789          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8790          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8791          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8792          IRTemp  di0 = newTemp(Ity_I64);
8793          IRTemp  di1 = newTemp(Ity_I64);
8794          IRTemp  di2 = newTemp(Ity_I64);
8795          IRTemp  di3 = newTemp(Ity_I64);
8796          IRTemp  du0 = newTemp(Ity_I64);
8797          IRTemp  du1 = newTemp(Ity_I64);
8798          IRTemp  du2 = newTemp(Ity_I64);
8799          IRTemp  du3 = newTemp(Ity_I64);
8800          if (bL) {
8801             assign(di0, loadLE(Ity_I64, a0));
8802             assign(di1, loadLE(Ity_I64, a1));
8803             assign(di2, loadLE(Ity_I64, a2));
8804             assign(di3, loadLE(Ity_I64, a3));
8805             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8806                                 di0, di1, di2, di3, 1 << size);
8807             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8808             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8809             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8810             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8811          } else {
8812             assign(du0, getDRegI64(rD + 0 * inc));
8813             assign(du1, getDRegI64(rD + 1 * inc));
8814             assign(du2, getDRegI64(rD + 2 * inc));
8815             assign(du3, getDRegI64(rD + 3 * inc));
8816             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8817                               du0, du1, du2, du3, 1 << size);
8818             storeLE(a0, mkexpr(di0));
8819             storeLE(a1, mkexpr(di1));
8820             storeLE(a2, mkexpr(di2));
8821             storeLE(a3, mkexpr(di3));
8822          }
8823          IRTemp tmp = newTemp(Ity_I32);
8824          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8825          addr = tmp;
8826       }
8827       else {
8828          vassert(0);
8829       }
8830
8831       /* Writeback */
8832       if (rM != 15) {
8833          IRExpr* e;
8834          if (rM == 13) {
8835             e = binop(Iop_Add32, mkexpr(initialRn),
8836                                  mkU32(8 * (N + 1) * regs));
8837          } else {
8838             e = binop(Iop_Add32, mkexpr(initialRn),
8839                                  mkexpr(initialRm));
8840          }
8841          if (isT)
8842             putIRegT(rN, e, IRTemp_INVALID);
8843          else
8844             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8845       }
8846
8847       DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8848       if ((inc == 1 && regs * (N + 1) > 1)
8849           || (inc == 2 && regs > 1 && N > 0)) {
8850          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8851       } else {
8852          UInt r;
8853          for (r = 0; r < regs; r++) {
8854             for (i = 0; i <= N; i++) {
8855                if (i || r)
8856                   DIP(", ");
8857                DIP("d%u", rD + r + i * inc);
8858             }
8859          }
8860       }
8861       DIP("}, [r%u]", rN);
8862       if (rM != 13 && rM != 15) {
8863          DIP(", r%u\n", rM);
8864       } else {
8865          DIP("%s\n", (rM != 15) ? "!" : "");
8866       }
8867       return True;
8868    }
8869 #  undef INSN
8870 }
8871
8872
8873 /*------------------------------------------------------------*/
8874 /*--- NEON, top level control                              ---*/
8875 /*------------------------------------------------------------*/
8876
8877 /* Both ARM and Thumb */
8878
8879 /* Translate a NEON instruction.    If successful, returns
8880    True and *dres may or may not be updated.  If failure, returns
8881    False and doesn't change *dres nor create any IR.
8882
8883    The Thumb and ARM encodings are similar for the 24 bottom bits, but
8884    the top 8 bits are slightly different.  In both cases, the caller
8885    must pass the entire 32 bits.  Callers may pass any instruction;
8886    this ignores non-NEON ones.
8887
8888    Caller must supply an IRTemp 'condT' holding the gating condition,
8889    or IRTemp_INVALID indicating the insn is always executed.  In ARM
8890    code, this must always be IRTemp_INVALID because NEON insns are
8891    unconditional for ARM.
8892
8893    Finally, the caller must indicate whether this occurs in ARM or in
8894    Thumb code.
8895
8896    This only handles NEON for ARMv7 and below.  The NEON extensions
8897    for v8 are handled by decode_V8_instruction.
8898 */
8899 static Bool decode_NEON_instruction_ARMv7_and_below (
8900                /*MOD*/DisResult* dres,
8901                UInt              insn32,
8902                IRTemp            condT,
8903                Bool              isT
8904             )
8905 {
8906 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8907
8908    /* There are two kinds of instruction to deal with: load/store and
8909       data processing.  In each case, in ARM mode we merely identify
8910       the kind, and pass it on to the relevant sub-handler.  In Thumb
8911       mode we identify the kind, swizzle the bits around to make it
8912       have the same encoding as in ARM, and hand it on to the
8913       sub-handler.
8914    */
8915
8916    /* In ARM mode, NEON instructions can't be conditional. */
8917    if (!isT)
8918       vassert(condT == IRTemp_INVALID);
8919
8920    /* Data processing:
8921       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8922       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8923    */
8924    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8925       // ARM, DP
8926       return dis_neon_data_processing(INSN(31,0), condT);
8927    }
8928    if (isT && INSN(31,29) == BITS3(1,1,1)
8929        && INSN(27,24) == BITS4(1,1,1,1)) {
8930       // Thumb, DP
8931       UInt reformatted = INSN(23,0);
8932       reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
8933       reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
8934       return dis_neon_data_processing(reformatted, condT);
8935    }
8936
8937    /* Load/store:
8938       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8939       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8940    */
8941    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8942       // ARM, memory
8943       return dis_neon_load_or_store(INSN(31,0), isT, condT);
8944    }
8945    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8946       UInt reformatted = INSN(23,0);
8947       reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
8948       return dis_neon_load_or_store(reformatted, isT, condT);
8949    }
8950
8951    /* Doesn't match. */
8952    return False;
8953
8954 #  undef INSN
8955 }
8956
8957
8958 /*------------------------------------------------------------*/
8959 /*--- V6 MEDIA instructions                                ---*/
8960 /*------------------------------------------------------------*/
8961
8962 /* Both ARM and Thumb */
8963
8964 /* Translate a V6 media instruction.    If successful, returns
8965    True and *dres may or may not be updated.  If failure, returns
8966    False and doesn't change *dres nor create any IR.
8967
8968    The Thumb and ARM encodings are completely different.  In Thumb
8969    mode, the caller must pass the entire 32 bits.  In ARM mode it must
8970    pass the lower 28 bits.  Apart from that, callers may pass any
8971    instruction; this function ignores anything it doesn't recognise.
8972
8973    Caller must supply an IRTemp 'condT' holding the gating condition,
8974    or IRTemp_INVALID indicating the insn is always executed.
8975
8976    Caller must also supply an ARMCondcode 'conq'.  This is only used
8977    for debug printing, no other purpose.  For ARM, this is simply the
8978    top 4 bits of the original instruction.  For Thumb, the condition
8979    is not (really) known until run time, and so ARMCondAL should be
8980    passed, only so that printing of these instructions does not show
8981    any condition.
8982
8983    Finally, the caller must indicate whether this occurs in ARM or in
8984    Thumb code.
8985 */
8986 static Bool decode_V6MEDIA_instruction (
8987                /*MOD*/DisResult* dres,
8988                UInt              insnv6m,
8989                IRTemp            condT,
8990                ARMCondcode       conq,
8991                Bool              isT
8992             )
8993 {
8994 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8995 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8996                                            (_bMax), (_bMin) )
8997 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8998                                            (_bMax), (_bMin) )
8999    HChar dis_buf[128];
9000    dis_buf[0] = 0;
9001
9002    if (isT) {
9003       vassert(conq == ARMCondAL);
9004    } else {
9005       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
9006       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
9007    }
9008
9009    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
9010    {
9011      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
9012      Bool gate = False;
9013
9014      if (isT) {
9015         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
9016             && INSNT1(7,6) == BITS2(0,0)) {
9017            regD = INSNT1(11,8);
9018            regM = INSNT1(3,0);
9019            regN = INSNT0(3,0);
9020            bitM = INSNT1(4,4);
9021            bitN = INSNT1(5,5);
9022            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9023               gate = True;
9024         }
9025      } else {
9026         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
9027             BITS4(0,0,0,0)         == INSNA(15,12) &&
9028             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
9029            regD = INSNA(19,16);
9030            regM = INSNA(11,8);
9031            regN = INSNA(3,0);
9032            bitM = INSNA(6,6);
9033            bitN = INSNA(5,5);
9034            if (regD != 15 && regN != 15 && regM != 15)
9035               gate = True;
9036         }
9037      }
9038
9039      if (gate) {
9040         IRTemp srcN = newTemp(Ity_I32);
9041         IRTemp srcM = newTemp(Ity_I32);
9042         IRTemp res  = newTemp(Ity_I32);
9043
9044         assign( srcN, binop(Iop_Sar32,
9045                             binop(Iop_Shl32,
9046                                   isT ? getIRegT(regN) : getIRegA(regN),
9047                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
9048         assign( srcM, binop(Iop_Sar32,
9049                             binop(Iop_Shl32,
9050                                   isT ? getIRegT(regM) : getIRegA(regM),
9051                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
9052         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
9053
9054         if (isT)
9055            putIRegT( regD, mkexpr(res), condT );
9056         else
9057            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9058
9059         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
9060              nCC(conq), regD, regN, regM );
9061         return True;
9062      }
9063      /* fall through */
9064    }
9065
9066    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9067    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9068    {
9069      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9070      Bool gate = False;
9071
9072      if (isT) {
9073         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9074             && INSNT1(7,5) == BITS3(0,0,0)) {
9075           regN = INSNT0(3,0);
9076           regD = INSNT1(11,8);
9077           regM = INSNT1(3,0);
9078           bitM = INSNT1(4,4);
9079           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9080              gate = True;
9081         }
9082      } else {
9083         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9084             INSNA(15,12) == BITS4(0,0,0,0)         &&
9085             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9086            regD = INSNA(19,16);
9087            regN = INSNA(3,0);
9088            regM = INSNA(11,8);
9089            bitM = INSNA(6,6);
9090            if (regD != 15 && regN != 15 && regM != 15)
9091               gate = True;
9092         }
9093      }
9094
9095      if (gate) {
9096         IRTemp irt_prod = newTemp(Ity_I64);
9097
9098         assign( irt_prod,
9099                 binop(Iop_MullS32,
9100                       isT ? getIRegT(regN) : getIRegA(regN),
9101                       binop(Iop_Sar32,
9102                             binop(Iop_Shl32,
9103                                   isT ? getIRegT(regM) : getIRegA(regM),
9104                                   mkU8(bitM ? 0 : 16)),
9105                             mkU8(16))) );
9106
9107         IRExpr* ire_result = binop(Iop_Or32,
9108                                    binop( Iop_Shl32,
9109                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
9110                                           mkU8(16) ),
9111                                    binop( Iop_Shr32,
9112                                           unop(Iop_64to32, mkexpr(irt_prod)),
9113                                           mkU8(16) ) );
9114
9115         if (isT)
9116            putIRegT( regD, ire_result, condT );
9117         else
9118            putIRegA( regD, ire_result, condT, Ijk_Boring );
9119
9120         DIP("smulw%c%s r%u, r%u, r%u\n",
9121             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9122         return True;
9123      }
9124      /* fall through */
9125    }
9126
9127    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9128    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9129    {
9130      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9131      Bool tbform = False;
9132      Bool gate = False;
9133
9134      if (isT) {
9135         if (INSNT0(15,4) == 0xEAC
9136             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9137            regN = INSNT0(3,0);
9138            regD = INSNT1(11,8);
9139            regM = INSNT1(3,0);
9140            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9141            shift_type = (INSNT1(5,5) << 1) | 0;
9142            tbform = (INSNT1(5,5) == 0) ? False : True;
9143            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9144               gate = True;
9145         }
9146      } else {
9147         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9148             INSNA(5,4)   == BITS2(0,1) /*          &&
9149             (INSNA(6,6)  == 0 || INSNA(6,6) == 1)
9150             This last bit with INSNA(6,6) is correct, but gcc 8 complains
9151             (correctly) that it is always true.  So I commented it out
9152             to keep gcc quiet. */ ) {
9153            regD = INSNA(15,12);
9154            regN = INSNA(19,16);
9155            regM = INSNA(3,0);
9156            imm5 = INSNA(11,7);
9157            shift_type = (INSNA(6,6) << 1) | 0;
9158            tbform = (INSNA(6,6) == 0) ? False : True;
9159            if (regD != 15 && regN != 15 && regM != 15)
9160               gate = True;
9161         }
9162      }
9163
9164      if (gate) {
9165         IRTemp irt_regM       = newTemp(Ity_I32);
9166         IRTemp irt_regM_shift = newTemp(Ity_I32);
9167         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9168         compute_result_and_C_after_shift_by_imm5(
9169            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9170
9171         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9172         IRExpr* ire_result
9173           = binop( Iop_Or32,
9174                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9175                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9176                                     unop(Iop_Not32, mkU32(mask))) );
9177
9178         if (isT)
9179            putIRegT( regD, ire_result, condT );
9180         else
9181            putIRegA( regD, ire_result, condT, Ijk_Boring );
9182
9183         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9184              nCC(conq), regD, regN, regM, dis_buf );
9185
9186         return True;
9187      }
9188      /* fall through */
9189    }
9190
9191    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9192    {
9193      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9194      Bool gate = False;
9195
9196      if (isT) {
9197         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9198             && INSNT0(4,4) == 0
9199             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9200            regD       = INSNT1(11,8);
9201            regN       = INSNT0(3,0);
9202            shift_type = (INSNT0(5,5) << 1) | 0;
9203            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9204            sat_imm    = INSNT1(4,0);
9205            if (!isBadRegT(regD) && !isBadRegT(regN))
9206               gate = True;
9207            if (shift_type == BITS2(1,0) && imm5 == 0)
9208               gate = False;
9209         }
9210      } else {
9211         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9212             INSNA(5,4)   == BITS2(0,1)) {
9213            regD       = INSNA(15,12);
9214            regN       = INSNA(3,0);
9215            shift_type = (INSNA(6,6) << 1) | 0;
9216            imm5       = INSNA(11,7);
9217            sat_imm    = INSNA(20,16);
9218            if (regD != 15 && regN != 15)
9219               gate = True;
9220         }
9221      }
9222
9223      if (gate) {
9224         IRTemp irt_regN       = newTemp(Ity_I32);
9225         IRTemp irt_regN_shift = newTemp(Ity_I32);
9226         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9227         IRTemp irt_result     = newTemp(Ity_I32);
9228
9229         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9230         compute_result_and_C_after_shift_by_imm5(
9231                 dis_buf, &irt_regN_shift, NULL,
9232                 irt_regN, shift_type, imm5, regN );
9233
9234         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9235         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9236
9237         if (isT)
9238            putIRegT( regD, mkexpr(irt_result), condT );
9239         else
9240            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9241
9242         DIP("usat%s r%u, #0x%04x, %s\n",
9243             nCC(conq), regD, imm5, dis_buf);
9244         return True;
9245      }
9246      /* fall through */
9247    }
9248
9249   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9250    {
9251      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9252      Bool gate = False;
9253
9254      if (isT) {
9255         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9256             && INSNT0(4,4) == 0
9257             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9258            regD       = INSNT1(11,8);
9259            regN       = INSNT0(3,0);
9260            shift_type = (INSNT0(5,5) << 1) | 0;
9261            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9262            sat_imm    = INSNT1(4,0) + 1;
9263            if (!isBadRegT(regD) && !isBadRegT(regN))
9264               gate = True;
9265            if (shift_type == BITS2(1,0) && imm5 == 0)
9266               gate = False;
9267         }
9268      } else {
9269         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9270             INSNA(5,4)   == BITS2(0,1)) {
9271            regD       = INSNA(15,12);
9272            regN       = INSNA(3,0);
9273            shift_type = (INSNA(6,6) << 1) | 0;
9274            imm5       = INSNA(11,7);
9275            sat_imm    = INSNA(20,16) + 1;
9276            if (regD != 15 && regN != 15)
9277               gate = True;
9278         }
9279      }
9280
9281      if (gate) {
9282         IRTemp irt_regN       = newTemp(Ity_I32);
9283         IRTemp irt_regN_shift = newTemp(Ity_I32);
9284         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9285         IRTemp irt_result     = newTemp(Ity_I32);
9286
9287         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9288         compute_result_and_C_after_shift_by_imm5(
9289                 dis_buf, &irt_regN_shift, NULL,
9290                 irt_regN, shift_type, imm5, regN );
9291
9292         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9293         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9294
9295         if (isT)
9296            putIRegT( regD, mkexpr(irt_result), condT );
9297         else
9298            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9299
9300         DIP( "ssat%s r%u, #0x%04x, %s\n",
9301              nCC(conq), regD, imm5, dis_buf);
9302         return True;
9303     }
9304     /* fall through */
9305   }
9306
9307    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9308    {
9309      UInt regD = 99, regN = 99, sat_imm = 99;
9310      Bool gate = False;
9311
9312      if (isT) {
9313         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9314             && INSNT0(5,4) == BITS2(1,0)
9315             && INSNT1(15,12) == BITS4(0,0,0,0)
9316             && INSNT1(7,4) == BITS4(0,0,0,0)) {
9317            regD       = INSNT1(11,8);
9318            regN       = INSNT0(3,0);
9319            sat_imm    = INSNT1(3,0) + 1;
9320            if (!isBadRegT(regD) && !isBadRegT(regN))
9321               gate = True;
9322         }
9323      } else {
9324         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9325             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9326            regD       = INSNA(15,12);
9327            regN       = INSNA(3,0);
9328            sat_imm    = INSNA(19,16) + 1;
9329            if (regD != 15 && regN != 15)
9330               gate = True;
9331         }
9332      }
9333
9334      if (gate) {
9335         IRTemp irt_regN    = newTemp(Ity_I32);
9336         IRTemp irt_regN_lo = newTemp(Ity_I32);
9337         IRTemp irt_regN_hi = newTemp(Ity_I32);
9338         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9339         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9340         IRTemp irt_res_lo  = newTemp(Ity_I32);
9341         IRTemp irt_res_hi  = newTemp(Ity_I32);
9342
9343         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9344         assign( irt_regN_lo,
9345                 binop( Iop_Sar32,
9346                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9347                        mkU8(16)) );
9348         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9349
9350         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9351         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9352
9353         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9354         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9355
9356         IRExpr* ire_result
9357            = binop(Iop_Or32,
9358                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9359                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9360         if (isT)
9361            putIRegT( regD, ire_result, condT );
9362         else
9363            putIRegA( regD, ire_result, condT, Ijk_Boring );
9364
9365         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9366         return True;
9367      }
9368      /* fall through */
9369    }
9370
9371    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9372    {
9373      UInt regD = 99, regN = 99, sat_imm = 99;
9374      Bool gate = False;
9375
9376      if (isT) {
9377         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9378            regN = INSNT0(3,0);
9379            regD = INSNT1(11,8);
9380            sat_imm = INSNT1(3,0);
9381            if (!isBadRegT(regD) && !isBadRegT(regN))
9382               gate = True;
9383        }
9384      } else {
9385         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9386             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9387             INSNA(7,4)   == BITS4(0,0,1,1)) {
9388            regD    = INSNA(15,12);
9389            regN    = INSNA(3,0);
9390            sat_imm = INSNA(19,16);
9391            if (regD != 15 && regN != 15)
9392               gate = True;
9393         }
9394      }
9395
9396      if (gate) {
9397         IRTemp irt_regN    = newTemp(Ity_I32);
9398         IRTemp irt_regN_lo = newTemp(Ity_I32);
9399         IRTemp irt_regN_hi = newTemp(Ity_I32);
9400         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9401         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9402         IRTemp irt_res_lo  = newTemp(Ity_I32);
9403         IRTemp irt_res_hi  = newTemp(Ity_I32);
9404
9405         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9406         assign( irt_regN_lo, binop( Iop_Sar32,
9407                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9408                                     mkU8(16)) );
9409         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9410
9411         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9412         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9413
9414         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9415         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9416
9417         IRExpr* ire_result = binop( Iop_Or32,
9418                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9419                                     mkexpr(irt_res_lo) );
9420
9421         if (isT)
9422            putIRegT( regD, ire_result, condT );
9423         else
9424            putIRegA( regD, ire_result, condT, Ijk_Boring );
9425
9426         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9427         return True;
9428      }
9429      /* fall through */
9430    }
9431
9432    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9433    {
9434      UInt regD = 99, regN = 99, regM = 99;
9435      Bool gate = False;
9436
9437      if (isT) {
9438         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9439            regN = INSNT0(3,0);
9440            regD = INSNT1(11,8);
9441            regM = INSNT1(3,0);
9442            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9443               gate = True;
9444         }
9445      } else {
9446         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9447             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9448             INSNA(7,4)   == BITS4(0,0,0,1)) {
9449            regD = INSNA(15,12);
9450            regN = INSNA(19,16);
9451            regM = INSNA(3,0);
9452            if (regD != 15 && regN != 15 && regM != 15)
9453               gate = True;
9454         }
9455      }
9456
9457      if (gate) {
9458         IRTemp rNt  = newTemp(Ity_I32);
9459         IRTemp rMt  = newTemp(Ity_I32);
9460         IRTemp res  = newTemp(Ity_I32);
9461         IRTemp reso = newTemp(Ity_I32);
9462
9463         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9464         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9465
9466         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9467         if (isT)
9468            putIRegT( regD, mkexpr(res), condT );
9469         else
9470            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9471
9472         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9473         set_GE_32_10_from_bits_31_15(reso, condT);
9474
9475         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9476         return True;
9477      }
9478      /* fall through */
9479    }
9480
9481    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9482    {
9483      UInt regD = 99, regN = 99, regM = 99;
9484      Bool gate = False;
9485
9486      if (isT) {
9487         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9488            regN = INSNT0(3,0);
9489            regD = INSNT1(11,8);
9490            regM = INSNT1(3,0);
9491            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9492               gate = True;
9493         }
9494      } else {
9495         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9496             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9497             INSNA(7,4)   == BITS4(0,0,0,1)) {
9498            regD = INSNA(15,12);
9499            regN = INSNA(19,16);
9500            regM = INSNA(3,0);
9501            if (regD != 15 && regN != 15 && regM != 15)
9502               gate = True;
9503         }
9504      }
9505
9506      if (gate) {
9507         IRTemp rNt  = newTemp(Ity_I32);
9508         IRTemp rMt  = newTemp(Ity_I32);
9509         IRTemp res  = newTemp(Ity_I32);
9510         IRTemp reso = newTemp(Ity_I32);
9511
9512         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9513         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9514
9515         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9516         if (isT)
9517            putIRegT( regD, mkexpr(res), condT );
9518         else
9519            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9520
9521         assign(reso, unop(Iop_Not32,
9522                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9523         set_GE_32_10_from_bits_31_15(reso, condT);
9524
9525         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9526         return True;
9527      }
9528      /* fall through */
9529    }
9530
9531    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9532    {
9533      UInt regD = 99, regN = 99, regM = 99;
9534      Bool gate = False;
9535
9536      if (isT) {
9537         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9538            regN = INSNT0(3,0);
9539            regD = INSNT1(11,8);
9540            regM = INSNT1(3,0);
9541            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9542               gate = True;
9543         }
9544      } else {
9545         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9546             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9547             INSNA(7,4)   == BITS4(0,1,1,1)) {
9548            regD = INSNA(15,12);
9549            regN = INSNA(19,16);
9550            regM = INSNA(3,0);
9551            if (regD != 15 && regN != 15 && regM != 15)
9552              gate = True;
9553         }
9554      }
9555
9556      if (gate) {
9557         IRTemp rNt  = newTemp(Ity_I32);
9558         IRTemp rMt  = newTemp(Ity_I32);
9559         IRTemp res  = newTemp(Ity_I32);
9560         IRTemp reso = newTemp(Ity_I32);
9561
9562         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9563         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9564
9565         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9566         if (isT)
9567            putIRegT( regD, mkexpr(res), condT );
9568         else
9569            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9570
9571         assign(reso, unop(Iop_Not32,
9572                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9573         set_GE_32_10_from_bits_31_15(reso, condT);
9574
9575         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9576         return True;
9577      }
9578      /* fall through */
9579    }
9580
9581    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9582    {
9583      UInt regD = 99, regN = 99, regM = 99;
9584      Bool gate = False;
9585
9586      if (isT) {
9587         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9588            regN = INSNT0(3,0);
9589            regD = INSNT1(11,8);
9590            regM = INSNT1(3,0);
9591            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9592               gate = True;
9593         }
9594      } else {
9595         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9596             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9597             INSNA(7,4)   == BITS4(0,1,1,1)) {
9598            regD = INSNA(15,12);
9599            regN = INSNA(19,16);
9600            regM = INSNA(3,0);
9601            if (regD != 15 && regN != 15 && regM != 15)
9602               gate = True;
9603         }
9604      }
9605
9606      if (gate) {
9607         IRTemp rNt  = newTemp(Ity_I32);
9608         IRTemp rMt  = newTemp(Ity_I32);
9609         IRTemp res  = newTemp(Ity_I32);
9610         IRTemp reso = newTemp(Ity_I32);
9611
9612         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9613         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9614
9615         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9616         if (isT)
9617            putIRegT( regD, mkexpr(res), condT );
9618         else
9619            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9620
9621         assign(reso, unop(Iop_Not32,
9622                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9623         set_GE_32_10_from_bits_31_15(reso, condT);
9624
9625         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9626         return True;
9627      }
9628      /* fall through */
9629    }
9630
9631    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9632    {
9633      UInt regD = 99, regN = 99, regM = 99;
9634      Bool gate = False;
9635
9636      if (isT) {
9637         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9638            regN = INSNT0(3,0);
9639            regD = INSNT1(11,8);
9640            regM = INSNT1(3,0);
9641            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9642               gate = True;
9643         }
9644      } else {
9645         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9646             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9647             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9648            regD = INSNA(15,12);
9649            regN = INSNA(19,16);
9650            regM = INSNA(3,0);
9651            if (regD != 15 && regN != 15 && regM != 15)
9652               gate = True;
9653         }
9654      }
9655
9656      if (gate) {
9657         IRTemp rNt  = newTemp(Ity_I32);
9658         IRTemp rMt  = newTemp(Ity_I32);
9659         IRTemp res  = newTemp(Ity_I32);
9660         IRTemp reso = newTemp(Ity_I32);
9661
9662         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9663         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9664
9665         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9666         if (isT)
9667            putIRegT( regD, mkexpr(res), condT );
9668         else
9669            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9670
9671         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9672         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9673
9674         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9675         return True;
9676      }
9677      /* fall through */
9678    }
9679
9680    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9681    {
9682      UInt regD = 99, regN = 99, regM = 99;
9683      Bool gate = False;
9684
9685      if (isT) {
9686         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9687            regN = INSNT0(3,0);
9688            regD = INSNT1(11,8);
9689            regM = INSNT1(3,0);
9690            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9691               gate = True;
9692         }
9693      } else {
9694         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9695             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9696             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9697            regD = INSNA(15,12);
9698            regN = INSNA(19,16);
9699            regM = INSNA(3,0);
9700            if (regD != 15 && regN != 15 && regM != 15)
9701               gate = True;
9702         }
9703      }
9704
9705      if (gate) {
9706         IRTemp rNt  = newTemp(Ity_I32);
9707         IRTemp rMt  = newTemp(Ity_I32);
9708         IRTemp res  = newTemp(Ity_I32);
9709         IRTemp reso = newTemp(Ity_I32);
9710
9711         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9712         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9713
9714         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9715         if (isT)
9716            putIRegT( regD, mkexpr(res), condT );
9717         else
9718            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9719
9720         assign(reso, unop(Iop_Not32,
9721                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9722         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9723
9724         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9725         return True;
9726      }
9727      /* fall through */
9728    }
9729
9730    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9731    {
9732      UInt regD = 99, regN = 99, regM = 99;
9733      Bool gate = False;
9734
9735      if (isT) {
9736         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9737            regN = INSNT0(3,0);
9738            regD = INSNT1(11,8);
9739            regM = INSNT1(3,0);
9740            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9741               gate = True;
9742         }
9743      } else {
9744         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9745             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9746             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9747            regD = INSNA(15,12);
9748            regN = INSNA(19,16);
9749            regM = INSNA(3,0);
9750            if (regD != 15 && regN != 15 && regM != 15)
9751              gate = True;
9752         }
9753      }
9754
9755      if (gate) {
9756         IRTemp rNt  = newTemp(Ity_I32);
9757         IRTemp rMt  = newTemp(Ity_I32);
9758         IRTemp res  = newTemp(Ity_I32);
9759         IRTemp reso = newTemp(Ity_I32);
9760
9761         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9762         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9763
9764         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9765         if (isT)
9766            putIRegT( regD, mkexpr(res), condT );
9767         else
9768            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9769
9770         assign(reso, unop(Iop_Not32,
9771                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9772         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9773
9774         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9775         return True;
9776      }
9777      /* fall through */
9778    }
9779
9780    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9781    {
9782      UInt regD = 99, regN = 99, regM = 99;
9783      Bool gate = False;
9784
9785      if (isT) {
9786         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9787            regN = INSNT0(3,0);
9788            regD = INSNT1(11,8);
9789            regM = INSNT1(3,0);
9790            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9791               gate = True;
9792         }
9793      } else {
9794         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9795             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9796             INSNA(7,4)   == BITS4(1,1,1,1)) {
9797            regD = INSNA(15,12);
9798            regN = INSNA(19,16);
9799            regM = INSNA(3,0);
9800            if (regD != 15 && regN != 15 && regM != 15)
9801               gate = True;
9802         }
9803      }
9804
9805      if (gate) {
9806         IRTemp rNt  = newTemp(Ity_I32);
9807         IRTemp rMt  = newTemp(Ity_I32);
9808         IRTemp res  = newTemp(Ity_I32);
9809         IRTemp reso = newTemp(Ity_I32);
9810
9811         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9812         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9813
9814         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9815         if (isT)
9816            putIRegT( regD, mkexpr(res), condT );
9817         else
9818            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9819
9820         assign(reso, unop(Iop_Not32,
9821                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9822         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9823
9824         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9825         return True;
9826      }
9827      /* fall through */
9828    }
9829
9830    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9831    {
9832      UInt regD = 99, regN = 99, regM = 99;
9833      Bool gate = False;
9834
9835      if (isT) {
9836         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9837            regN = INSNT0(3,0);
9838            regD = INSNT1(11,8);
9839            regM = INSNT1(3,0);
9840            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9841               gate = True;
9842         }
9843      } else {
9844         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9845             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9846             INSNA(7,4)   == BITS4(1,0,0,1)) {
9847            regD = INSNA(15,12);
9848            regN = INSNA(19,16);
9849            regM = INSNA(3,0);
9850            if (regD != 15 && regN != 15 && regM != 15)
9851               gate = True;
9852         }
9853      }
9854
9855      if (gate) {
9856         IRTemp rNt   = newTemp(Ity_I32);
9857         IRTemp rMt   = newTemp(Ity_I32);
9858         IRTemp res_q = newTemp(Ity_I32);
9859
9860         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9861         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9862
9863         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9864         if (isT)
9865            putIRegT( regD, mkexpr(res_q), condT );
9866         else
9867            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9868
9869         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9870         return True;
9871      }
9872      /* fall through */
9873    }
9874
9875    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9876    {
9877      UInt regD = 99, regN = 99, regM = 99;
9878      Bool gate = False;
9879
9880      if (isT) {
9881         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9882            regN = INSNT0(3,0);
9883            regD = INSNT1(11,8);
9884            regM = INSNT1(3,0);
9885            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9886               gate = True;
9887         }
9888      } else {
9889         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9890             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9891             INSNA(7,4)   == BITS4(1,1,1,1)) {
9892            regD = INSNA(15,12);
9893            regN = INSNA(19,16);
9894            regM = INSNA(3,0);
9895            if (regD != 15 && regN != 15 && regM != 15)
9896               gate = True;
9897         }
9898      }
9899
9900      if (gate) {
9901         IRTemp rNt   = newTemp(Ity_I32);
9902         IRTemp rMt   = newTemp(Ity_I32);
9903         IRTemp res_q = newTemp(Ity_I32);
9904
9905         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9906         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9907
9908         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9909         if (isT)
9910            putIRegT( regD, mkexpr(res_q), condT );
9911         else
9912            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9913
9914         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9915         return True;
9916      }
9917      /* fall through */
9918    }
9919
9920    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9921    {
9922      UInt regD = 99, regN = 99, regM = 99;
9923      Bool gate = False;
9924
9925      if (isT) {
9926         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9927            regN = INSNT0(3,0);
9928            regD = INSNT1(11,8);
9929            regM = INSNT1(3,0);
9930            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9931               gate = True;
9932         }
9933      } else {
9934         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9935             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9936             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9937            regD = INSNA(15,12);
9938            regN = INSNA(19,16);
9939            regM = INSNA(3,0);
9940            if (regD != 15 && regN != 15 && regM != 15)
9941               gate = True;
9942         }
9943      }
9944
9945      if (gate) {
9946         IRTemp rNt   = newTemp(Ity_I32);
9947         IRTemp rMt   = newTemp(Ity_I32);
9948         IRTemp res_q = newTemp(Ity_I32);
9949
9950         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9951         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9952
9953         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9954         if (isT)
9955            putIRegT( regD, mkexpr(res_q), condT );
9956         else
9957            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9958
9959         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9960         return True;
9961      }
9962      /* fall through */
9963    }
9964
9965    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9966    {
9967      UInt regD = 99, regN = 99, regM = 99;
9968      Bool gate = False;
9969
9970      if (isT) {
9971         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9972            regN = INSNT0(3,0);
9973            regD = INSNT1(11,8);
9974            regM = INSNT1(3,0);
9975            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9976               gate = True;
9977         }
9978      } else {
9979         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9980             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9981             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9982            regD = INSNA(15,12);
9983            regN = INSNA(19,16);
9984            regM = INSNA(3,0);
9985            if (regD != 15 && regN != 15 && regM != 15)
9986              gate = True;
9987         }
9988      }
9989
9990      if (gate) {
9991         IRTemp rNt   = newTemp(Ity_I32);
9992         IRTemp rMt   = newTemp(Ity_I32);
9993         IRTemp res_q = newTemp(Ity_I32);
9994
9995         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9996         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9997
9998         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9999         if (isT)
10000            putIRegT( regD, mkexpr(res_q), condT );
10001         else
10002            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10003
10004         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10005         return True;
10006      }
10007      /* fall through */
10008    }
10009
10010    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10011    {
10012      UInt regD = 99, regN = 99, regM = 99;
10013      Bool gate = False;
10014
10015      if (isT) {
10016         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10017            regN = INSNT0(3,0);
10018            regD = INSNT1(11,8);
10019            regM = INSNT1(3,0);
10020            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10021               gate = True;
10022         }
10023      } else {
10024         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10025             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10026             INSNA(7,4)   == BITS4(1,0,0,1)) {
10027            regD = INSNA(15,12);
10028            regN = INSNA(19,16);
10029            regM = INSNA(3,0);
10030            if (regD != 15 && regN != 15 && regM != 15)
10031               gate = True;
10032         }
10033      }
10034
10035      if (gate) {
10036         IRTemp rNt   = newTemp(Ity_I32);
10037         IRTemp rMt   = newTemp(Ity_I32);
10038         IRTemp res_q = newTemp(Ity_I32);
10039
10040         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10041         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10042
10043         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
10044         if (isT)
10045            putIRegT( regD, mkexpr(res_q), condT );
10046         else
10047            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10048
10049         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10050         return True;
10051      }
10052      /* fall through */
10053    }
10054
10055    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
10056    {
10057      UInt regD = 99, regN = 99, regM = 99;
10058      Bool gate = False;
10059
10060      if (isT) {
10061         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10062            regN = INSNT0(3,0);
10063            regD = INSNT1(11,8);
10064            regM = INSNT1(3,0);
10065            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10066               gate = True;
10067         }
10068      } else {
10069         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10070             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10071             INSNA(7,4)   == BITS4(0,0,0,1)) {
10072            regD = INSNA(15,12);
10073            regN = INSNA(19,16);
10074            regM = INSNA(3,0);
10075            if (regD != 15 && regN != 15 && regM != 15)
10076               gate = True;
10077         }
10078      }
10079
10080      if (gate) {
10081         IRTemp rNt   = newTemp(Ity_I32);
10082         IRTemp rMt   = newTemp(Ity_I32);
10083         IRTemp res_q = newTemp(Ity_I32);
10084
10085         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10086         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10087
10088         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10089         if (isT)
10090            putIRegT( regD, mkexpr(res_q), condT );
10091         else
10092            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10093
10094         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10095         return True;
10096      }
10097      /* fall through */
10098    }
10099
10100    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10101    {
10102      UInt regD = 99, regN = 99, regM = 99;
10103      Bool gate = False;
10104
10105      if (isT) {
10106         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10107            regN = INSNT0(3,0);
10108            regD = INSNT1(11,8);
10109            regM = INSNT1(3,0);
10110            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10111               gate = True;
10112         }
10113      } else {
10114         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10115             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10116             INSNA(7,4)   == BITS4(1,0,0,1)) {
10117            regD = INSNA(15,12);
10118            regN = INSNA(19,16);
10119            regM = INSNA(3,0);
10120            if (regD != 15 && regN != 15 && regM != 15)
10121               gate = True;
10122         }
10123      }
10124
10125      if (gate) {
10126         IRTemp rNt   = newTemp(Ity_I32);
10127         IRTemp rMt   = newTemp(Ity_I32);
10128         IRTemp res_q = newTemp(Ity_I32);
10129
10130         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10131         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10132
10133         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10134         if (isT)
10135            putIRegT( regD, mkexpr(res_q), condT );
10136         else
10137            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10138
10139         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10140         return True;
10141      }
10142      /* fall through */
10143    }
10144
10145    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10146    {
10147      UInt regD = 99, regN = 99, regM = 99;
10148      Bool gate = False;
10149
10150      if (isT) {
10151         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10152            regN = INSNT0(3,0);
10153            regD = INSNT1(11,8);
10154            regM = INSNT1(3,0);
10155            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10156               gate = True;
10157         }
10158      } else {
10159         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10160             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10161             INSNA(7,4)   == BITS4(0,0,0,1)) {
10162            regD = INSNA(15,12);
10163            regN = INSNA(19,16);
10164            regM = INSNA(3,0);
10165            if (regD != 15 && regN != 15 && regM != 15)
10166               gate = True;
10167         }
10168      }
10169
10170      if (gate) {
10171         IRTemp rNt   = newTemp(Ity_I32);
10172         IRTemp rMt   = newTemp(Ity_I32);
10173         IRTemp res_q = newTemp(Ity_I32);
10174
10175         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10176         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10177
10178         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10179         if (isT)
10180            putIRegT( regD, mkexpr(res_q), condT );
10181         else
10182            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10183
10184         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10185         return True;
10186      }
10187      /* fall through */
10188    }
10189
10190    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10191    {
10192      UInt regD = 99, regN = 99, regM = 99;
10193      Bool gate = False;
10194
10195       if (isT) {
10196         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10197            regN = INSNT0(3,0);
10198            regD = INSNT1(11,8);
10199            regM = INSNT1(3,0);
10200            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10201               gate = True;
10202         }
10203      } else {
10204         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10205             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10206             INSNA(7,4)   == BITS4(0,1,1,1)) {
10207            regD = INSNA(15,12);
10208            regN = INSNA(19,16);
10209            regM = INSNA(3,0);
10210            if (regD != 15 && regN != 15 && regM != 15)
10211              gate = True;
10212         }
10213      }
10214
10215      if (gate) {
10216         IRTemp rNt   = newTemp(Ity_I32);
10217         IRTemp rMt   = newTemp(Ity_I32);
10218         IRTemp res_q = newTemp(Ity_I32);
10219
10220         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10221         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10222
10223         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10224         if (isT)
10225            putIRegT( regD, mkexpr(res_q), condT );
10226         else
10227            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10228
10229         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10230         return True;
10231      }
10232      /* fall through */
10233    }
10234
10235    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10236    /* note: the hardware seems to construct the result differently
10237       from wot the manual says. */
10238    {
10239      UInt regD = 99, regN = 99, regM = 99;
10240      Bool gate = False;
10241
10242      if (isT) {
10243         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10244            regN = INSNT0(3,0);
10245            regD = INSNT1(11,8);
10246            regM = INSNT1(3,0);
10247            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10248               gate = True;
10249         }
10250      } else {
10251         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10252             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10253             INSNA(7,4)   == BITS4(0,1,0,1)) {
10254            regD = INSNA(15,12);
10255            regN = INSNA(19,16);
10256            regM = INSNA(3,0);
10257            if (regD != 15 && regN != 15 && regM != 15)
10258               gate = True;
10259         }
10260      }
10261
10262      if (gate) {
10263         IRTemp irt_regN     = newTemp(Ity_I32);
10264         IRTemp irt_regM     = newTemp(Ity_I32);
10265         IRTemp irt_sum      = newTemp(Ity_I32);
10266         IRTemp irt_diff     = newTemp(Ity_I32);
10267         IRTemp irt_sum_res  = newTemp(Ity_I32);
10268         IRTemp irt_diff_res = newTemp(Ity_I32);
10269
10270         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10271         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10272
10273         assign( irt_diff,
10274                 binop( Iop_Sub32,
10275                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10276                        binop( Iop_Sar32,
10277                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10278                               mkU8(16) ) ) );
10279         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10280
10281         assign( irt_sum,
10282                 binop( Iop_Add32,
10283                        binop( Iop_Sar32,
10284                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10285                               mkU8(16) ),
10286                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10287         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10288
10289         IRExpr* ire_result = binop( Iop_Or32,
10290                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
10291                                            mkU8(16) ),
10292                                     binop( Iop_And32, mkexpr(irt_sum_res),
10293                                            mkU32(0xFFFF)) );
10294
10295         if (isT)
10296            putIRegT( regD, ire_result, condT );
10297         else
10298            putIRegA( regD, ire_result, condT, Ijk_Boring );
10299
10300         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10301         return True;
10302      }
10303      /* fall through */
10304    }
10305
10306    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10307    {
10308      UInt regD = 99, regN = 99, regM = 99;
10309      Bool gate = False;
10310
10311      if (isT) {
10312         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10313            regN = INSNT0(3,0);
10314            regD = INSNT1(11,8);
10315            regM = INSNT1(3,0);
10316            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10317               gate = True;
10318         }
10319      } else {
10320         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10321             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10322             INSNA(7,4)   == BITS4(0,0,1,1)) {
10323            regD = INSNA(15,12);
10324            regN = INSNA(19,16);
10325            regM = INSNA(3,0);
10326            if (regD != 15 && regN != 15 && regM != 15)
10327               gate = True;
10328         }
10329      }
10330
10331      if (gate) {
10332         IRTemp irt_regN     = newTemp(Ity_I32);
10333         IRTemp irt_regM     = newTemp(Ity_I32);
10334         IRTemp irt_sum      = newTemp(Ity_I32);
10335         IRTemp irt_diff     = newTemp(Ity_I32);
10336         IRTemp irt_res_sum  = newTemp(Ity_I32);
10337         IRTemp irt_res_diff = newTemp(Ity_I32);
10338
10339         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10340         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10341
10342         assign( irt_diff,
10343                 binop( Iop_Sub32,
10344                        binop( Iop_Sar32,
10345                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10346                               mkU8(16) ),
10347                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10348         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10349
10350         assign( irt_sum,
10351                 binop( Iop_Add32,
10352                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10353                        binop( Iop_Sar32,
10354                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10355                               mkU8(16) ) ) );
10356         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10357
10358         IRExpr* ire_result
10359           = binop( Iop_Or32,
10360                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10361                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10362
10363         if (isT)
10364            putIRegT( regD, ire_result, condT );
10365         else
10366            putIRegA( regD, ire_result, condT, Ijk_Boring );
10367
10368         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10369         return True;
10370      }
10371      /* fall through */
10372    }
10373
10374    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10375    {
10376      UInt regD = 99, regN = 99, regM = 99;
10377      Bool gate = False;
10378
10379      if (isT) {
10380         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10381            regN = INSNT0(3,0);
10382            regD = INSNT1(11,8);
10383            regM = INSNT1(3,0);
10384            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10385               gate = True;
10386         }
10387      } else {
10388         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10389             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10390             INSNA(7,4)   == BITS4(0,0,1,1)) {
10391            regD = INSNA(15,12);
10392            regN = INSNA(19,16);
10393            regM = INSNA(3,0);
10394            if (regD != 15 && regN != 15 && regM != 15)
10395               gate = True;
10396         }
10397      }
10398
10399      if (gate) {
10400         IRTemp irt_regN = newTemp(Ity_I32);
10401         IRTemp irt_regM = newTemp(Ity_I32);
10402         IRTemp irt_sum  = newTemp(Ity_I32);
10403         IRTemp irt_diff = newTemp(Ity_I32);
10404
10405         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10406         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10407
10408         assign( irt_diff,
10409                 binop( Iop_Sub32,
10410                        binop( Iop_Sar32,
10411                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10412                               mkU8(16) ),
10413                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10414
10415         assign( irt_sum,
10416                 binop( Iop_Add32,
10417                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10418                        binop( Iop_Sar32,
10419                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10420                               mkU8(16) ) ) );
10421
10422         IRExpr* ire_result
10423           = binop( Iop_Or32,
10424                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10425                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10426
10427         IRTemp ge10 = newTemp(Ity_I32);
10428         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10429         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10430         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10431
10432         IRTemp ge32 = newTemp(Ity_I32);
10433         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10434         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10435         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10436
10437         if (isT)
10438            putIRegT( regD, ire_result, condT );
10439         else
10440            putIRegA( regD, ire_result, condT, Ijk_Boring );
10441
10442         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10443         return True;
10444      }
10445      /* fall through */
10446    }
10447
10448    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10449    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10450    {
10451      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10452      Bool gate = False, isAD = False;
10453
10454      if (isT) {
10455         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10456             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10457            regN = INSNT0(3,0);
10458            regD = INSNT1(11,8);
10459            regM = INSNT1(3,0);
10460            bitM = INSNT1(4,4);
10461            isAD = INSNT0(15,4) == 0xFB2;
10462            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10463               gate = True;
10464         }
10465      } else {
10466         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10467             INSNA(15,12) == BITS4(1,1,1,1)         &&
10468             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10469            regD = INSNA(19,16);
10470            regN = INSNA(3,0);
10471            regM = INSNA(11,8);
10472            bitM = INSNA(5,5);
10473            isAD = INSNA(6,6) == 0;
10474            if (regD != 15 && regN != 15 && regM != 15)
10475               gate = True;
10476         }
10477      }
10478
10479      if (gate) {
10480         IRTemp irt_regN    = newTemp(Ity_I32);
10481         IRTemp irt_regM    = newTemp(Ity_I32);
10482         IRTemp irt_prod_lo = newTemp(Ity_I32);
10483         IRTemp irt_prod_hi = newTemp(Ity_I32);
10484         IRTemp tmpM        = newTemp(Ity_I32);
10485
10486         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10487
10488         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10489         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10490
10491         assign( irt_prod_lo,
10492                 binop( Iop_Mul32,
10493                        binop( Iop_Sar32,
10494                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10495                               mkU8(16) ),
10496                        binop( Iop_Sar32,
10497                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10498                               mkU8(16) ) ) );
10499         assign( irt_prod_hi, binop(Iop_Mul32,
10500                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10501                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10502         IRExpr* ire_result
10503            = binop( isAD ? Iop_Add32 : Iop_Sub32,
10504                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10505
10506         if (isT)
10507            putIRegT( regD, ire_result, condT );
10508         else
10509            putIRegA( regD, ire_result, condT, Ijk_Boring );
10510
10511         if (isAD) {
10512            or_into_QFLAG32(
10513               signed_overflow_after_Add32( ire_result,
10514                                            irt_prod_lo, irt_prod_hi ),
10515               condT
10516            );
10517         }
10518
10519         DIP("smu%cd%s%s r%u, r%u, r%u\n",
10520             isAD ? 'a' : 's',
10521             bitM ? "x" : "", nCC(conq), regD, regN, regM);
10522         return True;
10523      }
10524      /* fall through */
10525    }
10526
10527    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10528    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10529    {
10530      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10531      Bool gate = False, isAD = False;
10532
10533      if (isT) {
10534        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10535            && INSNT1(7,5) == BITS3(0,0,0)) {
10536            regN = INSNT0(3,0);
10537            regD = INSNT1(11,8);
10538            regM = INSNT1(3,0);
10539            regA = INSNT1(15,12);
10540            bitM = INSNT1(4,4);
10541            isAD = INSNT0(15,4) == 0xFB2;
10542            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10543                && !isBadRegT(regA))
10544               gate = True;
10545         }
10546      } else {
10547         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10548             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10549            regD = INSNA(19,16);
10550            regA = INSNA(15,12);
10551            regN = INSNA(3,0);
10552            regM = INSNA(11,8);
10553            bitM = INSNA(5,5);
10554            isAD = INSNA(6,6) == 0;
10555            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10556               gate = True;
10557         }
10558      }
10559
10560      if (gate) {
10561         IRTemp irt_regN    = newTemp(Ity_I32);
10562         IRTemp irt_regM    = newTemp(Ity_I32);
10563         IRTemp irt_regA    = newTemp(Ity_I32);
10564         IRTemp irt_prod_lo = newTemp(Ity_I32);
10565         IRTemp irt_prod_hi = newTemp(Ity_I32);
10566         IRTemp irt_sum     = newTemp(Ity_I32);
10567         IRTemp tmpM        = newTemp(Ity_I32);
10568
10569         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10570         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10571
10572         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10573         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10574
10575         assign( irt_prod_lo,
10576                 binop(Iop_Mul32,
10577                       binop(Iop_Sar32,
10578                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10579                             mkU8(16)),
10580                       binop(Iop_Sar32,
10581                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10582                             mkU8(16))) );
10583         assign( irt_prod_hi,
10584                 binop( Iop_Mul32,
10585                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10586                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10587         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10588                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10589
10590         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10591
10592         if (isT)
10593            putIRegT( regD, ire_result, condT );
10594         else
10595            putIRegA( regD, ire_result, condT, Ijk_Boring );
10596
10597         if (isAD) {
10598            or_into_QFLAG32(
10599               signed_overflow_after_Add32( mkexpr(irt_sum),
10600                                            irt_prod_lo, irt_prod_hi ),
10601               condT
10602            );
10603         }
10604
10605         or_into_QFLAG32(
10606            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10607            condT
10608         );
10609
10610         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10611             isAD ? 'a' : 's',
10612             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10613         return True;
10614      }
10615      /* fall through */
10616    }
10617
10618    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10619    {
10620      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10621      Bool gate = False;
10622
10623      if (isT) {
10624         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10625            regN = INSNT0(3,0);
10626            regD = INSNT1(11,8);
10627            regM = INSNT1(3,0);
10628            regA = INSNT1(15,12);
10629            bitM = INSNT1(4,4);
10630            bitN = INSNT1(5,5);
10631            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10632                && !isBadRegT(regA))
10633               gate = True;
10634         }
10635      } else {
10636         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10637             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10638            regD = INSNA(19,16);
10639            regN = INSNA(3,0);
10640            regM = INSNA(11,8);
10641            regA = INSNA(15,12);
10642            bitM = INSNA(6,6);
10643            bitN = INSNA(5,5);
10644            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10645               gate = True;
10646         }
10647      }
10648
10649      if (gate) {
10650         IRTemp irt_regA = newTemp(Ity_I32);
10651         IRTemp irt_prod = newTemp(Ity_I32);
10652
10653         assign( irt_prod,
10654                 binop(Iop_Mul32,
10655                       binop(Iop_Sar32,
10656                             binop(Iop_Shl32,
10657                                   isT ? getIRegT(regN) : getIRegA(regN),
10658                                   mkU8(bitN ? 0 : 16)),
10659                             mkU8(16)),
10660                       binop(Iop_Sar32,
10661                             binop(Iop_Shl32,
10662                                   isT ? getIRegT(regM) : getIRegA(regM),
10663                                   mkU8(bitM ? 0 : 16)),
10664                             mkU8(16))) );
10665
10666         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10667
10668         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10669
10670         if (isT)
10671            putIRegT( regD, ire_result, condT );
10672         else
10673            putIRegA( regD, ire_result, condT, Ijk_Boring );
10674
10675         or_into_QFLAG32(
10676            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10677            condT
10678         );
10679
10680         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10681              bitN ? 't' : 'b', bitM ? 't' : 'b',
10682              nCC(conq), regD, regN, regM, regA );
10683         return True;
10684      }
10685      /* fall through */
10686    }
10687
10688    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10689    {
10690      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10691      Bool gate = False;
10692
10693      if (isT) {
10694         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10695            regN   = INSNT0(3,0);
10696            regDHi = INSNT1(11,8);
10697            regM   = INSNT1(3,0);
10698            regDLo = INSNT1(15,12);
10699            bitM   = INSNT1(4,4);
10700            bitN   = INSNT1(5,5);
10701            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10702                && !isBadRegT(regDLo) && regDHi != regDLo)
10703               gate = True;
10704         }
10705      } else {
10706         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10707             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10708            regDHi = INSNA(19,16);
10709            regN   = INSNA(3,0);
10710            regM   = INSNA(11,8);
10711            regDLo = INSNA(15,12);
10712            bitM   = INSNA(6,6);
10713            bitN   = INSNA(5,5);
10714            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10715                regDHi != regDLo)
10716               gate = True;
10717         }
10718      }
10719
10720      if (gate) {
10721         IRTemp irt_regD  = newTemp(Ity_I64);
10722         IRTemp irt_prod  = newTemp(Ity_I64);
10723         IRTemp irt_res   = newTemp(Ity_I64);
10724         IRTemp irt_resHi = newTemp(Ity_I32);
10725         IRTemp irt_resLo = newTemp(Ity_I32);
10726
10727         assign( irt_prod,
10728                 binop(Iop_MullS32,
10729                       binop(Iop_Sar32,
10730                             binop(Iop_Shl32,
10731                                   isT ? getIRegT(regN) : getIRegA(regN),
10732                                   mkU8(bitN ? 0 : 16)),
10733                             mkU8(16)),
10734                       binop(Iop_Sar32,
10735                             binop(Iop_Shl32,
10736                                   isT ? getIRegT(regM) : getIRegA(regM),
10737                                   mkU8(bitM ? 0 : 16)),
10738                             mkU8(16))) );
10739
10740         assign( irt_regD, binop(Iop_32HLto64,
10741                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
10742                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10743         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10744         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10745         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10746
10747         if (isT) {
10748            putIRegT( regDHi, mkexpr(irt_resHi), condT );
10749            putIRegT( regDLo, mkexpr(irt_resLo), condT );
10750         } else {
10751            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10752            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10753         }
10754
10755         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10756              bitN ? 't' : 'b', bitM ? 't' : 'b',
10757              nCC(conq), regDHi, regN, regM, regDLo );
10758         return True;
10759      }
10760      /* fall through */
10761    }
10762
10763    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10764    {
10765      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10766      Bool gate = False;
10767
10768      if (isT) {
10769         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10770            regN = INSNT0(3,0);
10771            regD = INSNT1(11,8);
10772            regM = INSNT1(3,0);
10773            regA = INSNT1(15,12);
10774            bitM = INSNT1(4,4);
10775            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10776                && !isBadRegT(regA))
10777               gate = True;
10778         }
10779      } else {
10780         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10781             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10782            regD = INSNA(19,16);
10783            regN = INSNA(3,0);
10784            regM = INSNA(11,8);
10785            regA = INSNA(15,12);
10786            bitM = INSNA(6,6);
10787            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10788               gate = True;
10789         }
10790      }
10791
10792      if (gate) {
10793         IRTemp irt_regA = newTemp(Ity_I32);
10794         IRTemp irt_prod = newTemp(Ity_I64);
10795
10796         assign( irt_prod,
10797                 binop(Iop_MullS32,
10798                       isT ? getIRegT(regN) : getIRegA(regN),
10799                       binop(Iop_Sar32,
10800                             binop(Iop_Shl32,
10801                                   isT ? getIRegT(regM) : getIRegA(regM),
10802                                   mkU8(bitM ? 0 : 16)),
10803                             mkU8(16))) );
10804
10805         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10806
10807         IRTemp prod32 = newTemp(Ity_I32);
10808         assign(prod32,
10809                binop(Iop_Or32,
10810                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10811                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10812         ));
10813
10814         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10815
10816         if (isT)
10817            putIRegT( regD, ire_result, condT );
10818         else
10819            putIRegA( regD, ire_result, condT, Ijk_Boring );
10820
10821         or_into_QFLAG32(
10822            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10823            condT
10824         );
10825
10826         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10827              bitM ? 't' : 'b',
10828              nCC(conq), regD, regN, regM, regA );
10829         return True;
10830      }
10831      /* fall through */
10832    }
10833
10834    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10835    /* fixme: fix up the test in v6media.c so that we can pass the ge
10836       flags as part of the test. */
10837    {
10838      UInt regD = 99, regN = 99, regM = 99;
10839      Bool gate = False;
10840
10841      if (isT) {
10842         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10843            regN = INSNT0(3,0);
10844            regD = INSNT1(11,8);
10845            regM = INSNT1(3,0);
10846            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10847               gate = True;
10848         }
10849      } else {
10850         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10851             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10852             INSNA(7,4)   == BITS4(1,0,1,1)) {
10853            regD = INSNA(15,12);
10854            regN = INSNA(19,16);
10855            regM = INSNA(3,0);
10856            if (regD != 15 && regN != 15 && regM != 15)
10857               gate = True;
10858         }
10859      }
10860
10861      if (gate) {
10862         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10863         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10864         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10865         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10866
10867         assign( irt_ge_flag0, get_GEFLAG32(0) );
10868         assign( irt_ge_flag1, get_GEFLAG32(1) );
10869         assign( irt_ge_flag2, get_GEFLAG32(2) );
10870         assign( irt_ge_flag3, get_GEFLAG32(3) );
10871
10872         IRExpr* ire_ge_flag0_or
10873           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10874                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10875         IRExpr* ire_ge_flag1_or
10876           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10877                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10878         IRExpr* ire_ge_flag2_or
10879           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10880                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10881         IRExpr* ire_ge_flag3_or
10882           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10883                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10884
10885         IRExpr* ire_ge_flags
10886           = binop( Iop_Or32,
10887                    binop(Iop_Or32,
10888                          binop(Iop_And32,
10889                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10890                                mkU32(0x000000ff)),
10891                          binop(Iop_And32,
10892                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10893                                mkU32(0x0000ff00))),
10894                    binop(Iop_Or32,
10895                          binop(Iop_And32,
10896                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10897                                mkU32(0x00ff0000)),
10898                          binop(Iop_And32,
10899                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10900                                mkU32(0xff000000))) );
10901
10902         IRExpr* ire_result
10903           = binop(Iop_Or32,
10904                   binop(Iop_And32,
10905                         isT ? getIRegT(regN) : getIRegA(regN),
10906                         ire_ge_flags ),
10907                   binop(Iop_And32,
10908                         isT ? getIRegT(regM) : getIRegA(regM),
10909                         unop(Iop_Not32, ire_ge_flags)));
10910
10911         if (isT)
10912            putIRegT( regD, ire_result, condT );
10913         else
10914            putIRegA( regD, ire_result, condT, Ijk_Boring );
10915
10916         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10917         return True;
10918      }
10919      /* fall through */
10920    }
10921
10922    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10923    {
10924      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10925      Bool gate = False;
10926
10927      if (isT) {
10928         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10929            regN   = INSNT0(3,0);
10930            regD   = INSNT1(11,8);
10931            regM   = INSNT1(3,0);
10932            rotate = INSNT1(5,4);
10933            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10934               gate = True;
10935         }
10936      } else {
10937         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10938             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10939            regD   = INSNA(15,12);
10940            regN   = INSNA(19,16);
10941            regM   = INSNA(3,0);
10942            rotate = INSNA(11,10);
10943            if (regD != 15 && regN != 15 && regM != 15)
10944              gate = True;
10945         }
10946      }
10947
10948      if (gate) {
10949         IRTemp irt_regN = newTemp(Ity_I32);
10950         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10951
10952         IRTemp irt_regM = newTemp(Ity_I32);
10953         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10954
10955         IRTemp irt_rot = newTemp(Ity_I32);
10956         assign( irt_rot, binop(Iop_And32,
10957                                genROR32(irt_regM, 8 * rotate),
10958                                mkU32(0x00FF00FF)) );
10959
10960         IRExpr* resLo
10961            = binop(Iop_And32,
10962                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10963                    mkU32(0x0000FFFF));
10964
10965         IRExpr* resHi
10966            = binop(Iop_Add32,
10967                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10968                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10969
10970         IRExpr* ire_result
10971            = binop( Iop_Or32, resHi, resLo );
10972
10973         if (isT)
10974            putIRegT( regD, ire_result, condT );
10975         else
10976            putIRegA( regD, ire_result, condT, Ijk_Boring );
10977
10978         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10979              nCC(conq), regD, regN, regM, 8 * rotate );
10980         return True;
10981      }
10982      /* fall through */
10983    }
10984
10985    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10986    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10987    {
10988      UInt rD = 99, rN = 99, rM = 99, rA = 99;
10989      Bool gate = False;
10990
10991      if (isT) {
10992        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10993            rN = INSNT0(3,0);
10994            rA = INSNT1(15,12);
10995            rD = INSNT1(11,8);
10996            rM = INSNT1(3,0);
10997            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10998               gate = True;
10999         }
11000      } else {
11001         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
11002             INSNA(7,4)   == BITS4(0,0,0,1) ) {
11003            rD = INSNA(19,16);
11004            rA = INSNA(15,12);
11005            rM = INSNA(11,8);
11006            rN = INSNA(3,0);
11007            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
11008               gate = True;
11009         }
11010      }
11011      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
11012
11013      if (gate) {
11014         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
11015         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
11016         IRExpr* rAe = rA == 15 ? mkU32(0)
11017                                : (isT ? getIRegT(rA) : getIRegA(rA));
11018         IRExpr* res = binop(Iop_Add32,
11019                             binop(Iop_Sad8Ux4, rNe, rMe),
11020                             rAe);
11021         if (isT)
11022            putIRegT( rD, res, condT );
11023         else
11024            putIRegA( rD, res, condT, Ijk_Boring );
11025
11026         if (rA == 15) {
11027            DIP( "usad8%s r%u, r%u, r%u\n",
11028                 nCC(conq), rD, rN, rM );
11029         } else {
11030            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
11031                 nCC(conq), rD, rN, rM, rA );
11032         }
11033         return True;
11034      }
11035      /* fall through */
11036    }
11037
11038    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
11039    {
11040      UInt regD = 99, regN = 99, regM = 99;
11041      Bool gate = False;
11042
11043      if (isT) {
11044         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
11045            regN = INSNT0(3,0);
11046            regD = INSNT1(11,8);
11047            regM = INSNT1(3,0);
11048            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11049               gate = True;
11050         }
11051      } else {
11052         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
11053             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11054             INSNA(7,4)   == BITS4(0,1,0,1)) {
11055            regD = INSNA(15,12);
11056            regN = INSNA(19,16);
11057            regM = INSNA(3,0);
11058            if (regD != 15 && regN != 15 && regM != 15)
11059               gate = True;
11060         }
11061      }
11062
11063      if (gate) {
11064         IRTemp rNt   = newTemp(Ity_I32);
11065         IRTemp rMt   = newTemp(Ity_I32);
11066         IRTemp res_q = newTemp(Ity_I32);
11067
11068         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11069         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11070
11071         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11072         if (isT)
11073            putIRegT( regD, mkexpr(res_q), condT );
11074         else
11075            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11076
11077         or_into_QFLAG32(
11078            signed_overflow_after_Add32(
11079               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11080            condT
11081         );
11082
11083         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11084         return True;
11085      }
11086      /* fall through */
11087    }
11088
11089    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11090    {
11091      UInt regD = 99, regN = 99, regM = 99;
11092      Bool gate = False;
11093
11094      if (isT) {
11095         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11096            regN = INSNT0(3,0);
11097            regD = INSNT1(11,8);
11098            regM = INSNT1(3,0);
11099            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11100               gate = True;
11101         }
11102      } else {
11103         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11104             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11105             INSNA(7,4)   == BITS4(0,1,0,1)) {
11106            regD = INSNA(15,12);
11107            regN = INSNA(19,16);
11108            regM = INSNA(3,0);
11109            if (regD != 15 && regN != 15 && regM != 15)
11110               gate = True;
11111         }
11112      }
11113
11114      if (gate) {
11115         IRTemp rNt   = newTemp(Ity_I32);
11116         IRTemp rMt   = newTemp(Ity_I32);
11117         IRTemp rN_d  = newTemp(Ity_I32);
11118         IRTemp res_q = newTemp(Ity_I32);
11119
11120         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11121         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11122
11123         or_into_QFLAG32(
11124            signed_overflow_after_Add32(
11125               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11126            condT
11127         );
11128
11129         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11130         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11131         if (isT)
11132            putIRegT( regD, mkexpr(res_q), condT );
11133         else
11134            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11135
11136         or_into_QFLAG32(
11137            signed_overflow_after_Add32(
11138               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11139            condT
11140         );
11141
11142         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11143         return True;
11144      }
11145      /* fall through */
11146    }
11147
11148    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11149    {
11150      UInt regD = 99, regN = 99, regM = 99;
11151      Bool gate = False;
11152
11153      if (isT) {
11154         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11155            regN = INSNT0(3,0);
11156            regD = INSNT1(11,8);
11157            regM = INSNT1(3,0);
11158            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11159               gate = True;
11160         }
11161      } else {
11162         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11163             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11164             INSNA(7,4)   == BITS4(0,1,0,1)) {
11165            regD = INSNA(15,12);
11166            regN = INSNA(19,16);
11167            regM = INSNA(3,0);
11168            if (regD != 15 && regN != 15 && regM != 15)
11169               gate = True;
11170         }
11171      }
11172
11173      if (gate) {
11174         IRTemp rNt   = newTemp(Ity_I32);
11175         IRTemp rMt   = newTemp(Ity_I32);
11176         IRTemp res_q = newTemp(Ity_I32);
11177
11178         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11179         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11180
11181         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11182         if (isT)
11183            putIRegT( regD, mkexpr(res_q), condT );
11184         else
11185            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11186
11187         or_into_QFLAG32(
11188            signed_overflow_after_Sub32(
11189               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11190            condT
11191         );
11192
11193         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11194         return True;
11195      }
11196      /* fall through */
11197    }
11198
11199    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11200    {
11201      UInt regD = 99, regN = 99, regM = 99;
11202      Bool gate = False;
11203
11204      if (isT) {
11205         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11206            regN = INSNT0(3,0);
11207            regD = INSNT1(11,8);
11208            regM = INSNT1(3,0);
11209            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11210               gate = True;
11211         }
11212      } else {
11213         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11214             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11215             INSNA(7,4)   == BITS4(0,1,0,1)) {
11216            regD = INSNA(15,12);
11217            regN = INSNA(19,16);
11218            regM = INSNA(3,0);
11219            if (regD != 15 && regN != 15 && regM != 15)
11220               gate = True;
11221         }
11222      }
11223
11224      if (gate) {
11225         IRTemp rNt   = newTemp(Ity_I32);
11226         IRTemp rMt   = newTemp(Ity_I32);
11227         IRTemp rN_d  = newTemp(Ity_I32);
11228         IRTemp res_q = newTemp(Ity_I32);
11229
11230         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11231         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11232
11233         or_into_QFLAG32(
11234            signed_overflow_after_Add32(
11235               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11236            condT
11237         );
11238
11239         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11240         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11241         if (isT)
11242            putIRegT( regD, mkexpr(res_q), condT );
11243         else
11244            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11245
11246         or_into_QFLAG32(
11247            signed_overflow_after_Sub32(
11248               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11249            condT
11250         );
11251
11252         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11253         return True;
11254      }
11255      /* fall through */
11256    }
11257
11258    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11259    {
11260      UInt regD = 99, regN = 99, regM = 99;
11261      Bool gate = False;
11262
11263      if (isT) {
11264         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11265            regN = INSNT0(3,0);
11266            regD = INSNT1(11,8);
11267            regM = INSNT1(3,0);
11268            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11269               gate = True;
11270         }
11271      } else {
11272         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11273             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11274             INSNA(7,4)   == BITS4(0,1,1,1)) {
11275            regD = INSNA(15,12);
11276            regN = INSNA(19,16);
11277            regM = INSNA(3,0);
11278            if (regD != 15 && regN != 15 && regM != 15)
11279              gate = True;
11280         }
11281      }
11282
11283      if (gate) {
11284         IRTemp rNt   = newTemp(Ity_I32);
11285         IRTemp rMt   = newTemp(Ity_I32);
11286         IRTemp res_q = newTemp(Ity_I32);
11287
11288         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11289         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11290
11291         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11292         if (isT)
11293            putIRegT( regD, mkexpr(res_q), condT );
11294         else
11295            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11296
11297         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11298         return True;
11299      }
11300      /* fall through */
11301    }
11302
11303    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11304    {
11305      UInt regD = 99, regN = 99, regM = 99;
11306      Bool gate = False;
11307
11308      if (isT) {
11309         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11310            regN = INSNT0(3,0);
11311            regD = INSNT1(11,8);
11312            regM = INSNT1(3,0);
11313            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11314               gate = True;
11315         }
11316      } else {
11317         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11318             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11319             INSNA(7,4)   == BITS4(0,0,0,1)) {
11320            regD = INSNA(15,12);
11321            regN = INSNA(19,16);
11322            regM = INSNA(3,0);
11323            if (regD != 15 && regN != 15 && regM != 15)
11324               gate = True;
11325         }
11326      }
11327
11328      if (gate) {
11329         IRTemp rNt   = newTemp(Ity_I32);
11330         IRTemp rMt   = newTemp(Ity_I32);
11331         IRTemp res_q = newTemp(Ity_I32);
11332
11333         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11334         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11335
11336         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11337         if (isT)
11338            putIRegT( regD, mkexpr(res_q), condT );
11339         else
11340            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11341
11342         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11343         return True;
11344      }
11345      /* fall through */
11346    }
11347
11348    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11349    {
11350      UInt regD = 99, regN = 99, regM = 99;
11351      Bool gate = False;
11352
11353      if (isT) {
11354         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11355            regN = INSNT0(3,0);
11356            regD = INSNT1(11,8);
11357            regM = INSNT1(3,0);
11358            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11359               gate = True;
11360         }
11361      } else {
11362         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11363             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11364             INSNA(7,4)   == BITS4(1,1,1,1)) {
11365            regD = INSNA(15,12);
11366            regN = INSNA(19,16);
11367            regM = INSNA(3,0);
11368            if (regD != 15 && regN != 15 && regM != 15)
11369               gate = True;
11370         }
11371      }
11372
11373      if (gate) {
11374         IRTemp rNt   = newTemp(Ity_I32);
11375         IRTemp rMt   = newTemp(Ity_I32);
11376         IRTemp res_q = newTemp(Ity_I32);
11377
11378         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11379         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11380
11381         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11382         if (isT)
11383            putIRegT( regD, mkexpr(res_q), condT );
11384         else
11385            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11386
11387         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11388         return True;
11389      }
11390      /* fall through */
11391    }
11392
11393    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11394    {
11395      UInt regD = 99, regN = 99, regM = 99;
11396      Bool gate = False;
11397
11398      if (isT) {
11399         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11400            regN = INSNT0(3,0);
11401            regD = INSNT1(11,8);
11402            regM = INSNT1(3,0);
11403            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11404               gate = True;
11405         }
11406      } else {
11407         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11408             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11409             INSNA(7,4)   == BITS4(0,1,1,1)) {
11410            regD = INSNA(15,12);
11411            regN = INSNA(19,16);
11412            regM = INSNA(3,0);
11413            if (regD != 15 && regN != 15 && regM != 15)
11414               gate = True;
11415         }
11416      }
11417
11418      if (gate) {
11419         IRTemp rNt   = newTemp(Ity_I32);
11420         IRTemp rMt   = newTemp(Ity_I32);
11421         IRTemp res_q = newTemp(Ity_I32);
11422
11423         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11424         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11425
11426         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11427         if (isT)
11428            putIRegT( regD, mkexpr(res_q), condT );
11429         else
11430            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11431
11432         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11433         return True;
11434      }
11435      /* fall through */
11436    }
11437
11438    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11439    {
11440      UInt regD = 99, regN = 99, regM = 99;
11441      Bool gate = False;
11442
11443      if (isT) {
11444         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11445            regN = INSNT0(3,0);
11446            regD = INSNT1(11,8);
11447            regM = INSNT1(3,0);
11448            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11449               gate = True;
11450         }
11451      } else {
11452         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11453             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11454             INSNA(7,4)   == BITS4(0,0,0,1)) {
11455            regD = INSNA(15,12);
11456            regN = INSNA(19,16);
11457            regM = INSNA(3,0);
11458            if (regD != 15 && regN != 15 && regM != 15)
11459               gate = True;
11460         }
11461      }
11462
11463      if (gate) {
11464         IRTemp rNt   = newTemp(Ity_I32);
11465         IRTemp rMt   = newTemp(Ity_I32);
11466         IRTemp res_q = newTemp(Ity_I32);
11467
11468         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11469         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11470
11471         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11472         if (isT)
11473            putIRegT( regD, mkexpr(res_q), condT );
11474         else
11475            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11476
11477         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11478         return True;
11479      }
11480      /* fall through */
11481    }
11482
11483    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11484    {
11485      UInt regD = 99, regN = 99, regM = 99;
11486      Bool gate = False;
11487
11488      if (isT) {
11489         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11490            regN = INSNT0(3,0);
11491            regD = INSNT1(11,8);
11492            regM = INSNT1(3,0);
11493            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11494               gate = True;
11495         }
11496      } else {
11497         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11498             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11499             INSNA(7,4)   == BITS4(0,1,0,1)) {
11500            regD = INSNA(15,12);
11501            regN = INSNA(19,16);
11502            regM = INSNA(3,0);
11503            if (regD != 15 && regN != 15 && regM != 15)
11504               gate = True;
11505         }
11506      }
11507
11508      if (gate) {
11509         IRTemp irt_regN     = newTemp(Ity_I32);
11510         IRTemp irt_regM     = newTemp(Ity_I32);
11511         IRTemp irt_sum      = newTemp(Ity_I32);
11512         IRTemp irt_diff     = newTemp(Ity_I32);
11513         IRTemp irt_sum_res  = newTemp(Ity_I32);
11514         IRTemp irt_diff_res = newTemp(Ity_I32);
11515
11516         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11517         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11518
11519         assign( irt_diff,
11520                 binop( Iop_Sub32,
11521                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11522                        binop( Iop_Shr32,
11523                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11524                               mkU8(16) ) ) );
11525         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11526
11527         assign( irt_sum,
11528                 binop( Iop_Add32,
11529                        binop( Iop_Shr32,
11530                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11531                               mkU8(16) ),
11532                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11533         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11534
11535         IRExpr* ire_result = binop( Iop_Or32,
11536                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
11537                                            mkU8(16) ),
11538                                     binop( Iop_And32, mkexpr(irt_sum_res),
11539                                            mkU32(0xFFFF)) );
11540
11541         if (isT)
11542            putIRegT( regD, ire_result, condT );
11543         else
11544            putIRegA( regD, ire_result, condT, Ijk_Boring );
11545
11546         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11547         return True;
11548      }
11549      /* fall through */
11550    }
11551
11552    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11553    {
11554      UInt regD = 99, regN = 99, regM = 99;
11555      Bool gate = False;
11556
11557      if (isT) {
11558         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11559            regN = INSNT0(3,0);
11560            regD = INSNT1(11,8);
11561            regM = INSNT1(3,0);
11562            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11563               gate = True;
11564         }
11565      } else {
11566         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11567             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11568             INSNA(7,4)   == BITS4(0,0,1,1)) {
11569            regD = INSNA(15,12);
11570            regN = INSNA(19,16);
11571            regM = INSNA(3,0);
11572            if (regD != 15 && regN != 15 && regM != 15)
11573               gate = True;
11574         }
11575      }
11576
11577      if (gate) {
11578         IRTemp irt_regN     = newTemp(Ity_I32);
11579         IRTemp irt_regM     = newTemp(Ity_I32);
11580         IRTemp irt_sum      = newTemp(Ity_I32);
11581         IRTemp irt_diff     = newTemp(Ity_I32);
11582         IRTemp irt_res_sum  = newTemp(Ity_I32);
11583         IRTemp irt_res_diff = newTemp(Ity_I32);
11584
11585         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11586         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11587
11588         assign( irt_diff,
11589                 binop( Iop_Sub32,
11590                        binop( Iop_Shr32,
11591                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11592                               mkU8(16) ),
11593                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11594         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11595
11596         assign( irt_sum,
11597                 binop( Iop_Add32,
11598                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11599                        binop( Iop_Shr32,
11600                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11601                               mkU8(16) ) ) );
11602         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11603
11604         IRExpr* ire_result
11605           = binop( Iop_Or32,
11606                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11607                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11608
11609         if (isT)
11610            putIRegT( regD, ire_result, condT );
11611         else
11612            putIRegA( regD, ire_result, condT, Ijk_Boring );
11613
11614         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11615         return True;
11616      }
11617      /* fall through */
11618    }
11619
11620    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11621    {
11622      UInt regD = 99, regN = 99, regM = 99;
11623      Bool gate = False;
11624
11625      if (isT) {
11626         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11627            regN = INSNT0(3,0);
11628            regD = INSNT1(11,8);
11629            regM = INSNT1(3,0);
11630            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11631               gate = True;
11632         }
11633      } else {
11634         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11635             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11636             INSNA(7,4)   == BITS4(0,1,0,1)) {
11637            regD = INSNA(15,12);
11638            regN = INSNA(19,16);
11639            regM = INSNA(3,0);
11640            if (regD != 15 && regN != 15 && regM != 15)
11641               gate = True;
11642         }
11643      }
11644
11645      if (gate) {
11646         IRTemp irt_regN = newTemp(Ity_I32);
11647         IRTemp irt_regM = newTemp(Ity_I32);
11648         IRTemp irt_sum  = newTemp(Ity_I32);
11649         IRTemp irt_diff = newTemp(Ity_I32);
11650
11651         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11652         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11653
11654         assign( irt_sum,
11655                 binop( Iop_Add32,
11656                        unop( Iop_16Uto32,
11657                              unop( Iop_32to16, mkexpr(irt_regN) )
11658                        ),
11659                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11660
11661         assign( irt_diff,
11662                 binop( Iop_Sub32,
11663                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11664                        unop( Iop_16Uto32,
11665                              unop( Iop_32to16, mkexpr(irt_regM) )
11666                        )
11667                 )
11668         );
11669
11670         IRExpr* ire_result
11671           = binop( Iop_Or32,
11672                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11673                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11674
11675         IRTemp ge10 = newTemp(Ity_I32);
11676         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11677                                          mkU32(0x10000), mkexpr(irt_sum) ),
11678                                   mkU32(1), mkU32(0) ) );
11679         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11680         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11681
11682         IRTemp ge32 = newTemp(Ity_I32);
11683         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11684         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11685         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11686
11687         if (isT)
11688            putIRegT( regD, ire_result, condT );
11689         else
11690            putIRegA( regD, ire_result, condT, Ijk_Boring );
11691
11692         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11693         return True;
11694      }
11695      /* fall through */
11696    }
11697
11698    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11699    {
11700      UInt regD = 99, regN = 99, regM = 99;
11701      Bool gate = False;
11702
11703      if (isT) {
11704         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11705            regN = INSNT0(3,0);
11706            regD = INSNT1(11,8);
11707            regM = INSNT1(3,0);
11708            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11709               gate = True;
11710         }
11711      } else {
11712         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11713             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11714             INSNA(7,4)   == BITS4(0,0,1,1)) {
11715            regD = INSNA(15,12);
11716            regN = INSNA(19,16);
11717            regM = INSNA(3,0);
11718            if (regD != 15 && regN != 15 && regM != 15)
11719               gate = True;
11720         }
11721      }
11722
11723      if (gate) {
11724         IRTemp irt_regN = newTemp(Ity_I32);
11725         IRTemp irt_regM = newTemp(Ity_I32);
11726         IRTemp irt_sum  = newTemp(Ity_I32);
11727         IRTemp irt_diff = newTemp(Ity_I32);
11728
11729         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11730         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11731
11732         assign( irt_diff,
11733                 binop( Iop_Sub32,
11734                        unop( Iop_16Uto32,
11735                              unop( Iop_32to16, mkexpr(irt_regN) )
11736                        ),
11737                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11738
11739         assign( irt_sum,
11740                 binop( Iop_Add32,
11741                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11742                        unop( Iop_16Uto32,
11743                              unop( Iop_32to16, mkexpr(irt_regM) )
11744                        ) ) );
11745
11746         IRExpr* ire_result
11747           = binop( Iop_Or32,
11748                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11749                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11750
11751         IRTemp ge10 = newTemp(Ity_I32);
11752         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11753         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11754         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11755
11756         IRTemp ge32 = newTemp(Ity_I32);
11757         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11758                                          mkU32(0x10000), mkexpr(irt_sum) ),
11759                                   mkU32(1), mkU32(0) ) );
11760         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11761         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11762
11763         if (isT)
11764            putIRegT( regD, ire_result, condT );
11765         else
11766            putIRegA( regD, ire_result, condT, Ijk_Boring );
11767
11768         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11769         return True;
11770      }
11771      /* fall through */
11772    }
11773
11774    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11775    {
11776      UInt regD = 99, regN = 99, regM = 99;
11777      Bool gate = False;
11778
11779      if (isT) {
11780         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11781            regN = INSNT0(3,0);
11782            regD = INSNT1(11,8);
11783            regM = INSNT1(3,0);
11784            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11785               gate = True;
11786         }
11787      } else {
11788         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11789             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11790             INSNA(7,4)   == BITS4(0,1,0,1)) {
11791            regD = INSNA(15,12);
11792            regN = INSNA(19,16);
11793            regM = INSNA(3,0);
11794            if (regD != 15 && regN != 15 && regM != 15)
11795               gate = True;
11796         }
11797      }
11798
11799      if (gate) {
11800         IRTemp irt_regN = newTemp(Ity_I32);
11801         IRTemp irt_regM = newTemp(Ity_I32);
11802         IRTemp irt_sum  = newTemp(Ity_I32);
11803         IRTemp irt_diff = newTemp(Ity_I32);
11804
11805         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11806         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11807
11808         assign( irt_sum,
11809                 binop( Iop_Add32,
11810                        binop( Iop_Sar32,
11811                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11812                               mkU8(16) ),
11813                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11814
11815         assign( irt_diff,
11816                 binop( Iop_Sub32,
11817                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11818                        binop( Iop_Sar32,
11819                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11820                               mkU8(16) ) ) );
11821
11822         IRExpr* ire_result
11823           = binop( Iop_Or32,
11824                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11825                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11826
11827         IRTemp ge10 = newTemp(Ity_I32);
11828         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11829         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11830         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11831
11832         IRTemp ge32 = newTemp(Ity_I32);
11833         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11834         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11835         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11836
11837         if (isT)
11838            putIRegT( regD, ire_result, condT );
11839         else
11840            putIRegA( regD, ire_result, condT, Ijk_Boring );
11841
11842         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11843         return True;
11844      }
11845      /* fall through */
11846    }
11847
11848    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11849    {
11850      UInt regD = 99, regN = 99, regM = 99;
11851      Bool gate = False;
11852
11853      if (isT) {
11854         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11855            regN = INSNT0(3,0);
11856            regD = INSNT1(11,8);
11857            regM = INSNT1(3,0);
11858            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11859               gate = True;
11860         }
11861      } else {
11862         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11863             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11864             INSNA(7,4)   == BITS4(1,1,1,1)) {
11865            regD = INSNA(15,12);
11866            regN = INSNA(19,16);
11867            regM = INSNA(3,0);
11868            if (regD != 15 && regN != 15 && regM != 15)
11869               gate = True;
11870         }
11871      }
11872
11873      if (gate) {
11874         IRTemp rNt   = newTemp(Ity_I32);
11875         IRTemp rMt   = newTemp(Ity_I32);
11876         IRTemp res_q = newTemp(Ity_I32);
11877
11878         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11879         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11880
11881         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11882         if (isT)
11883            putIRegT( regD, mkexpr(res_q), condT );
11884         else
11885            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11886
11887         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11888         return True;
11889      }
11890      /* fall through */
11891    }
11892
11893    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11894    {
11895      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11896      Bool gate = False;
11897
11898      if (isT) {
11899         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11900            regN   = INSNT0(3,0);
11901            regD   = INSNT1(11,8);
11902            regM   = INSNT1(3,0);
11903            rotate = INSNT1(5,4);
11904            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11905               gate = True;
11906         }
11907      } else {
11908         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11909             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11910            regD   = INSNA(15,12);
11911            regN   = INSNA(19,16);
11912            regM   = INSNA(3,0);
11913            rotate = INSNA(11,10);
11914            if (regD != 15 && regN != 15 && regM != 15)
11915              gate = True;
11916         }
11917      }
11918
11919      if (gate) {
11920         IRTemp irt_regN = newTemp(Ity_I32);
11921         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11922
11923         IRTemp irt_regM = newTemp(Ity_I32);
11924         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11925
11926         IRTemp irt_rot = newTemp(Ity_I32);
11927         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11928
11929         /* FIXME Maybe we can write this arithmetic in shorter form. */
11930         IRExpr* resLo
11931            = binop(Iop_And32,
11932                    binop(Iop_Add32,
11933                          mkexpr(irt_regN),
11934                          unop(Iop_16Uto32,
11935                               unop(Iop_8Sto16,
11936                                    unop(Iop_32to8, mkexpr(irt_rot))))),
11937                    mkU32(0x0000FFFF));
11938
11939         IRExpr* resHi
11940            = binop(Iop_And32,
11941                    binop(Iop_Add32,
11942                          mkexpr(irt_regN),
11943                          binop(Iop_Shl32,
11944                                unop(Iop_16Uto32,
11945                                     unop(Iop_8Sto16,
11946                                          unop(Iop_32to8,
11947                                               binop(Iop_Shr32,
11948                                                     mkexpr(irt_rot),
11949                                                     mkU8(16))))),
11950                                mkU8(16))),
11951                    mkU32(0xFFFF0000));
11952
11953         IRExpr* ire_result
11954            = binop( Iop_Or32, resHi, resLo );
11955
11956         if (isT)
11957            putIRegT( regD, ire_result, condT );
11958         else
11959            putIRegA( regD, ire_result, condT, Ijk_Boring );
11960
11961         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11962              nCC(conq), regD, regN, regM, 8 * rotate );
11963         return True;
11964      }
11965      /* fall through */
11966    }
11967
11968    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11969    {
11970      UInt regD = 99, regN = 99, regM = 99;
11971      Bool gate = False;
11972
11973      if (isT) {
11974         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11975            regN = INSNT0(3,0);
11976            regD = INSNT1(11,8);
11977            regM = INSNT1(3,0);
11978            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11979               gate = True;
11980         }
11981      } else {
11982         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11983             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11984             INSNA(7,4)   == BITS4(0,0,1,1)) {
11985            regD = INSNA(15,12);
11986            regN = INSNA(19,16);
11987            regM = INSNA(3,0);
11988            if (regD != 15 && regN != 15 && regM != 15)
11989               gate = True;
11990         }
11991      }
11992
11993      if (gate) {
11994         IRTemp rNt   = newTemp(Ity_I32);
11995         IRTemp rMt   = newTemp(Ity_I32);
11996         IRTemp irt_diff  = newTemp(Ity_I32);
11997         IRTemp irt_sum   = newTemp(Ity_I32);
11998         IRTemp res_q = newTemp(Ity_I32);
11999
12000         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12001         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12002
12003         assign( irt_diff,
12004                 binop(Iop_Sub32,
12005                       unop(Iop_16Sto32,
12006                            unop(Iop_32to16,
12007                                 mkexpr(rNt)
12008                            )
12009                       ),
12010                       unop(Iop_16Sto32,
12011                            unop(Iop_32to16,
12012                                 binop(Iop_Shr32,
12013                                       mkexpr(rMt), mkU8(16)
12014                                 )
12015                            )
12016                       )
12017                 )
12018         );
12019
12020         assign( irt_sum,
12021                 binop(Iop_Add32,
12022                       unop(Iop_16Sto32,
12023                            unop(Iop_32to16,
12024                                 binop(Iop_Shr32,
12025                                       mkexpr(rNt), mkU8(16)
12026                                 )
12027                            )
12028                       ),
12029                       unop(Iop_16Sto32,
12030                            unop(Iop_32to16, mkexpr(rMt)
12031                            )
12032                       )
12033                 )
12034         );
12035
12036         assign( res_q,
12037                 binop(Iop_Or32,
12038                       unop(Iop_16Uto32,
12039                            unop(Iop_32to16,
12040                                 binop(Iop_Shr32,
12041                                       mkexpr(irt_diff), mkU8(1)
12042                                 )
12043                            )
12044                       ),
12045                       binop(Iop_Shl32,
12046                             binop(Iop_Shr32,
12047                                   mkexpr(irt_sum), mkU8(1)
12048                             ),
12049                             mkU8(16)
12050                      )
12051                 )
12052         );
12053
12054         if (isT)
12055            putIRegT( regD, mkexpr(res_q), condT );
12056         else
12057            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12058
12059         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12060         return True;
12061      }
12062      /* fall through */
12063    }
12064
12065    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
12066    {
12067      UInt regD = 99, regN = 99, regM = 99;
12068      Bool gate = False;
12069
12070      if (isT) {
12071         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12072            regN = INSNT0(3,0);
12073            regD = INSNT1(11,8);
12074            regM = INSNT1(3,0);
12075            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12076               gate = True;
12077         }
12078      } else {
12079         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12080             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12081             INSNA(7,4)   == BITS4(0,0,1,1)) {
12082            regD = INSNA(15,12);
12083            regN = INSNA(19,16);
12084            regM = INSNA(3,0);
12085            if (regD != 15 && regN != 15 && regM != 15)
12086               gate = True;
12087         }
12088      }
12089
12090      if (gate) {
12091         IRTemp rNt   = newTemp(Ity_I32);
12092         IRTemp rMt   = newTemp(Ity_I32);
12093         IRTemp irt_diff  = newTemp(Ity_I32);
12094         IRTemp irt_sum   = newTemp(Ity_I32);
12095         IRTemp res_q = newTemp(Ity_I32);
12096
12097         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12098         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12099
12100         assign( irt_diff,
12101                 binop(Iop_Sub32,
12102                       unop(Iop_16Uto32,
12103                            unop(Iop_32to16,
12104                                 mkexpr(rNt)
12105                            )
12106                       ),
12107                       unop(Iop_16Uto32,
12108                            unop(Iop_32to16,
12109                                 binop(Iop_Shr32,
12110                                       mkexpr(rMt), mkU8(16)
12111                                 )
12112                            )
12113                       )
12114                 )
12115         );
12116
12117         assign( irt_sum,
12118                 binop(Iop_Add32,
12119                       unop(Iop_16Uto32,
12120                            unop(Iop_32to16,
12121                                 binop(Iop_Shr32,
12122                                       mkexpr(rNt), mkU8(16)
12123                                 )
12124                            )
12125                       ),
12126                       unop(Iop_16Uto32,
12127                            unop(Iop_32to16, mkexpr(rMt)
12128                            )
12129                       )
12130                 )
12131         );
12132
12133         assign( res_q,
12134                 binop(Iop_Or32,
12135                       unop(Iop_16Uto32,
12136                            unop(Iop_32to16,
12137                                 binop(Iop_Shr32,
12138                                       mkexpr(irt_diff), mkU8(1)
12139                                 )
12140                            )
12141                       ),
12142                       binop(Iop_Shl32,
12143                             binop(Iop_Shr32,
12144                                   mkexpr(irt_sum), mkU8(1)
12145                             ),
12146                             mkU8(16)
12147                      )
12148                 )
12149         );
12150
12151         if (isT)
12152            putIRegT( regD, mkexpr(res_q), condT );
12153         else
12154            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12155
12156         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12157         return True;
12158      }
12159      /* fall through */
12160    }
12161
12162    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12163    {
12164      UInt regD = 99, regN = 99, regM = 99;
12165      Bool gate = False;
12166
12167      if (isT) {
12168         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12169            regN = INSNT0(3,0);
12170            regD = INSNT1(11,8);
12171            regM = INSNT1(3,0);
12172            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12173               gate = True;
12174         }
12175      } else {
12176         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12177             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12178             INSNA(7,4)   == BITS4(0,1,0,1)) {
12179            regD = INSNA(15,12);
12180            regN = INSNA(19,16);
12181            regM = INSNA(3,0);
12182            if (regD != 15 && regN != 15 && regM != 15)
12183               gate = True;
12184         }
12185      }
12186
12187      if (gate) {
12188         IRTemp rNt   = newTemp(Ity_I32);
12189         IRTemp rMt   = newTemp(Ity_I32);
12190         IRTemp irt_diff  = newTemp(Ity_I32);
12191         IRTemp irt_sum   = newTemp(Ity_I32);
12192         IRTemp res_q = newTemp(Ity_I32);
12193
12194         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12195         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12196
12197         assign( irt_sum,
12198                 binop(Iop_Add32,
12199                       unop(Iop_16Sto32,
12200                            unop(Iop_32to16,
12201                                 mkexpr(rNt)
12202                            )
12203                       ),
12204                       unop(Iop_16Sto32,
12205                            unop(Iop_32to16,
12206                                 binop(Iop_Shr32,
12207                                       mkexpr(rMt), mkU8(16)
12208                                 )
12209                            )
12210                       )
12211                 )
12212         );
12213
12214         assign( irt_diff,
12215                 binop(Iop_Sub32,
12216                       unop(Iop_16Sto32,
12217                            unop(Iop_32to16,
12218                                 binop(Iop_Shr32,
12219                                       mkexpr(rNt), mkU8(16)
12220                                 )
12221                            )
12222                       ),
12223                       unop(Iop_16Sto32,
12224                            unop(Iop_32to16, mkexpr(rMt)
12225                            )
12226                       )
12227                 )
12228         );
12229
12230         assign( res_q,
12231                 binop(Iop_Or32,
12232                       unop(Iop_16Uto32,
12233                            unop(Iop_32to16,
12234                                 binop(Iop_Shr32,
12235                                       mkexpr(irt_sum), mkU8(1)
12236                                 )
12237                            )
12238                       ),
12239                       binop(Iop_Shl32,
12240                             binop(Iop_Shr32,
12241                                   mkexpr(irt_diff), mkU8(1)
12242                             ),
12243                             mkU8(16)
12244                      )
12245                 )
12246         );
12247
12248         if (isT)
12249            putIRegT( regD, mkexpr(res_q), condT );
12250         else
12251            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12252
12253         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12254         return True;
12255      }
12256      /* fall through */
12257    }
12258
12259    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12260    {
12261      UInt regD = 99, regN = 99, regM = 99;
12262      Bool gate = False;
12263
12264      if (isT) {
12265         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12266            regN = INSNT0(3,0);
12267            regD = INSNT1(11,8);
12268            regM = INSNT1(3,0);
12269            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12270               gate = True;
12271         }
12272      } else {
12273         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12274             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12275             INSNA(7,4)   == BITS4(0,1,0,1)) {
12276            regD = INSNA(15,12);
12277            regN = INSNA(19,16);
12278            regM = INSNA(3,0);
12279            if (regD != 15 && regN != 15 && regM != 15)
12280               gate = True;
12281         }
12282      }
12283
12284      if (gate) {
12285         IRTemp rNt   = newTemp(Ity_I32);
12286         IRTemp rMt   = newTemp(Ity_I32);
12287         IRTemp irt_diff  = newTemp(Ity_I32);
12288         IRTemp irt_sum   = newTemp(Ity_I32);
12289         IRTemp res_q = newTemp(Ity_I32);
12290
12291         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12292         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12293
12294         assign( irt_sum,
12295                 binop(Iop_Add32,
12296                       unop(Iop_16Uto32,
12297                            unop(Iop_32to16,
12298                                 mkexpr(rNt)
12299                            )
12300                       ),
12301                       unop(Iop_16Uto32,
12302                            unop(Iop_32to16,
12303                                 binop(Iop_Shr32,
12304                                       mkexpr(rMt), mkU8(16)
12305                                 )
12306                            )
12307                       )
12308                 )
12309         );
12310
12311         assign( irt_diff,
12312                 binop(Iop_Sub32,
12313                       unop(Iop_16Uto32,
12314                            unop(Iop_32to16,
12315                                 binop(Iop_Shr32,
12316                                       mkexpr(rNt), mkU8(16)
12317                                 )
12318                            )
12319                       ),
12320                       unop(Iop_16Uto32,
12321                            unop(Iop_32to16, mkexpr(rMt)
12322                            )
12323                       )
12324                 )
12325         );
12326
12327         assign( res_q,
12328                 binop(Iop_Or32,
12329                       unop(Iop_16Uto32,
12330                            unop(Iop_32to16,
12331                                 binop(Iop_Shr32,
12332                                       mkexpr(irt_sum), mkU8(1)
12333                                 )
12334                            )
12335                       ),
12336                       binop(Iop_Shl32,
12337                             binop(Iop_Shr32,
12338                                   mkexpr(irt_diff), mkU8(1)
12339                             ),
12340                             mkU8(16)
12341                      )
12342                 )
12343         );
12344
12345         if (isT)
12346            putIRegT( regD, mkexpr(res_q), condT );
12347         else
12348            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12349
12350         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12351         return True;
12352      }
12353      /* fall through */
12354    }
12355
12356    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12357    {
12358      UInt regD = 99, regN = 99, regM = 99;
12359      Bool gate = False;
12360
12361      if (isT) {
12362         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12363            regN = INSNT0(3,0);
12364            regD = INSNT1(11,8);
12365            regM = INSNT1(3,0);
12366            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12367               gate = True;
12368         }
12369      } else {
12370         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12371             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12372             INSNA(7,4)   == BITS4(0,1,1,1)) {
12373            regD = INSNA(15,12);
12374            regN = INSNA(19,16);
12375            regM = INSNA(3,0);
12376            if (regD != 15 && regN != 15 && regM != 15)
12377               gate = True;
12378         }
12379      }
12380
12381      if (gate) {
12382         IRTemp rNt   = newTemp(Ity_I32);
12383         IRTemp rMt   = newTemp(Ity_I32);
12384         IRTemp res_q = newTemp(Ity_I32);
12385
12386         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12387         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12388
12389         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12390         if (isT)
12391            putIRegT( regD, mkexpr(res_q), condT );
12392         else
12393            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12394
12395         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12396         return True;
12397      }
12398      /* fall through */
12399    }
12400
12401    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12402    {
12403      UInt rD = 99, rN = 99, rM = 99, rA = 99;
12404      Bool round  = False;
12405      Bool gate   = False;
12406
12407      if (isT) {
12408         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12409             && INSNT0(6,4) == BITS3(1,1,0)
12410             && INSNT1(7,5) == BITS3(0,0,0)) {
12411            round = INSNT1(4,4);
12412            rA    = INSNT1(15,12);
12413            rD    = INSNT1(11,8);
12414            rM    = INSNT1(3,0);
12415            rN    = INSNT0(3,0);
12416            if (!isBadRegT(rD)
12417                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12418               gate = True;
12419         }
12420      } else {
12421         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12422             && INSNA(15,12) != BITS4(1,1,1,1)
12423             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12424            round = INSNA(5,5);
12425            rD    = INSNA(19,16);
12426            rA    = INSNA(15,12);
12427            rM    = INSNA(11,8);
12428            rN    = INSNA(3,0);
12429            if (rD != 15 && rM != 15 && rN != 15)
12430               gate = True;
12431         }
12432      }
12433      if (gate) {
12434         IRTemp irt_rA   = newTemp(Ity_I32);
12435         IRTemp irt_rN   = newTemp(Ity_I32);
12436         IRTemp irt_rM   = newTemp(Ity_I32);
12437         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12438         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12439         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12440         IRExpr* res
12441         = unop(Iop_64HIto32,
12442                binop(Iop_Add64,
12443                      binop(Iop_Sub64,
12444                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12445                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12446                      mkU64(round ? 0x80000000ULL : 0ULL)));
12447         if (isT)
12448            putIRegT( rD, res, condT );
12449         else
12450            putIRegA(rD, res, condT, Ijk_Boring);
12451         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12452             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12453         return True;
12454      }
12455      /* fall through */
12456    }
12457
12458    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12459    {
12460      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12461      Bool m_swap = False;
12462      Bool gate   = False;
12463
12464      if (isT) {
12465         if (INSNT0(15,4) == 0xFBC &&
12466             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12467            rN     = INSNT0(3,0);
12468            rDlo   = INSNT1(15,12);
12469            rDhi   = INSNT1(11,8);
12470            rM     = INSNT1(3,0);
12471            m_swap = (INSNT1(4,4) & 1) == 1;
12472            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12473                && !isBadRegT(rM) && rDhi != rDlo)
12474               gate = True;
12475         }
12476      } else {
12477         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12478             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12479            rN     = INSNA(3,0);
12480            rDlo   = INSNA(15,12);
12481            rDhi   = INSNA(19,16);
12482            rM     = INSNA(11,8);
12483            m_swap = ( INSNA(5,5) & 1 ) == 1;
12484            if (rDlo != 15 && rDhi != 15
12485                && rN != 15 && rM != 15 && rDlo != rDhi)
12486               gate = True;
12487         }
12488      }
12489
12490      if (gate) {
12491         IRTemp irt_rM   = newTemp(Ity_I32);
12492         IRTemp irt_rN   = newTemp(Ity_I32);
12493         IRTemp irt_rDhi = newTemp(Ity_I32);
12494         IRTemp irt_rDlo = newTemp(Ity_I32);
12495         IRTemp op_2     = newTemp(Ity_I32);
12496         IRTemp pr_1     = newTemp(Ity_I64);
12497         IRTemp pr_2     = newTemp(Ity_I64);
12498         IRTemp result   = newTemp(Ity_I64);
12499         IRTemp resHi    = newTemp(Ity_I32);
12500         IRTemp resLo    = newTemp(Ity_I32);
12501         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12502         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12503         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12504         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12505         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12506         assign( pr_1, binop(Iop_MullS32,
12507                             unop(Iop_16Sto32,
12508                                  unop(Iop_32to16, mkexpr(irt_rN))
12509                             ),
12510                             unop(Iop_16Sto32,
12511                                  unop(Iop_32to16, mkexpr(op_2))
12512                             )
12513                       )
12514         );
12515         assign( pr_2, binop(Iop_MullS32,
12516                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12517                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12518                       )
12519         );
12520         assign( result, binop(Iop_Add64,
12521                               binop(Iop_Add64,
12522                                     mkexpr(pr_1),
12523                                     mkexpr(pr_2)
12524                               ),
12525                               binop(Iop_32HLto64,
12526                                     mkexpr(irt_rDhi),
12527                                     mkexpr(irt_rDlo)
12528                               )
12529                         )
12530         );
12531         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12532         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12533         if (isT) {
12534            putIRegT( rDhi, mkexpr(resHi), condT );
12535            putIRegT( rDlo, mkexpr(resLo), condT );
12536         } else {
12537            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12538            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12539         }
12540         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12541             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12542         return True;
12543      }
12544      /* fall through */
12545    }
12546
12547    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12548    {
12549      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12550      Bool m_swap = False;
12551      Bool gate   = False;
12552
12553      if (isT) {
12554         if ((INSNT0(15,4) == 0xFBD &&
12555             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12556            rN     = INSNT0(3,0);
12557            rDlo   = INSNT1(15,12);
12558            rDhi   = INSNT1(11,8);
12559            rM     = INSNT1(3,0);
12560            m_swap = (INSNT1(4,4) & 1) == 1;
12561            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12562                !isBadRegT(rM) && rDhi != rDlo)
12563               gate = True;
12564         }
12565      } else {
12566         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12567             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12568            rN     = INSNA(3,0);
12569            rDlo   = INSNA(15,12);
12570            rDhi   = INSNA(19,16);
12571            rM     = INSNA(11,8);
12572            m_swap = (INSNA(5,5) & 1) == 1;
12573            if (rDlo != 15 && rDhi != 15 &&
12574                rN != 15 && rM != 15 && rDlo != rDhi)
12575               gate = True;
12576         }
12577      }
12578      if (gate) {
12579         IRTemp irt_rM   = newTemp(Ity_I32);
12580         IRTemp irt_rN   = newTemp(Ity_I32);
12581         IRTemp irt_rDhi = newTemp(Ity_I32);
12582         IRTemp irt_rDlo = newTemp(Ity_I32);
12583         IRTemp op_2     = newTemp(Ity_I32);
12584         IRTemp pr_1     = newTemp(Ity_I64);
12585         IRTemp pr_2     = newTemp(Ity_I64);
12586         IRTemp result   = newTemp(Ity_I64);
12587         IRTemp resHi    = newTemp(Ity_I32);
12588         IRTemp resLo    = newTemp(Ity_I32);
12589         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12590         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12591         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12592         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12593         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12594         assign( pr_1, binop(Iop_MullS32,
12595                             unop(Iop_16Sto32,
12596                                  unop(Iop_32to16, mkexpr(irt_rN))
12597                             ),
12598                             unop(Iop_16Sto32,
12599                                  unop(Iop_32to16, mkexpr(op_2))
12600                             )
12601                       )
12602         );
12603         assign( pr_2, binop(Iop_MullS32,
12604                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12605                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12606                       )
12607         );
12608         assign( result, binop(Iop_Add64,
12609                               binop(Iop_Sub64,
12610                                     mkexpr(pr_1),
12611                                     mkexpr(pr_2)
12612                               ),
12613                               binop(Iop_32HLto64,
12614                                     mkexpr(irt_rDhi),
12615                                     mkexpr(irt_rDlo)
12616                               )
12617                         )
12618         );
12619         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12620         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12621         if (isT) {
12622            putIRegT( rDhi, mkexpr(resHi), condT );
12623            putIRegT( rDlo, mkexpr(resLo), condT );
12624         } else {
12625            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12626            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12627         }
12628         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12629             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12630         return True;
12631      }
12632      /* fall through */
12633    }
12634
12635    /* ---------- Doesn't match anything. ---------- */
12636    return False;
12637
12638 #  undef INSNA
12639 #  undef INSNT0
12640 #  undef INSNT1
12641 }
12642
12643
12644 /*------------------------------------------------------------*/
12645 /*--- V8 instructions                                      ---*/
12646 /*------------------------------------------------------------*/
12647
12648 /* Break a V128-bit value up into four 32-bit ints. */
12649
12650 static void breakupV128to32s ( IRTemp t128,
12651                                /*OUTs*/
12652                                IRTemp* t3, IRTemp* t2,
12653                                IRTemp* t1, IRTemp* t0 )
12654 {
12655    IRTemp hi64 = newTemp(Ity_I64);
12656    IRTemp lo64 = newTemp(Ity_I64);
12657    assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
12658    assign( lo64, unop(Iop_V128to64,   mkexpr(t128)) );
12659
12660    vassert(t0 && *t0 == IRTemp_INVALID);
12661    vassert(t1 && *t1 == IRTemp_INVALID);
12662    vassert(t2 && *t2 == IRTemp_INVALID);
12663    vassert(t3 && *t3 == IRTemp_INVALID);
12664
12665    *t0 = newTemp(Ity_I32);
12666    *t1 = newTemp(Ity_I32);
12667    *t2 = newTemp(Ity_I32);
12668    *t3 = newTemp(Ity_I32);
12669    assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
12670    assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
12671    assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
12672    assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
12673 }
12674
12675
12676 /* Both ARM and Thumb */
12677
12678 /* Translate a V8 instruction.  If successful, returns True and *dres
12679    may or may not be updated.  If unsuccessful, returns False and
12680    doesn't change *dres nor create any IR.
12681
12682    The Thumb and ARM encodings are potentially different.  In both
12683    ARM and Thumb mode, the caller must pass the entire 32 bits of
12684    the instruction.  Callers may pass any instruction; this function
12685    ignores anything it doesn't recognise.
12686
12687    Caller must supply an IRTemp 'condT' holding the gating condition,
12688    or IRTemp_INVALID indicating the insn is always executed.
12689
12690    If we are decoding an ARM instruction which is in the NV space
12691    then it is expected that condT will be IRTemp_INVALID, and that is
12692    asserted for.  That condition is ensured by the logic near the top
12693    of disInstr_ARM_WRK, that sets up condT.
12694
12695    When decoding for Thumb, the caller must pass the ITState pre/post
12696    this instruction, so that we can generate a SIGILL in the cases where
12697    the instruction may not be in an IT block.  When decoding for ARM,
12698    both of these must be IRTemp_INVALID.
12699
12700    Finally, the caller must indicate whether this occurs in ARM or in
12701    Thumb code.
12702 */
12703 static Bool decode_V8_instruction (
12704                /*MOD*/DisResult* dres,
12705                UInt              insnv8,
12706                IRTemp            condT,
12707                Bool              isT,
12708                IRTemp            old_itstate,
12709                IRTemp            new_itstate
12710             )
12711 {
12712 #  define INSN(_bMax,_bMin)   SLICE_UInt(insnv8, (_bMax), (_bMin))
12713
12714    if (isT) {
12715       vassert(old_itstate != IRTemp_INVALID);
12716       vassert(new_itstate != IRTemp_INVALID);
12717    } else {
12718       vassert(old_itstate == IRTemp_INVALID);
12719       vassert(new_itstate == IRTemp_INVALID);
12720    }
12721
12722    /* ARMCondcode 'conq' is only used for debug printing and for no other
12723       purpose.  For ARM, this is simply the top 4 bits of the instruction.
12724       For Thumb, the condition is not (really) known until run time, and so
12725       we set it to ARMCondAL in order that printing of these instructions
12726       does not show any condition. */
12727    ARMCondcode conq;
12728    if (isT) {
12729       conq = ARMCondAL;
12730    } else {
12731       conq = (ARMCondcode)INSN(31,28);
12732       if (conq == ARMCondNV || conq == ARMCondAL) {
12733          vassert(condT == IRTemp_INVALID);
12734       } else {
12735          vassert(condT != IRTemp_INVALID);
12736       }
12737       vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
12738    }
12739
12740    /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
12741    /*     31   27   23  21 19 17 15 11   7      3
12742       T1: 1111 1111 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12743       A1: 1111 0011 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12744
12745       T1: 1111 1111 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12746       A1: 1111 0011 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12747
12748       T1: 1111 1111 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12749       A1: 1111 0011 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12750
12751       T1: 1111 1111 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12752       A1: 1111 0011 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12753
12754       sz must be 00
12755       ARM encoding is in NV space.
12756       In Thumb mode, we must not be in an IT block.
12757    */
12758    {
12759      UInt regD = 99, regM = 99, opc = 4/*invalid*/;
12760      Bool gate = True;
12761
12762      UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12763      if (INSN(31,23) == high9 && INSN(21,16) == BITS6(1,1,0,0,0,0)
12764          && INSN(11,8) == BITS4(0,0,1,1) && INSN(4,4) == 0) {
12765         UInt bitD = INSN(22,22);
12766         UInt fldD = INSN(15,12);
12767         UInt bitM = INSN(5,5);
12768         UInt fldM = INSN(3,0);
12769         opc  = INSN(7,6);
12770         regD = (bitD << 4) | fldD;
12771         regM = (bitM << 4) | fldM;
12772      }
12773      if ((regD & 1) == 1 || (regM & 1) == 1)
12774         gate = False;
12775
12776      if (gate) {
12777         if (isT) {
12778            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12779         }
12780         /* In ARM mode, this is statically unconditional.  In Thumb mode,
12781            this must be dynamically unconditional, and we've SIGILLd if not.
12782            In either case we can create unconditional IR. */
12783         IRTemp op1 = newTemp(Ity_V128);
12784         IRTemp op2 = newTemp(Ity_V128);
12785         IRTemp src = newTemp(Ity_V128);
12786         IRTemp res = newTemp(Ity_V128);
12787         assign(op1,  getQReg(regD >> 1));
12788         assign(op2,  getQReg(regM >> 1));
12789         assign(src,  opc == BITS2(0,0) || opc == BITS2(0,1)
12790                         ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
12791                         : mkexpr(op2));
12792
12793         void* helpers[4]
12794            = { &armg_dirtyhelper_AESE,  &armg_dirtyhelper_AESD,
12795                &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
12796         const HChar* hNames[4]
12797            = { "armg_dirtyhelper_AESE",  "armg_dirtyhelper_AESD",
12798                "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
12799         const HChar* iNames[4]
12800            = { "aese", "aesd", "aesmc", "aesimc" };
12801
12802         vassert(opc >= 0 && opc <= 3);
12803         void*        helper = helpers[opc];
12804         const HChar* hname  = hNames[opc];
12805
12806         IRTemp w32_3, w32_2, w32_1, w32_0;
12807         w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
12808         breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
12809
12810         IRDirty* di
12811           = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
12812                                mkIRExprVec_5(
12813                                   IRExpr_VECRET(),
12814                                   mkexpr(w32_3), mkexpr(w32_2),
12815                                   mkexpr(w32_1), mkexpr(w32_0)) );
12816         stmt(IRStmt_Dirty(di));
12817
12818         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12819         DIP("%s.8 q%d, q%d\n", iNames[opc], regD >> 1, regM >> 1);
12820         return True;
12821      }
12822      /* fall through */
12823    }
12824
12825    /* ----------- SHA 3-reg insns q_q_q ----------- */
12826    /*
12827           31   27   23      19 15 11   7       3
12828       T1: 1110 1111 0  D 00 n  d  1100 N Q M 0 m  SHA1C Qd, Qn, Qm  ix=0
12829       A1: 1111 0010 ----------------------------
12830
12831       T1: 1110 1111 0  D 01 n  d  1100 N Q M 0 m  SHA1P Qd, Qn, Qm  ix=1
12832       A1: 1111 0010 ----------------------------
12833
12834       T1: 1110 1111 0  D 10 n  d  1100 N Q M 0 m  SHA1M Qd, Qn, Qm  ix=2
12835       A1: 1111 0010 ----------------------------
12836
12837       T1: 1110 1111 0  D 11 n  d  1100 N Q M 0 m  SHA1SU0 Qd, Qn, Qm  ix=3
12838       A1: 1111 0010 ----------------------------
12839       (that's a complete set of 4, based on insn[21,20])
12840
12841       T1: 1111 1111 0  D 00 n  d  1100 N Q M 0 m  SHA256H Qd, Qn, Qm  ix=4
12842       A1: 1111 0011 ----------------------------
12843
12844       T1: 1111 1111 0  D 01 n  d  1100 N Q M 0 m  SHA256H2 Qd, Qn, Qm  ix=5
12845       A1: 1111 0011 ----------------------------
12846
12847       T1: 1111 1111 0  D 10 n  d  1100 N Q M 0 m  SHA256SU1 Qd, Qn, Qm  ix=6
12848       A1: 1111 0011 ----------------------------
12849       (3/4 of a complete set of 4, based on insn[21,20])
12850
12851       Q must be 1.  Same comments about conditionalisation as for the AES
12852       group above apply.
12853    */
12854    {
12855      UInt ix = 8; /* invalid */
12856      Bool gate = False;
12857
12858      UInt hi9_sha1   = isT ? BITS9(1,1,1,0,1,1,1,1,0)
12859                            : BITS9(1,1,1,1,0,0,1,0,0);
12860      UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
12861                            : BITS9(1,1,1,1,0,0,1,1,0);
12862      if ((INSN(31,23) == hi9_sha1 || INSN(31,23) == hi9_sha256)
12863          && INSN(11,8) == BITS4(1,1,0,0)
12864          && INSN(6,6) == 1 && INSN(4,4) == 0) {
12865         ix = INSN(21,20);
12866         if (INSN(31,23) == hi9_sha256)
12867            ix |= 4;
12868         if (ix < 7)
12869            gate = True;
12870      }
12871
12872      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
12873      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
12874      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
12875      if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
12876         gate = False;
12877
12878      if (gate) {
12879         vassert(ix >= 0 && ix < 7);
12880         const HChar* inames[7]
12881            = { "sha1c", "sha1p", "sha1m", "sha1su0",
12882                "sha256h", "sha256h2", "sha256su1" };
12883         void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
12884                                 UInt,UInt,UInt,UInt,UInt,UInt)
12885            = { &armg_dirtyhelper_SHA1C,    &armg_dirtyhelper_SHA1P,
12886                &armg_dirtyhelper_SHA1M,    &armg_dirtyhelper_SHA1SU0,
12887                &armg_dirtyhelper_SHA256H,  &armg_dirtyhelper_SHA256H2,
12888                &armg_dirtyhelper_SHA256SU1 };
12889         const HChar* hnames[7]
12890            = { "armg_dirtyhelper_SHA1C",    "armg_dirtyhelper_SHA1P",
12891                "armg_dirtyhelper_SHA1M",    "armg_dirtyhelper_SHA1SU0",
12892                "armg_dirtyhelper_SHA256H",  "armg_dirtyhelper_SHA256H2",
12893                "armg_dirtyhelper_SHA256SU1" };
12894
12895         /* This is a really lame way to implement this, even worse than
12896            the arm64 version.  But at least it works. */
12897
12898         if (isT) {
12899            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12900         }
12901
12902         IRTemp vD = newTemp(Ity_V128);
12903         IRTemp vN = newTemp(Ity_V128);
12904         IRTemp vM = newTemp(Ity_V128);
12905         assign(vD,  getQReg(regD >> 1));
12906         assign(vN,  getQReg(regN >> 1));
12907         assign(vM,  getQReg(regM >> 1));
12908
12909         IRTemp d32_3, d32_2, d32_1, d32_0;
12910         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
12911         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
12912
12913         IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
12914         n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
12915         breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
12916
12917         IRTemp m32_3, m32_2, m32_1, m32_0;
12918         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
12919         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
12920
12921         IRTemp n32_3 = newTemp(Ity_I32);
12922         IRTemp n32_2 = newTemp(Ity_I32);
12923         IRTemp n32_1 = newTemp(Ity_I32);
12924         IRTemp n32_0 = newTemp(Ity_I32);
12925
12926         /* Mask off any bits of the N register operand that aren't actually
12927            needed, so that Memcheck doesn't complain unnecessarily. */
12928         switch (ix) {
12929            case 0: case 1: case 2:
12930               assign(n32_3, mkU32(0));
12931               assign(n32_2, mkU32(0));
12932               assign(n32_1, mkU32(0));
12933               assign(n32_0, mkexpr(n32_0_pre));
12934               break;
12935            case 3: case 4: case 5: case 6:
12936               assign(n32_3, mkexpr(n32_3_pre));
12937               assign(n32_2, mkexpr(n32_2_pre));
12938               assign(n32_1, mkexpr(n32_1_pre));
12939               assign(n32_0, mkexpr(n32_0_pre));
12940               break;
12941            default:
12942               vassert(0);
12943         }
12944
12945         IRExpr** argvec
12946            = mkIRExprVec_13(
12947                 IRExpr_VECRET(),
12948                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
12949                 mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
12950                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
12951              );
12952
12953         IRTemp res = newTemp(Ity_V128);
12954         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
12955                                          hnames[ix], helpers[ix], argvec );
12956         stmt(IRStmt_Dirty(di));
12957         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12958
12959         DIP("%s.8 q%u, q%u, q%u\n",
12960             inames[ix], regD >> 1, regN >> 1, regM >> 1);
12961         return True;
12962      }
12963      /* fall through */
12964    }
12965
12966    /* ----------- SHA1SU1, SHA256SU0 ----------- */
12967    /*
12968           31   27   23  21 19   15 11   7      3
12969       T1: 1111 1111 1 D 11 1010 d  0011 10 M 0 m  SHA1SU1 Qd, Qm
12970       A1: 1111 0011 ----------------------------
12971
12972       T1: 1111 1111 1 D 11 1010 d  0011 11 M 0 m  SHA256SU0 Qd, Qm
12973       A1: 1111 0011 ----------------------------
12974
12975       Same comments about conditionalisation as for the AES group above apply.
12976    */
12977    {
12978      Bool gate = False;
12979
12980      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12981      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,1,0)
12982          && INSN(11,7) == BITS5(0,0,1,1,1) && INSN(4,4) == 0) {
12983         gate = True;
12984      }
12985
12986      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
12987      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
12988      if ((regD & 1) == 1 || (regM & 1) == 1)
12989         gate = False;
12990
12991      Bool is_1SU1 = INSN(6,6) == 0;
12992
12993      if (gate) {
12994         const HChar* iname
12995            = is_1SU1 ? "sha1su1" : "sha256su0";
12996         void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
12997            = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
12998                      : *armg_dirtyhelper_SHA256SU0;
12999         const HChar* hname
13000            = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
13001                      : "armg_dirtyhelper_SHA256SU0";
13002
13003         if (isT) {
13004            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13005         }
13006
13007         IRTemp vD = newTemp(Ity_V128);
13008         IRTemp vM = newTemp(Ity_V128);
13009         assign(vD,  getQReg(regD >> 1));
13010         assign(vM,  getQReg(regM >> 1));
13011
13012         IRTemp d32_3, d32_2, d32_1, d32_0;
13013         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
13014         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
13015
13016         IRTemp m32_3, m32_2, m32_1, m32_0;
13017         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13018         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13019
13020         IRExpr** argvec
13021            = mkIRExprVec_9(
13022                 IRExpr_VECRET(),
13023                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
13024                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
13025              );
13026
13027         IRTemp res = newTemp(Ity_V128);
13028         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13029                                          hname, helper, argvec );
13030         stmt(IRStmt_Dirty(di));
13031         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13032
13033         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13034         return True;
13035      }
13036      /* fall through */
13037    }
13038
13039    /* ----------- SHA1H ----------- */
13040    /*
13041           31   27   23  21 19   15 11   7      3
13042       T1: 1111 1111 1 D 11 1001 d  0010 11 M 0 m  SHA1H Qd, Qm
13043       A1: 1111 0011 ----------------------------
13044
13045       Same comments about conditionalisation as for the AES group above apply.
13046    */
13047    {
13048      Bool gate = False;
13049
13050      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
13051      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,0,1)
13052          && INSN(11,6) == BITS6(0,0,1,0,1,1) && INSN(4,4) == 0) {
13053         gate = True;
13054      }
13055
13056      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
13057      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
13058      if ((regD & 1) == 1 || (regM & 1) == 1)
13059         gate = False;
13060
13061      if (gate) {
13062         const HChar* iname = "sha1h";
13063         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
13064         const HChar* hname                        = "armg_dirtyhelper_SHA1H";
13065
13066         if (isT) {
13067            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13068         }
13069
13070         IRTemp vM = newTemp(Ity_V128);
13071         assign(vM,  getQReg(regM >> 1));
13072
13073         IRTemp m32_3, m32_2, m32_1, m32_0;
13074         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13075         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13076         /* m32_3, m32_2, m32_1 are just abandoned.  No harm; iropt will
13077            remove them. */
13078
13079         IRExpr*  zero   = mkU32(0);
13080         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13081                                         zero, zero, zero, mkexpr(m32_0));
13082
13083         IRTemp res = newTemp(Ity_V128);
13084         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13085                                          hname, helper, argvec );
13086         stmt(IRStmt_Dirty(di));
13087         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13088
13089         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13090         return True;
13091      }
13092      /* fall through */
13093    }
13094
13095    /* ----------- VMULL.P64 ----------- */
13096    /*
13097           31   27   23  21 19 15 11   7       3
13098       T2: 1110 1111 1 D 10 n  d  1110 N 0 M 0 m
13099       A2: 1111 0010 -------------------------
13100
13101       The ARM documentation is pretty difficult to follow here.
13102       Same comments about conditionalisation as for the AES group above apply.
13103    */
13104    {
13105      Bool gate = False;
13106
13107      UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
13108      if (INSN(31,23) == hi9 && INSN(21,20) == BITS2(1,0)
13109          && INSN(11,8) == BITS4(1,1,1,0)
13110          && INSN(6,6) == 0 && INSN(4,4) == 0) {
13111         gate = True;
13112      }
13113
13114      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
13115      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
13116      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
13117
13118      if ((regD & 1) == 1)
13119         gate = False;
13120
13121      if (gate) {
13122         const HChar* iname = "vmull";
13123         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
13124         const HChar* hname                        = "armg_dirtyhelper_VMULLP64";
13125
13126         if (isT) {
13127            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13128         }
13129
13130         IRTemp srcN = newTemp(Ity_I64);
13131         IRTemp srcM = newTemp(Ity_I64);
13132         assign(srcN, getDRegI64(regN));
13133         assign(srcM, getDRegI64(regM));
13134
13135         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13136                                         unop(Iop_64HIto32, mkexpr(srcN)),
13137                                         unop(Iop_64to32,   mkexpr(srcN)),
13138                                         unop(Iop_64HIto32, mkexpr(srcM)),
13139                                         unop(Iop_64to32, mkexpr(srcM)));
13140
13141         IRTemp res = newTemp(Ity_V128);
13142         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13143                                          hname, helper, argvec );
13144         stmt(IRStmt_Dirty(di));
13145         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13146
13147         DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
13148         return True;
13149      }
13150      /* fall through */
13151    }
13152
13153    /* ----------- LDA{,B,H}, STL{,B,H} ----------- */
13154    /*     31   27   23   19   15 11   7    3
13155       A1: cond 0001 1001  n    t 1100 1001 1111  LDA  Rt, [Rn]
13156       A1: cond 0001 1111  n    t 1100 1001 1111  LDAH Rt, [Rn]
13157       A1: cond 0001 1101  n    t 1100 1001 1111  LDAB Rt, [Rn]
13158
13159       A1: cond 0001 1000  n 1111 1100 1001    t  STL  Rt, [Rn]
13160       A1: cond 0001 1110  n 1111 1100 1001    t  STLH Rt, [Rn]
13161       A1: cond 0001 1100  n 1111 1100 1001    t  STLB Rt, [Rn]
13162
13163       T1: 1110 1000 1101  n    t 1111 1010 1111  LDA  Rt, [Rn]
13164       T1: 1110 1000 1101  n    t 1111 1001 1111  LDAH Rt, [Rn]
13165       T1: 1110 1000 1101  n    t 1111 1000 1111  LDAB Rt, [Rn]
13166
13167       T1: 1110 1000 1100  n    t 1111 1010 1111  STL  Rt, [Rn]
13168       T1: 1110 1000 1100  n    t 1111 1001 1111  STLH Rt, [Rn]
13169       T1: 1110 1000 1100  n    t 1111 1000 1111  STLB Rt, [Rn]
13170    */
13171    {
13172      UInt nn     = 16; // invalid
13173      UInt tt     = 16; // invalid
13174      UInt szBlg2 = 4;  // invalid
13175      Bool isLoad = False;
13176      Bool gate   = False;
13177      if (isT) {
13178         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13179             && INSN(11,6) == BITS6(1,1,1,1,1,0)
13180             && INSN(3,0) == BITS4(1,1,1,1)) {
13181            nn     = INSN(19,16);
13182            tt     = INSN(15,12);
13183            isLoad = INSN(20,20) == 1;
13184            szBlg2 = INSN(5,4); // 00:B 01:H 10:W 11:invalid
13185            gate   = szBlg2 != BITS2(1,1) && tt != 15 && nn != 15;
13186         }
13187      } else {
13188         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13189             && INSN(11,0) == BITS12(1,1,0,0,1,0,0,1,1,1,1,1)) {
13190            nn     = INSN(19,16);
13191            tt     = INSN(15,12);
13192            isLoad = True;
13193            szBlg2     = INSN(22,21); // 10:B 11:H 00:W 01:invalid
13194            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13195         }
13196         else
13197         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13198             && INSN(15,4) == BITS12(1,1,1,1,1,1,0,0,1,0,0,1)) {
13199            nn     = INSN(19,16);
13200            tt     = INSN(3,0);
13201            isLoad = False;
13202            szBlg2     = INSN(22,21);  // 10:B 11:H 00:W 01:invalid
13203            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13204         }
13205         if (gate) {
13206            // Rearrange szBlg2 bits to be the same as the Thumb case
13207            switch (szBlg2) {
13208               case 2: szBlg2 = 0; break;
13209               case 3: szBlg2 = 1; break;
13210               case 0: szBlg2 = 2; break;
13211               default: /*NOTREACHED*/vassert(0);
13212            }
13213         }
13214      }
13215      // For both encodings, the instruction is guarded by condT, which
13216      // is passed in by the caller.  Note that the the loads and stores
13217      // are conditional, so we don't have to truncate the IRSB at this
13218      // point, but the fence is unconditional.  There's no way to
13219      // represent a conditional fence without a side exit, but it
13220      // doesn't matter from a correctness standpoint that it is
13221      // unconditional -- it just loses a bit of performance in the
13222      // case where the condition doesn't hold.
13223      if (gate) {
13224         vassert(szBlg2 <= 2 && nn <= 14 && tt <= 14);
13225         IRExpr* ea = llGetIReg(nn);
13226         if (isLoad) {
13227            static IRLoadGOp cvt[3]
13228               = { ILGop_8Uto32, ILGop_16Uto32, ILGop_Ident32 };
13229            IRTemp data = newTemp(Ity_I32);
13230            loadGuardedLE(data, cvt[szBlg2], ea, mkU32(0)/*alt*/, condT);
13231            if (isT) {
13232               putIRegT(tt, mkexpr(data), condT);
13233            } else {
13234               putIRegA(tt, mkexpr(data), condT, Ijk_INVALID);
13235            }
13236            stmt(IRStmt_MBE(Imbe_Fence));
13237         } else {
13238            stmt(IRStmt_MBE(Imbe_Fence));
13239            IRExpr* data = llGetIReg(tt);
13240            switch (szBlg2) {
13241               case 0: data = unop(Iop_32to8,  data); break;
13242               case 1: data = unop(Iop_32to16, data); break;
13243               case 2: break;
13244               default: vassert(0);
13245            }
13246            storeGuardedLE(ea, data, condT);
13247         }
13248         const HChar* ldNames[3] = { "ldab", "ldah", "lda" };
13249         const HChar* stNames[3] = { "stlb", "stlh", "stl" };
13250         DIP("%s r%u, [r%u]", (isLoad ? ldNames : stNames)[szBlg2], tt, nn);
13251         return True;
13252      }
13253      /* else fall through */
13254    }
13255
13256    /* ----------- LDAEX{,B,H,D}, STLEX{,B,H,D} ----------- */
13257    /*     31   27   23   19 15 11   7    3
13258       A1: cond 0001 1101 n  t  1110 1001 1111  LDAEXB Rt, [Rn]
13259       A1: cond 0001 1111 n  t  1110 1001 1111  LDAEXH Rt, [Rn]
13260       A1: cond 0001 1001 n  t  1110 1001 1111  LDAEX  Rt, [Rn]
13261       A1: cond 0001 1011 n  t  1110 1001 1111  LDAEXD Rt, Rt+1, [Rn]
13262
13263       A1: cond 0001 1100 n  d  1110 1001 t     STLEXB Rd, Rt, [Rn]
13264       A1: cond 0001 1110 n  d  1110 1001 t     STLEXH Rd, Rt, [Rn]
13265       A1: cond 0001 1000 n  d  1110 1001 t     STLEX  Rd, Rt, [Rn]
13266       A1: cond 0001 1010 n  d  1110 1001 t     STLEXD Rd, Rt, Rt+1, [Rn]
13267
13268           31  28   24    19 15 11   7    3
13269       T1: 111 0100 01101 n  t  1111 1100 1111  LDAEXB Rt, [Rn]
13270       T1: 111 0100 01101 n  t  1111 1101 1111  LDAEXH Rt, [Rn]
13271       T1: 111 0100 01101 n  t  1111 1110 1111  LDAEX  Rt, [Rn]
13272       T1: 111 0100 01101 n  t  t2   1111 1111  LDAEXD Rt, Rt2, [Rn]
13273
13274       T1: 111 0100 01100 n  t  1111 1100 d     STLEXB Rd, Rt, [Rn]
13275       T1: 111 0100 01100 n  t  1111 1101 d     STLEXH Rd, Rt, [Rn]
13276       T1: 111 0100 01100 n  t  1111 1110 d     STLEX  Rd, Rt, [Rn]
13277       T1: 111 0100 01100 n  t  t2   1111 d     STLEXD Rd, Rt, Rt2, [Rn]
13278    */
13279    {
13280      UInt nn     = 16; // invalid
13281      UInt tt     = 16; // invalid
13282      UInt tt2    = 16; // invalid
13283      UInt dd     = 16; // invalid
13284      UInt szBlg2 = 4;  // invalid
13285      Bool isLoad = False;
13286      Bool gate   = False;
13287      if (isT) {
13288         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13289             && INSN(7,6) == BITS2(1,1)) {
13290            isLoad = INSN(20,20) == 1;
13291            nn     = INSN(19,16);
13292            tt     = INSN(15,12);
13293            tt2    = INSN(11,8);
13294            szBlg2 = INSN(5,4);
13295            dd     = INSN(3,0);
13296            gate   = True;
13297            if (szBlg2 < BITS2(1,1) && tt2 != BITS4(1,1,1,1)) gate = False;
13298            if (isLoad && dd != BITS4(1,1,1,1)) gate = False;
13299            // re-set not-used register values to invalid
13300            if (szBlg2 < BITS2(1,1)) tt2 = 16;
13301            if (isLoad) dd = 16;
13302         }
13303      } else {
13304         /* ARM encoding.  Do the load and store cases separately as
13305            the register numbers are in different places and a combined decode
13306            is too confusing. */
13307         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13308             && INSN(11,0) == BITS12(1,1,1,0,1,0,0,1,1,1,1,1)) {
13309            szBlg2 = INSN(22,21);
13310            isLoad = True;
13311            nn     = INSN(19,16);
13312            tt     = INSN(15,12);
13313            gate   = True;
13314         }
13315         else
13316         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13317             && INSN(11,4) == BITS8(1,1,1,0,1,0,0,1)) {
13318            szBlg2 = INSN(22,21);
13319            isLoad = False;
13320            nn     = INSN(19,16);
13321            dd     = INSN(15,12);
13322            tt     = INSN(3,0);
13323            gate   = True;
13324         }
13325         if (gate) {
13326            // Rearrange szBlg2 bits to be the same as the Thumb case
13327            switch (szBlg2) {
13328               case 2: szBlg2 = 0; break;
13329               case 3: szBlg2 = 1; break;
13330               case 0: szBlg2 = 2; break;
13331               case 1: szBlg2 = 3; break;
13332               default: /*NOTREACHED*/vassert(0);
13333            }
13334         }
13335      }
13336      // Perform further checks on register numbers
13337      if (gate) {
13338         /**/ if (isT && isLoad) {
13339            // Thumb load
13340            if (szBlg2 < 3) {
13341               if (! (tt != 13 && tt != 15 && nn != 15)) gate = False;
13342            } else {
13343               if (! (tt != 13 && tt != 15 && tt2 != 13 && tt2 != 15
13344                      && tt != tt2 && nn != 15)) gate = False;
13345            }
13346         }
13347         else if (isT && !isLoad) {
13348            // Thumb store
13349            if (szBlg2 < 3) {
13350               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13351                      && nn != 15 && dd != nn && dd != tt)) gate = False;
13352            } else {
13353               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13354                      && tt2 != 13 && tt2 != 15 && nn != 15 && dd != nn
13355                      && dd != tt && dd != tt2)) gate = False;
13356            }
13357         }
13358         else if (!isT && isLoad) {
13359            // ARM Load
13360            if (szBlg2 < 3) {
13361               if (! (tt != 15 && nn != 15)) gate = False;
13362            } else {
13363               if (! ((tt & 1) == 0 && tt != 14 && nn != 15)) gate = False;
13364               vassert(tt2 == 16/*invalid*/);
13365               tt2 = tt + 1;
13366            }
13367         }
13368         else if (!isT && !isLoad) {
13369            // ARM Store
13370            if (szBlg2 < 3) {
13371               if (! (dd != 15 && tt != 15 && nn != 15
13372                      && dd != nn && dd != tt)) gate = False;
13373            } else {
13374               if (! (dd != 15 && (tt & 1) == 0 && tt != 14 && nn != 15
13375                      && dd != nn && dd != tt && dd != tt+1)) gate = False;
13376               vassert(tt2 == 16/*invalid*/);
13377               tt2 = tt + 1;
13378            }
13379         }
13380         else /*NOTREACHED*/vassert(0);
13381      }
13382      if (gate) {
13383         // Paranoia ..
13384         vassert(szBlg2 <= 3);
13385         if (szBlg2 < 3) { vassert(tt2 == 16/*invalid*/); }
13386                    else { vassert(tt2 <= 14); }
13387         if (isLoad) { vassert(dd == 16/*invalid*/); }
13388                else { vassert(dd <= 14); }
13389      }
13390      // If we're still good even after all that, generate the IR.
13391      if (gate) {
13392         /* First, go unconditional.  Staying in-line is too complex. */
13393         if (isT) {
13394            vassert(condT != IRTemp_INVALID);
13395            mk_skip_over_T32_if_cond_is_false( condT );
13396         } else {
13397            if (condT != IRTemp_INVALID) {
13398               mk_skip_over_A32_if_cond_is_false( condT );
13399               condT = IRTemp_INVALID;
13400            }
13401         }
13402         /* Now the load or store. */
13403         IRType ty = Ity_INVALID; /* the type of the transferred data */
13404         const HChar* nm = NULL;
13405         switch (szBlg2) {
13406            case 0: nm = "b"; ty = Ity_I8;  break;
13407            case 1: nm = "h"; ty = Ity_I16; break;
13408            case 2: nm = "";  ty = Ity_I32; break;
13409            case 3: nm = "d"; ty = Ity_I64; break;
13410            default: vassert(0);
13411         }
13412         IRExpr* ea = isT ? getIRegT(nn) : getIRegA(nn);
13413         if (isLoad) {
13414            // LOAD.  Transaction, then fence.
13415            IROp widen = Iop_INVALID;
13416            switch (szBlg2) {
13417               case 0: widen = Iop_8Uto32;  break;
13418               case 1: widen = Iop_16Uto32; break;
13419               case 2: case 3: break;
13420               default: vassert(0);
13421            }
13422            IRTemp  res = newTemp(ty);
13423            // FIXME: assumes little-endian guest
13424            stmt( IRStmt_LLSC(Iend_LE, res, ea, NULL/*this is a load*/) );
13425
13426 #          define PUT_IREG(_nnz, _eez) \
13427               do { vassert((_nnz) <= 14); /* no writes to the PC */ \
13428                    if (isT) { putIRegT((_nnz), (_eez), IRTemp_INVALID); } \
13429                        else { putIRegA((_nnz), (_eez), \
13430                               IRTemp_INVALID, Ijk_Boring); } } while(0)
13431            if (ty == Ity_I64) {
13432               // FIXME: assumes little-endian guest
13433               PUT_IREG(tt,  unop(Iop_64to32, mkexpr(res)));
13434               PUT_IREG(tt2, unop(Iop_64HIto32, mkexpr(res)));
13435            } else {
13436               PUT_IREG(tt, widen == Iop_INVALID
13437                               ? mkexpr(res) : unop(widen, mkexpr(res)));
13438            }
13439            stmt(IRStmt_MBE(Imbe_Fence));
13440            if (ty == Ity_I64) {
13441               DIP("ldrex%s%s r%u, r%u, [r%u]\n",
13442                   nm, isT ? "" : nCC(conq), tt, tt2, nn);
13443            } else {
13444               DIP("ldrex%s%s r%u, [r%u]\n", nm, isT ? "" : nCC(conq), tt, nn);
13445            }
13446 #          undef PUT_IREG
13447         } else {
13448            // STORE.  Fence, then transaction.
13449            IRTemp resSC1, resSC32, data;
13450            IROp   narrow = Iop_INVALID;
13451            switch (szBlg2) {
13452               case 0: narrow = Iop_32to8; break;
13453               case 1: narrow = Iop_32to16; break;
13454               case 2: case 3: break;
13455               default: vassert(0);
13456            }
13457            stmt(IRStmt_MBE(Imbe_Fence));
13458            data = newTemp(ty);
13459 #          define GET_IREG(_nnz) (isT ? getIRegT(_nnz) : getIRegA(_nnz))
13460            assign(data,
13461                   ty == Ity_I64
13462                      // FIXME: assumes little-endian guest
13463                      ? binop(Iop_32HLto64, GET_IREG(tt2), GET_IREG(tt))
13464                      : narrow == Iop_INVALID
13465                         ? GET_IREG(tt)
13466                         : unop(narrow, GET_IREG(tt)));
13467 #          undef GET_IREG
13468            resSC1 = newTemp(Ity_I1);
13469            // FIXME: assumes little-endian guest
13470            stmt( IRStmt_LLSC(Iend_LE, resSC1, ea, mkexpr(data)) );
13471
13472            /* Set rDD to 1 on failure, 0 on success.  Currently we have
13473               resSC1 == 0 on failure, 1 on success. */
13474            resSC32 = newTemp(Ity_I32);
13475            assign(resSC32,
13476                   unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
13477            vassert(dd <= 14); /* no writes to the PC */
13478            if (isT) {
13479               putIRegT(dd, mkexpr(resSC32), IRTemp_INVALID);
13480            } else {
13481               putIRegA(dd, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring);
13482            }
13483            if (ty == Ity_I64) {
13484               DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
13485                   nm, isT ? "" : nCC(conq), dd, tt, tt2, nn);
13486            } else {
13487               DIP("strex%s%s r%u, r%u, [r%u]\n",
13488                   nm, isT ? "" : nCC(conq), dd, tt, nn);
13489            }
13490         } /* if (isLoad) */
13491         return True;
13492      } /* if (gate) */
13493      /* else fall through */
13494    }
13495
13496    /* ----------- VSEL<c>.F64 d_d_d, VSEL<c>.F32 s_s_s ----------- */
13497    /*        31   27    22 21 19 15 11  8 7 6 5 4 3
13498       T1/A1: 1111 11100 D  cc n  d  101 1 N 0 M 0 m  VSEL<c>.F64 Dd, Dn, Dm
13499       T1/A1: 1111 11100 D  cc n  d  101 0 N 0 M 0 m  VSEL<c>.F32 Sd, Sn, Sm
13500
13501       ARM encoding is in NV space.
13502       In Thumb mode, we must not be in an IT block.
13503    */
13504    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,0) && INSN(11,9) == BITS3(1,0,1)
13505        && INSN(6,6) == 0 && INSN(4,4) == 0) {
13506       UInt bit_D  = INSN(22,22);
13507       UInt fld_cc = INSN(21,20);
13508       UInt fld_n  = INSN(19,16);
13509       UInt fld_d  = INSN(15,12);
13510       Bool isF64  = INSN(8,8) == 1;
13511       UInt bit_N  = INSN(7,7);
13512       UInt bit_M  = INSN(5,5);
13513       UInt fld_m  = INSN(3,0);
13514
13515       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13516       UInt nn = isF64 ? ((bit_N << 4) | fld_n) : ((fld_n << 1) | bit_N);
13517       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13518
13519       UInt cc_1 = (fld_cc >> 1) & 1;
13520       UInt cc_0 = (fld_cc >> 0) & 1;
13521       UInt cond = (fld_cc << 2) | ((cc_1 ^ cc_0) << 1) | 0;
13522
13523       if (isT) {
13524          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13525       }
13526       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13527          this must be dynamically unconditional, and we've SIGILLd if not.
13528          In either case we can create unconditional IR. */
13529
13530       IRTemp guard = newTemp(Ity_I32);
13531       assign(guard, mk_armg_calculate_condition(cond));
13532       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13533       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13534       IRExpr* res  = IRExpr_ITE(unop(Iop_32to1, mkexpr(guard)), srcN, srcM);
13535       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13536
13537       UChar rch = isF64 ? 'd' : 'f';
13538       DIP("vsel%s.%s %c%u, %c%u, %c%u\n",
13539           nCC(cond), isF64 ? "f64" : "f32", rch, dd, rch, nn, rch, mm);
13540       return True;
13541    }
13542
13543    /* -------- VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s -------- */
13544    /*        31        22 21   17 15 11  8 7  5 4 3
13545       T1/A1: 111111101 D  1110 rm Vd 101 1 01 M 0 Vm VRINT{A,N,P,M}.F64 Dd, Dm
13546       T1/A1: 111111101 D  1110 rm Vd 101 0 01 M 0 Vm VRINT{A,N,P,M}.F32 Sd, Sm
13547
13548       ARM encoding is in NV space.
13549       In Thumb mode, we must not be in an IT block.
13550    */
13551    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1)
13552        && INSN(21,18) == BITS4(1,1,1,0) && INSN(11,9) == BITS3(1,0,1)
13553        && INSN(7,6) == BITS2(0,1) && INSN(4,4) == 0) {
13554       UInt bit_D  = INSN(22,22);
13555       UInt fld_rm = INSN(17,16);
13556       UInt fld_d  = INSN(15,12);
13557       Bool isF64  = INSN(8,8) == 1;
13558       UInt bit_M  = INSN(5,5);
13559       UInt fld_m  = INSN(3,0);
13560
13561       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13562       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13563
13564       if (isT) {
13565          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13566       }
13567       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13568          this must be dynamically unconditional, and we've SIGILLd if not.
13569          In either case we can create unconditional IR. */
13570
13571       UChar c = '?';
13572       IRRoundingMode rm = Irrm_NEAREST;
13573       switch (fld_rm) {
13574          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13575             kludge since it doesn't take into account the nearest-even vs
13576             nearest-away semantics. */
13577          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13578          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13579          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13580          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13581          default: vassert(0);
13582       }
13583
13584       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13585       IRExpr* res  = binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13586                            mkU32((UInt)rm), srcM);
13587       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13588
13589       UChar rch = isF64 ? 'd' : 'f';
13590       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13591           c, isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13592       return True;
13593    }
13594
13595    /* -------- VRINT{Z,R}.F64.F64 d_d, VRINT{Z,R}.F32.F32 s_s -------- */
13596    /*     31   27    22 21     15 11   7  6 5 4 3
13597       T1: 1110 11101 D  110110 Vd 1011 op 1 M 0 Vm VRINT<r><c>.F64.F64 Dd, Dm
13598       A1: cond 11101 D  110110 Vd 1011 op 1 M 0 Vm
13599
13600       T1: 1110 11101 D  110110 Vd 1010 op 1 M 0 Vm VRINT<r><c>.F32.F32 Sd, Sm
13601       A1: cond 11101 D  110110 Vd 1010 op 1 M 0 Vm
13602
13603       In contrast to the VRINT variants just above, this can be conditional.
13604    */
13605    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13606        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,0)
13607        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13608       UInt bit_D   = INSN(22,22);
13609       UInt fld_Vd  = INSN(15,12);
13610       Bool isF64   = INSN(8,8) == 1;
13611       Bool rToZero = INSN(7,7) == 1;
13612       UInt bit_M   = INSN(5,5);
13613       UInt fld_Vm  = INSN(3,0);
13614       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13615       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13616
13617       if (isT) vassert(condT != IRTemp_INVALID);
13618       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13619       IRTemp src = newTemp(ty);
13620       IRTemp res = newTemp(ty);
13621       assign(src, (isF64 ? getDReg : getFReg)(mm));
13622
13623       IRTemp rm = newTemp(Ity_I32);
13624       assign(rm, rToZero ? mkU32(Irrm_ZERO)
13625                          : mkexpr(mk_get_IR_rounding_mode()));
13626       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13627                         mkexpr(rm), mkexpr(src)));
13628       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13629
13630       UChar rch = isF64 ? 'd' : 'f';
13631       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13632           rToZero ? 'z' : 'r',
13633           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13634       return True;
13635    }
13636
13637    /* ----------- VCVT{A,N,P,M}{.S32,.U32}{.F64,.F32} ----------- */
13638    /*        31   27    22 21   17 15 11  8  7  6 5 4 3
13639       T1/A1: 1111 11101 D  1111 rm Vd 101 sz op 1 M 0 Vm
13640              VCVT{A,N,P,M}{.S32,.U32}.F64 Sd, Dm
13641              VCVT{A,N,P,M}{.S32,.U32}.F32 Sd, Sm
13642
13643       ARM encoding is in NV space.
13644       In Thumb mode, we must not be in an IT block.
13645    */
13646    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,18) == BITS4(1,1,1,1)
13647        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13648       UInt bit_D  = INSN(22,22);
13649       UInt fld_rm = INSN(17,16);
13650       UInt fld_Vd = INSN(15,12);
13651       Bool isF64  = INSN(8,8) == 1;
13652       Bool isU    = INSN(7,7) == 0;
13653       UInt bit_M  = INSN(5,5);
13654       UInt fld_Vm = INSN(3,0);
13655
13656       UInt dd = (fld_Vd << 1) | bit_D;
13657       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13658
13659       if (isT) {
13660          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13661       }
13662       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13663          this must be dynamically unconditional, and we've SIGILLd if not.
13664          In either case we can create unconditional IR. */
13665
13666       UChar c = '?';
13667       IRRoundingMode rm = Irrm_NEAREST;
13668       switch (fld_rm) {
13669          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13670             kludge since it doesn't take into account the nearest-even vs
13671             nearest-away semantics. */
13672          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13673          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13674          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13675          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13676          default: vassert(0);
13677       }
13678
13679       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13680       IRTemp   res = newTemp(Ity_I32);
13681
13682       /* The arm back end doesn't support use of Iop_F32toI32U or
13683          Iop_F32toI32S, so for those cases we widen the F32 to F64
13684          and then follow the F64 route. */
13685       if (!isF64) {
13686          srcM = unop(Iop_F32toF64, srcM);
13687       }
13688       assign(res, binop(isU ? Iop_F64toI32U : Iop_F64toI32S,
13689                         mkU32((UInt)rm), srcM));
13690
13691       llPutFReg(dd, unop(Iop_ReinterpI32asF32, mkexpr(res)));
13692
13693       UChar rch = isF64 ? 'd' : 'f';
13694       DIP("vcvt%c.%s.%s %c%u, %c%u\n",
13695           c, isU ? "u32" : "s32", isF64 ? "f64" : "f32", 's', dd, rch, mm);
13696       return True;
13697    }
13698
13699    /* ----------- V{MAX,MIN}NM{.F64 d_d_d, .F32 s_s_s} ----------- */
13700    /* 31   27    22 21 19 15 11  8 7 6  5 4 3
13701       1111 11101 D  00 Vn Vd 101 1 N op M 0 Vm  V{MIN,MAX}NM.F64 Dd, Dn, Dm
13702       1111 11101 D  00 Vn Vd 101 0 N op M 0 Vm  V{MIN,MAX}NM.F32 Sd, Sn, Sm
13703
13704       ARM encoding is in NV space.
13705       In Thumb mode, we must not be in an IT block.
13706    */
13707    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,20) == BITS2(0,0)
13708        && INSN(11,9) == BITS3(1,0,1) && INSN(4,4) == 0) {
13709       UInt bit_D  = INSN(22,22);
13710       UInt fld_Vn = INSN(19,16);
13711       UInt fld_Vd = INSN(15,12);
13712       Bool isF64  = INSN(8,8) == 1;
13713       UInt bit_N  = INSN(7,7);
13714       Bool isMAX  = INSN(6,6) == 0;
13715       UInt bit_M  = INSN(5,5);
13716       UInt fld_Vm = INSN(3,0);
13717
13718       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13719       UInt nn = isF64 ? ((bit_N << 4) | fld_Vn) : ((fld_Vn << 1) | bit_N);
13720       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13721
13722       if (isT) {
13723          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13724       }
13725       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13726          this must be dynamically unconditional, and we've SIGILLd if not.
13727          In either case we can create unconditional IR. */
13728
13729       IROp op = isF64 ? (isMAX ? Iop_MaxNumF64 : Iop_MinNumF64)
13730                       : (isMAX ? Iop_MaxNumF32 : Iop_MinNumF32);
13731       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13732       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13733       IRExpr* res  = binop(op, srcN, srcM);
13734       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13735
13736       UChar rch = isF64 ? 'd' : 'f';
13737       DIP("v%snm.%s %c%u, %c%u, %c%u\n",
13738           isMAX ? "max" : "min", isF64 ? "f64" : "f32",
13739           rch, dd, rch, nn, rch, mm);
13740       return True;
13741    }
13742
13743    /* ----------- VRINTX.F64.F64 d_d, VRINTX.F32.F32 s_s ----------- */
13744    /*     31   27    22 21     15 11  8 7  5 4 3
13745       T1: 1110 11101 D  110111 Vd 101 1 01 M 0 Vm VRINTX<c>.F64.F64 Dd, Dm
13746       A1: cond 11101 D  110111 Vd 101 1 01 M 0 Vm
13747
13748       T1: 1110 11101 D  110111 Vd 101 0 01 M 0 Vm VRINTX<c>.F32.F32 Dd, Dm
13749       A1: cond 11101 D  110111 Vd 101 0 01 M 0 Vm
13750
13751       Like VRINT{Z,R}{.F64.F64, .F32.F32} just above, this can be conditional.
13752       This produces the same code as the VRINTR case since we ignore the
13753       requirement to signal inexactness.
13754    */
13755    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13756        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,1)
13757        && INSN(11,9) == BITS3(1,0,1) && INSN(7,6) == BITS2(0,1)
13758        && INSN(4,4) == 0) {
13759       UInt bit_D  = INSN(22,22);
13760       UInt fld_Vd = INSN(15,12);
13761       Bool isF64  = INSN(8,8) == 1;
13762       UInt bit_M  = INSN(5,5);
13763       UInt fld_Vm = INSN(3,0);
13764       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13765       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13766
13767       if (isT) vassert(condT != IRTemp_INVALID);
13768       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13769       IRTemp src = newTemp(ty);
13770       IRTemp res = newTemp(ty);
13771       assign(src, (isF64 ? getDReg : getFReg)(mm));
13772
13773       IRTemp rm = newTemp(Ity_I32);
13774       assign(rm, mkexpr(mk_get_IR_rounding_mode()));
13775       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13776                         mkexpr(rm), mkexpr(src)));
13777       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13778
13779       UChar rch = isF64 ? 'd' : 'f';
13780       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13781           'x',
13782           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13783       return True;
13784    }
13785
13786    /* ----------- V{MAX,MIN}NM{.F32 d_d_d, .F32 q_q_q} ----------- */
13787    /*     31   27    22 21 20 19 15 11   7 6 5 4 3
13788       T1: 1111 11110 D  op 0  Vn Vd 1111 N 1 M 1 Vm  V{MIN,MAX}NM.F32 Qd,Qn,Qm
13789       A1: 1111 00110 D  op 0  Vn Vd 1111 N 1 M 1 Vm
13790
13791       T1: 1111 11110 D  op 0  Vn Vd 1111 N 0 M 1 Vm  V{MIN,MAX}NM.F32 Dd,Dn,Dm
13792       A1: 1111 00110 D  op 0  Vn Vd 1111 N 0 M 1 Vm
13793
13794       ARM encoding is in NV space.
13795       In Thumb mode, we must not be in an IT block.
13796    */
13797    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,0)
13798                            : BITS9(1,1,1,1,0,0,1,1,0))
13799        && INSN(20,20) == 0 && INSN(11,8) == BITS4(1,1,1,1) && INSN(4,4) == 1) {
13800       UInt bit_D  = INSN(22,22);
13801       Bool isMax  = INSN(21,21) == 0;
13802       UInt fld_Vn = INSN(19,16);
13803       UInt fld_Vd = INSN(15,12);
13804       UInt bit_N  = INSN(7,7);
13805       Bool isQ    = INSN(6,6) == 1;
13806       UInt bit_M  = INSN(5,5);
13807       UInt fld_Vm = INSN(3,0);
13808
13809       /* dd, nn, mm are D-register numbers. */
13810       UInt dd = (bit_D << 4) | fld_Vd;
13811       UInt nn = (bit_N << 4) | fld_Vn;
13812       UInt mm = (bit_M << 4) | fld_Vm;
13813
13814       if (! (isQ && ((dd & 1) == 1 || (nn & 1) == 1 || (mm & 1) == 1))) {
13815          /* Do this piecewise on f regs.  This is a bit tricky
13816             though because we are dealing with the full 16 x Q == 32 x D
13817             register set, so the implied F reg numbers are 0 to 63.  But
13818             ll{Get,Put}FReg only allow the 0 .. 31 as those are the only
13819             architected F regs. */
13820          UInt ddF = dd << 1;
13821          UInt nnF = nn << 1;
13822          UInt mmF = mm << 1;
13823
13824          if (isT) {
13825             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13826          }
13827          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13828             this must be dynamically unconditional, and we've SIGILLd if not.
13829             In either case we can create unconditional IR. */
13830
13831          IROp op = isMax ? Iop_MaxNumF32 : Iop_MinNumF32;
13832
13833          IRTemp r0 = newTemp(Ity_F32);
13834          IRTemp r1 = newTemp(Ity_F32);
13835          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13836          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13837
13838          assign(r0, binop(op, llGetFReg_up_to_64(nnF+0),
13839                               llGetFReg_up_to_64(mmF+0)));
13840          assign(r1, binop(op, llGetFReg_up_to_64(nnF+1),
13841                               llGetFReg_up_to_64(mmF+1)));
13842          if (isQ) {
13843             assign(r2, binop(op, llGetFReg_up_to_64(nnF+2),
13844                                  llGetFReg_up_to_64(mmF+2)));
13845             assign(r3, binop(op, llGetFReg_up_to_64(nnF+3),
13846                                  llGetFReg_up_to_64(mmF+3)));
13847          }
13848          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13849          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13850          if (isQ) {
13851             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13852             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13853          }
13854
13855          HChar rch = isQ ? 'q' : 'd';
13856          UInt  sh  = isQ ? 1 : 0;
13857          DIP("v%snm.f32 %c%u, %c%u, %c%u\n",
13858               isMax ? "max" : "min", rch,
13859               dd >> sh, rch, nn >> sh, rch, mm >> sh);
13860          return True;
13861       }
13862       /* else fall through */
13863    }
13864
13865    /* ----------- VCVT{A,N,P,M}{.F32 d_d, .F32 q_q} ----------- */
13866    /*     31   27    22 21     15 11 9  7  6 5 4 3
13867       T1: 1111 11111 D  111011 Vd 00 rm op Q M 0 Vm
13868       A1: 1111 00111 D  111011 Vd 00 rm op Q M 0 Vm
13869
13870       ARM encoding is in NV space.
13871       In Thumb mode, we must not be in an IT block.
13872    */
13873    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13874                            : BITS9(1,1,1,1,0,0,1,1,1))
13875        && INSN(21,16) == BITS6(1,1,1,0,1,1) && INSN(11,10) == BITS2(0,0)
13876        && INSN(4,4) == 0) {
13877       UInt bit_D  = INSN(22,22);
13878       UInt fld_Vd = INSN(15,12);
13879       UInt fld_rm = INSN(9,8);
13880       Bool isU    = INSN(7,7) == 1;
13881       Bool isQ    = INSN(6,6) == 1;
13882       UInt bit_M  = INSN(5,5);
13883       UInt fld_Vm = INSN(3,0);
13884
13885       /* dd, nn, mm are D-register numbers. */
13886       UInt dd = (bit_D << 4) | fld_Vd;
13887       UInt mm = (bit_M << 4) | fld_Vm;
13888
13889       if (! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13890          /* Do this piecewise on f regs. */
13891          UInt ddF = dd << 1;
13892          UInt mmF = mm << 1;
13893
13894          if (isT) {
13895             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13896          }
13897          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13898             this must be dynamically unconditional, and we've SIGILLd if not.
13899             In either case we can create unconditional IR. */
13900
13901          UChar cvtc = '?';
13902          IRRoundingMode rm = Irrm_NEAREST;
13903          switch (fld_rm) {
13904             /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13905                kludge since it doesn't take into account the nearest-even vs
13906                nearest-away semantics. */
13907             case BITS2(0,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13908             case BITS2(0,1): cvtc = 'n'; rm = Irrm_NEAREST; break;
13909             case BITS2(1,0): cvtc = 'p'; rm = Irrm_PosINF;  break;
13910             case BITS2(1,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
13911             default: vassert(0);
13912          }
13913
13914          IROp cvt = isU ? Iop_F64toI32U : Iop_F64toI32S;
13915
13916          IRTemp r0 = newTemp(Ity_F32);
13917          IRTemp r1 = newTemp(Ity_F32);
13918          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13919          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13920
13921          IRExpr* rmE = mkU32((UInt)rm);
13922
13923          assign(r0, unop(Iop_ReinterpI32asF32,
13924                          binop(cvt, rmE, unop(Iop_F32toF64,
13925                                               llGetFReg_up_to_64(mmF+0)))));
13926          assign(r1, unop(Iop_ReinterpI32asF32,
13927                          binop(cvt, rmE, unop(Iop_F32toF64,
13928                                               llGetFReg_up_to_64(mmF+1)))));
13929          if (isQ) {
13930             assign(r2, unop(Iop_ReinterpI32asF32,
13931                             binop(cvt, rmE, unop(Iop_F32toF64,
13932                                                  llGetFReg_up_to_64(mmF+2)))));
13933             assign(r3, unop(Iop_ReinterpI32asF32,
13934                             binop(cvt, rmE, unop(Iop_F32toF64,
13935                                                  llGetFReg_up_to_64(mmF+3)))));
13936          }
13937
13938          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13939          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13940          if (isQ) {
13941             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13942             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13943          }
13944
13945          HChar rch = isQ ? 'q' : 'd';
13946          UInt  sh  = isQ ? 1 : 0;
13947          DIP("vcvt%c.%c32.f32 %c%u, %c%u\n",
13948               cvtc, isU ? 'u' : 's', rch, dd >> sh, rch, mm >> sh);
13949          return True;
13950       }
13951       /* else fall through */
13952    }
13953
13954    /* ----------- VRINT{A,N,P,M,X,Z}{.F32 d_d, .F32 q_q} ----------- */
13955    /*     31   27    22 21     15 11 9  6 5 4 3
13956       T1: 1111 11111 D  111010 Vd 01 op Q M 0 Vm
13957       A1: 1111 00111 D  111010 Vd 01 op Q M 0 Vm
13958
13959       ARM encoding is in NV space.
13960       In Thumb mode, we must not be in an IT block.
13961    */
13962    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13963                            : BITS9(1,1,1,1,0,0,1,1,1))
13964        && INSN(21,16) == BITS6(1,1,1,0,1,0) && INSN(11,10) == BITS2(0,1)
13965        && INSN(4,4) == 0) {
13966       UInt bit_D  = INSN(22,22);
13967       UInt fld_Vd = INSN(15,12);
13968       UInt fld_op = INSN(9,7);
13969       Bool isQ    = INSN(6,6) == 1;
13970       UInt bit_M  = INSN(5,5);
13971       UInt fld_Vm = INSN(3,0);
13972
13973       /* dd, nn, mm are D-register numbers. */
13974       UInt dd = (bit_D << 4) | fld_Vd;
13975       UInt mm = (bit_M << 4) | fld_Vm;
13976
13977       if (! (fld_op == BITS3(1,0,0) || fld_op == BITS3(1,1,0))
13978           && ! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13979          /* Do this piecewise on f regs. */
13980          UInt ddF = dd << 1;
13981          UInt mmF = mm << 1;
13982
13983          if (isT) {
13984             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13985          }
13986          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13987             this must be dynamically unconditional, and we've SIGILLd if not.
13988             In either case we can create unconditional IR. */
13989
13990          UChar cvtc = '?';
13991          IRRoundingMode rm = Irrm_NEAREST;
13992          switch (fld_op) {
13993             /* Various kludges:
13994                - The use of NEAREST for both the 'a' and 'n' cases,
13995                  since it doesn't take into account the nearest-even vs
13996                  nearest-away semantics.
13997                - For the 'x' case, we don't signal inexactness.
13998             */
13999             case BITS3(0,1,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
14000             case BITS3(0,0,0): cvtc = 'n'; rm = Irrm_NEAREST; break;
14001             case BITS3(1,1,1): cvtc = 'p'; rm = Irrm_PosINF;  break;
14002             case BITS3(1,0,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
14003             case BITS3(0,1,1): cvtc = 'z'; rm = Irrm_ZERO;    break;
14004             case BITS3(0,0,1): cvtc = 'x'; rm = Irrm_NEAREST; break;
14005             case BITS3(1,0,0):
14006             case BITS3(1,1,0):
14007             default: vassert(0);
14008          }
14009
14010          IRTemp r0 = newTemp(Ity_F32);
14011          IRTemp r1 = newTemp(Ity_F32);
14012          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14013          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14014
14015          IRExpr* rmE = mkU32((UInt)rm);
14016          IROp    rnd = Iop_RoundF32toInt;
14017
14018          assign(r0, binop(rnd, rmE, llGetFReg_up_to_64(mmF+0)));
14019          assign(r1, binop(rnd, rmE, llGetFReg_up_to_64(mmF+1)));
14020          if (isQ) {
14021             assign(r2, binop(rnd, rmE, llGetFReg_up_to_64(mmF+2)));
14022             assign(r3, binop(rnd, rmE, llGetFReg_up_to_64(mmF+3)));
14023          }
14024
14025          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
14026          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
14027          if (isQ) {
14028             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
14029             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
14030          }
14031
14032          HChar rch = isQ ? 'q' : 'd';
14033          UInt  sh  = isQ ? 1 : 0;
14034          DIP("vrint%c.f32.f32 %c%u, %c%u\n",
14035              cvtc, rch, dd >> sh, rch, mm >> sh);
14036          return True;
14037       }
14038       /* else fall through */
14039    }
14040
14041    /* ---------- Doesn't match anything. ---------- */
14042    return False;
14043
14044 #  undef INSN
14045 }
14046
14047
14048 /*------------------------------------------------------------*/
14049 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
14050 /*------------------------------------------------------------*/
14051
14052 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
14053    unconditional, so the caller must produce a jump-around before
14054    calling this, if the insn is to be conditional.  Caller is
14055    responsible for all validation of parameters.  For LDMxx, if PC is
14056    amongst the values loaded, caller is also responsible for
14057    generating the jump. */
14058 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
14059                          UInt rN,      /* base reg */
14060                          UInt bINC,    /* 1: inc,  0: dec */
14061                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
14062                          UInt bW,      /* 1: writeback to Rn */
14063                          UInt bL,      /* 1: load, 0: store */
14064                          UInt regList )
14065 {
14066    Int i, r, m, nRegs;
14067    IRTemp jk = Ijk_Boring;
14068
14069    /* Get hold of the old Rn value.  We might need to write its value
14070       to memory during a store, and if it's also the writeback
14071       register then we need to get its value now.  We can't treat it
14072       exactly like the other registers we're going to transfer,
14073       because for xxMDA and xxMDB writeback forms, the generated IR
14074       updates Rn in the guest state before any transfers take place.
14075       We have to do this as per comments below, in order that if Rn is
14076       the stack pointer then it always has a value is below or equal
14077       to any of the transfer addresses.  Ick. */
14078    IRTemp oldRnT = newTemp(Ity_I32);
14079    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
14080
14081    IRTemp anchorT = newTemp(Ity_I32);
14082    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
14083       ignore the bottom two bits of the address.  However, Cortex-A8
14084       doesn't seem to care.  Hence: */
14085    /* No .. don't force alignment .. */
14086    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
14087    /* Instead, use the potentially misaligned address directly. */
14088    assign(anchorT, mkexpr(oldRnT));
14089
14090    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
14091    // bINC == 1:  xxMIA, xxMIB
14092    // bINC == 0:  xxMDA, xxMDB
14093
14094    // For xxMDA and xxMDB, update Rn first if necessary.  We have
14095    // to do this first so that, for the common idiom of the transfers
14096    // faulting because we're pushing stuff onto a stack and the stack
14097    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
14098    // we need to have the SP up-to-date "covering" (pointing below) the
14099    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
14100    // do the transfer first, and then update rN afterwards.
14101    nRegs = 0;
14102    for (i = 0; i < 16; i++) {
14103      if ((regList & (1 << i)) != 0)
14104          nRegs++;
14105    }
14106    if (bW == 1 && !bINC) {
14107       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14108       if (arm)
14109          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14110       else
14111          putIRegT( rN, e, IRTemp_INVALID );
14112    }
14113
14114    // Make up a list of the registers to transfer, and their offsets
14115    // in memory relative to the anchor.  If the base reg (Rn) is part
14116    // of the transfer, then do it last for a load and first for a store.
14117    UInt xReg[16], xOff[16];
14118    Int  nX = 0;
14119    m = 0;
14120    for (i = 0; i < 16; i++) {
14121       r = bINC ? i : (15-i);
14122       if (0 == (regList & (1<<r)))
14123          continue;
14124       if (bBEFORE)
14125          m++;
14126       /* paranoia: check we aren't transferring the writeback
14127          register during a load. Should be assured by decode-point
14128          check above. */
14129       if (bW == 1 && bL == 1)
14130          vassert(r != rN);
14131
14132       xOff[nX] = 4 * m;
14133       xReg[nX] = r;
14134       nX++;
14135
14136       if (!bBEFORE)
14137          m++;
14138    }
14139    vassert(m == nRegs);
14140    vassert(nX == nRegs);
14141    vassert(nX <= 16);
14142
14143    if (bW == 0 && (regList & (1<<rN)) != 0) {
14144       /* Non-writeback, and basereg is to be transferred.  Do its
14145          transfer last for a load and first for a store.  Requires
14146          reordering xOff/xReg. */
14147       if (0) {
14148          vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
14149          for (i = 0; i < nX; i++)
14150             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14151          vex_printf("\n");
14152       }
14153
14154       vassert(nX > 0);
14155       for (i = 0; i < nX; i++) {
14156          if (xReg[i] == rN)
14157              break;
14158       }
14159       vassert(i < nX); /* else we didn't find it! */
14160       UInt tReg = xReg[i];
14161       UInt tOff = xOff[i];
14162       if (bL == 1) {
14163          /* load; make this transfer happen last */
14164          if (i < nX-1) {
14165             for (m = i+1; m < nX; m++) {
14166                xReg[m-1] = xReg[m];
14167                xOff[m-1] = xOff[m];
14168             }
14169             vassert(m == nX);
14170             xReg[m-1] = tReg;
14171             xOff[m-1] = tOff;
14172          }
14173       } else {
14174          /* store; make this transfer happen first */
14175          if (i > 0) {
14176             for (m = i-1; m >= 0; m--) {
14177                xReg[m+1] = xReg[m];
14178                xOff[m+1] = xOff[m];
14179             }
14180             vassert(m == -1);
14181             xReg[0] = tReg;
14182             xOff[0] = tOff;
14183          }
14184       }
14185
14186       if (0) {
14187          vex_printf("REG_LIST_POST:\n");
14188          for (i = 0; i < nX; i++)
14189             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14190          vex_printf("\n");
14191       }
14192    }
14193
14194    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
14195        register and PC in the register list is a return for purposes of branch
14196        prediction.
14197       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
14198        to be counted in event 0x0E (Procedure return).*/
14199    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
14200       jk = Ijk_Ret;
14201    }
14202
14203    /* Actually generate the transfers */
14204    for (i = 0; i < nX; i++) {
14205       r = xReg[i];
14206       if (bL == 1) {
14207          IRExpr* e = loadLE(Ity_I32,
14208                             binop(opADDorSUB, mkexpr(anchorT),
14209                                   mkU32(xOff[i])));
14210          if (arm) {
14211             putIRegA( r, e, IRTemp_INVALID, jk );
14212          } else {
14213             // no: putIRegT( r, e, IRTemp_INVALID );
14214             // putIRegT refuses to write to R15.  But that might happen.
14215             // Since this is uncond, and we need to be able to
14216             // write the PC, just use the low level put:
14217             llPutIReg( r, e );
14218          }
14219       } else {
14220          /* if we're storing Rn, make sure we use the correct
14221             value, as per extensive comments above */
14222          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
14223                   r == rN ? mkexpr(oldRnT)
14224                           : (arm ? getIRegA(r) : getIRegT(r) ) );
14225       }
14226    }
14227
14228    // If we are doing xxMIA or xxMIB,
14229    // do the transfer first, and then update rN afterwards.
14230    if (bW == 1 && bINC) {
14231       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14232       if (arm)
14233          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14234       else
14235          putIRegT( rN, e, IRTemp_INVALID );
14236    }
14237 }
14238
14239
14240 /*------------------------------------------------------------*/
14241 /*--- VFP (CP 10 and 11) instructions                      ---*/
14242 /*------------------------------------------------------------*/
14243
14244 /* Both ARM and Thumb */
14245
14246 /* Translate a CP10 or CP11 instruction.  If successful, returns
14247    True and *dres may or may not be updated.  If failure, returns
14248    False and doesn't change *dres nor create any IR.
14249
14250    The ARM and Thumb encodings are identical for the low 28 bits of
14251    the insn (yay!) and that's what the caller must supply, iow, imm28
14252    has the top 4 bits masked out.  Caller is responsible for
14253    determining whether the masked-out bits are valid for a CP10/11
14254    insn.  The rules for the top 4 bits are:
14255
14256      ARM: 0000 to 1110 allowed, and this is the gating condition.
14257      1111 (NV) is not allowed.
14258
14259      Thumb: must be 1110.  The gating condition is taken from
14260      ITSTATE in the normal way.
14261
14262    Conditionalisation:
14263
14264    Caller must supply an IRTemp 'condT' holding the gating condition,
14265    or IRTemp_INVALID indicating the insn is always executed.
14266
14267    Caller must also supply an ARMCondcode 'cond'.  This is only used
14268    for debug printing, no other purpose.  For ARM, this is simply the
14269    top 4 bits of the original instruction.  For Thumb, the condition
14270    is not (really) known until run time, and so ARMCondAL should be
14271    passed, only so that printing of these instructions does not show
14272    any condition.
14273
14274    Finally, the caller must indicate whether this occurs in ARM or
14275    Thumb code.
14276 */
14277 static Bool decode_CP10_CP11_instruction (
14278                /*MOD*/DisResult* dres,
14279                UInt              insn28,
14280                IRTemp            condT,
14281                ARMCondcode       conq,
14282                Bool              isT
14283             )
14284 {
14285 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
14286
14287    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
14288
14289    if (isT) {
14290       vassert(conq == ARMCondAL);
14291    } else {
14292       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
14293    }
14294
14295    /* ----------------------------------------------------------- */
14296    /* -- VFP instructions -- double precision (mostly)         -- */
14297    /* ----------------------------------------------------------- */
14298
14299    /* --------------------- fldmx, fstmx --------------------- */
14300    /*
14301                                  31   27   23   19 15 11   7   0
14302                                          P U WL
14303       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
14304       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
14305       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
14306
14307       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
14308       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
14309       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
14310
14311       Regs transferred: Dd .. D(d + (offset-3)/2)
14312       offset must be odd, must not imply a reg > 15
14313       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
14314
14315       case coding:
14316          1  at-Rn   (access at Rn)
14317          2  ia-Rn   (access at Rn, then Rn += 4+8n)
14318          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
14319    */
14320    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14321        && INSN(11,8) == BITS4(1,0,1,1)) {
14322       UInt bP      = (insn28 >> 24) & 1;
14323       UInt bU      = (insn28 >> 23) & 1;
14324       UInt bW      = (insn28 >> 21) & 1;
14325       UInt bL      = (insn28 >> 20) & 1;
14326       UInt offset  = (insn28 >> 0) & 0xFF;
14327       UInt rN      = INSN(19,16);
14328       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14329       UInt nRegs   = (offset - 1) / 2;
14330       UInt summary = 0;
14331       Int  i;
14332
14333       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14334          summary = 1;
14335       }
14336       else if (bP == 0 && bU == 1 && bW == 1) {
14337          summary = 2;
14338       }
14339       else if (bP == 1 && bU == 0 && bW == 1) {
14340          summary = 3;
14341       }
14342       else goto after_vfp_fldmx_fstmx;
14343
14344       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14345       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14346          goto after_vfp_fldmx_fstmx;
14347
14348       /* offset must be odd, and specify at least one register */
14349       if (0 == (offset & 1) || offset < 3)
14350          goto after_vfp_fldmx_fstmx;
14351
14352       /* can't transfer regs after D15 */
14353       if (dD + nRegs - 1 >= 32)
14354          goto after_vfp_fldmx_fstmx;
14355
14356       /* Now, we can't do a conditional load or store, since that very
14357          likely will generate an exception.  So we have to take a side
14358          exit at this point if the condition is false. */
14359       if (condT != IRTemp_INVALID) {
14360          if (isT)
14361             mk_skip_over_T32_if_cond_is_false( condT );
14362          else
14363             mk_skip_over_A32_if_cond_is_false( condT );
14364          condT = IRTemp_INVALID;
14365       }
14366       /* Ok, now we're unconditional.  Do the load or store. */
14367
14368       /* get the old Rn value */
14369       IRTemp rnT = newTemp(Ity_I32);
14370       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14371                            rN == 15));
14372
14373       /* make a new value for Rn, post-insn */
14374       IRTemp rnTnew = IRTemp_INVALID;
14375       if (summary == 2 || summary == 3) {
14376          rnTnew = newTemp(Ity_I32);
14377          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14378                               mkexpr(rnT),
14379                               mkU32(4 + 8 * nRegs)));
14380       }
14381
14382       /* decide on the base transfer address */
14383       IRTemp taT = newTemp(Ity_I32);
14384       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14385
14386       /* update Rn if necessary -- in case 3, we're moving it down, so
14387          update before any memory reference, in order to keep Memcheck
14388          and V's stack-extending logic (on linux) happy */
14389       if (summary == 3) {
14390          if (isT)
14391             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14392          else
14393             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14394       }
14395
14396       /* generate the transfers */
14397       for (i = 0; i < nRegs; i++) {
14398          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14399          if (bL) {
14400             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14401          } else {
14402             storeLE(addr, getDReg(dD + i));
14403          }
14404       }
14405
14406       /* update Rn if necessary -- in case 2, we're moving it up, so
14407          update after any memory reference, in order to keep Memcheck
14408          and V's stack-extending logic (on linux) happy */
14409       if (summary == 2) {
14410          if (isT)
14411             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14412          else
14413             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14414       }
14415
14416       const HChar* nm = bL==1 ? "ld" : "st";
14417       switch (summary) {
14418          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
14419                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14420                   break;
14421          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
14422                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14423                   break;
14424          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
14425                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14426                   break;
14427          default: vassert(0);
14428       }
14429
14430       goto decode_success_vfp;
14431       /* FIXME alignment constraints? */
14432    }
14433
14434   after_vfp_fldmx_fstmx:
14435
14436    /* --------------------- fldmd, fstmd --------------------- */
14437    /*
14438                                  31   27   23   19 15 11   7   0
14439                                          P U WL
14440       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
14441       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
14442       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
14443
14444       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
14445       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
14446       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
14447
14448       Regs transferred: Dd .. D(d + (offset-2)/2)
14449       offset must be even, must not imply a reg > 15
14450       IA/DB: Rn is changed by (8 x # regs transferred)
14451
14452       case coding:
14453          1  at-Rn   (access at Rn)
14454          2  ia-Rn   (access at Rn, then Rn += 8n)
14455          3  db-Rn   (Rn -= 8n,     then access at Rn)
14456    */
14457    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14458        && INSN(11,8) == BITS4(1,0,1,1)) {
14459       UInt bP      = (insn28 >> 24) & 1;
14460       UInt bU      = (insn28 >> 23) & 1;
14461       UInt bW      = (insn28 >> 21) & 1;
14462       UInt bL      = (insn28 >> 20) & 1;
14463       UInt offset  = (insn28 >> 0) & 0xFF;
14464       UInt rN      = INSN(19,16);
14465       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14466       UInt nRegs   = offset / 2;
14467       UInt summary = 0;
14468       Int  i;
14469
14470       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14471          summary = 1;
14472       }
14473       else if (bP == 0 && bU == 1 && bW == 1) {
14474          summary = 2;
14475       }
14476       else if (bP == 1 && bU == 0 && bW == 1) {
14477          summary = 3;
14478       }
14479       else goto after_vfp_fldmd_fstmd;
14480
14481       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14482       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14483          goto after_vfp_fldmd_fstmd;
14484
14485       /* offset must be even, and specify at least one register */
14486       if (1 == (offset & 1) || offset < 2)
14487          goto after_vfp_fldmd_fstmd;
14488
14489       /* can't transfer regs after D15 */
14490       if (dD + nRegs - 1 >= 32)
14491          goto after_vfp_fldmd_fstmd;
14492
14493       /* Now, we can't do a conditional load or store, since that very
14494          likely will generate an exception.  So we have to take a side
14495          exit at this point if the condition is false. */
14496       if (condT != IRTemp_INVALID) {
14497          if (isT)
14498             mk_skip_over_T32_if_cond_is_false( condT );
14499          else
14500             mk_skip_over_A32_if_cond_is_false( condT );
14501          condT = IRTemp_INVALID;
14502       }
14503       /* Ok, now we're unconditional.  Do the load or store. */
14504
14505       /* get the old Rn value */
14506       IRTemp rnT = newTemp(Ity_I32);
14507       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14508                            rN == 15));
14509
14510       /* make a new value for Rn, post-insn */
14511       IRTemp rnTnew = IRTemp_INVALID;
14512       if (summary == 2 || summary == 3) {
14513          rnTnew = newTemp(Ity_I32);
14514          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14515                               mkexpr(rnT),
14516                               mkU32(8 * nRegs)));
14517       }
14518
14519       /* decide on the base transfer address */
14520       IRTemp taT = newTemp(Ity_I32);
14521       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14522
14523       /* update Rn if necessary -- in case 3, we're moving it down, so
14524          update before any memory reference, in order to keep Memcheck
14525          and V's stack-extending logic (on linux) happy */
14526       if (summary == 3) {
14527          if (isT)
14528             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14529          else
14530             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14531       }
14532
14533       /* generate the transfers */
14534       for (i = 0; i < nRegs; i++) {
14535          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14536          if (bL) {
14537             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14538          } else {
14539             storeLE(addr, getDReg(dD + i));
14540          }
14541       }
14542
14543       /* update Rn if necessary -- in case 2, we're moving it up, so
14544          update after any memory reference, in order to keep Memcheck
14545          and V's stack-extending logic (on linux) happy */
14546       if (summary == 2) {
14547          if (isT)
14548             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14549          else
14550             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14551       }
14552
14553       const HChar* nm = bL==1 ? "ld" : "st";
14554       switch (summary) {
14555          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
14556                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14557                   break;
14558          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
14559                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14560                   break;
14561          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
14562                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14563                   break;
14564          default: vassert(0);
14565       }
14566
14567       goto decode_success_vfp;
14568       /* FIXME alignment constraints? */
14569    }
14570
14571   after_vfp_fldmd_fstmd:
14572
14573    /* ------------------- fmrx, fmxr ------------------- */
14574    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
14575        && BITS4(1,0,1,0) == INSN(11,8)
14576        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14577       UInt rD  = INSN(15,12);
14578       UInt reg = INSN(19,16);
14579       if (reg == BITS4(0,0,0,1)) {
14580          if (rD == 15) {
14581             IRTemp nzcvT = newTemp(Ity_I32);
14582             /* When rD is 15, we are copying the top 4 bits of FPSCR
14583                into CPSR.  That is, set the flags thunk to COPY and
14584                install FPSCR[31:28] as the value to copy. */
14585             assign(nzcvT, binop(Iop_And32,
14586                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
14587                                 mkU32(0xF0000000)));
14588             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
14589             DIP("fmstat%s\n", nCC(conq));
14590          } else {
14591             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
14592             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
14593             if (isT)
14594                putIRegT(rD, e, condT);
14595             else
14596                putIRegA(rD, e, condT, Ijk_Boring);
14597             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
14598          }
14599          goto decode_success_vfp;
14600       }
14601       /* fall through */
14602    }
14603
14604    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
14605        && BITS4(1,0,1,0) == INSN(11,8)
14606        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14607       UInt rD  = INSN(15,12);
14608       UInt reg = INSN(19,16);
14609       if (reg == BITS4(0,0,0,1)) {
14610          putMiscReg32(OFFB_FPSCR,
14611                       isT ? getIRegT(rD) : getIRegA(rD), condT);
14612          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
14613          goto decode_success_vfp;
14614       }
14615       /* fall through */
14616    }
14617
14618    /* --------------------- vmov --------------------- */
14619    // VMOV dM, rD, rN
14620    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
14621       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14622       UInt rD = INSN(15,12); /* lo32 */
14623       UInt rN = INSN(19,16); /* hi32 */
14624       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
14625          /* fall through */
14626       } else {
14627          putDReg(dM,
14628                  unop(Iop_ReinterpI64asF64,
14629                       binop(Iop_32HLto64,
14630                             isT ? getIRegT(rN) : getIRegA(rN),
14631                             isT ? getIRegT(rD) : getIRegA(rD))),
14632                  condT);
14633          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
14634          goto decode_success_vfp;
14635       }
14636       /* fall through */
14637    }
14638
14639    // VMOV rD, rN, dM
14640    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
14641       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14642       UInt rD = INSN(15,12); /* lo32 */
14643       UInt rN = INSN(19,16); /* hi32 */
14644       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
14645           || rD == rN) {
14646          /* fall through */
14647       } else {
14648          IRTemp i64 = newTemp(Ity_I64);
14649          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
14650          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
14651          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
14652          if (isT) {
14653             putIRegT(rN, hi32, condT);
14654             putIRegT(rD, lo32, condT);
14655          } else {
14656             putIRegA(rN, hi32, condT, Ijk_Boring);
14657             putIRegA(rD, lo32, condT, Ijk_Boring);
14658          }
14659          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
14660          goto decode_success_vfp;
14661       }
14662       /* fall through */
14663    }
14664
14665    // VMOV sD, sD+1, rN, rM
14666    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
14667       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14668       UInt rN = INSN(15,12);
14669       UInt rM = INSN(19,16);
14670       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14671           || sD == 31) {
14672          /* fall through */
14673       } else {
14674          putFReg(sD,
14675                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
14676                  condT);
14677          putFReg(sD+1,
14678                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
14679                  condT);
14680          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
14681               nCC(conq), sD, sD + 1, rN, rM);
14682          goto decode_success_vfp;
14683       }
14684    }
14685
14686    // VMOV rN, rM, sD, sD+1
14687    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
14688       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14689       UInt rN = INSN(15,12);
14690       UInt rM = INSN(19,16);
14691       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14692           || sD == 31 || rN == rM) {
14693          /* fall through */
14694       } else {
14695          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
14696          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
14697          if (isT) {
14698             putIRegT(rN, res0, condT);
14699             putIRegT(rM, res1, condT);
14700          } else {
14701             putIRegA(rN, res0, condT, Ijk_Boring);
14702             putIRegA(rM, res1, condT, Ijk_Boring);
14703          }
14704          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
14705              nCC(conq), rN, rM, sD, sD + 1);
14706          goto decode_success_vfp;
14707       }
14708    }
14709
14710    // VMOV rD[x], rT  (ARM core register to scalar)
14711    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
14712       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
14713       UInt rT  = INSN(15,12);
14714       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14715       UInt index;
14716       if (rT == 15 || (isT && rT == 13)) {
14717          /* fall through */
14718       } else {
14719          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14720             index = opc & 7;
14721             putDRegI64(rD, triop(Iop_SetElem8x8,
14722                                  getDRegI64(rD),
14723                                  mkU8(index),
14724                                  unop(Iop_32to8,
14725                                       isT ? getIRegT(rT) : getIRegA(rT))),
14726                            condT);
14727             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14728             goto decode_success_vfp;
14729          }
14730          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14731             index = (opc >> 1) & 3;
14732             putDRegI64(rD, triop(Iop_SetElem16x4,
14733                                  getDRegI64(rD),
14734                                  mkU8(index),
14735                                  unop(Iop_32to16,
14736                                       isT ? getIRegT(rT) : getIRegA(rT))),
14737                            condT);
14738             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14739             goto decode_success_vfp;
14740          }
14741          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
14742             index = (opc >> 2) & 1;
14743             putDRegI64(rD, triop(Iop_SetElem32x2,
14744                                  getDRegI64(rD),
14745                                  mkU8(index),
14746                                  isT ? getIRegT(rT) : getIRegA(rT)),
14747                            condT);
14748             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14749             goto decode_success_vfp;
14750          } else {
14751             /* fall through */
14752          }
14753       }
14754    }
14755
14756    // VMOV (scalar to ARM core register)
14757    // VMOV rT, rD[x]
14758    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
14759       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
14760       UInt rT  = INSN(15,12);
14761       UInt U   = INSN(23,23);
14762       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14763       UInt index;
14764       if (rT == 15 || (isT && rT == 13)) {
14765          /* fall through */
14766       } else {
14767          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14768             index = opc & 7;
14769             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
14770                              binop(Iop_GetElem8x8,
14771                                    getDRegI64(rN),
14772                                    mkU8(index)));
14773             if (isT)
14774                putIRegT(rT, e, condT);
14775             else
14776                putIRegA(rT, e, condT, Ijk_Boring);
14777             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14778                   rT, rN, index);
14779             goto decode_success_vfp;
14780          }
14781          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14782             index = (opc >> 1) & 3;
14783             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
14784                              binop(Iop_GetElem16x4,
14785                                    getDRegI64(rN),
14786                                    mkU8(index)));
14787             if (isT)
14788                putIRegT(rT, e, condT);
14789             else
14790                putIRegA(rT, e, condT, Ijk_Boring);
14791             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14792                   rT, rN, index);
14793             goto decode_success_vfp;
14794          }
14795          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
14796             index = (opc >> 2) & 1;
14797             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
14798             if (isT)
14799                putIRegT(rT, e, condT);
14800             else
14801                putIRegA(rT, e, condT, Ijk_Boring);
14802             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
14803             goto decode_success_vfp;
14804          } else {
14805             /* fall through */
14806          }
14807       }
14808    }
14809
14810    // VMOV.F32 sD, #imm
14811    // FCONSTS sD, #imm
14812    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14813        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
14814       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
14815       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14816       UInt b    = (imm8 >> 6) & 1;
14817       UInt imm;
14818       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
14819              | ((imm8 & 0x1f) << 3);
14820       imm <<= 16;
14821       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
14822       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
14823       goto decode_success_vfp;
14824    }
14825
14826    // VMOV.F64 dD, #imm
14827    // FCONSTD dD, #imm
14828    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14829        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
14830       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
14831       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14832       UInt b    = (imm8 >> 6) & 1;
14833       ULong imm;
14834       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
14835              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
14836       imm <<= 48;
14837       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
14838       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
14839       goto decode_success_vfp;
14840    }
14841
14842    /* ---------------------- vdup ------------------------- */
14843    // VDUP dD, rT
14844    // VDUP qD, rT
14845    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
14846        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
14847       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
14848       UInt rT   = INSN(15,12);
14849       UInt Q    = INSN(21,21);
14850       UInt size = (INSN(22,22) << 1) | INSN(5,5);
14851       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
14852          /* fall through */
14853       } else {
14854          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
14855          if (Q) {
14856             rD >>= 1;
14857             switch (size) {
14858                case 0:
14859                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
14860                   break;
14861                case 1:
14862                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
14863                               condT);
14864                   break;
14865                case 2:
14866                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
14867                               condT);
14868                   break;
14869                default:
14870                   vassert(0);
14871             }
14872             DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
14873          } else {
14874             switch (size) {
14875                case 0:
14876                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
14877                   break;
14878                case 1:
14879                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
14880                                condT);
14881                   break;
14882                case 2:
14883                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
14884                                condT);
14885                   break;
14886                default:
14887                   vassert(0);
14888             }
14889             DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
14890          }
14891          goto decode_success_vfp;
14892       }
14893    }
14894
14895    /* --------------------- f{ld,st}d --------------------- */
14896    // FLDD, FSTD
14897    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
14898        && BITS4(1,0,1,1) == INSN(11,8)) {
14899       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
14900       UInt rN     = INSN(19,16);
14901       UInt offset = (insn28 & 0xFF) << 2;
14902       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
14903       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
14904       /* make unconditional */
14905       if (condT != IRTemp_INVALID) {
14906          if (isT)
14907             mk_skip_over_T32_if_cond_is_false( condT );
14908          else
14909             mk_skip_over_A32_if_cond_is_false( condT );
14910          condT = IRTemp_INVALID;
14911       }
14912       IRTemp ea = newTemp(Ity_I32);
14913       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
14914                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
14915                                 rN == 15),
14916                        mkU32(offset)));
14917       if (bL) {
14918          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
14919       } else {
14920          storeLE(mkexpr(ea), getDReg(dD));
14921       }
14922       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
14923           bL ? "ld" : "st", nCC(conq), dD, rN,
14924           bU ? '+' : '-', offset);
14925       goto decode_success_vfp;
14926    }
14927
14928    /* --------------------- dp insns (D) --------------------- */
14929    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
14930        && BITS4(1,0,1,1) == INSN(11,8)
14931        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
14932       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
14933       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
14934       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
14935       UInt    bP  = (insn28 >> 23) & 1;
14936       UInt    bQ  = (insn28 >> 21) & 1;
14937       UInt    bR  = (insn28 >> 20) & 1;
14938       UInt    bS  = (insn28 >> 6) & 1;
14939       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
14940       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14941       switch (opc) {
14942          case BITS4(0,0,0,0): /* MAC: d + n * m */
14943             putDReg(dD, triop(Iop_AddF64, rm,
14944                               getDReg(dD),
14945                               triop(Iop_MulF64, rm, getDReg(dN),
14946                                                     getDReg(dM))),
14947                         condT);
14948             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14949             goto decode_success_vfp;
14950          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
14951             putDReg(dD, triop(Iop_AddF64, rm,
14952                               getDReg(dD),
14953                               unop(Iop_NegF64,
14954                                    triop(Iop_MulF64, rm, getDReg(dN),
14955                                                          getDReg(dM)))),
14956                         condT);
14957             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14958             goto decode_success_vfp;
14959          case BITS4(0,0,1,0): /* MSC: - d + n * m */
14960             putDReg(dD, triop(Iop_AddF64, rm,
14961                               unop(Iop_NegF64, getDReg(dD)),
14962                               triop(Iop_MulF64, rm, getDReg(dN),
14963                                                     getDReg(dM))),
14964                         condT);
14965             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14966             goto decode_success_vfp;
14967          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
14968             putDReg(dD, triop(Iop_AddF64, rm,
14969                               unop(Iop_NegF64, getDReg(dD)),
14970                               unop(Iop_NegF64,
14971                                    triop(Iop_MulF64, rm, getDReg(dN),
14972                                                          getDReg(dM)))),
14973                         condT);
14974             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14975             goto decode_success_vfp;
14976          case BITS4(0,1,0,0): /* MUL: n * m */
14977             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
14978                         condT);
14979             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14980             goto decode_success_vfp;
14981          case BITS4(0,1,0,1): /* NMUL: - n * m */
14982             putDReg(dD, unop(Iop_NegF64,
14983                              triop(Iop_MulF64, rm, getDReg(dN),
14984                                                    getDReg(dM))),
14985                     condT);
14986             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14987             goto decode_success_vfp;
14988          case BITS4(0,1,1,0): /* ADD: n + m */
14989             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
14990                         condT);
14991             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14992             goto decode_success_vfp;
14993          case BITS4(0,1,1,1): /* SUB: n - m */
14994             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
14995                         condT);
14996             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14997             goto decode_success_vfp;
14998          case BITS4(1,0,0,0): /* DIV: n / m */
14999             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
15000                         condT);
15001             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15002             goto decode_success_vfp;
15003          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15004             /* XXXROUNDINGFIXME look up ARM reference for fused
15005                multiply-add rounding */
15006             putDReg(dD, triop(Iop_AddF64, rm,
15007                               unop(Iop_NegF64, getDReg(dD)),
15008                               triop(Iop_MulF64, rm,
15009                                                 getDReg(dN),
15010                                                 getDReg(dM))),
15011                         condT);
15012             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15013             goto decode_success_vfp;
15014          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15015             /* XXXROUNDINGFIXME look up ARM reference for fused
15016                multiply-add rounding */
15017             putDReg(dD, triop(Iop_AddF64, rm,
15018                               unop(Iop_NegF64, getDReg(dD)),
15019                               triop(Iop_MulF64, rm,
15020                                                 unop(Iop_NegF64, getDReg(dN)),
15021                                                 getDReg(dM))),
15022                         condT);
15023             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15024             goto decode_success_vfp;
15025          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15026             /* XXXROUNDINGFIXME look up ARM reference for fused
15027                multiply-add rounding */
15028             putDReg(dD, triop(Iop_AddF64, rm,
15029                               getDReg(dD),
15030                               triop(Iop_MulF64, rm, getDReg(dN),
15031                                                     getDReg(dM))),
15032                         condT);
15033             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15034             goto decode_success_vfp;
15035          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15036             /* XXXROUNDINGFIXME look up ARM reference for fused
15037                multiply-add rounding */
15038             putDReg(dD, triop(Iop_AddF64, rm,
15039                               getDReg(dD),
15040                               triop(Iop_MulF64, rm,
15041                                     unop(Iop_NegF64, getDReg(dN)),
15042                                     getDReg(dM))),
15043                         condT);
15044             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15045             goto decode_success_vfp;
15046          default:
15047             break;
15048       }
15049    }
15050
15051    /* --------------------- compares (D) --------------------- */
15052    /*          31   27   23   19   15 11   7    3
15053                  28   24   20   16 12    8    4    0
15054       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
15055       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
15056       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
15057       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
15058                                  Z         N
15059
15060       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
15061       Z=1 Compare Dd vs zero
15062
15063       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15064       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15065       (Not that we pay any attention to N here)
15066    */
15067    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15068        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15069        && BITS4(1,0,1,1) == INSN(11,8)
15070        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15071       UInt bZ = (insn28 >> 16) & 1;
15072       UInt bN = (insn28 >> 7) & 1;
15073       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15074       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
15075       if (bZ && INSN(3,0) != 0) {
15076          /* does not decode; fall through */
15077       } else {
15078          IRTemp argL = newTemp(Ity_F64);
15079          IRTemp argR = newTemp(Ity_F64);
15080          IRTemp irRes = newTemp(Ity_I32);
15081          assign(argL, getDReg(dD));
15082          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
15083          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15084
15085          IRTemp nzcv     = IRTemp_INVALID;
15086          IRTemp oldFPSCR = newTemp(Ity_I32);
15087          IRTemp newFPSCR = newTemp(Ity_I32);
15088
15089          /* This is where the fun starts.  We have to convert 'irRes'
15090             from an IR-convention return result (IRCmpF64Result) to an
15091             ARM-encoded (N,Z,C,V) group.  The final result is in the
15092             bottom 4 bits of 'nzcv'. */
15093          /* Map compare result from IR to ARM(nzcv) */
15094          /*
15095             FP cmp result | IR   | ARM(nzcv)
15096             --------------------------------
15097             UN              0x45   0011
15098             LT              0x01   1000
15099             GT              0x00   0010
15100             EQ              0x40   0110
15101          */
15102          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15103
15104          /* And update FPSCR accordingly */
15105          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15106          assign(newFPSCR,
15107                 binop(Iop_Or32,
15108                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15109                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15110
15111          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15112
15113          if (bZ) {
15114             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
15115          } else {
15116             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
15117          }
15118          goto decode_success_vfp;
15119       }
15120       /* fall through */
15121    }
15122
15123    /* --------------------- unary (D) --------------------- */
15124    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15125        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15126        && BITS4(1,0,1,1) == INSN(11,8)
15127        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15128       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
15129       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
15130       UInt b16 = (insn28 >> 16) & 1;
15131       UInt b7  = (insn28 >> 7) & 1;
15132       /**/ if (b16 == 0 && b7 == 0) {
15133          // FCPYD
15134          putDReg(dD, getDReg(dM), condT);
15135          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
15136          goto decode_success_vfp;
15137       }
15138       else if (b16 == 0 && b7 == 1) {
15139          // FABSD
15140          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
15141          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
15142          goto decode_success_vfp;
15143       }
15144       else if (b16 == 1 && b7 == 0) {
15145          // FNEGD
15146          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
15147          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
15148          goto decode_success_vfp;
15149       }
15150       else if (b16 == 1 && b7 == 1) {
15151          // FSQRTD
15152          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15153          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
15154          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
15155          goto decode_success_vfp;
15156       }
15157       else
15158          vassert(0);
15159
15160       /* fall through */
15161    }
15162
15163    /* ----------------- I <-> D conversions ----------------- */
15164
15165    // F{S,U}ITOD dD, fM
15166    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15167        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
15168        && BITS4(1,0,1,1) == INSN(11,8)
15169        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15170       UInt bM    = (insn28 >> 5) & 1;
15171       UInt fM    = (INSN(3,0) << 1) | bM;
15172       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
15173       UInt syned = (insn28 >> 7) & 1;
15174       if (syned) {
15175          // FSITOD
15176          putDReg(dD, unop(Iop_I32StoF64,
15177                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15178                  condT);
15179          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
15180       } else {
15181          // FUITOD
15182          putDReg(dD, unop(Iop_I32UtoF64,
15183                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15184                  condT);
15185          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
15186       }
15187       goto decode_success_vfp;
15188    }
15189
15190    // FTO{S,U}ID fD, dM
15191    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15192        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15193        && BITS4(1,0,1,1) == INSN(11,8)
15194        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15195       UInt   bD    = (insn28 >> 22) & 1;
15196       UInt   fD    = (INSN(15,12) << 1) | bD;
15197       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15198       UInt   bZ    = (insn28 >> 7) & 1;
15199       UInt   syned = (insn28 >> 16) & 1;
15200       IRTemp rmode = newTemp(Ity_I32);
15201       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15202                        : mkexpr(mk_get_IR_rounding_mode()));
15203       if (syned) {
15204          // FTOSID
15205          putFReg(fD, unop(Iop_ReinterpI32asF32,
15206                           binop(Iop_F64toI32S, mkexpr(rmode),
15207                                 getDReg(dM))),
15208                  condT);
15209          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
15210              nCC(conq), fD, dM);
15211       } else {
15212          // FTOUID
15213          putFReg(fD, unop(Iop_ReinterpI32asF32,
15214                           binop(Iop_F64toI32U, mkexpr(rmode),
15215                                 getDReg(dM))),
15216                  condT);
15217          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
15218              nCC(conq), fD, dM);
15219       }
15220       goto decode_success_vfp;
15221    }
15222
15223    /* ----------------------------------------------------------- */
15224    /* -- VFP instructions -- single precision                  -- */
15225    /* ----------------------------------------------------------- */
15226
15227    /* --------------------- fldms, fstms --------------------- */
15228    /*
15229                                  31   27   23   19 15 11   7   0
15230                                          P UDWL
15231       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
15232       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
15233       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
15234
15235       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
15236       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
15237       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
15238
15239       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
15240       offset must not imply a reg > 15
15241       IA/DB: Rn is changed by (4 x # regs transferred)
15242
15243       case coding:
15244          1  at-Rn   (access at Rn)
15245          2  ia-Rn   (access at Rn, then Rn += 4n)
15246          3  db-Rn   (Rn -= 4n,     then access at Rn)
15247    */
15248    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
15249        && INSN(11,8) == BITS4(1,0,1,0)) {
15250       UInt bP      = (insn28 >> 24) & 1;
15251       UInt bU      = (insn28 >> 23) & 1;
15252       UInt bW      = (insn28 >> 21) & 1;
15253       UInt bL      = (insn28 >> 20) & 1;
15254       UInt bD      = (insn28 >> 22) & 1;
15255       UInt offset  = (insn28 >> 0) & 0xFF;
15256       UInt rN      = INSN(19,16);
15257       UInt fD      = (INSN(15,12) << 1) | bD;
15258       UInt nRegs   = offset;
15259       UInt summary = 0;
15260       Int  i;
15261
15262       /**/ if (bP == 0 && bU == 1 && bW == 0) {
15263          summary = 1;
15264       }
15265       else if (bP == 0 && bU == 1 && bW == 1) {
15266          summary = 2;
15267       }
15268       else if (bP == 1 && bU == 0 && bW == 1) {
15269          summary = 3;
15270       }
15271       else goto after_vfp_fldms_fstms;
15272
15273       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
15274       if (rN == 15 && (summary == 2 || summary == 3 || isT))
15275          goto after_vfp_fldms_fstms;
15276
15277       /* offset must specify at least one register */
15278       if (offset < 1)
15279          goto after_vfp_fldms_fstms;
15280
15281       /* can't transfer regs after S31 */
15282       if (fD + nRegs - 1 >= 32)
15283          goto after_vfp_fldms_fstms;
15284
15285       /* Now, we can't do a conditional load or store, since that very
15286          likely will generate an exception.  So we have to take a side
15287          exit at this point if the condition is false. */
15288       if (condT != IRTemp_INVALID) {
15289          if (isT)
15290             mk_skip_over_T32_if_cond_is_false( condT );
15291          else
15292             mk_skip_over_A32_if_cond_is_false( condT );
15293          condT = IRTemp_INVALID;
15294       }
15295       /* Ok, now we're unconditional.  Do the load or store. */
15296
15297       /* get the old Rn value */
15298       IRTemp rnT = newTemp(Ity_I32);
15299       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
15300                            rN == 15));
15301
15302       /* make a new value for Rn, post-insn */
15303       IRTemp rnTnew = IRTemp_INVALID;
15304       if (summary == 2 || summary == 3) {
15305          rnTnew = newTemp(Ity_I32);
15306          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
15307                               mkexpr(rnT),
15308                               mkU32(4 * nRegs)));
15309       }
15310
15311       /* decide on the base transfer address */
15312       IRTemp taT = newTemp(Ity_I32);
15313       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
15314
15315       /* update Rn if necessary -- in case 3, we're moving it down, so
15316          update before any memory reference, in order to keep Memcheck
15317          and V's stack-extending logic (on linux) happy */
15318       if (summary == 3) {
15319          if (isT)
15320             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15321          else
15322             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15323       }
15324
15325       /* generate the transfers */
15326       for (i = 0; i < nRegs; i++) {
15327          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
15328          if (bL) {
15329             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
15330          } else {
15331             storeLE(addr, getFReg(fD + i));
15332          }
15333       }
15334
15335       /* update Rn if necessary -- in case 2, we're moving it up, so
15336          update after any memory reference, in order to keep Memcheck
15337          and V's stack-extending logic (on linux) happy */
15338       if (summary == 2) {
15339          if (isT)
15340             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15341          else
15342             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15343       }
15344
15345       const HChar* nm = bL==1 ? "ld" : "st";
15346       switch (summary) {
15347          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
15348                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15349                   break;
15350          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
15351                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15352                   break;
15353          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
15354                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15355                   break;
15356          default: vassert(0);
15357       }
15358
15359       goto decode_success_vfp;
15360       /* FIXME alignment constraints? */
15361    }
15362
15363   after_vfp_fldms_fstms:
15364
15365    /* --------------------- fmsr, fmrs --------------------- */
15366    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15367        && BITS4(1,0,1,0) == INSN(11,8)
15368        && BITS4(0,0,0,0) == INSN(3,0)
15369        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
15370       UInt rD  = INSN(15,12);
15371       UInt b7  = (insn28 >> 7) & 1;
15372       UInt fN  = (INSN(19,16) << 1) | b7;
15373       UInt b20 = (insn28 >> 20) & 1;
15374       if (rD == 15) {
15375          /* fall through */
15376          /* Let's assume that no sane person would want to do
15377             floating-point transfers to or from the program counter,
15378             and simply decline to decode the instruction.  The ARM ARM
15379             doesn't seem to explicitly disallow this case, though. */
15380       } else {
15381          if (b20) {
15382             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
15383             if (isT)
15384                putIRegT(rD, res, condT);
15385             else
15386                putIRegA(rD, res, condT, Ijk_Boring);
15387             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
15388          } else {
15389             putFReg(fN, unop(Iop_ReinterpI32asF32,
15390                              isT ? getIRegT(rD) : getIRegA(rD)),
15391                         condT);
15392             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
15393          }
15394          goto decode_success_vfp;
15395       }
15396       /* fall through */
15397    }
15398
15399    /* --------------------- f{ld,st}s --------------------- */
15400    // FLDS, FSTS
15401    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
15402        && BITS4(1,0,1,0) == INSN(11,8)) {
15403       UInt bD     = (insn28 >> 22) & 1;
15404       UInt fD     = (INSN(15,12) << 1) | bD;
15405       UInt rN     = INSN(19,16);
15406       UInt offset = (insn28 & 0xFF) << 2;
15407       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
15408       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
15409       /* make unconditional */
15410       if (condT != IRTemp_INVALID) {
15411          if (isT)
15412             mk_skip_over_T32_if_cond_is_false( condT );
15413          else
15414             mk_skip_over_A32_if_cond_is_false( condT );
15415          condT = IRTemp_INVALID;
15416       }
15417       IRTemp ea = newTemp(Ity_I32);
15418       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
15419                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
15420                                 rN == 15),
15421                        mkU32(offset)));
15422       if (bL) {
15423          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
15424       } else {
15425          storeLE(mkexpr(ea), getFReg(fD));
15426       }
15427       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
15428           bL ? "ld" : "st", nCC(conq), fD, rN,
15429           bU ? '+' : '-', offset);
15430       goto decode_success_vfp;
15431    }
15432
15433    /* --------------------- dp insns (F) --------------------- */
15434    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
15435        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15436        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
15437       UInt    bM  = (insn28 >> 5) & 1;
15438       UInt    bD  = (insn28 >> 22) & 1;
15439       UInt    bN  = (insn28 >> 7) & 1;
15440       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
15441       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
15442       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
15443       UInt    bP  = (insn28 >> 23) & 1;
15444       UInt    bQ  = (insn28 >> 21) & 1;
15445       UInt    bR  = (insn28 >> 20) & 1;
15446       UInt    bS  = (insn28 >> 6) & 1;
15447       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
15448       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15449       switch (opc) {
15450          case BITS4(0,0,0,0): /* MAC: d + n * m */
15451             putFReg(fD, triop(Iop_AddF32, rm,
15452                               getFReg(fD),
15453                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15454                         condT);
15455             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15456             goto decode_success_vfp;
15457          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
15458             putFReg(fD, triop(Iop_AddF32, rm,
15459                               getFReg(fD),
15460                               unop(Iop_NegF32,
15461                                    triop(Iop_MulF32, rm, getFReg(fN),
15462                                                          getFReg(fM)))),
15463                         condT);
15464             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15465             goto decode_success_vfp;
15466          case BITS4(0,0,1,0): /* MSC: - d + n * m */
15467             putFReg(fD, triop(Iop_AddF32, rm,
15468                               unop(Iop_NegF32, getFReg(fD)),
15469                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15470                         condT);
15471             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15472             goto decode_success_vfp;
15473          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
15474             putFReg(fD, triop(Iop_AddF32, rm,
15475                               unop(Iop_NegF32, getFReg(fD)),
15476                               unop(Iop_NegF32,
15477                                    triop(Iop_MulF32, rm,
15478                                                      getFReg(fN),
15479                                                     getFReg(fM)))),
15480                         condT);
15481             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15482             goto decode_success_vfp;
15483          case BITS4(0,1,0,0): /* MUL: n * m */
15484             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
15485                         condT);
15486             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15487             goto decode_success_vfp;
15488          case BITS4(0,1,0,1): /* NMUL: - n * m */
15489             putFReg(fD, unop(Iop_NegF32,
15490                              triop(Iop_MulF32, rm, getFReg(fN),
15491                                                    getFReg(fM))),
15492                     condT);
15493             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15494             goto decode_success_vfp;
15495          case BITS4(0,1,1,0): /* ADD: n + m */
15496             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
15497                         condT);
15498             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15499             goto decode_success_vfp;
15500          case BITS4(0,1,1,1): /* SUB: n - m */
15501             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
15502                         condT);
15503             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15504             goto decode_success_vfp;
15505          case BITS4(1,0,0,0): /* DIV: n / m */
15506             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
15507                         condT);
15508             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15509             goto decode_success_vfp;
15510          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15511             /* XXXROUNDINGFIXME look up ARM reference for fused
15512                multiply-add rounding */
15513             putFReg(fD, triop(Iop_AddF32, rm,
15514                               unop(Iop_NegF32, getFReg(fD)),
15515                               triop(Iop_MulF32, rm,
15516                                                 getFReg(fN),
15517                                                 getFReg(fM))),
15518                         condT);
15519             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15520             goto decode_success_vfp;
15521          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15522             /* XXXROUNDINGFIXME look up ARM reference for fused
15523                multiply-add rounding */
15524             putFReg(fD, triop(Iop_AddF32, rm,
15525                               unop(Iop_NegF32, getFReg(fD)),
15526                               triop(Iop_MulF32, rm,
15527                                                 unop(Iop_NegF32, getFReg(fN)),
15528                                                 getFReg(fM))),
15529                         condT);
15530             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15531             goto decode_success_vfp;
15532          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15533             /* XXXROUNDINGFIXME look up ARM reference for fused
15534                multiply-add rounding */
15535             putFReg(fD, triop(Iop_AddF32, rm,
15536                               getFReg(fD),
15537                               triop(Iop_MulF32, rm, getFReg(fN),
15538                                                     getFReg(fM))),
15539                         condT);
15540             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15541             goto decode_success_vfp;
15542          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15543             /* XXXROUNDINGFIXME look up ARM reference for fused
15544                multiply-add rounding */
15545             putFReg(fD, triop(Iop_AddF32, rm,
15546                               getFReg(fD),
15547                               triop(Iop_MulF32, rm,
15548                                     unop(Iop_NegF32, getFReg(fN)),
15549                                     getFReg(fM))),
15550                         condT);
15551             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15552             goto decode_success_vfp;
15553          default:
15554             break;
15555       }
15556    }
15557
15558    /* --------------------- compares (S) --------------------- */
15559    /*          31   27   23   19   15 11   7    3
15560                  28   24   20   16 12    8    4    0
15561       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
15562       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
15563       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
15564       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
15565                                  Z         N
15566
15567       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
15568       Z=1 Compare Fd:D vs zero
15569
15570       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15571       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15572       (Not that we pay any attention to N here)
15573    */
15574    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15575        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15576        && BITS4(1,0,1,0) == INSN(11,8)
15577        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15578       UInt bZ = (insn28 >> 16) & 1;
15579       UInt bN = (insn28 >> 7) & 1;
15580       UInt bD = (insn28 >> 22) & 1;
15581       UInt bM = (insn28 >> 5) & 1;
15582       UInt fD = (INSN(15,12) << 1) | bD;
15583       UInt fM = (INSN(3,0) << 1) | bM;
15584       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
15585          /* does not decode; fall through */
15586       } else {
15587          IRTemp argL = newTemp(Ity_F64);
15588          IRTemp argR = newTemp(Ity_F64);
15589          IRTemp irRes = newTemp(Ity_I32);
15590
15591          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
15592          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
15593                          : unop(Iop_F32toF64, getFReg(fM)));
15594          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15595
15596          IRTemp nzcv     = IRTemp_INVALID;
15597          IRTemp oldFPSCR = newTemp(Ity_I32);
15598          IRTemp newFPSCR = newTemp(Ity_I32);
15599
15600          /* This is where the fun starts.  We have to convert 'irRes'
15601             from an IR-convention return result (IRCmpF64Result) to an
15602             ARM-encoded (N,Z,C,V) group.  The final result is in the
15603             bottom 4 bits of 'nzcv'. */
15604          /* Map compare result from IR to ARM(nzcv) */
15605          /*
15606             FP cmp result | IR   | ARM(nzcv)
15607             --------------------------------
15608             UN              0x45   0011
15609             LT              0x01   1000
15610             GT              0x00   0010
15611             EQ              0x40   0110
15612          */
15613          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15614
15615          /* And update FPSCR accordingly */
15616          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15617          assign(newFPSCR,
15618                 binop(Iop_Or32,
15619                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15620                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15621
15622          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15623
15624          if (bZ) {
15625             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
15626          } else {
15627             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
15628                 nCC(conq), fD, fM);
15629          }
15630          goto decode_success_vfp;
15631       }
15632       /* fall through */
15633    }
15634
15635    /* --------------------- unary (S) --------------------- */
15636    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15637        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15638        && BITS4(1,0,1,0) == INSN(11,8)
15639        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15640       UInt bD = (insn28 >> 22) & 1;
15641       UInt bM = (insn28 >> 5) & 1;
15642       UInt fD  = (INSN(15,12) << 1) | bD;
15643       UInt fM  = (INSN(3,0) << 1) | bM;
15644       UInt b16 = (insn28 >> 16) & 1;
15645       UInt b7  = (insn28 >> 7) & 1;
15646       /**/ if (b16 == 0 && b7 == 0) {
15647          // FCPYS
15648          putFReg(fD, getFReg(fM), condT);
15649          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
15650          goto decode_success_vfp;
15651       }
15652       else if (b16 == 0 && b7 == 1) {
15653          // FABSS
15654          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
15655          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
15656          goto decode_success_vfp;
15657       }
15658       else if (b16 == 1 && b7 == 0) {
15659          // FNEGS
15660          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
15661          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
15662          goto decode_success_vfp;
15663       }
15664       else if (b16 == 1 && b7 == 1) {
15665          // FSQRTS
15666          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15667          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
15668          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
15669          goto decode_success_vfp;
15670       }
15671       else
15672          vassert(0);
15673
15674       /* fall through */
15675    }
15676
15677    /* ----------------- I <-> S conversions ----------------- */
15678
15679    // F{S,U}ITOS fD, fM
15680    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
15681       bit int will always fit within the 53 bit mantissa, so there's
15682       no possibility of a loss of precision, but that's obviously not
15683       the case here.  Hence this case possibly requires rounding, and
15684       so it drags in the current rounding mode. */
15685    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15686        && BITS4(1,0,0,0) == INSN(19,16)
15687        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15688        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15689       UInt bM    = (insn28 >> 5) & 1;
15690       UInt bD    = (insn28 >> 22) & 1;
15691       UInt fM    = (INSN(3,0) << 1) | bM;
15692       UInt fD    = (INSN(15,12) << 1) | bD;
15693       UInt syned = (insn28 >> 7) & 1;
15694       IRTemp rmode = newTemp(Ity_I32);
15695       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15696       if (syned) {
15697          // FSITOS
15698          putFReg(fD, binop(Iop_F64toF32,
15699                            mkexpr(rmode),
15700                            unop(Iop_I32StoF64,
15701                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15702                  condT);
15703          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
15704       } else {
15705          // FUITOS
15706          putFReg(fD, binop(Iop_F64toF32,
15707                            mkexpr(rmode),
15708                            unop(Iop_I32UtoF64,
15709                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15710                  condT);
15711          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
15712       }
15713       goto decode_success_vfp;
15714    }
15715
15716    // FTO{S,U}IS fD, fM
15717    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15718        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15719        && BITS4(1,0,1,0) == INSN(11,8)
15720        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15721       UInt   bM    = (insn28 >> 5) & 1;
15722       UInt   bD    = (insn28 >> 22) & 1;
15723       UInt   fD    = (INSN(15,12) << 1) | bD;
15724       UInt   fM    = (INSN(3,0) << 1) | bM;
15725       UInt   bZ    = (insn28 >> 7) & 1;
15726       UInt   syned = (insn28 >> 16) & 1;
15727       IRTemp rmode = newTemp(Ity_I32);
15728       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15729                        : mkexpr(mk_get_IR_rounding_mode()));
15730       if (syned) {
15731          // FTOSIS
15732          putFReg(fD, unop(Iop_ReinterpI32asF32,
15733                           binop(Iop_F64toI32S, mkexpr(rmode),
15734                                 unop(Iop_F32toF64, getFReg(fM)))),
15735                  condT);
15736          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
15737              nCC(conq), fD, fM);
15738          goto decode_success_vfp;
15739       } else {
15740          // FTOUIS
15741          putFReg(fD, unop(Iop_ReinterpI32asF32,
15742                           binop(Iop_F64toI32U, mkexpr(rmode),
15743                                 unop(Iop_F32toF64, getFReg(fM)))),
15744                  condT);
15745          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
15746              nCC(conq), fD, fM);
15747          goto decode_success_vfp;
15748       }
15749    }
15750
15751    /* ----------------- S <-> D conversions ----------------- */
15752
15753    // FCVTDS
15754    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15755        && BITS4(0,1,1,1) == INSN(19,16)
15756        && BITS4(1,0,1,0) == INSN(11,8)
15757        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15758       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15759       UInt bM = (insn28 >> 5) & 1;
15760       UInt fM = (INSN(3,0) << 1) | bM;
15761       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
15762       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
15763       goto decode_success_vfp;
15764    }
15765
15766    // FCVTSD
15767    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15768        && BITS4(0,1,1,1) == INSN(19,16)
15769        && BITS4(1,0,1,1) == INSN(11,8)
15770        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15771       UInt   bD    = (insn28 >> 22) & 1;
15772       UInt   fD    = (INSN(15,12) << 1) | bD;
15773       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15774       IRTemp rmode = newTemp(Ity_I32);
15775       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15776       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
15777                   condT);
15778       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
15779       goto decode_success_vfp;
15780    }
15781
15782    /* --------------- VCVT fixed<->floating, VFP --------------- */
15783    /*          31   27   23   19   15 11   7    3
15784                  28   24   20   16 12    8    4    0
15785
15786                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
15787
15788       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
15789       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
15790       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
15791       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
15792       are of this form.  We only handle a subset of the cases though.
15793    */
15794    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15795        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
15796        && BITS3(1,0,1) == INSN(11,9)
15797        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
15798       UInt bD        = INSN(22,22);
15799       UInt bOP       = INSN(18,18);
15800       UInt bU        = INSN(16,16);
15801       UInt Vd        = INSN(15,12);
15802       UInt bSF       = INSN(8,8);
15803       UInt bSX       = INSN(7,7);
15804       UInt bI        = INSN(5,5);
15805       UInt imm4      = INSN(3,0);
15806       Bool to_fixed  = bOP == 1;
15807       Bool dp_op     = bSF == 1;
15808       Bool unsyned   = bU == 1;
15809       UInt size      = bSX == 0 ? 16 : 32;
15810       Int  frac_bits = size - ((imm4 << 1) | bI);
15811       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
15812
15813       IRExpr* rm     = mkU32(Irrm_NEAREST);
15814       IRTemp  scale  = newTemp(Ity_F64);
15815       assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
15816
15817       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
15818                                             && size == 32) {
15819          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
15820          /* This generates really horrible code.  We could potentially
15821             do much better. */
15822          IRTemp rmode = newTemp(Ity_I32);
15823          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
15824          IRTemp src32 = newTemp(Ity_I32);
15825          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
15826          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15827                                 mkexpr(src32 ) );
15828          IRExpr* resF64 = triop(Iop_DivF64,
15829                                 rm, as_F64,
15830                                 triop(Iop_AddF64, rm, mkexpr(scale),
15831                                                       mkexpr(scale)));
15832          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
15833          putFReg(d, resF32, condT);
15834          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
15835              unsyned ? 'u' : 's', d, d, frac_bits);
15836          goto decode_success_vfp;
15837       }
15838       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
15839                                             && size == 32) {
15840          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
15841          /* This generates really horrible code.  We could potentially
15842             do much better. */
15843          IRTemp src32 = newTemp(Ity_I32);
15844          assign(src32, unop(Iop_64to32, getDRegI64(d)));
15845          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15846                                 mkexpr(src32 ) );
15847          IRExpr* resF64 = triop(Iop_DivF64,
15848                                 rm, as_F64,
15849                                 triop(Iop_AddF64, rm, mkexpr(scale),
15850                                                       mkexpr(scale)));
15851          putDReg(d, resF64, condT);
15852          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
15853              unsyned ? 'u' : 's', d, d, frac_bits);
15854          goto decode_success_vfp;
15855       }
15856       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
15857                                             && size == 32) {
15858          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
15859          IRTemp srcF64 = newTemp(Ity_F64);
15860          assign(srcF64, getDReg(d));
15861          IRTemp scaledF64 = newTemp(Ity_F64);
15862          assign(scaledF64, triop(Iop_MulF64,
15863                                  rm, mkexpr(srcF64),
15864                                  triop(Iop_AddF64, rm, mkexpr(scale),
15865                                                        mkexpr(scale))));
15866          IRTemp rmode = newTemp(Ity_I32);
15867          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15868          IRTemp asI32 = newTemp(Ity_I32);
15869          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15870                              mkexpr(rmode), mkexpr(scaledF64)));
15871          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
15872                             mkexpr(asI32)), condT);
15873
15874          DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
15875              unsyned ? 'u' : 's', d, d, frac_bits);
15876          goto decode_success_vfp;
15877       }
15878       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
15879                                             && size == 32) {
15880          /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
15881          IRTemp srcF32 = newTemp(Ity_F32);
15882          assign(srcF32, getFReg(d));
15883          IRTemp scaledF64 = newTemp(Ity_F64);
15884          assign(scaledF64, triop(Iop_MulF64,
15885                                  rm, unop(Iop_F32toF64, mkexpr(srcF32)),
15886                                  triop(Iop_AddF64, rm, mkexpr(scale),
15887                                                        mkexpr(scale))));
15888          IRTemp rmode = newTemp(Ity_I32);
15889          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15890          IRTemp asI32 = newTemp(Ity_I32);
15891          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15892                              mkexpr(rmode), mkexpr(scaledF64)));
15893          putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
15894          DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
15895              unsyned ? 'u' : 's', d, d, frac_bits);
15896          goto decode_success_vfp;
15897       }
15898       /* fall through */
15899    }
15900
15901    /* FAILURE */
15902    return False;
15903
15904   decode_success_vfp:
15905    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
15906       assert that we aren't accepting, in this fn, insns that actually
15907       should be handled somewhere else. */
15908    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
15909    return True;
15910
15911 #  undef INSN
15912 }
15913
15914
15915 /*------------------------------------------------------------*/
15916 /*--- Instructions in NV (never) space                     ---*/
15917 /*------------------------------------------------------------*/
15918
15919 /* ARM only */
15920 /* Translate a NV space instruction.  If successful, returns True and
15921    *dres may or may not be updated.  If failure, returns False and
15922    doesn't change *dres nor create any IR.
15923
15924    Note that all NEON instructions (in ARM mode) up to and including
15925    ARMv7, but not later, are handled through here, since they are all
15926    in NV space.
15927 */
15928 static Bool decode_NV_instruction_ARMv7_and_below
15929                                  ( /*MOD*/DisResult* dres,
15930                                     const VexArchInfo* archinfo,
15931                                     UInt insn )
15932 {
15933 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
15934 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
15935
15936    HChar dis_buf[128];
15937
15938    // Should only be called for NV instructions
15939    vassert(BITS4(1,1,1,1) == INSN_COND);
15940
15941    /* ------------------------ pld{w} ------------------------ */
15942    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15943        && BITS4(1,1,1,1) == INSN(15,12)) {
15944       UInt rN    = INSN(19,16);
15945       UInt imm12 = INSN(11,0);
15946       UInt bU    = INSN(23,23);
15947       UInt bR    = INSN(22,22);
15948       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
15949       return True;
15950    }
15951
15952    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15953        && BITS4(1,1,1,1) == INSN(15,12)
15954        && 0 == INSN(4,4)) {
15955       UInt rN   = INSN(19,16);
15956       UInt rM   = INSN(3,0);
15957       UInt imm5 = INSN(11,7);
15958       UInt sh2  = INSN(6,5);
15959       UInt bU   = INSN(23,23);
15960       UInt bR   = INSN(22,22);
15961       if (rM != 15 && (rN != 15 || bR)) {
15962          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
15963                                                        sh2, imm5, dis_buf);
15964          IRTemp eaT = newTemp(Ity_I32);
15965          /* Bind eaE to a temp merely for debugging-vex purposes, so we
15966             can check it's a plausible decoding.  It will get removed
15967             by iropt a little later on. */
15968          vassert(eaE);
15969          assign(eaT, eaE);
15970          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
15971          return True;
15972       }
15973       /* fall through */
15974    }
15975
15976    /* ------------------------ pli ------------------------ */
15977    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
15978        && BITS4(1,1,1,1) == INSN(15,12)) {
15979       UInt rN    = INSN(19,16);
15980       UInt imm12 = INSN(11,0);
15981       UInt bU    = INSN(23,23);
15982       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
15983       return True;
15984    }
15985
15986    /* --------------------- Interworking branches --------------------- */
15987
15988    // BLX (1), viz, unconditional branch and link to R15+simm24
15989    // and set CPSR.T = 1, that is, switch to Thumb mode
15990    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
15991       UInt bitH   = INSN(24,24);
15992       UInt uimm24 = INSN(23,0);   uimm24 <<= 8;
15993       Int  simm24 = (Int)uimm24;  simm24 >>= 8;
15994       simm24 = (((UInt)simm24) << 2) + (bitH << 1);
15995       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
15996          it is implies that CPSR.T == 0.  Hence the current insn's
15997          address is guaranteed to be of the form X--(30)--X00.  So, no
15998          need to mask any bits off it.  But need to set the lowest bit
15999          to 1 to denote we're in Thumb mode after this, since
16000          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
16001          into the call, so end the block at this point. */
16002       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
16003       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
16004                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
16005       llPutIReg(15, mkU32(dst));
16006       dres->jk_StopHere = Ijk_Call;
16007       dres->whatNext    = Dis_StopHere;
16008       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
16009       return True;
16010    }
16011
16012    /* ------------------- v7 barrier insns ------------------- */
16013    switch (insn) {
16014       case 0xF57FF06F: /* ISB */
16015          stmt( IRStmt_MBE(Imbe_Fence) );
16016          DIP("ISB\n");
16017          return True;
16018       case 0xF57FF04F: /* DSB sy */
16019       case 0xF57FF04E: /* DSB st */
16020       case 0xF57FF04B: /* DSB ish */
16021       case 0xF57FF04A: /* DSB ishst */
16022       case 0xF57FF047: /* DSB nsh */
16023       case 0xF57FF046: /* DSB nshst */
16024       case 0xF57FF043: /* DSB osh */
16025       case 0xF57FF042: /* DSB oshst */
16026          stmt( IRStmt_MBE(Imbe_Fence) );
16027          DIP("DSB\n");
16028          return True;
16029       case 0xF57FF05F: /* DMB sy */
16030       case 0xF57FF05E: /* DMB st */
16031       case 0xF57FF05B: /* DMB ish */
16032       case 0xF57FF05A: /* DMB ishst */
16033       case 0xF57FF057: /* DMB nsh */
16034       case 0xF57FF056: /* DMB nshst */
16035       case 0xF57FF053: /* DMB osh */
16036       case 0xF57FF052: /* DMB oshst */
16037          stmt( IRStmt_MBE(Imbe_Fence) );
16038          DIP("DMB\n");
16039          return True;
16040       default:
16041          break;
16042    }
16043
16044    /* ------------------- CLREX ------------------ */
16045    if (insn == 0xF57FF01F) {
16046       /* AFAICS, this simply cancels a (all?) reservations made by a
16047          (any?) preceding LDREX(es).  Arrange to hand it through to
16048          the back end. */
16049       stmt( IRStmt_MBE(Imbe_CancelReservation) );
16050       DIP("clrex\n");
16051       return True;
16052    }
16053
16054    /* ------------------- NEON ------------------- */
16055    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
16056       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
16057                         dres, insn, IRTemp_INVALID/*unconditional*/,
16058                         False/*!isT*/
16059                      );
16060       if (ok_neon)
16061          return True;
16062    }
16063
16064    // unrecognised
16065    return False;
16066
16067 #  undef INSN_COND
16068 #  undef INSN
16069 }
16070
16071
16072 /*------------------------------------------------------------*/
16073 /*--- Disassemble a single ARM instruction                 ---*/
16074 /*------------------------------------------------------------*/
16075
16076 /* Disassemble a single ARM instruction into IR.  The instruction is
16077    located in host memory at guest_instr, and has (decoded) guest IP
16078    of guest_R15_curr_instr_notENC, which will have been set before the
16079    call here. */
16080
16081 static
16082 DisResult disInstr_ARM_WRK (
16083              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
16084              Bool         resteerCisOk,
16085              void*        callback_opaque,
16086              const UChar* guest_instr,
16087              const VexArchInfo* archinfo,
16088              const VexAbiInfo*  abiinfo,
16089              Bool         sigill_diag
16090           )
16091 {
16092    // A macro to fish bits out of 'insn'.
16093 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
16094 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
16095
16096    DisResult dres;
16097    UInt      insn;
16098    IRTemp    condT; /* :: Ity_I32 */
16099    UInt      summary;
16100    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
16101
16102    /* Set result defaults. */
16103    dres.whatNext    = Dis_Continue;
16104    dres.len         = 4;
16105    dres.continueAt  = 0;
16106    dres.jk_StopHere = Ijk_INVALID;
16107    dres.hint        = Dis_HintNone;
16108
16109    /* Set default actions for post-insn handling of writes to r15, if
16110       required. */
16111    r15written = False;
16112    r15guard   = IRTemp_INVALID; /* unconditional */
16113    r15kind    = Ijk_Boring;
16114
16115    /* At least this is simple on ARM: insns are all 4 bytes long, and
16116       4-aligned.  So just fish the whole thing out of memory right now
16117       and have done. */
16118    insn = getUIntLittleEndianly( guest_instr );
16119
16120    if (0) vex_printf("insn: 0x%x\n", insn);
16121
16122    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
16123
16124    vassert(0 == (guest_R15_curr_instr_notENC & 3));
16125
16126    /* ----------------------------------------------------------- */
16127
16128    /* Spot "Special" instructions (see comment at top of file). */
16129    {
16130       const UChar* code = guest_instr;
16131       /* Spot the 16-byte preamble:
16132
16133          e1a0c1ec  mov r12, r12, ROR #3
16134          e1a0c6ec  mov r12, r12, ROR #13
16135          e1a0ceec  mov r12, r12, ROR #29
16136          e1a0c9ec  mov r12, r12, ROR #19
16137       */
16138       UInt word1 = 0xE1A0C1EC;
16139       UInt word2 = 0xE1A0C6EC;
16140       UInt word3 = 0xE1A0CEEC;
16141       UInt word4 = 0xE1A0C9EC;
16142       if (getUIntLittleEndianly(code+ 0) == word1 &&
16143           getUIntLittleEndianly(code+ 4) == word2 &&
16144           getUIntLittleEndianly(code+ 8) == word3 &&
16145           getUIntLittleEndianly(code+12) == word4) {
16146          /* Got a "Special" instruction preamble.  Which one is it? */
16147          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
16148                                                /* orr r10,r10,r10 */) {
16149             /* R3 = client_request ( R4 ) */
16150             DIP("r3 = client_request ( %%r4 )\n");
16151             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16152             dres.jk_StopHere = Ijk_ClientReq;
16153             dres.whatNext    = Dis_StopHere;
16154             goto decode_success;
16155          }
16156          else
16157          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
16158                                                /* orr r11,r11,r11 */) {
16159             /* R3 = guest_NRADDR */
16160             DIP("r3 = guest_NRADDR\n");
16161             dres.len = 20;
16162             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
16163             goto decode_success;
16164          }
16165          else
16166          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
16167                                                /* orr r12,r12,r12 */) {
16168             /*  branch-and-link-to-noredir R4 */
16169             DIP("branch-and-link-to-noredir r4\n");
16170             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
16171             llPutIReg(15, llGetIReg(4));
16172             dres.jk_StopHere = Ijk_NoRedir;
16173             dres.whatNext    = Dis_StopHere;
16174             goto decode_success;
16175          }
16176          else
16177          if (getUIntLittleEndianly(code+16) == 0xE1899009
16178                                                /* orr r9,r9,r9 */) {
16179             /* IR injection */
16180             DIP("IR injection\n");
16181             vex_inject_ir(irsb, Iend_LE);
16182             // Invalidate the current insn. The reason is that the IRop we're
16183             // injecting here can change. In which case the translation has to
16184             // be redone. For ease of handling, we simply invalidate all the
16185             // time.
16186             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
16187             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
16188             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16189             dres.whatNext    = Dis_StopHere;
16190             dres.jk_StopHere = Ijk_InvalICache;
16191             goto decode_success;
16192          }
16193          /* We don't know what it is.  Set opc1/opc2 so decode_failure
16194             can print the insn following the Special-insn preamble. */
16195          insn = getUIntLittleEndianly(code+16);
16196          goto decode_failure;
16197          /*NOTREACHED*/
16198       }
16199
16200    }
16201
16202    /* ----------------------------------------------------------- */
16203
16204    /* Main ARM instruction decoder starts here. */
16205
16206    /* Deal with the condition.  Strategy is to merely generate a
16207       condition temporary at this point (or IRTemp_INVALID, meaning
16208       unconditional).  We leave it to lower-level instruction decoders
16209       to decide whether they can generate straight-line code, or
16210       whether they must generate a side exit before the instruction.
16211       condT :: Ity_I32 and is always either zero or one. */
16212    condT = IRTemp_INVALID;
16213    switch ( (ARMCondcode)INSN_COND ) {
16214       case ARMCondNV: {
16215          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
16216          // some cases are acceptable
16217          Bool ok
16218             = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
16219          if (ok)
16220             goto decode_success;
16221          else
16222             goto after_v7_decoder;
16223       }
16224       case ARMCondAL: // Always executed
16225          break;
16226       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
16227       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
16228       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
16229       case ARMCondGT: case ARMCondLE:
16230          condT = newTemp(Ity_I32);
16231          assign( condT, mk_armg_calculate_condition( INSN_COND ));
16232          break;
16233    }
16234
16235    /* ----------------------------------------------------------- */
16236    /* -- ARMv5 integer instructions                            -- */
16237    /* ----------------------------------------------------------- */
16238
16239    /* ---------------- Data processing ops ------------------- */
16240
16241    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
16242        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
16243       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
16244       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
16245       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
16246       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
16247       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
16248       IRTemp  rNt  = IRTemp_INVALID;
16249       IRTemp  res  = IRTemp_INVALID;
16250       IRTemp  oldV = IRTemp_INVALID;
16251       IRTemp  oldC = IRTemp_INVALID;
16252       const HChar*  name = NULL;
16253       IROp    op   = Iop_INVALID;
16254       Bool    ok;
16255
16256       switch (INSN(24,21)) {
16257
16258          /* --------- ADD, SUB, AND, OR --------- */
16259          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
16260             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
16261          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
16262             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16263          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
16264             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16265          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
16266             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
16267          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
16268             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
16269          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
16270             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
16271          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
16272             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
16273          rd_eq_rn_op_SO: {
16274             Bool isRSB = False;
16275             Bool isBIC = False;
16276             switch (INSN(24,21)) {
16277                case BITS4(0,0,1,1):
16278                   vassert(op == Iop_Sub32); isRSB = True; break;
16279                case BITS4(1,1,1,0):
16280                   vassert(op == Iop_And32); isBIC = True; break;
16281                default:
16282                   break;
16283             }
16284             rNt = newTemp(Ity_I32);
16285             assign(rNt, getIRegA(rN));
16286             ok = mk_shifter_operand(
16287                     INSN(25,25), INSN(11,0),
16288                     &shop, bitS ? &shco : NULL, dis_buf
16289                  );
16290             if (!ok)
16291                break;
16292             res = newTemp(Ity_I32);
16293             // compute the main result
16294             if (isRSB) {
16295                // reverse-subtract: shifter_operand - Rn
16296                vassert(op == Iop_Sub32);
16297                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
16298             } else if (isBIC) {
16299                // andn: shifter_operand & ~Rn
16300                vassert(op == Iop_And32);
16301                assign(res, binop(op, mkexpr(rNt),
16302                                      unop(Iop_Not32, mkexpr(shop))) );
16303             } else {
16304                // normal: Rn op shifter_operand
16305                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
16306             }
16307             // but don't commit it until after we've finished
16308             // all necessary reads from the guest state
16309             if (bitS
16310                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
16311                oldV = newTemp(Ity_I32);
16312                assign( oldV, mk_armg_calculate_flag_v() );
16313             }
16314             // can't safely read guest state after here
16315             // now safe to put the main result
16316             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16317             // XXXX!! not safe to read any guest state after
16318             // this point (I think the code below doesn't do that).
16319             if (!bitS)
16320                vassert(shco == IRTemp_INVALID);
16321             /* Update the flags thunk if necessary */
16322             if (bitS) {
16323                vassert(shco != IRTemp_INVALID);
16324                switch (op) {
16325                   case Iop_Add32:
16326                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
16327                      break;
16328                   case Iop_Sub32:
16329                      if (isRSB) {
16330                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
16331                      } else {
16332                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
16333                      }
16334                      break;
16335                   case Iop_And32: /* BIC and AND set the flags the same */
16336                   case Iop_Or32:
16337                   case Iop_Xor32:
16338                      // oldV has been read just above
16339                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16340                                         res, shco, oldV, condT );
16341                      break;
16342                   default:
16343                      vassert(0);
16344                }
16345             }
16346             DIP("%s%s%s r%u, r%u, %s\n",
16347                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16348             goto decode_success;
16349          }
16350
16351          /* --------- MOV, MVN --------- */
16352          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
16353          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
16354             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
16355             IRTemp jk = Ijk_Boring;
16356             if (rN != 0)
16357                break; /* rN must be zero */
16358             ok = mk_shifter_operand(
16359                     INSN(25,25), INSN(11,0),
16360                     &shop, bitS ? &shco : NULL, dis_buf
16361                  );
16362             if (!ok)
16363                break;
16364             res = newTemp(Ity_I32);
16365             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
16366                                : mkexpr(shop) );
16367             if (bitS) {
16368                vassert(shco != IRTemp_INVALID);
16369                oldV = newTemp(Ity_I32);
16370                assign( oldV, mk_armg_calculate_flag_v() );
16371             } else {
16372                vassert(shco == IRTemp_INVALID);
16373             }
16374             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
16375                 return for purposes of branch prediction. */
16376             if (!isMVN && INSN(11,0) == 14) {
16377               jk = Ijk_Ret;
16378             }
16379             // can't safely read guest state after here
16380             putIRegA( rD, mkexpr(res), condT, jk );
16381             /* Update the flags thunk if necessary */
16382             if (bitS) {
16383                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16384                                   res, shco, oldV, condT );
16385             }
16386             DIP("%s%s%s r%u, %s\n",
16387                 isMVN ? "mvn" : "mov",
16388                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
16389             goto decode_success;
16390          }
16391
16392          /* --------- CMP --------- */
16393          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
16394          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
16395             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
16396             if (rD != 0)
16397                break; /* rD must be zero */
16398             if (bitS == 0)
16399                break; /* if S (bit 20) is not set, it's not CMP/CMN */
16400             rNt = newTemp(Ity_I32);
16401             assign(rNt, getIRegA(rN));
16402             ok = mk_shifter_operand(
16403                     INSN(25,25), INSN(11,0),
16404                     &shop, NULL, dis_buf
16405                  );
16406             if (!ok)
16407                break;
16408             // can't safely read guest state after here
16409             /* Update the flags thunk. */
16410             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16411                             rNt, shop, condT );
16412             DIP("%s%s r%u, %s\n",
16413                 isCMN ? "cmn" : "cmp",
16414                 nCC(INSN_COND), rN, dis_buf );
16415             goto decode_success;
16416          }
16417
16418          /* --------- TST --------- */
16419          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
16420          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
16421             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
16422             if (rD != 0)
16423                break; /* rD must be zero */
16424             if (bitS == 0)
16425                break; /* if S (bit 20) is not set, it's not TST/TEQ */
16426             rNt = newTemp(Ity_I32);
16427             assign(rNt, getIRegA(rN));
16428             ok = mk_shifter_operand(
16429                     INSN(25,25), INSN(11,0),
16430                     &shop, &shco, dis_buf
16431                  );
16432             if (!ok)
16433                break;
16434             /* Update the flags thunk. */
16435             res = newTemp(Ity_I32);
16436             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
16437                                mkexpr(rNt), mkexpr(shop)) );
16438             oldV = newTemp(Ity_I32);
16439             assign( oldV, mk_armg_calculate_flag_v() );
16440             // can't safely read guest state after here
16441             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16442                                res, shco, oldV, condT );
16443             DIP("%s%s r%u, %s\n",
16444                 isTEQ ? "teq" : "tst",
16445                 nCC(INSN_COND), rN, dis_buf );
16446             goto decode_success;
16447          }
16448
16449          /* --------- ADC, SBC, RSC --------- */
16450          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
16451             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
16452          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
16453             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
16454          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
16455             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
16456          rd_eq_rn_op_SO_op_oldC: {
16457             // FIXME: shco isn't used for anything.  Get rid of it.
16458             rNt = newTemp(Ity_I32);
16459             assign(rNt, getIRegA(rN));
16460             ok = mk_shifter_operand(
16461                     INSN(25,25), INSN(11,0),
16462                     &shop, bitS ? &shco : NULL, dis_buf
16463                  );
16464             if (!ok)
16465                break;
16466             oldC = newTemp(Ity_I32);
16467             assign( oldC, mk_armg_calculate_flag_c() );
16468             res = newTemp(Ity_I32);
16469             // compute the main result
16470             switch (INSN(24,21)) {
16471                case BITS4(0,1,0,1): /* ADC */
16472                   assign(res,
16473                          binop(Iop_Add32,
16474                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
16475                                mkexpr(oldC) ));
16476                   break;
16477                case BITS4(0,1,1,0): /* SBC */
16478                   assign(res,
16479                          binop(Iop_Sub32,
16480                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
16481                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16482                   break;
16483                case BITS4(0,1,1,1): /* RSC */
16484                   assign(res,
16485                          binop(Iop_Sub32,
16486                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
16487                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16488                   break;
16489                default:
16490                   vassert(0);
16491             }
16492             // but don't commit it until after we've finished
16493             // all necessary reads from the guest state
16494             // now safe to put the main result
16495             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16496             // XXXX!! not safe to read any guest state after
16497             // this point (I think the code below doesn't do that).
16498             if (!bitS)
16499                vassert(shco == IRTemp_INVALID);
16500             /* Update the flags thunk if necessary */
16501             if (bitS) {
16502                vassert(shco != IRTemp_INVALID);
16503                switch (INSN(24,21)) {
16504                   case BITS4(0,1,0,1): /* ADC */
16505                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16506                                         rNt, shop, oldC, condT );
16507                      break;
16508                   case BITS4(0,1,1,0): /* SBC */
16509                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16510                                         rNt, shop, oldC, condT );
16511                      break;
16512                   case BITS4(0,1,1,1): /* RSC */
16513                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16514                                         shop, rNt, oldC, condT );
16515                      break;
16516                   default:
16517                      vassert(0);
16518                }
16519             }
16520             DIP("%s%s%s r%u, r%u, %s\n",
16521                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16522             goto decode_success;
16523          }
16524
16525          default:
16526             vassert(0);
16527       }
16528    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
16529
16530    /* --------------------- Load/store (ubyte & word) -------- */
16531    // LDR STR LDRB STRB
16532    /*                 31   27   23   19 15 11    6   4 3  # highest bit
16533                         28   24   20 16 12
16534       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
16535       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
16536       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
16537       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
16538       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
16539       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
16540    */
16541    /* case coding:
16542              1   at-ea               (access at ea)
16543              2   at-ea-then-upd      (access at ea, then Rn = ea)
16544              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16545       ea coding
16546              16  Rn +/- imm12
16547              32  Rn +/- Rm sh2 imm5
16548    */
16549    /* Quickly skip over all of this for hopefully most instructions */
16550    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
16551       goto after_load_store_ubyte_or_word;
16552
16553    summary = 0;
16554
16555    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
16556       summary = 1 | 16;
16557    }
16558    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
16559                                           && INSN(4,4) == 0) {
16560       summary = 1 | 32;
16561    }
16562    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
16563       summary = 2 | 16;
16564    }
16565    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
16566                                           && INSN(4,4) == 0) {
16567       summary = 2 | 32;
16568    }
16569    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
16570       summary = 3 | 16;
16571    }
16572    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
16573                                           && INSN(4,4) == 0) {
16574       summary = 3 | 32;
16575    }
16576    else goto after_load_store_ubyte_or_word;
16577
16578    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
16579      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
16580      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
16581      UInt bU = (insn >> 23) & 1;      /* 23 */
16582      UInt bB = (insn >> 22) & 1;      /* 22 */
16583      UInt bL = (insn >> 20) & 1;      /* 20 */
16584      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
16585      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
16586      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
16587
16588      /* Skip some invalid cases, which would lead to two competing
16589         updates to the same register, or which are otherwise
16590         disallowed by the spec. */
16591      switch (summary) {
16592         case 1 | 16:
16593            break;
16594         case 1 | 32:
16595            if (rM == 15) goto after_load_store_ubyte_or_word;
16596            break;
16597         case 2 | 16: case 3 | 16:
16598            if (rN == 15) goto after_load_store_ubyte_or_word;
16599            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16600            break;
16601         case 2 | 32: case 3 | 32:
16602            if (rM == 15) goto after_load_store_ubyte_or_word;
16603            if (rN == 15) goto after_load_store_ubyte_or_word;
16604            if (rN == rM) goto after_load_store_ubyte_or_word;
16605            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16606            break;
16607         default:
16608            vassert(0);
16609      }
16610
16611      /* compute the effective address.  Bind it to a tmp since we
16612         may need to use it twice. */
16613      IRExpr* eaE = NULL;
16614      switch (summary & 0xF0) {
16615         case 16:
16616            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
16617            break;
16618         case 32:
16619            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
16620                                                   dis_buf );
16621            break;
16622      }
16623      vassert(eaE);
16624      IRTemp eaT = newTemp(Ity_I32);
16625      assign(eaT, eaE);
16626
16627      /* get the old Rn value */
16628      IRTemp rnT = newTemp(Ity_I32);
16629      assign(rnT, getIRegA(rN));
16630
16631      /* decide on the transfer address */
16632      IRTemp taT = IRTemp_INVALID;
16633      switch (summary & 0x0F) {
16634         case 1: case 2: taT = eaT; break;
16635         case 3:         taT = rnT; break;
16636      }
16637      vassert(taT != IRTemp_INVALID);
16638
16639      if (bL == 0) {
16640        /* Store.  If necessary, update the base register before the
16641           store itself, so that the common idiom of "str rX, [sp,
16642           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
16643           rX") doesn't cause Memcheck to complain that the access is
16644           below the stack pointer.  Also, not updating sp before the
16645           store confuses Valgrind's dynamic stack-extending logic.  So
16646           do it before the store.  Hence we need to snarf the store
16647           data before doing the basereg update. */
16648
16649         /* get hold of the data to be stored */
16650         IRTemp rDt = newTemp(Ity_I32);
16651         assign(rDt, getIRegA(rD));
16652
16653         /* Update Rn if necessary. */
16654         switch (summary & 0x0F) {
16655            case 2: case 3:
16656               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16657               break;
16658         }
16659
16660         /* generate the transfer */
16661         if (bB == 0) { // word store
16662            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
16663         } else { // byte store
16664            vassert(bB == 1);
16665            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
16666         }
16667
16668      } else {
16669         /* Load */
16670         vassert(bL == 1);
16671
16672         /* generate the transfer */
16673         if (bB == 0) { // word load
16674            IRTemp jk = Ijk_Boring;
16675            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
16676                base register and PC as the destination register is a return for
16677                purposes of branch prediction.
16678               The ARM ARM Sec. C9.10.1 further specifies that it must use a
16679                post-increment by immediate addressing mode to be counted in
16680                event 0x0E (Procedure return).*/
16681            if (rN == 13 && summary == (3 | 16) && bB == 0) {
16682               jk = Ijk_Ret;
16683            }
16684            IRTemp tD = newTemp(Ity_I32);
16685            loadGuardedLE( tD, ILGop_Ident32,
16686                           mkexpr(taT), llGetIReg(rD), condT );
16687            /* "rD == 15 ? condT : IRTemp_INVALID": simply
16688               IRTemp_INVALID would be correct in all cases here, and
16689               for the non-r15 case it generates better code, by
16690               avoiding two tests of the cond (since it is already
16691               tested by loadGuardedLE).  However, the logic at the end
16692               of this function, that deals with writes to r15, has an
16693               optimisation which depends on seeing whether or not the
16694               write is conditional.  Hence in this particular case we
16695               let it "see" the guard condition. */
16696            putIRegA( rD, mkexpr(tD),
16697                      rD == 15 ? condT : IRTemp_INVALID, jk );
16698         } else { // byte load
16699            vassert(bB == 1);
16700            IRTemp tD = newTemp(Ity_I32);
16701            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
16702            /* No point in similar 3rd arg complexity here, since we
16703               can't sanely write anything to r15 like this. */
16704            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
16705         }
16706
16707         /* Update Rn if necessary. */
16708         switch (summary & 0x0F) {
16709            case 2: case 3:
16710               // should be assured by logic above:
16711               if (bL == 1)
16712                  vassert(rD != rN); /* since we just wrote rD */
16713               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16714               break;
16715         }
16716      }
16717
16718      switch (summary & 0x0F) {
16719         case 1:  DIP("%sr%s%s r%u, %s\n",
16720                      bL == 0 ? "st" : "ld",
16721                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16722                  break;
16723         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16724                      bL == 0 ? "st" : "ld",
16725                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16726                  break;
16727         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16728                      bL == 0 ? "st" : "ld",
16729                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16730                  break;
16731         default: vassert(0);
16732      }
16733
16734      /* XXX deal with alignment constraints */
16735
16736      goto decode_success;
16737
16738      /* Complications:
16739
16740         For all loads: if the Amode specifies base register
16741         writeback, and the same register is specified for Rd and Rn,
16742         the results are UNPREDICTABLE.
16743
16744         For all loads and stores: if R15 is written, branch to
16745         that address afterwards.
16746
16747         STRB: straightforward
16748         LDRB: loaded data is zero extended
16749         STR:  lowest 2 bits of address are ignored
16750         LDR:  if the lowest 2 bits of the address are nonzero
16751               then the loaded value is rotated right by 8 * the lowest 2 bits
16752      */
16753    }
16754
16755   after_load_store_ubyte_or_word:
16756
16757    /* --------------------- Load/store (sbyte & hword) -------- */
16758    // LDRH LDRSH STRH LDRSB
16759    /*                 31   27   23   19 15 11   7    3     # highest bit
16760                         28   24   20 16 12    8    4    0
16761       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
16762       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
16763       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
16764       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
16765       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
16766       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
16767    */
16768    /* case coding:
16769              1   at-ea               (access at ea)
16770              2   at-ea-then-upd      (access at ea, then Rn = ea)
16771              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16772       ea coding
16773              16  Rn +/- imm8
16774              32  Rn +/- Rm
16775    */
16776    /* Quickly skip over all of this for hopefully most instructions */
16777    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16778       goto after_load_store_sbyte_or_hword;
16779
16780    /* Check the "1SH1" thing. */
16781    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
16782       goto after_load_store_sbyte_or_hword;
16783
16784    summary = 0;
16785
16786    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
16787       summary = 1 | 16;
16788    }
16789    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
16790       summary = 1 | 32;
16791    }
16792    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
16793       summary = 2 | 16;
16794    }
16795    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
16796       summary = 2 | 32;
16797    }
16798    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
16799       summary = 3 | 16;
16800    }
16801    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
16802       summary = 3 | 32;
16803    }
16804    else goto after_load_store_sbyte_or_hword;
16805
16806    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16807      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16808      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16809      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16810      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
16811      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
16812      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
16813      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16814
16815      /* Skip combinations that are either meaningless or already
16816         handled by main word-or-unsigned-byte load-store
16817         instructions. */
16818      if (bS == 0 && bH == 0) /* "unsigned byte" */
16819         goto after_load_store_sbyte_or_hword;
16820      if (bS == 1 && bL == 0) /* "signed store" */
16821         goto after_load_store_sbyte_or_hword;
16822
16823      /* Require 11:8 == 0 for Rn +/- Rm cases */
16824      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16825         goto after_load_store_sbyte_or_hword;
16826
16827      /* Skip some invalid cases, which would lead to two competing
16828         updates to the same register, or which are otherwise
16829         disallowed by the spec. */
16830      switch (summary) {
16831         case 1 | 16:
16832            break;
16833         case 1 | 32:
16834            if (rM == 15) goto after_load_store_sbyte_or_hword;
16835            break;
16836         case 2 | 16: case 3 | 16:
16837            if (rN == 15) goto after_load_store_sbyte_or_hword;
16838            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16839            break;
16840         case 2 | 32: case 3 | 32:
16841            if (rM == 15) goto after_load_store_sbyte_or_hword;
16842            if (rN == 15) goto after_load_store_sbyte_or_hword;
16843            if (rN == rM) goto after_load_store_sbyte_or_hword;
16844            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16845            break;
16846         default:
16847            vassert(0);
16848      }
16849
16850      /* If this is a branch, make it unconditional at this point.
16851         Doing conditional branches in-line is too complex (for now).
16852         Note that you'd have to be insane to use any of these loads to
16853         do a branch, since they only load 16 bits at most, but we
16854         handle it just in case. */
16855      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
16856         // go uncond
16857         mk_skip_over_A32_if_cond_is_false( condT );
16858         condT = IRTemp_INVALID;
16859         // now uncond
16860      }
16861
16862      /* compute the effective address.  Bind it to a tmp since we
16863         may need to use it twice. */
16864      IRExpr* eaE = NULL;
16865      switch (summary & 0xF0) {
16866         case 16:
16867            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16868            break;
16869         case 32:
16870            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16871            break;
16872      }
16873      vassert(eaE);
16874      IRTemp eaT = newTemp(Ity_I32);
16875      assign(eaT, eaE);
16876
16877      /* get the old Rn value */
16878      IRTemp rnT = newTemp(Ity_I32);
16879      assign(rnT, getIRegA(rN));
16880
16881      /* decide on the transfer address */
16882      IRTemp taT = IRTemp_INVALID;
16883      switch (summary & 0x0F) {
16884         case 1: case 2: taT = eaT; break;
16885         case 3:         taT = rnT; break;
16886      }
16887      vassert(taT != IRTemp_INVALID);
16888
16889      /* ll previous value of rD, for dealing with conditional loads */
16890      IRTemp llOldRd = newTemp(Ity_I32);
16891      assign(llOldRd, llGetIReg(rD));
16892
16893      /* halfword store  H 1  L 0  S 0
16894         uhalf load      H 1  L 1  S 0
16895         shalf load      H 1  L 1  S 1
16896         sbyte load      H 0  L 1  S 1
16897      */
16898      const HChar* name = NULL;
16899      /* generate the transfer */
16900      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
16901         storeGuardedLE( mkexpr(taT),
16902                         unop(Iop_32to16, getIRegA(rD)), condT );
16903         name = "strh";
16904      }
16905      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
16906         IRTemp newRd = newTemp(Ity_I32);
16907         loadGuardedLE( newRd, ILGop_16Uto32,
16908                        mkexpr(taT), mkexpr(llOldRd), condT );
16909         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16910         name = "ldrh";
16911      }
16912      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
16913         IRTemp newRd = newTemp(Ity_I32);
16914         loadGuardedLE( newRd, ILGop_16Sto32,
16915                        mkexpr(taT), mkexpr(llOldRd), condT );
16916         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16917         name = "ldrsh";
16918      }
16919      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
16920         IRTemp newRd = newTemp(Ity_I32);
16921         loadGuardedLE( newRd, ILGop_8Sto32,
16922                        mkexpr(taT), mkexpr(llOldRd), condT );
16923         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16924         name = "ldrsb";
16925      }
16926      else
16927         vassert(0); // should be assured by logic above
16928
16929      /* Update Rn if necessary. */
16930      switch (summary & 0x0F) {
16931         case 2: case 3:
16932            // should be assured by logic above:
16933            if (bL == 1)
16934               vassert(rD != rN); /* since we just wrote rD */
16935            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16936            break;
16937      }
16938
16939      switch (summary & 0x0F) {
16940         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16941                  break;
16942         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16943                      name, nCC(INSN_COND), rD, dis_buf);
16944                  break;
16945         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16946                      name, nCC(INSN_COND), rD, dis_buf);
16947                  break;
16948         default: vassert(0);
16949      }
16950
16951      /* XXX deal with alignment constraints */
16952
16953      goto decode_success;
16954
16955      /* Complications:
16956
16957         For all loads: if the Amode specifies base register
16958         writeback, and the same register is specified for Rd and Rn,
16959         the results are UNPREDICTABLE.
16960
16961         For all loads and stores: if R15 is written, branch to
16962         that address afterwards.
16963
16964         Misaligned halfword stores => Unpredictable
16965         Misaligned halfword loads  => Unpredictable
16966      */
16967    }
16968
16969   after_load_store_sbyte_or_hword:
16970
16971    /* --------------------- Load/store multiple -------------- */
16972    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
16973    // Remarkably complex and difficult to get right
16974    // match 27:20 as 100XX0WL
16975    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
16976       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
16977       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
16978       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
16979       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
16980       //                   28   24   20 16       0
16981
16982       UInt bINC    = (insn >> 23) & 1;
16983       UInt bBEFORE = (insn >> 24) & 1;
16984
16985       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
16986       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
16987       UInt rN      = (insn >> 16) & 0xF;
16988       UInt regList = insn & 0xFFFF;
16989       /* Skip some invalid cases, which would lead to two competing
16990          updates to the same register, or which are otherwise
16991          disallowed by the spec.  Note the test above has required
16992          that S == 0, since that looks like a kernel-mode only thing.
16993          Done by forcing the real pattern, viz 100XXSWL to actually be
16994          100XX0WL. */
16995       if (rN == 15) goto after_load_store_multiple;
16996       // reglist can't be empty
16997       if (regList == 0) goto after_load_store_multiple;
16998       // if requested to writeback Rn, and this is a load instruction,
16999       // then Rn can't appear in RegList, since we'd have two competing
17000       // new values for Rn.  We do however accept this case for store
17001       // instructions.
17002       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
17003          goto after_load_store_multiple;
17004
17005       /* Now, we can't do a conditional load or store, since that very
17006          likely will generate an exception.  So we have to take a side
17007          exit at this point if the condition is false. */
17008       if (condT != IRTemp_INVALID) {
17009          mk_skip_over_A32_if_cond_is_false( condT );
17010          condT = IRTemp_INVALID;
17011       }
17012
17013       /* Ok, now we're unconditional.  Generate the IR. */
17014       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
17015
17016       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
17017           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
17018           nCC(INSN_COND),
17019           rN, bW ? "!" : "", regList);
17020
17021       goto decode_success;
17022    }
17023
17024   after_load_store_multiple:
17025
17026    /* --------------------- Control flow --------------------- */
17027    // B, BL (Branch, or Branch-and-Link, to immediate offset)
17028    //
17029    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
17030       UInt link   = (insn >> 24) & 1;
17031       UInt uimm24 = insn & ((1<<24)-1);  uimm24 <<= 8;
17032       Int  simm24 = (Int)uimm24;         simm24 >>= 8;
17033       UInt dst    = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
17034       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
17035       if (link) {
17036          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
17037                       condT, Ijk_Boring);
17038       }
17039       if (condT == IRTemp_INVALID) {
17040          /* unconditional transfer to 'dst'.  See if we can simply
17041             continue tracing at the destination. */
17042          if (resteerOkFn( callback_opaque, dst )) {
17043             /* yes */
17044             dres.whatNext   = Dis_ResteerU;
17045             dres.continueAt = dst;
17046          } else {
17047             /* no; terminate the SB at this point. */
17048             llPutIReg(15, mkU32(dst));
17049             dres.jk_StopHere = jk;
17050             dres.whatNext    = Dis_StopHere;
17051          }
17052          DIP("b%s 0x%x\n", link ? "l" : "", dst);
17053       } else {
17054          /* conditional transfer to 'dst' */
17055          const HChar* comment = "";
17056
17057          /* First see if we can do some speculative chasing into one
17058             arm or the other.  Be conservative and only chase if
17059             !link, that is, this is a normal conditional branch to a
17060             known destination. */
17061          if (!link
17062              && resteerCisOk
17063              && vex_control.guest_chase_cond
17064              && dst < guest_R15_curr_instr_notENC
17065              && resteerOkFn( callback_opaque, dst) ) {
17066             /* Speculation: assume this backward branch is taken.  So
17067                we need to emit a side-exit to the insn following this
17068                one, on the negation of the condition, and continue at
17069                the branch target address (dst). */
17070             stmt( IRStmt_Exit( unop(Iop_Not1,
17071                                     unop(Iop_32to1, mkexpr(condT))),
17072                                Ijk_Boring,
17073                                IRConst_U32(guest_R15_curr_instr_notENC+4),
17074                                OFFB_R15T ));
17075             dres.whatNext   = Dis_ResteerC;
17076             dres.continueAt = (Addr32)dst;
17077             comment = "(assumed taken)";
17078          }
17079          else
17080          if (!link
17081              && resteerCisOk
17082              && vex_control.guest_chase_cond
17083              && dst >= guest_R15_curr_instr_notENC
17084              && resteerOkFn( callback_opaque,
17085                              guest_R15_curr_instr_notENC+4) ) {
17086             /* Speculation: assume this forward branch is not taken.
17087                So we need to emit a side-exit to dst (the dest) and
17088                continue disassembling at the insn immediately
17089                following this one. */
17090             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17091                                Ijk_Boring,
17092                                IRConst_U32(dst),
17093                                OFFB_R15T ));
17094             dres.whatNext   = Dis_ResteerC;
17095             dres.continueAt = guest_R15_curr_instr_notENC+4;
17096             comment = "(assumed not taken)";
17097          }
17098          else {
17099             /* Conservative default translation - end the block at
17100                this point. */
17101             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17102                                jk, IRConst_U32(dst), OFFB_R15T ));
17103             llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
17104             dres.jk_StopHere = Ijk_Boring;
17105             dres.whatNext    = Dis_StopHere;
17106          }
17107          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
17108              dst, comment);
17109       }
17110       goto decode_success;
17111    }
17112
17113    // B, BL (Branch, or Branch-and-Link, to a register)
17114    // NB: interworking branch
17115    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17116        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
17117        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
17118            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
17119       IRTemp  dst = newTemp(Ity_I32);
17120       UInt    link = (INSN(11,4) >> 1) & 1;
17121       UInt    rM   = INSN(3,0);
17122       // we don't decode the case (link && rM == 15), as that's
17123       // Unpredictable.
17124       if (!(link && rM == 15)) {
17125          if (condT != IRTemp_INVALID) {
17126             mk_skip_over_A32_if_cond_is_false( condT );
17127          }
17128          // rM contains an interworking address exactly as we require
17129          // (with continuation CPSR.T in bit 0), so we can use it
17130          // as-is, with no masking.
17131          assign( dst, getIRegA(rM) );
17132          if (link) {
17133             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
17134                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
17135          }
17136          llPutIReg(15, mkexpr(dst));
17137          dres.jk_StopHere = link ? Ijk_Call
17138                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
17139          dres.whatNext    = Dis_StopHere;
17140          if (condT == IRTemp_INVALID) {
17141             DIP("b%sx r%u\n", link ? "l" : "", rM);
17142          } else {
17143             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
17144          }
17145          goto decode_success;
17146       }
17147       /* else: (link && rM == 15): just fall through */
17148    }
17149
17150    /* --- NB: ARM interworking branches are in NV space, hence
17151       are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
17152       ---
17153    */
17154
17155    /* --------------------- Clz --------------------- */
17156    // CLZ
17157    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
17158        && INSN(19,16) == BITS4(1,1,1,1)
17159        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
17160       UInt rD = INSN(15,12);
17161       UInt rM = INSN(3,0);
17162       IRTemp arg = newTemp(Ity_I32);
17163       IRTemp res = newTemp(Ity_I32);
17164       assign(arg, getIRegA(rM));
17165       assign(res, IRExpr_ITE(
17166                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
17167                      mkU32(32),
17168                      unop(Iop_Clz32, mkexpr(arg))
17169             ));
17170       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17171       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
17172       goto decode_success;
17173    }
17174
17175    /* --------------------- Mul etc --------------------- */
17176    // MUL
17177    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17178        && INSN(15,12) == BITS4(0,0,0,0)
17179        && INSN(7,4) == BITS4(1,0,0,1)) {
17180       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17181       UInt rD = INSN(19,16);
17182       UInt rS = INSN(11,8);
17183       UInt rM = INSN(3,0);
17184       if (rD == 15 || rM == 15 || rS == 15) {
17185          /* Unpredictable; don't decode; fall through */
17186       } else {
17187          IRTemp argL = newTemp(Ity_I32);
17188          IRTemp argR = newTemp(Ity_I32);
17189          IRTemp res  = newTemp(Ity_I32);
17190          IRTemp oldC = IRTemp_INVALID;
17191          IRTemp oldV = IRTemp_INVALID;
17192          assign( argL, getIRegA(rM));
17193          assign( argR, getIRegA(rS));
17194          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
17195          if (bitS) {
17196             oldC = newTemp(Ity_I32);
17197             assign(oldC, mk_armg_calculate_flag_c());
17198             oldV = newTemp(Ity_I32);
17199             assign(oldV, mk_armg_calculate_flag_v());
17200          }
17201          // now update guest state
17202          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17203          if (bitS) {
17204             IRTemp pair = newTemp(Ity_I32);
17205             assign( pair, binop(Iop_Or32,
17206                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17207                                 mkexpr(oldV)) );
17208             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17209          }
17210          DIP("mul%c%s r%u, r%u, r%u\n",
17211              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
17212          goto decode_success;
17213       }
17214       /* fall through */
17215    }
17216
17217    /* --------------------- Integer Divides --------------------- */
17218    // SDIV
17219    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
17220        && INSN(15,12) == BITS4(1,1,1,1)
17221        && INSN(7,4) == BITS4(0,0,0,1)) {
17222       UInt rD = INSN(19,16);
17223       UInt rM = INSN(11,8);
17224       UInt rN = INSN(3,0);
17225       if (rD == 15 || rM == 15 || rN == 15) {
17226          /* Unpredictable; don't decode; fall through */
17227       } else {
17228          IRTemp res  = newTemp(Ity_I32);
17229          IRTemp argL = newTemp(Ity_I32);
17230          IRTemp argR = newTemp(Ity_I32);
17231          assign(argL, getIRegA(rN));
17232          assign(argR, getIRegA(rM));
17233          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
17234          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17235          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
17236          goto decode_success;
17237       }
17238     }
17239
17240    // UDIV
17241    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
17242        && INSN(15,12) == BITS4(1,1,1,1)
17243        && INSN(7,4) == BITS4(0,0,0,1)) {
17244       UInt rD = INSN(19,16);
17245       UInt rM = INSN(11,8);
17246       UInt rN = INSN(3,0);
17247       if (rD == 15 || rM == 15 || rN == 15) {
17248          /* Unpredictable; don't decode; fall through */
17249       } else {
17250          IRTemp res  = newTemp(Ity_I32);
17251          IRTemp argL = newTemp(Ity_I32);
17252          IRTemp argR = newTemp(Ity_I32);
17253          assign(argL, getIRegA(rN));
17254          assign(argR, getIRegA(rM));
17255          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
17256          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17257          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
17258          goto decode_success;
17259       }
17260    }
17261
17262    // MLA, MLS
17263    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17264        && INSN(7,4) == BITS4(1,0,0,1)) {
17265       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
17266       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
17267       UInt rD = INSN(19,16);
17268       UInt rN = INSN(15,12);
17269       UInt rS = INSN(11,8);
17270       UInt rM = INSN(3,0);
17271       if (bitS == 1 && isMLS == 1) {
17272          /* This isn't allowed (MLS that sets flags).  don't decode;
17273             fall through */
17274       }
17275       else
17276       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
17277          /* Unpredictable; don't decode; fall through */
17278       } else {
17279          IRTemp argL = newTemp(Ity_I32);
17280          IRTemp argR = newTemp(Ity_I32);
17281          IRTemp argP = newTemp(Ity_I32);
17282          IRTemp res  = newTemp(Ity_I32);
17283          IRTemp oldC = IRTemp_INVALID;
17284          IRTemp oldV = IRTemp_INVALID;
17285          assign( argL, getIRegA(rM));
17286          assign( argR, getIRegA(rS));
17287          assign( argP, getIRegA(rN));
17288          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
17289                             mkexpr(argP),
17290                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
17291          if (bitS) {
17292             vassert(!isMLS); // guaranteed above
17293             oldC = newTemp(Ity_I32);
17294             assign(oldC, mk_armg_calculate_flag_c());
17295             oldV = newTemp(Ity_I32);
17296             assign(oldV, mk_armg_calculate_flag_v());
17297          }
17298          // now update guest state
17299          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17300          if (bitS) {
17301             IRTemp pair = newTemp(Ity_I32);
17302             assign( pair, binop(Iop_Or32,
17303                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17304                                 mkexpr(oldV)) );
17305             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17306          }
17307          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
17308              isMLS ? 's' : 'a', bitS ? 's' : ' ',
17309              nCC(INSN_COND), rD, rM, rS, rN);
17310          goto decode_success;
17311       }
17312       /* fall through */
17313    }
17314
17315    // SMULL, UMULL
17316    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17317        && INSN(7,4) == BITS4(1,0,0,1)) {
17318       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17319       UInt rDhi = INSN(19,16);
17320       UInt rDlo = INSN(15,12);
17321       UInt rS   = INSN(11,8);
17322       UInt rM   = INSN(3,0);
17323       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17324       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17325          /* Unpredictable; don't decode; fall through */
17326       } else {
17327          IRTemp argL  = newTemp(Ity_I32);
17328          IRTemp argR  = newTemp(Ity_I32);
17329          IRTemp res   = newTemp(Ity_I64);
17330          IRTemp resHi = newTemp(Ity_I32);
17331          IRTemp resLo = newTemp(Ity_I32);
17332          IRTemp oldC  = IRTemp_INVALID;
17333          IRTemp oldV  = IRTemp_INVALID;
17334          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17335          assign( argL, getIRegA(rM));
17336          assign( argR, getIRegA(rS));
17337          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
17338          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17339          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17340          if (bitS) {
17341             oldC = newTemp(Ity_I32);
17342             assign(oldC, mk_armg_calculate_flag_c());
17343             oldV = newTemp(Ity_I32);
17344             assign(oldV, mk_armg_calculate_flag_v());
17345          }
17346          // now update guest state
17347          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17348          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17349          if (bitS) {
17350             IRTemp pair = newTemp(Ity_I32);
17351             assign( pair, binop(Iop_Or32,
17352                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17353                                 mkexpr(oldV)) );
17354             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17355          }
17356          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
17357              isS ? 's' : 'u', bitS ? 's' : ' ',
17358              nCC(INSN_COND), rDlo, rDhi, rM, rS);
17359          goto decode_success;
17360       }
17361       /* fall through */
17362    }
17363
17364    // SMLAL, UMLAL
17365    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17366        && INSN(7,4) == BITS4(1,0,0,1)) {
17367       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17368       UInt rDhi = INSN(19,16);
17369       UInt rDlo = INSN(15,12);
17370       UInt rS   = INSN(11,8);
17371       UInt rM   = INSN(3,0);
17372       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17373       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17374          /* Unpredictable; don't decode; fall through */
17375       } else {
17376          IRTemp argL  = newTemp(Ity_I32);
17377          IRTemp argR  = newTemp(Ity_I32);
17378          IRTemp old   = newTemp(Ity_I64);
17379          IRTemp res   = newTemp(Ity_I64);
17380          IRTemp resHi = newTemp(Ity_I32);
17381          IRTemp resLo = newTemp(Ity_I32);
17382          IRTemp oldC  = IRTemp_INVALID;
17383          IRTemp oldV  = IRTemp_INVALID;
17384          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17385          assign( argL, getIRegA(rM));
17386          assign( argR, getIRegA(rS));
17387          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
17388          assign( res, binop(Iop_Add64,
17389                             mkexpr(old),
17390                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
17391          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17392          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17393          if (bitS) {
17394             oldC = newTemp(Ity_I32);
17395             assign(oldC, mk_armg_calculate_flag_c());
17396             oldV = newTemp(Ity_I32);
17397             assign(oldV, mk_armg_calculate_flag_v());
17398          }
17399          // now update guest state
17400          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17401          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17402          if (bitS) {
17403             IRTemp pair = newTemp(Ity_I32);
17404             assign( pair, binop(Iop_Or32,
17405                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17406                                 mkexpr(oldV)) );
17407             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17408          }
17409          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
17410              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
17411              rDlo, rDhi, rM, rS);
17412          goto decode_success;
17413       }
17414       /* fall through */
17415    }
17416
17417    // UMAAL
17418    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
17419       UInt rDhi = INSN(19,16);
17420       UInt rDlo = INSN(15,12);
17421       UInt rM   = INSN(11,8);
17422       UInt rN   = INSN(3,0);
17423       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
17424          /* Unpredictable; don't decode; fall through */
17425       } else {
17426          IRTemp argN   = newTemp(Ity_I32);
17427          IRTemp argM   = newTemp(Ity_I32);
17428          IRTemp argDhi = newTemp(Ity_I32);
17429          IRTemp argDlo = newTemp(Ity_I32);
17430          IRTemp res    = newTemp(Ity_I64);
17431          IRTemp resHi  = newTemp(Ity_I32);
17432          IRTemp resLo  = newTemp(Ity_I32);
17433          assign( argN,   getIRegA(rN) );
17434          assign( argM,   getIRegA(rM) );
17435          assign( argDhi, getIRegA(rDhi) );
17436          assign( argDlo, getIRegA(rDlo) );
17437          assign( res,
17438                  binop(Iop_Add64,
17439                        binop(Iop_Add64,
17440                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
17441                              unop(Iop_32Uto64, mkexpr(argDhi))),
17442                        unop(Iop_32Uto64, mkexpr(argDlo))) );
17443          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17444          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17445          // now update guest state
17446          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17447          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17448          DIP("umaal %s r%u, r%u, r%u, r%u\n",
17449              nCC(INSN_COND), rDlo, rDhi, rN, rM);
17450          goto decode_success;
17451       }
17452       /* fall through */
17453    }
17454
17455    /* --------------------- Msr etc --------------------- */
17456
17457    // MSR apsr, #imm
17458    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
17459        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
17460       UInt write_ge    = INSN(18,18);
17461       UInt write_nzcvq = INSN(19,19);
17462       if (write_nzcvq || write_ge) {
17463          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
17464          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
17465          IRTemp immT = newTemp(Ity_I32);
17466          vassert(rot <= 30);
17467          imm = ROR32(imm, rot);
17468          assign(immT, mkU32(imm));
17469          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
17470          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
17471              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
17472          goto decode_success;
17473       }
17474       /* fall through */
17475    }
17476
17477    // MSR apsr, reg
17478    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17479        && INSN(17,12) == BITS6(0,0,1,1,1,1)
17480        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
17481       UInt rN          = INSN(3,0);
17482       UInt write_ge    = INSN(18,18);
17483       UInt write_nzcvq = INSN(19,19);
17484       if (rN != 15 && (write_nzcvq || write_ge)) {
17485          IRTemp rNt = newTemp(Ity_I32);
17486          assign(rNt, getIRegA(rN));
17487          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
17488          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
17489              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
17490          goto decode_success;
17491       }
17492       /* fall through */
17493    }
17494
17495    // MRS rD, cpsr
17496    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
17497       UInt rD   = INSN(15,12);
17498       if (rD != 15) {
17499          IRTemp apsr = synthesise_APSR();
17500          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
17501          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
17502          goto decode_success;
17503       }
17504       /* fall through */
17505    }
17506
17507    /* --------------------- Svc --------------------- */
17508    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
17509       UInt imm24 = (insn >> 0) & 0xFFFFFF;
17510       if (imm24 == 0) {
17511          /* A syscall.  We can't do this conditionally, hence: */
17512          if (condT != IRTemp_INVALID) {
17513             mk_skip_over_A32_if_cond_is_false( condT );
17514          }
17515          // AL after here
17516          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
17517          dres.jk_StopHere = Ijk_Sys_syscall;
17518          dres.whatNext    = Dis_StopHere;
17519          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
17520          goto decode_success;
17521       }
17522       /* fall through */
17523    }
17524
17525    /* ------------------------ swp ------------------------ */
17526
17527    // SWP, SWPB
17528    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
17529        && BITS4(0,0,0,0) == INSN(11,8)
17530        && BITS4(1,0,0,1) == INSN(7,4)) {
17531       UInt   rN   = INSN(19,16);
17532       UInt   rD   = INSN(15,12);
17533       UInt   rM   = INSN(3,0);
17534       IRTemp tRn  = newTemp(Ity_I32);
17535       IRTemp tNew = newTemp(Ity_I32);
17536       IRTemp tOld = IRTemp_INVALID;
17537       IRTemp tSC1 = newTemp(Ity_I1);
17538       UInt   isB  = (insn >> 22) & 1;
17539
17540       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
17541          /* undecodable; fall through */
17542       } else {
17543          /* make unconditional */
17544          if (condT != IRTemp_INVALID) {
17545             mk_skip_over_A32_if_cond_is_false( condT );
17546             condT = IRTemp_INVALID;
17547          }
17548          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
17549          assign(tRn, getIRegA(rN));
17550          assign(tNew, getIRegA(rM));
17551          if (isB) {
17552             /* swpb */
17553             tOld = newTemp(Ity_I8);
17554             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17555                               NULL/*=>isLL*/) );
17556             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17557                               unop(Iop_32to8, mkexpr(tNew))) );
17558          } else {
17559             /* swp */
17560             tOld = newTemp(Ity_I32);
17561             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17562                               NULL/*=>isLL*/) );
17563             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17564                               mkexpr(tNew)) );
17565          }
17566          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
17567                            /*Ijk_NoRedir*/Ijk_Boring,
17568                            IRConst_U32(guest_R15_curr_instr_notENC),
17569                            OFFB_R15T ));
17570          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
17571                       IRTemp_INVALID, Ijk_Boring);
17572          DIP("swp%s%s r%u, r%u, [r%u]\n",
17573              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
17574          goto decode_success;
17575       }
17576       /* fall through */
17577    }
17578
17579    /* ----------------------------------------------------------- */
17580    /* -- ARMv6 instructions                                    -- */
17581    /* ----------------------------------------------------------- */
17582
17583    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
17584
17585    // LDREXD, LDREX, LDREXH, LDREXB
17586    if (0x01900F9F == (insn & 0x0F900FFF)) {
17587       UInt   rT    = INSN(15,12);
17588       UInt   rN    = INSN(19,16);
17589       IRType ty    = Ity_INVALID;
17590       IROp   widen = Iop_INVALID;
17591       const HChar* nm = NULL;
17592       Bool   valid = True;
17593       switch (INSN(22,21)) {
17594          case 0: nm = "";  ty = Ity_I32; break;
17595          case 1: nm = "d"; ty = Ity_I64; break;
17596          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
17597          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
17598          default: vassert(0);
17599       }
17600       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17601          if (rT == 15 || rN == 15)
17602             valid = False;
17603       } else {
17604          vassert(ty == Ity_I64);
17605          if ((rT & 1) == 1 || rT == 14 || rN == 15)
17606             valid = False;
17607       }
17608       if (valid) {
17609          IRTemp res;
17610          /* make unconditional */
17611          if (condT != IRTemp_INVALID) {
17612            mk_skip_over_A32_if_cond_is_false( condT );
17613            condT = IRTemp_INVALID;
17614          }
17615          /* Ok, now we're unconditional.  Do the load. */
17616          res = newTemp(ty);
17617          // FIXME: assumes little-endian guest
17618          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
17619                            NULL/*this is a load*/) );
17620          if (ty == Ity_I64) {
17621             // FIXME: assumes little-endian guest
17622             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
17623                            IRTemp_INVALID, Ijk_Boring);
17624             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
17625                            IRTemp_INVALID, Ijk_Boring);
17626             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
17627                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
17628          } else {
17629             putIRegA(rT, widen == Iop_INVALID
17630                             ? mkexpr(res) : unop(widen, mkexpr(res)),
17631                      IRTemp_INVALID, Ijk_Boring);
17632             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
17633          }
17634          goto decode_success;
17635       }
17636       /* undecodable; fall through */
17637    }
17638
17639    // STREXD, STREX, STREXH, STREXB
17640    if (0x01800F90 == (insn & 0x0F900FF0)) {
17641       UInt   rT     = INSN(3,0);
17642       UInt   rN     = INSN(19,16);
17643       UInt   rD     = INSN(15,12);
17644       IRType ty     = Ity_INVALID;
17645       IROp   narrow = Iop_INVALID;
17646       const HChar* nm = NULL;
17647       Bool   valid  = True;
17648       switch (INSN(22,21)) {
17649          case 0: nm = "";  ty = Ity_I32; break;
17650          case 1: nm = "d"; ty = Ity_I64; break;
17651          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
17652          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
17653          default: vassert(0);
17654       }
17655       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17656          if (rD == 15 || rN == 15 || rT == 15
17657              || rD == rN || rD == rT)
17658             valid = False;
17659       } else {
17660          vassert(ty == Ity_I64);
17661          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
17662              || rD == rN || rD == rT || rD == rT+1)
17663             valid = False;
17664       }
17665       if (valid) {
17666          IRTemp resSC1, resSC32, data;
17667          /* make unconditional */
17668          if (condT != IRTemp_INVALID) {
17669             mk_skip_over_A32_if_cond_is_false( condT );
17670             condT = IRTemp_INVALID;
17671          }
17672          /* Ok, now we're unconditional.  Do the store. */
17673          data = newTemp(ty);
17674          assign(data,
17675                 ty == Ity_I64
17676                    // FIXME: assumes little-endian guest
17677                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
17678                    : narrow == Iop_INVALID
17679                       ? getIRegA(rT)
17680                       : unop(narrow, getIRegA(rT)));
17681          resSC1 = newTemp(Ity_I1);
17682          // FIXME: assumes little-endian guest
17683          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
17684
17685          /* Set rD to 1 on failure, 0 on success.  Currently we have
17686             resSC1 == 0 on failure, 1 on success. */
17687          resSC32 = newTemp(Ity_I32);
17688          assign(resSC32,
17689                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
17690
17691          putIRegA(rD, mkexpr(resSC32),
17692                       IRTemp_INVALID, Ijk_Boring);
17693          if (ty == Ity_I64) {
17694             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
17695                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
17696          } else {
17697             DIP("strex%s%s r%u, r%u, [r%u]\n",
17698                 nm, nCC(INSN_COND), rD, rT, rN);
17699          }
17700          goto decode_success;
17701       }
17702       /* fall through */
17703    }
17704
17705    /* --------------------- movw, movt --------------------- */
17706    if (0x03000000 == (insn & 0x0FF00000)
17707        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
17708       UInt rD    = INSN(15,12);
17709       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
17710       UInt isT   = (insn >> 22) & 1;
17711       if (rD == 15) {
17712          /* forget it */
17713       } else {
17714          if (isT) {
17715             putIRegA(rD,
17716                      binop(Iop_Or32,
17717                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
17718                            mkU32(imm16 << 16)),
17719                      condT, Ijk_Boring);
17720             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17721             goto decode_success;
17722          } else {
17723             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
17724             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17725             goto decode_success;
17726          }
17727       }
17728       /* fall through */
17729    }
17730
17731    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
17732    /* FIXME: this is an exact duplicate of the Thumb version.  They
17733       should be commoned up. */
17734    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
17735        && BITS4(1,1,1,1) == INSN(19,16)
17736        && BITS4(0,1,1,1) == INSN(7,4)
17737        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
17738       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
17739       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
17740          Int    rot  = (INSN(11,8) >> 2) & 3;
17741          UInt   rM   = INSN(3,0);
17742          UInt   rD   = INSN(15,12);
17743          IRTemp srcT = newTemp(Ity_I32);
17744          IRTemp rotT = newTemp(Ity_I32);
17745          IRTemp dstT = newTemp(Ity_I32);
17746          const HChar* nm = "???";
17747          assign(srcT, getIRegA(rM));
17748          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
17749          switch (subopc) {
17750             case BITS4(0,1,1,0): // UXTB
17751                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
17752                nm = "uxtb";
17753                break;
17754             case BITS4(0,0,1,0): // SXTB
17755                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
17756                nm = "sxtb";
17757                break;
17758             case BITS4(0,1,1,1): // UXTH
17759                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
17760                nm = "uxth";
17761                break;
17762             case BITS4(0,0,1,1): // SXTH
17763                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
17764                nm = "sxth";
17765                break;
17766             case BITS4(0,1,0,0): // UXTB16
17767                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
17768                nm = "uxtb16";
17769                break;
17770             case BITS4(0,0,0,0): { // SXTB16
17771                IRTemp lo32 = newTemp(Ity_I32);
17772                IRTemp hi32 = newTemp(Ity_I32);
17773                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
17774                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
17775                assign(
17776                   dstT,
17777                   binop(Iop_Or32,
17778                         binop(Iop_And32,
17779                               unop(Iop_8Sto32,
17780                                    unop(Iop_32to8, mkexpr(lo32))),
17781                               mkU32(0xFFFF)),
17782                         binop(Iop_Shl32,
17783                               unop(Iop_8Sto32,
17784                                    unop(Iop_32to8, mkexpr(hi32))),
17785                               mkU8(16))
17786                ));
17787                nm = "sxtb16";
17788                break;
17789             }
17790             default:
17791                vassert(0); // guarded by "if" above
17792          }
17793          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
17794          DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
17795          goto decode_success;
17796       }
17797       /* fall through */
17798    }
17799
17800    /* ------------------- bfi, bfc ------------------- */
17801    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17802        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17803       UInt rD  = INSN(15,12);
17804       UInt rN  = INSN(3,0);
17805       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
17806       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17807       if (rD == 15 || msb < lsb) {
17808          /* undecodable; fall through */
17809       } else {
17810          IRTemp src    = newTemp(Ity_I32);
17811          IRTemp olddst = newTemp(Ity_I32);
17812          IRTemp newdst = newTemp(Ity_I32);
17813          UInt   mask   = ((UInt)1) << (msb - lsb);
17814          mask = (mask - 1) + mask;
17815          vassert(mask != 0); // guaranteed by "msb < lsb" check above
17816          mask <<= lsb;
17817
17818          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
17819          assign(olddst, getIRegA(rD));
17820          assign(newdst,
17821                 binop(Iop_Or32,
17822                    binop(Iop_And32,
17823                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
17824                          mkU32(mask)),
17825                    binop(Iop_And32,
17826                          mkexpr(olddst),
17827                          mkU32(~mask)))
17828                );
17829
17830          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
17831
17832          if (rN == 15) {
17833             DIP("bfc%s r%u, #%u, #%u\n",
17834                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
17835          } else {
17836             DIP("bfi%s r%u, r%u, #%u, #%u\n",
17837                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
17838          }
17839          goto decode_success;
17840       }
17841       /* fall through */
17842    }
17843
17844    /* ------------------- {u,s}bfx ------------------- */
17845    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17846        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17847       UInt rD  = INSN(15,12);
17848       UInt rN  = INSN(3,0);
17849       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
17850       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17851       UInt msb = lsb + wm1;
17852       UInt isU = (insn >> 22) & 1;    /* 22:22 */
17853       if (rD == 15 || rN == 15 || msb >= 32) {
17854          /* undecodable; fall through */
17855       } else {
17856          IRTemp src  = newTemp(Ity_I32);
17857          IRTemp tmp  = newTemp(Ity_I32);
17858          IRTemp res  = newTemp(Ity_I32);
17859          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
17860          vassert(msb >= 0 && msb <= 31);
17861          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
17862
17863          assign(src, getIRegA(rN));
17864          assign(tmp, binop(Iop_And32,
17865                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
17866                            mkU32(mask)));
17867          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
17868                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
17869                            mkU8(31-wm1)));
17870
17871          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17872
17873          DIP("%s%s r%u, r%u, #%u, #%u\n",
17874              isU ? "ubfx" : "sbfx",
17875              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
17876          goto decode_success;
17877       }
17878       /* fall through */
17879    }
17880
17881    /* --------------------- Load/store doubleword ------------- */
17882    // LDRD STRD
17883    /*                 31   27   23   19 15 11   7    3     # highest bit
17884                         28   24   20 16 12    8    4    0
17885       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
17886       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
17887       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
17888       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
17889       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
17890       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
17891    */
17892    /* case coding:
17893              1   at-ea               (access at ea)
17894              2   at-ea-then-upd      (access at ea, then Rn = ea)
17895              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
17896       ea coding
17897              16  Rn +/- imm8
17898              32  Rn +/- Rm
17899    */
17900    /* Quickly skip over all of this for hopefully most instructions */
17901    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
17902       goto after_load_store_doubleword;
17903
17904    /* Check the "11S1" thing. */
17905    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
17906       goto after_load_store_doubleword;
17907
17908    summary = 0;
17909
17910    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
17911       summary = 1 | 16;
17912    }
17913    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
17914       summary = 1 | 32;
17915    }
17916    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
17917       summary = 2 | 16;
17918    }
17919    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
17920       summary = 2 | 32;
17921    }
17922    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
17923       summary = 3 | 16;
17924    }
17925    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
17926       summary = 3 | 32;
17927    }
17928    else goto after_load_store_doubleword;
17929
17930    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
17931      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
17932      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
17933      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
17934      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
17935      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
17936
17937      /* Require rD to be an even numbered register */
17938      if ((rD & 1) != 0)
17939         goto after_load_store_doubleword;
17940
17941      /* Require 11:8 == 0 for Rn +/- Rm cases */
17942      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
17943         goto after_load_store_doubleword;
17944
17945      /* Skip some invalid cases, which would lead to two competing
17946         updates to the same register, or which are otherwise
17947         disallowed by the spec. */
17948      switch (summary) {
17949         case 1 | 16:
17950            break;
17951         case 1 | 32:
17952            if (rM == 15) goto after_load_store_doubleword;
17953            break;
17954         case 2 | 16: case 3 | 16:
17955            if (rN == 15) goto after_load_store_doubleword;
17956            if (bS == 0 && (rN == rD || rN == rD+1))
17957               goto after_load_store_doubleword;
17958            break;
17959         case 2 | 32: case 3 | 32:
17960            if (rM == 15) goto after_load_store_doubleword;
17961            if (rN == 15) goto after_load_store_doubleword;
17962            if (rN == rM) goto after_load_store_doubleword;
17963            if (bS == 0 && (rN == rD || rN == rD+1))
17964               goto after_load_store_doubleword;
17965            break;
17966         default:
17967            vassert(0);
17968      }
17969
17970      /* If this is a branch, make it unconditional at this point.
17971         Doing conditional branches in-line is too complex (for
17972         now). */
17973      vassert((rD & 1) == 0); /* from tests above */
17974      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
17975         // go uncond
17976         mk_skip_over_A32_if_cond_is_false( condT );
17977         condT = IRTemp_INVALID;
17978         // now uncond
17979      }
17980
17981      /* compute the effective address.  Bind it to a tmp since we
17982         may need to use it twice. */
17983      IRExpr* eaE = NULL;
17984      switch (summary & 0xF0) {
17985         case 16:
17986            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
17987            break;
17988         case 32:
17989            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
17990            break;
17991      }
17992      vassert(eaE);
17993      IRTemp eaT = newTemp(Ity_I32);
17994      assign(eaT, eaE);
17995
17996      /* get the old Rn value */
17997      IRTemp rnT = newTemp(Ity_I32);
17998      assign(rnT, getIRegA(rN));
17999
18000      /* decide on the transfer address */
18001      IRTemp taT = IRTemp_INVALID;
18002      switch (summary & 0x0F) {
18003         case 1: case 2: taT = eaT; break;
18004         case 3:         taT = rnT; break;
18005      }
18006      vassert(taT != IRTemp_INVALID);
18007
18008      /* XXX deal with alignment constraints */
18009      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
18010         ignore alignment issues for the time being. */
18011
18012      /* For almost all cases, we do the writeback after the transfers.
18013         However, that leaves the stack "uncovered" in cases like:
18014            strd    rD, [sp, #-8]
18015            strd    rD, [sp, #-16]
18016         In which case, do the writeback to SP now, instead of later.
18017         This is bad in that it makes the insn non-restartable if the
18018         accesses fault, but at least keeps Memcheck happy. */
18019      Bool writeback_already_done = False;
18020      if (bS == 1 /*store*/ && summary == (2 | 16)
18021          && rN == 13 && rN != rD && rN != rD+1
18022          && bU == 0/*minus*/
18023          && (imm8 == 8 || imm8 == 16)) {
18024         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18025         writeback_already_done = True;
18026      }
18027
18028      /* doubleword store  S 1
18029         doubleword load   S 0
18030      */
18031      const HChar* name = NULL;
18032      /* generate the transfers */
18033      if (bS == 1) { // doubleword store
18034         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
18035                         getIRegA(rD+0), condT );
18036         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
18037                         getIRegA(rD+1), condT );
18038         name = "strd";
18039      } else { // doubleword load
18040         IRTemp oldRd0 = newTemp(Ity_I32);
18041         IRTemp oldRd1 = newTemp(Ity_I32);
18042         assign(oldRd0, llGetIReg(rD+0));
18043         assign(oldRd1, llGetIReg(rD+1));
18044         IRTemp newRd0 = newTemp(Ity_I32);
18045         IRTemp newRd1 = newTemp(Ity_I32);
18046         loadGuardedLE( newRd0, ILGop_Ident32,
18047                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
18048                        mkexpr(oldRd0), condT );
18049         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
18050         loadGuardedLE( newRd1, ILGop_Ident32,
18051                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
18052                        mkexpr(oldRd1), condT );
18053         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
18054         name = "ldrd";
18055      }
18056
18057      /* Update Rn if necessary. */
18058      switch (summary & 0x0F) {
18059         case 2: case 3:
18060            // should be assured by logic above:
18061            vassert(rN != 15); /* from checks above */
18062            if (bS == 0) {
18063               vassert(rD+0 != rN); /* since we just wrote rD+0 */
18064               vassert(rD+1 != rN); /* since we just wrote rD+1 */
18065            }
18066            if (!writeback_already_done)
18067               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18068            break;
18069      }
18070
18071      switch (summary & 0x0F) {
18072         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
18073                  break;
18074         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
18075                      name, nCC(INSN_COND), rD, dis_buf);
18076                  break;
18077         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
18078                      name, nCC(INSN_COND), rD, dis_buf);
18079                  break;
18080         default: vassert(0);
18081      }
18082
18083      goto decode_success;
18084    }
18085
18086   after_load_store_doubleword:
18087
18088    /* ------------------- {s,u}xtab ------------- */
18089    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18090        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18091        && BITS4(0,1,1,1) == INSN(7,4)) {
18092       UInt rN  = INSN(19,16);
18093       UInt rD  = INSN(15,12);
18094       UInt rM  = INSN(3,0);
18095       UInt rot = (insn >> 10) & 3;
18096       UInt isU = INSN(22,22);
18097       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
18098          /* undecodable; fall through */
18099       } else {
18100          IRTemp srcL = newTemp(Ity_I32);
18101          IRTemp srcR = newTemp(Ity_I32);
18102          IRTemp res  = newTemp(Ity_I32);
18103          assign(srcR, getIRegA(rM));
18104          assign(srcL, getIRegA(rN));
18105          assign(res,  binop(Iop_Add32,
18106                             mkexpr(srcL),
18107                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
18108                                  unop(Iop_32to8,
18109                                       genROR32(srcR, 8 * rot)))));
18110          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18111          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
18112              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18113          goto decode_success;
18114       }
18115       /* fall through */
18116    }
18117
18118    /* ------------------- {s,u}xtah ------------- */
18119    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18120        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18121        && BITS4(0,1,1,1) == INSN(7,4)) {
18122       UInt rN  = INSN(19,16);
18123       UInt rD  = INSN(15,12);
18124       UInt rM  = INSN(3,0);
18125       UInt rot = (insn >> 10) & 3;
18126       UInt isU = INSN(22,22);
18127       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
18128          /* undecodable; fall through */
18129       } else {
18130          IRTemp srcL = newTemp(Ity_I32);
18131          IRTemp srcR = newTemp(Ity_I32);
18132          IRTemp res  = newTemp(Ity_I32);
18133          assign(srcR, getIRegA(rM));
18134          assign(srcL, getIRegA(rN));
18135          assign(res,  binop(Iop_Add32,
18136                             mkexpr(srcL),
18137                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
18138                                  unop(Iop_32to16,
18139                                       genROR32(srcR, 8 * rot)))));
18140          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18141
18142          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
18143              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18144          goto decode_success;
18145       }
18146       /* fall through */
18147    }
18148
18149    /* ------------------- rev16, rev ------------------ */
18150    if (INSN(27,16) == 0x6BF
18151        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
18152       Bool isREV = INSN(11,4) == 0xF3;
18153       UInt rM    = INSN(3,0);
18154       UInt rD    = INSN(15,12);
18155       if (rM != 15 && rD != 15) {
18156          IRTemp rMt = newTemp(Ity_I32);
18157          assign(rMt, getIRegA(rM));
18158          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
18159          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18160          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
18161              nCC(INSN_COND), rD, rM);
18162          goto decode_success;
18163       }
18164    }
18165
18166    /* ------------------- revsh ----------------------- */
18167    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
18168       UInt rM = INSN(3,0);
18169       UInt rD = INSN(15,12);
18170       if (rM != 15 && rD != 15) {
18171          IRTemp irt_rM  = newTemp(Ity_I32);
18172          IRTemp irt_hi  = newTemp(Ity_I32);
18173          IRTemp irt_low = newTemp(Ity_I32);
18174          IRTemp irt_res = newTemp(Ity_I32);
18175          assign(irt_rM, getIRegA(rM));
18176          assign(irt_hi,
18177                 binop(Iop_Sar32,
18178                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18179                       mkU8(16)
18180                 )
18181          );
18182          assign(irt_low,
18183                 binop(Iop_And32,
18184                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18185                       mkU32(0xFF)
18186                 )
18187          );
18188          assign(irt_res,
18189                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18190          );
18191          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
18192          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
18193          goto decode_success;
18194       }
18195    }
18196
18197    /* ------------------- rbit ------------------ */
18198    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
18199       UInt rD = INSN(15,12);
18200       UInt rM = INSN(3,0);
18201       if (rD != 15 && rM != 15) {
18202          IRTemp arg = newTemp(Ity_I32);
18203          assign(arg, getIRegA(rM));
18204          IRTemp res = gen_BITREV(arg);
18205          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18206          DIP("rbit r%u, r%u\n", rD, rM);
18207          goto decode_success;
18208       }
18209    }
18210
18211    /* ------------------- smmul ------------------ */
18212    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18213        && INSN(15,12) == BITS4(1,1,1,1)
18214        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18215       UInt bitR = INSN(5,5);
18216       UInt rD = INSN(19,16);
18217       UInt rM = INSN(11,8);
18218       UInt rN = INSN(3,0);
18219       if (rD != 15 && rM != 15 && rN != 15) {
18220          IRExpr* res
18221          = unop(Iop_64HIto32,
18222                 binop(Iop_Add64,
18223                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
18224                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18225          putIRegA(rD, res, condT, Ijk_Boring);
18226          DIP("smmul%s%s r%u, r%u, r%u\n",
18227              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
18228          goto decode_success;
18229       }
18230    }
18231
18232    /* ------------------- smmla ------------------ */
18233    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18234        && INSN(15,12) != BITS4(1,1,1,1)
18235        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18236       UInt bitR = INSN(5,5);
18237       UInt rD = INSN(19,16);
18238       UInt rA = INSN(15,12);
18239       UInt rM = INSN(11,8);
18240       UInt rN = INSN(3,0);
18241       if (rD != 15 && rM != 15 && rN != 15) {
18242          IRExpr* res
18243          = unop(Iop_64HIto32,
18244                 binop(Iop_Add64,
18245                       binop(Iop_Add64,
18246                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
18247                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
18248                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18249          putIRegA(rD, res, condT, Ijk_Boring);
18250          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
18251              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
18252          goto decode_success;
18253       }
18254    }
18255
18256    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
18257    /* Load Register Unprivileged:
18258       ldrt<c> Rt, [Rn] {, #+/-imm12}
18259    */
18260    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
18261       UInt rT     = INSN(15,12);
18262       UInt rN     = INSN(19,16);
18263       UInt imm12  = INSN(11,0);
18264       UInt bU     = INSN(23,23);
18265       Bool valid  = True;
18266       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18267       if (valid) {
18268          IRTemp newRt = newTemp(Ity_I32);
18269          loadGuardedLE( newRt,
18270                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18271          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18272          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18273                              getIRegA(rN), mkU32(imm12));
18274          putIRegA(rN, erN, condT, Ijk_Boring);
18275          DIP("ldrt%s r%u, [r%u], #%c%u\n",
18276              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18277          goto decode_success;
18278       }
18279    }
18280
18281    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
18282    /* Load Register Unprivileged:
18283       ldrt<c> Rt, [Rn], +/-Rm{, shift}
18284    */
18285    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
18286         && INSN(4,4) == 0 ) {
18287       UInt rT     = INSN(15,12);
18288       UInt rN     = INSN(19,16);
18289       UInt rM     = INSN(3,0);
18290       UInt imm5   = INSN(11,7);
18291       UInt bU     = INSN(23,23);
18292       UInt type   = INSN(6,5);
18293       Bool valid  = True;
18294       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18295           /* || (ArchVersion() < 6 && rM == rN) */)
18296          valid = False;
18297       if (valid) {
18298          IRTemp newRt = newTemp(Ity_I32);
18299          loadGuardedLE( newRt,
18300                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18301          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18302          // dis_buf generated is slightly bogus, in fact.
18303          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18304                                                        type, imm5, dis_buf);
18305          putIRegA(rN, erN, condT, Ijk_Boring);
18306          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18307          goto decode_success;
18308       }
18309    }
18310
18311    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
18312    /* Load Register Byte Unprivileged:
18313       ldrbt<c> Rt, [Rn], #+/-imm12
18314    */
18315    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
18316       UInt rT     = INSN(15,12);
18317       UInt rN     = INSN(19,16);
18318       UInt imm12  = INSN(11,0);
18319       UInt bU     = INSN(23,23);
18320       Bool valid  = True;
18321       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18322       if (valid) {
18323          IRTemp newRt = newTemp(Ity_I32);
18324          loadGuardedLE( newRt,
18325                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18326          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18327          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18328                              getIRegA(rN), mkU32(imm12));
18329          putIRegA(rN, erN, condT, Ijk_Boring);
18330          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
18331              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18332          goto decode_success;
18333       }
18334    }
18335
18336    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
18337    /* Load Register Byte Unprivileged:
18338       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
18339    */
18340    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
18341         && INSN(4,4) == 0 ) {
18342       UInt rT     = INSN(15,12);
18343       UInt rN     = INSN(19,16);
18344       UInt rM     = INSN(3,0);
18345       UInt imm5   = INSN(11,7);
18346       UInt bU     = INSN(23,23);
18347       UInt type   = INSN(6,5);
18348       Bool valid  = True;
18349       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18350           /* || (ArchVersion() < 6 && rM == rN) */)
18351          valid = False;
18352       if (valid) {
18353          IRTemp newRt = newTemp(Ity_I32);
18354          loadGuardedLE( newRt,
18355                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18356          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18357          // dis_buf generated is slightly bogus, in fact.
18358          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18359                                                        type, imm5, dis_buf);
18360          putIRegA(rN, erN, condT, Ijk_Boring);
18361          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18362          goto decode_success;
18363       }
18364    }
18365
18366    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
18367    /* Load Register Halfword Unprivileged:
18368       ldrht<c> Rt, [Rn] {, #+/-imm8}
18369    */
18370    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18371        && INSN(7,4) == BITS4(1,0,1,1) ) {
18372       UInt rT    = INSN(15,12);
18373       UInt rN    = INSN(19,16);
18374       UInt bU    = INSN(23,23);
18375       UInt imm4H = INSN(11,8);
18376       UInt imm4L = INSN(3,0);
18377       UInt imm8  = (imm4H << 4) | imm4L;
18378       Bool valid = True;
18379       if (rT == 15 || rN == 15 || rN == rT)
18380          valid = False;
18381       if (valid) {
18382          IRTemp newRt = newTemp(Ity_I32);
18383          loadGuardedLE( newRt,
18384                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18385          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18386          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18387                              getIRegA(rN), mkU32(imm8));
18388          putIRegA(rN, erN, condT, Ijk_Boring);
18389          DIP("ldrht%s r%u, [r%u], #%c%u\n",
18390              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18391          goto decode_success;
18392       }
18393    }
18394
18395    /* -------------- (A2) LDRHT reg+/-reg -------------- */
18396    /* Load Register Halfword Unprivileged:
18397       ldrht<c> Rt, [Rn], +/-Rm
18398    */
18399    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18400        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18401       UInt rT    = INSN(15,12);
18402       UInt rN    = INSN(19,16);
18403       UInt rM    = INSN(3,0);
18404       UInt bU    = INSN(23,23);
18405       Bool valid = True;
18406       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18407          valid = False;
18408       if (valid) {
18409          IRTemp newRt = newTemp(Ity_I32);
18410          loadGuardedLE( newRt,
18411                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18412          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18413          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18414                              getIRegA(rN), getIRegA(rM));
18415          putIRegA(rN, erN, condT, Ijk_Boring);
18416          DIP("ldrht%s r%u, [r%u], %cr%u\n",
18417              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18418          goto decode_success;
18419       }
18420    }
18421
18422    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
18423    /* Load Register Signed Halfword Unprivileged:
18424       ldrsht<c> Rt, [Rn] {, #+/-imm8}
18425    */
18426    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18427        && INSN(7,4) == BITS4(1,1,1,1)) {
18428       UInt rT    = INSN(15,12);
18429       UInt rN    = INSN(19,16);
18430       UInt bU    = INSN(23,23);
18431       UInt imm4H = INSN(11,8);
18432       UInt imm4L = INSN(3,0);
18433       UInt imm8  = (imm4H << 4) | imm4L;
18434       Bool valid = True;
18435       if (rN == 15 || rT == 15 || rN == rT)
18436          valid = False;
18437       if (valid) {
18438          IRTemp newRt = newTemp(Ity_I32);
18439          loadGuardedLE( newRt,
18440                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18441          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18442          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18443                              getIRegA(rN), mkU32(imm8));
18444          putIRegA(rN, erN, condT, Ijk_Boring);
18445          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
18446              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18447          goto decode_success;
18448       }
18449    }
18450
18451    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
18452    /* Load Register Signed Halfword Unprivileged:
18453       ldrsht<c> Rt, [Rn], +/-Rm
18454    */
18455    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18456        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
18457       UInt rT    = INSN(15,12);
18458       UInt rN    = INSN(19,16);
18459       UInt rM    = INSN(3,0);
18460       UInt bU    = INSN(23,23);
18461       Bool valid = True;
18462       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
18463          valid = False;
18464       if (valid) {
18465          IRTemp newRt = newTemp(Ity_I32);
18466          loadGuardedLE( newRt,
18467                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18468          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18469          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18470                              getIRegA(rN), getIRegA(rM));
18471          putIRegA(rN, erN, condT, Ijk_Boring);
18472          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
18473              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18474          goto decode_success;
18475       }
18476    }
18477
18478    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
18479    /* Load Register Signed Byte Unprivileged:
18480       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
18481    */
18482    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18483        && INSN(7,4) == BITS4(1,1,0,1)) {
18484       UInt rT    = INSN(15,12);
18485       UInt rN    = INSN(19,16);
18486       UInt bU    = INSN(23,23);
18487       UInt imm4H = INSN(11,8);
18488       UInt imm4L = INSN(3,0);
18489       UInt imm8  = (imm4H << 4) | imm4L;
18490       Bool valid = True;
18491       if (rT == 15 || rN == 15 || rN == rT)
18492          valid = False;
18493       if (valid) {
18494          IRTemp newRt = newTemp(Ity_I32);
18495          loadGuardedLE( newRt,
18496                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18497          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18498          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18499                              getIRegA(rN), mkU32(imm8));
18500          putIRegA(rN, erN, condT, Ijk_Boring);
18501          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
18502              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18503          goto decode_success;
18504       }
18505    }
18506
18507    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
18508    /* Load Register Signed Byte Unprivileged:
18509       ldrsbt<c> Rt, [Rn], +/-Rm
18510    */
18511    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18512        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
18513       UInt rT    = INSN(15,12);
18514       UInt rN    = INSN(19,16);
18515       UInt bU    = INSN(23,23);
18516       UInt rM    = INSN(3,0);
18517       Bool valid = True;
18518       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18519          valid = False;
18520       if (valid) {
18521          IRTemp newRt = newTemp(Ity_I32);
18522          loadGuardedLE( newRt,
18523                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18524          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18525          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18526                              getIRegA(rN), getIRegA(rM));
18527          putIRegA(rN, erN, condT, Ijk_Boring);
18528          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
18529              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18530          goto decode_success;
18531       }
18532    }
18533
18534    /* -------------- (A1) STRBT reg+#imm12 -------------- */
18535    /* Store Register Byte Unprivileged:
18536       strbt<c> Rt, [Rn], #+/-imm12
18537    */
18538    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
18539       UInt rT     = INSN(15,12);
18540       UInt rN     = INSN(19,16);
18541       UInt imm12  = INSN(11,0);
18542       UInt bU     = INSN(23,23);
18543       Bool valid = True;
18544       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18545       if (valid) {
18546          IRExpr* address = getIRegA(rN);
18547          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18548          storeGuardedLE( address, data, condT);
18549          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18550                                getIRegA(rN), mkU32(imm12));
18551          putIRegA(rN, newRn, condT, Ijk_Boring);
18552          DIP("strbt%s r%u, [r%u], #%c%u\n",
18553              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18554          goto decode_success;
18555       }
18556    }
18557
18558    /* -------------- (A2) STRBT reg+/-reg -------------- */
18559    /* Store Register Byte Unprivileged:
18560       strbt<c> Rt, [Rn], +/-Rm{, shift}
18561    */
18562    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
18563        && INSN(4,4) == 0) {
18564       UInt rT     = INSN(15,12);
18565       UInt rN     = INSN(19,16);
18566       UInt imm5   = INSN(11,7);
18567       UInt type   = INSN(6,5);
18568       UInt rM     = INSN(3,0);
18569       UInt bU     = INSN(23,23);
18570       Bool valid  = True;
18571       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18572       if (valid) {
18573          IRExpr* address = getIRegA(rN);
18574          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18575          storeGuardedLE( address, data, condT);
18576          // dis_buf generated is slightly bogus, in fact.
18577          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18578                                                        type, imm5, dis_buf);
18579          putIRegA(rN, erN, condT, Ijk_Boring);
18580          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18581          goto decode_success;
18582       }
18583    }
18584
18585    /* -------------- (A1) STRHT reg+#imm8 -------------- */
18586    /* Store Register Halfword Unprivileged:
18587       strht<c> Rt, [Rn], #+/-imm8
18588    */
18589    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
18590        && INSN(7,4) == BITS4(1,0,1,1) ) {
18591       UInt rT    = INSN(15,12);
18592       UInt rN    = INSN(19,16);
18593       UInt imm4H = INSN(11,8);
18594       UInt imm4L = INSN(3,0);
18595       UInt imm8  = (imm4H << 4) | imm4L;
18596       UInt bU    = INSN(23,23);
18597       Bool valid = True;
18598       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18599       if (valid) {
18600          IRExpr* address = getIRegA(rN);
18601          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18602          storeGuardedLE( address, data, condT);
18603          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18604                                getIRegA(rN), mkU32(imm8));
18605          putIRegA(rN, newRn, condT, Ijk_Boring);
18606          DIP("strht%s r%u, [r%u], #%c%u\n",
18607              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18608          goto decode_success;
18609       }
18610    }
18611
18612    /* -------------- (A2) STRHT reg+reg -------------- */
18613    /* Store Register Halfword Unprivileged:
18614       strht<c> Rt, [Rn], +/-Rm
18615    */
18616    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
18617        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18618       UInt rT    = INSN(15,12);
18619       UInt rN    = INSN(19,16);
18620       UInt rM    = INSN(3,0);
18621       UInt bU    = INSN(23,23);
18622       Bool valid = True;
18623       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18624       if (valid) {
18625          IRExpr* address = getIRegA(rN);
18626          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18627          storeGuardedLE( address, data, condT);
18628          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18629                                getIRegA(rN), getIRegA(rM));
18630          putIRegA(rN, newRn, condT, Ijk_Boring);
18631          DIP("strht%s r%u, [r%u], %cr%u\n",
18632              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18633          goto decode_success;
18634       }
18635    }
18636
18637    /* -------------- (A1) STRT reg+imm12 -------------- */
18638    /* Store Register Unprivileged:
18639       strt<c> Rt, [Rn], #+/-imm12
18640    */
18641    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
18642       UInt rT    = INSN(15,12);
18643       UInt rN    = INSN(19,16);
18644       UInt imm12 = INSN(11,0);
18645       UInt bU    = INSN(23,23);
18646       Bool valid = True;
18647       if (rN == 15 || rN == rT) valid = False;
18648       if (valid) {
18649          IRExpr* address = getIRegA(rN);
18650          storeGuardedLE( address, getIRegA(rT), condT);
18651          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18652                                getIRegA(rN), mkU32(imm12));
18653          putIRegA(rN, newRn, condT, Ijk_Boring);
18654          DIP("strt%s r%u, [r%u], %c%u\n",
18655              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18656          goto decode_success;
18657       }
18658    }
18659
18660    /* -------------- (A2) STRT reg+reg -------------- */
18661    /* Store Register Unprivileged:
18662       strt<c> Rt, [Rn], +/-Rm{, shift}
18663    */
18664    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
18665        && INSN(4,4) == 0 ) {
18666       UInt rT    = INSN(15,12);
18667       UInt rN    = INSN(19,16);
18668       UInt rM    = INSN(3,0);
18669       UInt type  = INSN(6,5);
18670       UInt imm5  = INSN(11,7);
18671       UInt bU    = INSN(23,23);
18672       Bool valid = True;
18673       if (rN == 15 || rN == rT || rM == 15) valid = False;
18674       /* FIXME We didn't do:
18675          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
18676       if (valid) {
18677          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
18678          // dis_buf generated is slightly bogus, in fact.
18679          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18680                                                        type, imm5, dis_buf);
18681          putIRegA(rN, erN, condT, Ijk_Boring);
18682          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18683          goto decode_success;
18684       }
18685    }
18686
18687    /* ----------------------------------------------------------- */
18688    /* -- ARMv7 instructions                                    -- */
18689    /* ----------------------------------------------------------- */
18690
18691    /* -------------- read CP15 TPIDRURO register ------------- */
18692    /* mrc     p15, 0, r0,  c13, c0, 3  up to
18693       mrc     p15, 0, r14, c13, c0, 3
18694    */
18695    /* I don't know whether this is really v7-only.  But anyway, we
18696       have to support it since arm-linux uses TPIDRURO as a thread
18697       state register. */
18698    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
18699       UInt rD = INSN(15,12);
18700       if (rD <= 14) {
18701          /* skip r15, that's too stupid to handle */
18702          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
18703                       condT, Ijk_Boring);
18704          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
18705          goto decode_success;
18706       }
18707       /* fall through */
18708    }
18709
18710    /* ------------ read/write CP15 TPIDRURW register ----------- */
18711    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
18712       mcr     p15, 0, r14, c13, c0, 2
18713
18714       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
18715       mrc     p15, 0, r14, c13, c0, 2
18716    */
18717    if (0x0E0D0F50 == (insn & 0x0FFF0FFF)) { // MCR
18718       UInt rS = INSN(15,12);
18719       if (rS <= 14) {
18720          /* skip r15, that's too stupid to handle */
18721          putMiscReg32(OFFB_TPIDRURW, getIRegA(rS), condT);
18722          DIP("mcr%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rS);
18723          goto decode_success;
18724       }
18725       /* fall through */
18726    }
18727    if (0x0E1D0F50 == (insn & 0x0FFF0FFF)) { // MRC
18728       UInt rD = INSN(15,12);
18729       if (rD <= 14) {
18730          /* skip r15, that's too stupid to handle */
18731          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32),
18732                       condT, Ijk_Boring);
18733          DIP("mrc%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rD);
18734          goto decode_success;
18735       }
18736       /* fall through */
18737    }
18738
18739    /* -------------- read CP15 PMUSRENR register ------------- */
18740    /* mrc     p15, 0, r0,  c9, c14, 0  up to
18741       mrc     p15, 0, r14, c9, c14, 0
18742    */
18743    /* A program reading this register is really asking "which
18744       performance monitoring registes are available in user space?
18745       The simple answer here is to return zero, meaning "none".  See
18746       #345984. */
18747    if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
18748       UInt rD = INSN(15,12);
18749       if (rD <= 14) {
18750          /* skip r15, that's too stupid to handle */
18751          putIRegA(rD, mkU32(0), condT, Ijk_Boring);
18752          DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
18753          goto decode_success;
18754       }
18755       /* fall through */
18756    }
18757
18758    /* Handle various kinds of barriers.  This is rather indiscriminate
18759       in the sense that they are all turned into an IR Fence, which
18760       means we don't know which they are, so the back end has to
18761       re-emit them all when it comes acrosss an IR Fence.
18762    */
18763    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
18764    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
18765       UInt rT = INSN(15,12);
18766       if (rT <= 14) {
18767          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
18768             Memory Barrier -- ensures ordering of memory accesses. */
18769          stmt( IRStmt_MBE(Imbe_Fence) );
18770          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
18771          goto decode_success;
18772       }
18773       /* fall through */
18774    }
18775    /* other flavours of barrier */
18776    switch (insn) {
18777       case 0xEE070F9A: /* v6 */
18778          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
18779             Synch Barrier -- ensures completion of memory accesses. */
18780          stmt( IRStmt_MBE(Imbe_Fence) );
18781          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
18782          goto decode_success;
18783       case 0xEE070F95: /* v6 */
18784          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
18785             Instruction Synchronisation Barrier (or Flush Prefetch
18786             Buffer) -- a pipe flush, I think.  I suspect we could
18787             ignore those, but to be on the safe side emit a fence
18788             anyway. */
18789          stmt( IRStmt_MBE(Imbe_Fence) );
18790          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
18791          goto decode_success;
18792       default:
18793          break;
18794    }
18795
18796    /* ----------------------------------------------------------- */
18797    /* -- Hints                                                 -- */
18798    /* ----------------------------------------------------------- */
18799
18800    switch (insn & 0x0FFFFFFF) {
18801       /* ------------------- NOP ------------------ */
18802       case 0x0320F000:
18803          DIP("nop%s\n", nCC(INSN_COND));
18804          goto decode_success;
18805       /* ------------------- YIELD ------------------ */
18806       case 0x0320F001:
18807          /* Continue after conditionally yielding. */
18808          DIP("yield%s\n", nCC(INSN_COND));
18809          stmt( IRStmt_Exit( unop(Iop_32to1,
18810                                  condT == IRTemp_INVALID
18811                                     ? mkU32(1) : mkexpr(condT)),
18812                             Ijk_Yield,
18813                             IRConst_U32(guest_R15_curr_instr_notENC + 4),
18814                             OFFB_R15T ));
18815          goto decode_success;
18816       default:
18817          break;
18818    }
18819
18820    /* ----------------------------------------------------------- */
18821    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
18822    /* ----------------------------------------------------------- */
18823
18824    if (INSN_COND != ARMCondNV) {
18825       Bool ok_vfp = decode_CP10_CP11_instruction (
18826                        &dres, INSN(27,0), condT, INSN_COND,
18827                        False/*!isT*/
18828                     );
18829       if (ok_vfp)
18830          goto decode_success;
18831    }
18832
18833    /* ----------------------------------------------------------- */
18834    /* -- NEON instructions (in ARM mode)                       -- */
18835    /* ----------------------------------------------------------- */
18836
18837    /* These are all in NV space, and so are taken care of (far) above,
18838       by a call from this function to
18839       decode_NV_instruction_ARMv7_and_below(). */
18840
18841    /* ----------------------------------------------------------- */
18842    /* -- v6 media instructions (in ARM mode)                   -- */
18843    /* ----------------------------------------------------------- */
18844
18845    { Bool ok_v6m = decode_V6MEDIA_instruction(
18846                        &dres, INSN(27,0), condT, INSN_COND,
18847                        False/*!isT*/
18848                    );
18849      if (ok_v6m)
18850         goto decode_success;
18851    }
18852
18853    /* ----------------------------------------------------------- */
18854    /* -- v8 instructions (in ARM mode)                         -- */
18855    /* ----------------------------------------------------------- */
18856
18857   after_v7_decoder:
18858
18859    /* If we get here, it means that all attempts to decode the
18860       instruction as ARMv7 or earlier have failed.  So, if we're doing
18861       ARMv8 or later, here is the point to try for it. */
18862
18863    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
18864       Bool ok_v8
18865          = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
18866                                   IRTemp_INVALID, IRTemp_INVALID );
18867       if (ok_v8)
18868          goto decode_success;
18869    }
18870
18871    /* ----------------------------------------------------------- */
18872    /* -- Undecodable                                           -- */
18873    /* ----------------------------------------------------------- */
18874
18875    goto decode_failure;
18876    /*NOTREACHED*/
18877
18878   decode_failure:
18879    /* All decode failures end up here. */
18880    if (sigill_diag) {
18881       vex_printf("disInstr(arm): unhandled instruction: "
18882                  "0x%x\n", insn);
18883       vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
18884                                    "4:4=%d "
18885                                    "3:0=%d(0x%x)\n",
18886                  (Int)INSN_COND, (UInt)INSN_COND,
18887                  (Int)INSN(27,20), (UInt)INSN(27,20),
18888                  (Int)INSN(4,4),
18889                  (Int)INSN(3,0), (UInt)INSN(3,0) );
18890    }
18891
18892    /* Tell the dispatcher that this insn cannot be decoded, and so has
18893       not been executed, and (is currently) the next to be executed.
18894       R15 should be up-to-date since it made so at the start of each
18895       insn, but nevertheless be paranoid and update it again right
18896       now. */
18897    vassert(0 == (guest_R15_curr_instr_notENC & 3));
18898    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
18899    dres.len         = 0;
18900    dres.whatNext    = Dis_StopHere;
18901    dres.jk_StopHere = Ijk_NoDecode;
18902    dres.continueAt  = 0;
18903    return dres;
18904
18905   decode_success:
18906    /* All decode successes end up here. */
18907    DIP("\n");
18908
18909    vassert(dres.len == 4 || dres.len == 20);
18910
18911    /* Now then.  Do we have an implicit jump to r15 to deal with? */
18912    if (r15written) {
18913       /* If we get jump to deal with, we assume that there's been no
18914          other competing branch stuff previously generated for this
18915          insn.  That's reasonable, in the sense that the ARM insn set
18916          appears to declare as "Unpredictable" any instruction which
18917          generates more than one possible new value for r15.  Hence
18918          just assert.  The decoders themselves should check against
18919          all such instructions which are thusly Unpredictable, and
18920          decline to decode them.  Hence we should never get here if we
18921          have competing new values for r15, and hence it is safe to
18922          assert here. */
18923       vassert(dres.whatNext == Dis_Continue);
18924       vassert(irsb->next == NULL);
18925       vassert(irsb->jumpkind == Ijk_Boring);
18926       /* If r15 is unconditionally written, terminate the block by
18927          jumping to it.  If it's conditionally written, still
18928          terminate the block (a shame, but we can't do side exits to
18929          arbitrary destinations), but first jump to the next
18930          instruction if the condition doesn't hold. */
18931       /* We can't use getIReg(15) to get the destination, since that
18932          will produce r15+8, which isn't what we want.  Must use
18933          llGetIReg(15) instead. */
18934       if (r15guard == IRTemp_INVALID) {
18935          /* unconditional */
18936       } else {
18937          /* conditional */
18938          stmt( IRStmt_Exit(
18939                   unop(Iop_32to1,
18940                        binop(Iop_Xor32,
18941                              mkexpr(r15guard), mkU32(1))),
18942                   r15kind,
18943                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
18944                   OFFB_R15T
18945          ));
18946       }
18947       /* This seems crazy, but we're required to finish the insn with
18948          a write to the guest PC.  As usual we rely on ir_opt to tidy
18949          up later. */
18950       llPutIReg(15, llGetIReg(15));
18951       dres.whatNext    = Dis_StopHere;
18952       dres.jk_StopHere = r15kind;
18953    } else {
18954       /* Set up the end-state in the normal way. */
18955       switch (dres.whatNext) {
18956          case Dis_Continue:
18957             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
18958             break;
18959          case Dis_ResteerU:
18960          case Dis_ResteerC:
18961             llPutIReg(15, mkU32(dres.continueAt));
18962             break;
18963          case Dis_StopHere:
18964             break;
18965          default:
18966             vassert(0);
18967       }
18968    }
18969
18970    return dres;
18971
18972 #  undef INSN_COND
18973 #  undef INSN
18974 }
18975
18976
18977 /*------------------------------------------------------------*/
18978 /*--- Disassemble a single Thumb2 instruction              ---*/
18979 /*------------------------------------------------------------*/
18980
18981 static const UChar it_length_table[256]; /* fwds */
18982
18983 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
18984    automagically adds 4 to fetches of r15.  However, writes to regs
18985    are done with putIRegT, which disallows writes to r15.  Hence any
18986    r15 writes and associated jumps have to be done "by hand". */
18987
18988 /* Disassemble a single Thumb instruction into IR.  The instruction is
18989    located in host memory at guest_instr, and has (decoded) guest IP
18990    of guest_R15_curr_instr_notENC, which will have been set before the
18991    call here. */
18992
18993 static
18994 DisResult disInstr_THUMB_WRK (
18995              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
18996              Bool         resteerCisOk,
18997              void*        callback_opaque,
18998              const UChar* guest_instr,
18999              const VexArchInfo* archinfo,
19000              const VexAbiInfo*  abiinfo,
19001              Bool         sigill_diag
19002           )
19003 {
19004    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
19005       bits out of insn1, but that's defined only after the end of the
19006       16-bit insn decoder, so as to stop it mistakenly being used
19007       therein. */
19008 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
19009
19010    DisResult dres;
19011    UShort    insn0; /*  first 16 bits of the insn */
19012    UShort    insn1; /* second 16 bits of the insn */
19013    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
19014
19015    /* Summary result of the ITxxx backwards analysis: False == safe
19016       but suboptimal. */
19017    Bool guaranteedUnconditional = False;
19018
19019    /* Set result defaults. */
19020    dres.whatNext    = Dis_Continue;
19021    dres.len         = 2;
19022    dres.continueAt  = 0;
19023    dres.jk_StopHere = Ijk_INVALID;
19024    dres.hint        = Dis_HintNone;
19025
19026    /* Set default actions for post-insn handling of writes to r15, if
19027       required. */
19028    r15written = False;
19029    r15guard   = IRTemp_INVALID; /* unconditional */
19030    r15kind    = Ijk_Boring;
19031
19032    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
19033       this point.  If we need the second 16, get them later.  We can't
19034       get them both out immediately because it risks a fault (very
19035       unlikely, but ..) if the second 16 bits aren't actually
19036       necessary. */
19037    insn0 = getUShortLittleEndianly( guest_instr );
19038    insn1 = 0; /* We'll get it later, once we know we need it. */
19039
19040    /* Similarly, will set this later. */
19041    IRTemp old_itstate = IRTemp_INVALID;
19042
19043    if (0) vex_printf("insn: 0x%x\n", insn0);
19044
19045    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
19046
19047    vassert(0 == (guest_R15_curr_instr_notENC & 1));
19048
19049    /* ----------------------------------------------------------- */
19050    /* Spot "Special" instructions (see comment at top of file). */
19051    {
19052       const UChar* code = guest_instr;
19053       /* Spot the 16-byte preamble:
19054
19055          ea4f 0cfc  mov.w   ip, ip, ror #3
19056          ea4f 3c7c  mov.w   ip, ip, ror #13
19057          ea4f 7c7c  mov.w   ip, ip, ror #29
19058          ea4f 4cfc  mov.w   ip, ip, ror #19
19059       */
19060       UInt word1 = 0x0CFCEA4F;
19061       UInt word2 = 0x3C7CEA4F;
19062       UInt word3 = 0x7C7CEA4F;
19063       UInt word4 = 0x4CFCEA4F;
19064       if (getUIntLittleEndianly(code+ 0) == word1 &&
19065           getUIntLittleEndianly(code+ 4) == word2 &&
19066           getUIntLittleEndianly(code+ 8) == word3 &&
19067           getUIntLittleEndianly(code+12) == word4) {
19068          /* Got a "Special" instruction preamble.  Which one is it? */
19069          // 0x 0A 0A EA 4A
19070          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
19071                                                /* orr.w r10,r10,r10 */) {
19072             /* R3 = client_request ( R4 ) */
19073             DIP("r3 = client_request ( %%r4 )\n");
19074             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19075             dres.jk_StopHere = Ijk_ClientReq;
19076             dres.whatNext    = Dis_StopHere;
19077             goto decode_success;
19078          }
19079          else
19080          // 0x 0B 0B EA 4B
19081          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
19082                                                /* orr r11,r11,r11 */) {
19083             /* R3 = guest_NRADDR */
19084             DIP("r3 = guest_NRADDR\n");
19085             dres.len = 20;
19086             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
19087             goto decode_success;
19088          }
19089          else
19090          // 0x 0C 0C EA 4C
19091          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
19092                                                /* orr r12,r12,r12 */) {
19093             /*  branch-and-link-to-noredir R4 */
19094             DIP("branch-and-link-to-noredir r4\n");
19095             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19096             llPutIReg(15, getIRegT(4));
19097             dres.jk_StopHere = Ijk_NoRedir;
19098             dres.whatNext    = Dis_StopHere;
19099             goto decode_success;
19100          }
19101          else
19102          // 0x 09 09 EA 49
19103          if (getUIntLittleEndianly(code+16) == 0x0909EA49
19104                                                /* orr r9,r9,r9 */) {
19105             /* IR injection */
19106             DIP("IR injection\n");
19107             vex_inject_ir(irsb, Iend_LE);
19108             // Invalidate the current insn. The reason is that the IRop we're
19109             // injecting here can change. In which case the translation has to
19110             // be redone. For ease of handling, we simply invalidate all the
19111             // time.
19112             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
19113             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
19114             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19115             dres.whatNext    = Dis_StopHere;
19116             dres.jk_StopHere = Ijk_InvalICache;
19117             goto decode_success;
19118          }
19119          /* We don't know what it is.  Set insn0 so decode_failure
19120             can print the insn following the Special-insn preamble. */
19121          insn0 = getUShortLittleEndianly(code+16);
19122          goto decode_failure;
19123          /*NOTREACHED*/
19124       }
19125
19126    }
19127
19128    /* ----------------------------------------------------------- */
19129
19130    /* Main Thumb instruction decoder starts here.  It's a series of
19131       switches which examine ever longer bit sequences at the MSB of
19132       the instruction word, first for 16-bit insns, then for 32-bit
19133       insns. */
19134
19135    /* --- BEGIN ITxxx optimisation analysis --- */
19136    /* This is a crucial optimisation for the ITState boilerplate that
19137       follows.  Examine the 9 halfwords preceding this instruction,
19138       and if we are absolutely sure that none of them constitute an
19139       'it' instruction, then we can be sure that this instruction is
19140       not under the control of any 'it' instruction, and so
19141       guest_ITSTATE must be zero.  So write zero into ITSTATE right
19142       now, so that iropt can fold out almost all of the resulting
19143       junk.
19144
19145       If we aren't sure, we can always safely skip this step.  So be a
19146       bit conservative about it: only poke around in the same page as
19147       this instruction, lest we get a fault from the previous page
19148       that would not otherwise have happened.  The saving grace is
19149       that such skipping is pretty rare -- it only happens,
19150       statistically, 18/4096ths of the time, so is judged unlikely to
19151       be a performance problems.
19152
19153       FIXME: do better.  Take into account the number of insns covered
19154       by any IT insns we find, to rule out cases where an IT clearly
19155       cannot cover this instruction.  This would improve behaviour for
19156       branch targets immediately following an IT-guarded group that is
19157       not of full length.  Eg, (and completely ignoring issues of 16-
19158       vs 32-bit insn length):
19159
19160              ite cond
19161              insn1
19162              insn2
19163       label: insn3
19164              insn4
19165
19166       The 'it' only conditionalises insn1 and insn2.  However, the
19167       current analysis is conservative and considers insn3 and insn4
19168       also possibly guarded.  Hence if 'label:' is the start of a hot
19169       loop we will get a big performance hit.
19170    */
19171    {
19172       /* Summary result of this analysis: False == safe but
19173          suboptimal. */
19174       vassert(guaranteedUnconditional == False);
19175
19176       UInt pc = guest_R15_curr_instr_notENC;
19177       vassert(0 == (pc & 1));
19178
19179       UInt pageoff = pc & 0xFFF;
19180       if (pageoff >= 18) {
19181          /* It's safe to poke about in the 9 halfwords preceding this
19182             insn.  So, have a look at them. */
19183          guaranteedUnconditional = True; /* assume no 'it' insn found,
19184                                             till we do */
19185          UShort* hwp = (UShort*)(HWord)pc;
19186          Int i;
19187          for (i = -1; i >= -9; i--) {
19188             /* We're in the same page.  (True, but commented out due
19189                to expense.) */
19190             /*
19191             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
19192                       == ( pc & 0xFFFFF000 ) );
19193             */
19194             /* All valid IT instructions must have the form 0xBFxy,
19195                where x can be anything, but y must be nonzero.  Find
19196                the number of insns covered by it (1 .. 4) and check to
19197                see if it can possibly reach up to the instruction in
19198                question.  Some (x,y) combinations mean UNPREDICTABLE,
19199                and the table is constructed to be conservative by
19200                returning 4 for those cases, so the analysis is safe
19201                even if the code uses unpredictable IT instructions (in
19202                which case its authors are nuts, but hey.)  */
19203             UShort hwp_i = hwp[i];
19204             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
19205                /* might be an 'it' insn. */
19206                /* # guarded insns */
19207                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
19208                vassert(n_guarded >= 1 && n_guarded <= 4);
19209                if (n_guarded * 2 /* # guarded HWs, worst case */
19210                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
19211                    /* -(i+0) also seems to work, even though I think
19212                       it's wrong.  I don't understand that. */
19213                   guaranteedUnconditional = False;
19214                break;
19215             }
19216          }
19217       }
19218    }
19219    /* --- END ITxxx optimisation analysis --- */
19220
19221    /* Generate the guarding condition for this insn, by examining
19222       ITSTATE.  Assign it to condT.  Also, generate new
19223       values for ITSTATE ready for stuffing back into the
19224       guest state, but don't actually do the Put yet, since it will
19225       need to stuffed back in only after the instruction gets to a
19226       point where it is sure to complete.  Mostly we let the code at
19227       decode_success handle this, but in cases where the insn contains
19228       a side exit, we have to update them before the exit. */
19229
19230    /* If the ITxxx optimisation analysis above could not prove that
19231       this instruction is guaranteed unconditional, we insert a
19232       lengthy IR preamble to compute the guarding condition at
19233       runtime.  If it can prove it (which obviously we hope is the
19234       normal case) then we insert a minimal preamble, which is
19235       equivalent to setting guest_ITSTATE to zero and then folding
19236       that through the full preamble (which completely disappears). */
19237
19238    IRTemp condT              = IRTemp_INVALID;
19239    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
19240
19241    IRTemp new_itstate        = IRTemp_INVALID;
19242    vassert(old_itstate == IRTemp_INVALID);
19243
19244    if (guaranteedUnconditional) {
19245       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19246
19247       // ITSTATE = 0 :: I32
19248       IRTemp z32 = newTemp(Ity_I32);
19249       assign(z32, mkU32(0));
19250       put_ITSTATE(z32);
19251
19252       // old_itstate = 0 :: I32
19253       //
19254       // old_itstate = get_ITSTATE();
19255       old_itstate = z32; /* 0 :: I32 */
19256
19257       // new_itstate = old_itstate >> 8
19258       //             = 0 >> 8
19259       //             = 0 :: I32
19260       //
19261       // new_itstate = newTemp(Ity_I32);
19262       // assign(new_itstate,
19263       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19264       new_itstate = z32;
19265
19266       // ITSTATE = 0 :: I32(again)
19267       //
19268       // put_ITSTATE(new_itstate);
19269
19270       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
19271       //        = calc_cond_dyn( xor(0,0xE0) )
19272       //        = calc_cond_dyn ( 0xE0 )
19273       //        = 1 :: I32
19274       // Not that this matters, since the computed value is not used:
19275       // see condT folding below
19276       //
19277       // IRTemp condT1 = newTemp(Ity_I32);
19278       // assign(condT1,
19279       //        mk_armg_calculate_condition_dyn(
19280       //           binop(Iop_Xor32,
19281       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19282       //                 mkU32(0xE0))
19283       //       )
19284       // );
19285
19286       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
19287       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
19288       //       = 32to8(0) == 0  ? 1  : condT1
19289       //       = 0 == 0  ? 1  : condT1
19290       //       = 1
19291       //
19292       // condT = newTemp(Ity_I32);
19293       // assign(condT, IRExpr_ITE(
19294       //                  unop(Iop_32to8, binop(Iop_And32,
19295       //                                        mkexpr(old_itstate),
19296       //                                        mkU32(0xF0))),
19297       //                  mkexpr(condT1),
19298       //                  mkU32(1))
19299       //       ));
19300       condT = newTemp(Ity_I32);
19301       assign(condT, mkU32(1));
19302
19303       // notInITt = xor32(and32(old_itstate, 1), 1)
19304       //          = xor32(and32(0, 1), 1)
19305       //          = xor32(0, 1)
19306       //          = 1 :: I32
19307       //
19308       // IRTemp notInITt = newTemp(Ity_I32);
19309       // assign(notInITt,
19310       //        binop(Iop_Xor32,
19311       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19312       //              mkU32(1)));
19313
19314       // cond_AND_notInIT_T = and32(notInITt, condT)
19315       //                    = and32(1, 1)
19316       //                    = 1
19317       //
19318       // cond_AND_notInIT_T = newTemp(Ity_I32);
19319       // assign(cond_AND_notInIT_T,
19320       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19321       cond_AND_notInIT_T = condT; /* 1 :: I32 */
19322
19323       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19324    } else {
19325       /* BEGIN { STANDARD PREAMBLE; } */
19326
19327       old_itstate = get_ITSTATE();
19328
19329       new_itstate = newTemp(Ity_I32);
19330       assign(new_itstate,
19331              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19332
19333       put_ITSTATE(new_itstate);
19334
19335       /* Same strategy as for ARM insns: generate a condition
19336          temporary at this point (or IRTemp_INVALID, meaning
19337          unconditional).  We leave it to lower-level instruction
19338          decoders to decide whether they can generate straight-line
19339          code, or whether they must generate a side exit before the
19340          instruction.  condT :: Ity_I32 and is always either zero or
19341          one. */
19342       IRTemp condT1 = newTemp(Ity_I32);
19343       assign(condT1,
19344              mk_armg_calculate_condition_dyn(
19345                 binop(Iop_Xor32,
19346                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19347                       mkU32(0xE0))
19348             )
19349       );
19350
19351       /* This is a bit complex, but needed to make Memcheck understand
19352          that, if the condition in old_itstate[7:4] denotes AL (that
19353          is, if this instruction is to be executed unconditionally),
19354          then condT does not depend on the results of calling the
19355          helper.
19356
19357          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
19358          that case set condT directly to 1.  Else we use the results
19359          of the helper.  Since old_itstate is always defined and
19360          because Memcheck does lazy V-bit propagation through ITE,
19361          this will cause condT to always be a defined 1 if the
19362          condition is 'AL'.  From an execution semantics point of view
19363          this is irrelevant since we're merely duplicating part of the
19364          behaviour of the helper.  But it makes it clear to Memcheck,
19365          in this case, that condT does not in fact depend on the
19366          contents of the condition code thunk.  Without it, we get
19367          quite a lot of false errors.
19368
19369          So, just to clarify: from a straight semantics point of view,
19370          we can simply do "assign(condT, mkexpr(condT1))", and the
19371          simulator still runs fine.  It's just that we get loads of
19372          false errors from Memcheck. */
19373       condT = newTemp(Ity_I32);
19374       assign(condT, IRExpr_ITE(
19375                        binop(Iop_CmpNE32, binop(Iop_And32,
19376                                                 mkexpr(old_itstate),
19377                                                 mkU32(0xF0)),
19378                                           mkU32(0)),
19379                        mkexpr(condT1),
19380                        mkU32(1)
19381             ));
19382
19383       /* Something we don't have in ARM: generate a 0 or 1 value
19384          indicating whether or not we are in an IT block (NB: 0 = in
19385          IT block, 1 = not in IT block).  This is used to gate
19386          condition code updates in 16-bit Thumb instructions. */
19387       IRTemp notInITt = newTemp(Ity_I32);
19388       assign(notInITt,
19389              binop(Iop_Xor32,
19390                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19391                    mkU32(1)));
19392
19393       /* Compute 'condT && notInITt' -- that is, the instruction is
19394          going to execute, and we're not in an IT block.  This is the
19395          gating condition for updating condition codes in 16-bit Thumb
19396          instructions, except for CMP, CMN and TST. */
19397       cond_AND_notInIT_T = newTemp(Ity_I32);
19398       assign(cond_AND_notInIT_T,
19399              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19400       /* END { STANDARD PREAMBLE; } */
19401    }
19402
19403
19404    /* At this point:
19405       * ITSTATE has been updated
19406       * condT holds the guarding condition for this instruction (0 or 1),
19407       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
19408       * cond_AND_notInIT_T is the AND of the above two.
19409
19410       If the instruction proper can't trap, then there's nothing else
19411       to do w.r.t. ITSTATE -- just go and and generate IR for the
19412       insn, taking into account the guarding condition.
19413
19414       If, however, the instruction might trap, then we must back up
19415       ITSTATE to the old value, and re-update it after the potentially
19416       trapping IR section.  A trap can happen either via a memory
19417       reference or because we need to throw SIGILL.
19418
19419       If an instruction has a side exit, we need to be sure that any
19420       ITSTATE backup is re-updated before the side exit.
19421    */
19422
19423    /* ----------------------------------------------------------- */
19424    /* --                                                       -- */
19425    /* -- Thumb 16-bit integer instructions                     -- */
19426    /* --                                                       -- */
19427    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
19428    /* --            not allowed in this section                -- */
19429    /* --                                                       -- */
19430    /* ----------------------------------------------------------- */
19431
19432    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
19433       TST, do not set the condition codes.  Hence we must dynamically
19434       test for this case for every condition code update. */
19435
19436    IROp   anOp   = Iop_INVALID;
19437    const HChar* anOpNm = NULL;
19438
19439    /* ================ 16-bit 15:6 cases ================ */
19440
19441    switch (INSN0(15,6)) {
19442
19443    case 0x10a:   // CMP
19444    case 0x10b: { // CMN
19445       /* ---------------- CMP Rn, Rm ---------------- */
19446       Bool   isCMN = INSN0(15,6) == 0x10b;
19447       UInt   rN    = INSN0(2,0);
19448       UInt   rM    = INSN0(5,3);
19449       IRTemp argL  = newTemp(Ity_I32);
19450       IRTemp argR  = newTemp(Ity_I32);
19451       assign( argL, getIRegT(rN) );
19452       assign( argR, getIRegT(rM) );
19453       /* Update flags regardless of whether in an IT block or not. */
19454       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19455                       argL, argR, condT );
19456       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
19457       goto decode_success;
19458    }
19459
19460    case 0x108: {
19461       /* ---------------- TST Rn, Rm ---------------- */
19462       UInt   rN   = INSN0(2,0);
19463       UInt   rM   = INSN0(5,3);
19464       IRTemp oldC = newTemp(Ity_I32);
19465       IRTemp oldV = newTemp(Ity_I32);
19466       IRTemp res  = newTemp(Ity_I32);
19467       assign( oldC, mk_armg_calculate_flag_c() );
19468       assign( oldV, mk_armg_calculate_flag_v() );
19469       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
19470       /* Update flags regardless of whether in an IT block or not. */
19471       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19472       DIP("tst r%u, r%u\n", rN, rM);
19473       goto decode_success;
19474    }
19475
19476    case 0x109: {
19477       /* ---------------- NEGS Rd, Rm ---------------- */
19478       /* Rd = -Rm */
19479       UInt   rM   = INSN0(5,3);
19480       UInt   rD   = INSN0(2,0);
19481       IRTemp arg  = newTemp(Ity_I32);
19482       IRTemp zero = newTemp(Ity_I32);
19483       assign(arg, getIRegT(rM));
19484       assign(zero, mkU32(0));
19485       // rD can never be r15
19486       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
19487       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
19488       DIP("negs r%u, r%u\n", rD, rM);
19489       goto decode_success;
19490    }
19491
19492    case 0x10F: {
19493       /* ---------------- MVNS Rd, Rm ---------------- */
19494       /* Rd = ~Rm */
19495       UInt   rM   = INSN0(5,3);
19496       UInt   rD   = INSN0(2,0);
19497       IRTemp oldV = newTemp(Ity_I32);
19498       IRTemp oldC = newTemp(Ity_I32);
19499       IRTemp res  = newTemp(Ity_I32);
19500       assign( oldV, mk_armg_calculate_flag_v() );
19501       assign( oldC, mk_armg_calculate_flag_c() );
19502       assign(res, unop(Iop_Not32, getIRegT(rM)));
19503       // rD can never be r15
19504       putIRegT(rD, mkexpr(res), condT);
19505       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19506                          cond_AND_notInIT_T );
19507       DIP("mvns r%u, r%u\n", rD, rM);
19508       goto decode_success;
19509    }
19510
19511    case 0x10C:
19512       /* ---------------- ORRS Rd, Rm ---------------- */
19513       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
19514    case 0x100:
19515       /* ---------------- ANDS Rd, Rm ---------------- */
19516       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
19517    case 0x101:
19518       /* ---------------- EORS Rd, Rm ---------------- */
19519       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
19520    case 0x10d:
19521       /* ---------------- MULS Rd, Rm ---------------- */
19522       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
19523    and_orr_eor_mul: {
19524       /* Rd = Rd `op` Rm */
19525       UInt   rM   = INSN0(5,3);
19526       UInt   rD   = INSN0(2,0);
19527       IRTemp res  = newTemp(Ity_I32);
19528       IRTemp oldV = newTemp(Ity_I32);
19529       IRTemp oldC = newTemp(Ity_I32);
19530       assign( oldV, mk_armg_calculate_flag_v() );
19531       assign( oldC, mk_armg_calculate_flag_c() );
19532       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
19533       // not safe to read guest state after here
19534       // rD can never be r15
19535       putIRegT(rD, mkexpr(res), condT);
19536       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19537                          cond_AND_notInIT_T );
19538       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
19539       goto decode_success;
19540    }
19541
19542    case 0x10E: {
19543       /* ---------------- BICS Rd, Rm ---------------- */
19544       /* Rd = Rd & ~Rm */
19545       UInt   rM   = INSN0(5,3);
19546       UInt   rD   = INSN0(2,0);
19547       IRTemp res  = newTemp(Ity_I32);
19548       IRTemp oldV = newTemp(Ity_I32);
19549       IRTemp oldC = newTemp(Ity_I32);
19550       assign( oldV, mk_armg_calculate_flag_v() );
19551       assign( oldC, mk_armg_calculate_flag_c() );
19552       assign( res, binop(Iop_And32, getIRegT(rD),
19553                                     unop(Iop_Not32, getIRegT(rM) )));
19554       // not safe to read guest state after here
19555       // rD can never be r15
19556       putIRegT(rD, mkexpr(res), condT);
19557       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19558                          cond_AND_notInIT_T );
19559       DIP("bics r%u, r%u\n", rD, rM);
19560       goto decode_success;
19561    }
19562
19563    case 0x105: {
19564       /* ---------------- ADCS Rd, Rm ---------------- */
19565       /* Rd = Rd + Rm + oldC */
19566       UInt   rM   = INSN0(5,3);
19567       UInt   rD   = INSN0(2,0);
19568       IRTemp argL = newTemp(Ity_I32);
19569       IRTemp argR = newTemp(Ity_I32);
19570       IRTemp oldC = newTemp(Ity_I32);
19571       IRTemp res  = newTemp(Ity_I32);
19572       assign(argL, getIRegT(rD));
19573       assign(argR, getIRegT(rM));
19574       assign(oldC, mk_armg_calculate_flag_c());
19575       assign(res, binop(Iop_Add32,
19576                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19577                         mkexpr(oldC)));
19578       // rD can never be r15
19579       putIRegT(rD, mkexpr(res), condT);
19580       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
19581                          cond_AND_notInIT_T );
19582       DIP("adcs r%u, r%u\n", rD, rM);
19583       goto decode_success;
19584    }
19585
19586    case 0x106: {
19587       /* ---------------- SBCS Rd, Rm ---------------- */
19588       /* Rd = Rd - Rm - (oldC ^ 1) */
19589       UInt   rM   = INSN0(5,3);
19590       UInt   rD   = INSN0(2,0);
19591       IRTemp argL = newTemp(Ity_I32);
19592       IRTemp argR = newTemp(Ity_I32);
19593       IRTemp oldC = newTemp(Ity_I32);
19594       IRTemp res  = newTemp(Ity_I32);
19595       assign(argL, getIRegT(rD));
19596       assign(argR, getIRegT(rM));
19597       assign(oldC, mk_armg_calculate_flag_c());
19598       assign(res, binop(Iop_Sub32,
19599                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19600                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
19601       // rD can never be r15
19602       putIRegT(rD, mkexpr(res), condT);
19603       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
19604                          cond_AND_notInIT_T );
19605       DIP("sbcs r%u, r%u\n", rD, rM);
19606       goto decode_success;
19607    }
19608
19609    case 0x2CB: {
19610       /* ---------------- UXTB Rd, Rm ---------------- */
19611       /* Rd = 8Uto32(Rm) */
19612       UInt rM = INSN0(5,3);
19613       UInt rD = INSN0(2,0);
19614       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
19615                    condT);
19616       DIP("uxtb r%u, r%u\n", rD, rM);
19617       goto decode_success;
19618    }
19619
19620    case 0x2C9: {
19621       /* ---------------- SXTB Rd, Rm ---------------- */
19622       /* Rd = 8Sto32(Rm) */
19623       UInt rM = INSN0(5,3);
19624       UInt rD = INSN0(2,0);
19625       putIRegT(rD, binop(Iop_Sar32,
19626                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
19627                          mkU8(24)),
19628                    condT);
19629       DIP("sxtb r%u, r%u\n", rD, rM);
19630       goto decode_success;
19631    }
19632
19633    case 0x2CA: {
19634       /* ---------------- UXTH Rd, Rm ---------------- */
19635       /* Rd = 16Uto32(Rm) */
19636       UInt rM = INSN0(5,3);
19637       UInt rD = INSN0(2,0);
19638       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
19639                    condT);
19640       DIP("uxth r%u, r%u\n", rD, rM);
19641       goto decode_success;
19642    }
19643
19644    case 0x2C8: {
19645       /* ---------------- SXTH Rd, Rm ---------------- */
19646       /* Rd = 16Sto32(Rm) */
19647       UInt rM = INSN0(5,3);
19648       UInt rD = INSN0(2,0);
19649       putIRegT(rD, binop(Iop_Sar32,
19650                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
19651                          mkU8(16)),
19652                    condT);
19653       DIP("sxth r%u, r%u\n", rD, rM);
19654       goto decode_success;
19655    }
19656
19657    case 0x102:   // LSLS
19658    case 0x103:   // LSRS
19659    case 0x104:   // ASRS
19660    case 0x107: { // RORS
19661       /* ---------------- LSLS Rs, Rd ---------------- */
19662       /* ---------------- LSRS Rs, Rd ---------------- */
19663       /* ---------------- ASRS Rs, Rd ---------------- */
19664       /* ---------------- RORS Rs, Rd ---------------- */
19665       /* Rd = Rd `op` Rs, and set flags */
19666       UInt   rS   = INSN0(5,3);
19667       UInt   rD   = INSN0(2,0);
19668       IRTemp oldV = newTemp(Ity_I32);
19669       IRTemp rDt  = newTemp(Ity_I32);
19670       IRTemp rSt  = newTemp(Ity_I32);
19671       IRTemp res  = newTemp(Ity_I32);
19672       IRTemp resC = newTemp(Ity_I32);
19673       const HChar* wot  = "???";
19674       assign(rSt, getIRegT(rS));
19675       assign(rDt, getIRegT(rD));
19676       assign(oldV, mk_armg_calculate_flag_v());
19677       /* Does not appear to be the standard 'how' encoding. */
19678       switch (INSN0(15,6)) {
19679          case 0x102:
19680             compute_result_and_C_after_LSL_by_reg(
19681                dis_buf, &res, &resC, rDt, rSt, rD, rS
19682             );
19683             wot = "lsl";
19684             break;
19685          case 0x103:
19686             compute_result_and_C_after_LSR_by_reg(
19687                dis_buf, &res, &resC, rDt, rSt, rD, rS
19688             );
19689             wot = "lsr";
19690             break;
19691          case 0x104:
19692             compute_result_and_C_after_ASR_by_reg(
19693                dis_buf, &res, &resC, rDt, rSt, rD, rS
19694             );
19695             wot = "asr";
19696             break;
19697          case 0x107:
19698             compute_result_and_C_after_ROR_by_reg(
19699                dis_buf, &res, &resC, rDt, rSt, rD, rS
19700             );
19701             wot = "ror";
19702             break;
19703          default:
19704             /*NOTREACHED*/vassert(0);
19705       }
19706       // not safe to read guest state after this point
19707       putIRegT(rD, mkexpr(res), condT);
19708       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19709                          cond_AND_notInIT_T );
19710       DIP("%ss r%u, r%u\n", wot, rS, rD);
19711       goto decode_success;
19712    }
19713
19714    case 0x2E8:   // REV
19715    case 0x2E9: { // REV16
19716       /* ---------------- REV   Rd, Rm ---------------- */
19717       /* ---------------- REV16 Rd, Rm ---------------- */
19718       UInt rM = INSN0(5,3);
19719       UInt rD = INSN0(2,0);
19720       Bool isREV = INSN0(15,6) == 0x2E8;
19721       IRTemp arg = newTemp(Ity_I32);
19722       assign(arg, getIRegT(rM));
19723       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
19724       putIRegT(rD, mkexpr(res), condT);
19725       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
19726       goto decode_success;
19727    }
19728
19729    case 0x2EB: { // REVSH
19730       /* ---------------- REVSH Rd, Rn ---------------- */
19731       UInt rM = INSN0(5,3);
19732       UInt rD = INSN0(2,0);
19733       IRTemp irt_rM  = newTemp(Ity_I32);
19734       IRTemp irt_hi  = newTemp(Ity_I32);
19735       IRTemp irt_low = newTemp(Ity_I32);
19736       IRTemp irt_res = newTemp(Ity_I32);
19737       assign(irt_rM, getIRegT(rM));
19738       assign(irt_hi,
19739              binop(Iop_Sar32,
19740                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
19741                    mkU8(16)
19742              )
19743       );
19744       assign(irt_low,
19745              binop(Iop_And32,
19746                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
19747                    mkU32(0xFF)
19748              )
19749       );
19750       assign(irt_res,
19751              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
19752       );
19753       putIRegT(rD, mkexpr(irt_res), condT);
19754       DIP("revsh r%u, r%u\n", rD, rM);
19755       goto decode_success;
19756    }
19757
19758    default:
19759       break; /* examine the next shortest prefix */
19760
19761    }
19762
19763
19764    /* ================ 16-bit 15:7 cases ================ */
19765
19766    switch (INSN0(15,7)) {
19767
19768    case BITS9(1,0,1,1,0,0,0,0,0): {
19769       /* ------------ ADD SP, #imm7 * 4 ------------ */
19770       UInt uimm7 = INSN0(6,0);
19771       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
19772                    condT);
19773       DIP("add sp, #%u\n", uimm7 * 4);
19774       goto decode_success;
19775    }
19776
19777    case BITS9(1,0,1,1,0,0,0,0,1): {
19778       /* ------------ SUB SP, #imm7 * 4 ------------ */
19779       UInt uimm7 = INSN0(6,0);
19780       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
19781                    condT);
19782       DIP("sub sp, #%u\n", uimm7 * 4);
19783       goto decode_success;
19784    }
19785
19786    case BITS9(0,1,0,0,0,1,1,1,0): {
19787       /* ---------------- BX rM ---------------- */
19788       /* Branch to reg, and optionally switch modes.  Reg contains a
19789          suitably encoded address therefore (w CPSR.T at the bottom).
19790          Have to special-case r15, as usual. */
19791       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19792       if (BITS3(0,0,0) == INSN0(2,0)) {
19793          IRTemp dst = newTemp(Ity_I32);
19794          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19795          mk_skip_over_T16_if_cond_is_false(condT);
19796          condT = IRTemp_INVALID;
19797          // now uncond
19798          if (rM <= 14) {
19799             assign( dst, getIRegT(rM) );
19800          } else {
19801             vassert(rM == 15);
19802             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
19803          }
19804          llPutIReg(15, mkexpr(dst));
19805          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19806          dres.whatNext    = Dis_StopHere;
19807          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
19808          goto decode_success;
19809       }
19810       break;
19811    }
19812
19813    /* ---------------- BLX rM ---------------- */
19814    /* Branch and link to interworking address in rM. */
19815    case BITS9(0,1,0,0,0,1,1,1,1): {
19816       if (BITS3(0,0,0) == INSN0(2,0)) {
19817          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19818          IRTemp dst = newTemp(Ity_I32);
19819          if (rM <= 14) {
19820             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19821             mk_skip_over_T16_if_cond_is_false(condT);
19822             condT = IRTemp_INVALID;
19823             // now uncond
19824             /* We're returning to Thumb code, hence "| 1" */
19825             assign( dst, getIRegT(rM) );
19826             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
19827                           IRTemp_INVALID );
19828             llPutIReg(15, mkexpr(dst));
19829             dres.jk_StopHere = Ijk_Call;
19830             dres.whatNext    = Dis_StopHere;
19831             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
19832             goto decode_success;
19833          }
19834          /* else unpredictable, fall through */
19835       }
19836       break;
19837    }
19838
19839    default:
19840       break; /* examine the next shortest prefix */
19841
19842    }
19843
19844
19845    /* ================ 16-bit 15:8 cases ================ */
19846
19847    switch (INSN0(15,8)) {
19848
19849    case BITS8(1,1,0,1,1,1,1,1): {
19850       /* ---------------- SVC ---------------- */
19851       UInt imm8 = INSN0(7,0);
19852       if (imm8 == 0) {
19853          /* A syscall.  We can't do this conditionally, hence: */
19854          mk_skip_over_T16_if_cond_is_false( condT );
19855          // FIXME: what if we have to back up and restart this insn?
19856          // then ITSTATE will be wrong (we'll have it as "used")
19857          // when it isn't.  Correct is to save ITSTATE in a
19858          // stash pseudo-reg, and back up from that if we have to
19859          // restart.
19860          // uncond after here
19861          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
19862          dres.jk_StopHere = Ijk_Sys_syscall;
19863          dres.whatNext    = Dis_StopHere;
19864          DIP("svc #0x%08x\n", imm8);
19865          goto decode_success;
19866       }
19867       /* else fall through */
19868       break;
19869    }
19870
19871    case BITS8(0,1,0,0,0,1,0,0): {
19872       /* ---------------- ADD(HI) Rd, Rm ---------------- */
19873       UInt h1 = INSN0(7,7);
19874       UInt h2 = INSN0(6,6);
19875       UInt rM = (h2 << 3) | INSN0(5,3);
19876       UInt rD = (h1 << 3) | INSN0(2,0);
19877       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
19878       if (rD == 15 && rM == 15) {
19879          // then it's invalid
19880       } else {
19881          IRTemp res = newTemp(Ity_I32);
19882          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
19883          if (rD != 15) {
19884             putIRegT( rD, mkexpr(res), condT );
19885          } else {
19886             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19887             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19888             /* jump over insn if not selected */
19889             mk_skip_over_T16_if_cond_is_false(condT);
19890             condT = IRTemp_INVALID;
19891             // now uncond
19892             /* non-interworking branch */
19893             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
19894             dres.jk_StopHere = Ijk_Boring;
19895             dres.whatNext    = Dis_StopHere;
19896          }
19897          DIP("add(hi) r%u, r%u\n", rD, rM);
19898          goto decode_success;
19899       }
19900       break;
19901    }
19902
19903    case BITS8(0,1,0,0,0,1,0,1): {
19904       /* ---------------- CMP(HI) Rd, Rm ---------------- */
19905       UInt h1 = INSN0(7,7);
19906       UInt h2 = INSN0(6,6);
19907       UInt rM = (h2 << 3) | INSN0(5,3);
19908       UInt rN = (h1 << 3) | INSN0(2,0);
19909       if (h1 != 0 || h2 != 0) {
19910          IRTemp argL  = newTemp(Ity_I32);
19911          IRTemp argR  = newTemp(Ity_I32);
19912          assign( argL, getIRegT(rN) );
19913          assign( argR, getIRegT(rM) );
19914          /* Update flags regardless of whether in an IT block or not. */
19915          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19916          DIP("cmphi r%u, r%u\n", rN, rM);
19917          goto decode_success;
19918       }
19919       break;
19920    }
19921
19922    case BITS8(0,1,0,0,0,1,1,0): {
19923       /* ---------------- MOV(HI) Rd, Rm ---------------- */
19924       UInt h1 = INSN0(7,7);
19925       UInt h2 = INSN0(6,6);
19926       UInt rM = (h2 << 3) | INSN0(5,3);
19927       UInt rD = (h1 << 3) | INSN0(2,0);
19928       /* The old ARM ARM seems to disallow the case where both Rd and
19929          Rm are "low" registers, but newer versions allow it. */
19930       if (1 /*h1 != 0 || h2 != 0*/) {
19931          IRTemp val = newTemp(Ity_I32);
19932          assign( val, getIRegT(rM) );
19933          if (rD != 15) {
19934             putIRegT( rD, mkexpr(val), condT );
19935          } else {
19936             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19937             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19938             /* jump over insn if not selected */
19939             mk_skip_over_T16_if_cond_is_false(condT);
19940             condT = IRTemp_INVALID;
19941             // now uncond
19942             /* non-interworking branch */
19943             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
19944             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19945             dres.whatNext    = Dis_StopHere;
19946          }
19947          DIP("mov r%u, r%u\n", rD, rM);
19948          goto decode_success;
19949       }
19950       break;
19951    }
19952
19953    case BITS8(1,0,1,1,1,1,1,1): {
19954       /* ---------------- IT (if-then) ---------------- */
19955       UInt firstcond = INSN0(7,4);
19956       UInt mask = INSN0(3,0);
19957       UInt newITSTATE = 0;
19958       /* This is the ITSTATE represented as described in
19959          libvex_guest_arm.h.  It is not the ARM ARM representation. */
19960       HChar c1 = '.';
19961       HChar c2 = '.';
19962       HChar c3 = '.';
19963       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
19964                                     firstcond, mask );
19965       if (valid && firstcond != 0xF/*NV*/) {
19966          /* Not allowed in an IT block; SIGILL if so. */
19967          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19968
19969          IRTemp t = newTemp(Ity_I32);
19970          assign(t, mkU32(newITSTATE));
19971          put_ITSTATE(t);
19972
19973          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
19974          goto decode_success;
19975       }
19976       break;
19977    }
19978
19979    case BITS8(1,0,1,1,0,0,0,1):
19980    case BITS8(1,0,1,1,0,0,1,1):
19981    case BITS8(1,0,1,1,1,0,0,1):
19982    case BITS8(1,0,1,1,1,0,1,1): {
19983       /* ---------------- CB{N}Z ---------------- */
19984       UInt rN    = INSN0(2,0);
19985       UInt bOP   = INSN0(11,11);
19986       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
19987       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19988       /* It's a conditional branch forward. */
19989       IRTemp kond = newTemp(Ity_I1);
19990       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
19991                           getIRegT(rN), mkU32(0)) );
19992
19993       vassert(0 == (guest_R15_curr_instr_notENC & 1));
19994       /* Looks like the nearest insn we can branch to is the one after
19995          next.  That makes sense, as there's no point in being able to
19996          encode a conditional branch to the next instruction. */
19997       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
19998       stmt(IRStmt_Exit( mkexpr(kond),
19999                         Ijk_Boring,
20000                         IRConst_U32(toUInt(dst)),
20001                         OFFB_R15T ));
20002       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
20003       goto decode_success;
20004    }
20005
20006    default:
20007       break; /* examine the next shortest prefix */
20008
20009    }
20010
20011
20012    /* ================ 16-bit 15:9 cases ================ */
20013
20014    switch (INSN0(15,9)) {
20015
20016    case BITS7(1,0,1,1,0,1,0): {
20017       /* ---------------- PUSH ---------------- */
20018       /* This is a bit like STMxx, but way simpler. Complications we
20019          don't have to deal with:
20020          * SP being one of the transferred registers
20021          * direction (increment vs decrement)
20022          * before-vs-after-ness
20023       */
20024       Int  i, nRegs;
20025       UInt bitR    = INSN0(8,8);
20026       UInt regList = INSN0(7,0);
20027       if (bitR) regList |= (1 << 14);
20028
20029       /* At least one register must be transferred, else result is
20030          UNPREDICTABLE. */
20031       if (regList != 0) {
20032          /* Since we can't generate a guaranteed non-trapping IR
20033             sequence, (1) jump over the insn if it is gated false, and
20034             (2) back out the ITSTATE update. */
20035          mk_skip_over_T16_if_cond_is_false(condT);
20036          condT = IRTemp_INVALID;
20037          put_ITSTATE(old_itstate);
20038          // now uncond
20039
20040          nRegs = 0;
20041          for (i = 0; i < 16; i++) {
20042             if ((regList & (1 << i)) != 0)
20043                nRegs++;
20044          }
20045          vassert(nRegs >= 1 && nRegs <= 9);
20046
20047          /* Move SP down first of all, so we're "covered".  And don't
20048             mess with its alignment. */
20049          IRTemp newSP = newTemp(Ity_I32);
20050          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
20051          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20052
20053          /* Generate a transfer base address as a forced-aligned
20054             version of the final SP value. */
20055          IRTemp base = newTemp(Ity_I32);
20056          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
20057
20058          /* Now the transfers */
20059          nRegs = 0;
20060          for (i = 0; i < 16; i++) {
20061             if ((regList & (1 << i)) != 0) {
20062                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
20063                         getIRegT(i) );
20064                nRegs++;
20065             }
20066          }
20067
20068          /* Reinstate the ITSTATE update. */
20069          put_ITSTATE(new_itstate);
20070
20071          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
20072          goto decode_success;
20073       }
20074       break;
20075    }
20076
20077    case BITS7(1,0,1,1,1,1,0): {
20078       /* ---------------- POP ---------------- */
20079       Int  i, nRegs;
20080       UInt bitR    = INSN0(8,8);
20081       UInt regList = INSN0(7,0);
20082
20083       /* At least one register must be transferred, else result is
20084          UNPREDICTABLE. */
20085       if (regList != 0 || bitR) {
20086          /* Since we can't generate a guaranteed non-trapping IR
20087             sequence, (1) jump over the insn if it is gated false, and
20088             (2) back out the ITSTATE update. */
20089          mk_skip_over_T16_if_cond_is_false(condT);
20090          condT = IRTemp_INVALID;
20091          put_ITSTATE(old_itstate);
20092          // now uncond
20093
20094          nRegs = 0;
20095          for (i = 0; i < 8; i++) {
20096             if ((regList & (1 << i)) != 0)
20097                nRegs++;
20098          }
20099          vassert(nRegs >= 0 && nRegs <= 8);
20100          vassert(bitR == 0 || bitR == 1);
20101
20102          IRTemp oldSP = newTemp(Ity_I32);
20103          assign(oldSP, getIRegT(13));
20104
20105          /* Generate a transfer base address as a forced-aligned
20106             version of the original SP value. */
20107          IRTemp base = newTemp(Ity_I32);
20108          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
20109
20110          /* Compute a new value for SP, but don't install it yet, so
20111             that we're "covered" until all the transfers are done.
20112             And don't mess with its alignment. */
20113          IRTemp newSP = newTemp(Ity_I32);
20114          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
20115                                         mkU32(4 * (nRegs + bitR))));
20116
20117          /* Now the transfers, not including PC */
20118          nRegs = 0;
20119          for (i = 0; i < 8; i++) {
20120             if ((regList & (1 << i)) != 0) {
20121                putIRegT(i, loadLE( Ity_I32,
20122                                    binop(Iop_Add32, mkexpr(base),
20123                                                     mkU32(4 * nRegs))),
20124                            IRTemp_INVALID );
20125                nRegs++;
20126             }
20127          }
20128
20129          IRTemp newPC = IRTemp_INVALID;
20130          if (bitR) {
20131             newPC = newTemp(Ity_I32);
20132             assign( newPC, loadLE( Ity_I32,
20133                                    binop(Iop_Add32, mkexpr(base),
20134                                                     mkU32(4 * nRegs))));
20135          }
20136
20137          /* Now we can safely install the new SP value */
20138          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20139
20140          /* Reinstate the ITSTATE update. */
20141          put_ITSTATE(new_itstate);
20142
20143          /* now, do we also have to do a branch?  If so, it turns out
20144             that the new PC value is encoded exactly as we need it to
20145             be -- with CPSR.T in the bottom bit.  So we can simply use
20146             it as is, no need to mess with it.  Note, therefore, this
20147             is an interworking return. */
20148          if (bitR) {
20149             llPutIReg(15, mkexpr(newPC));
20150             dres.jk_StopHere = Ijk_Ret;
20151             dres.whatNext    = Dis_StopHere;
20152          }
20153
20154          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
20155          goto decode_success;
20156       }
20157       break;
20158    }
20159
20160    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
20161    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
20162       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
20163       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
20164       UInt   uimm3 = INSN0(8,6);
20165       UInt   rN    = INSN0(5,3);
20166       UInt   rD    = INSN0(2,0);
20167       UInt   isSub = INSN0(9,9);
20168       IRTemp argL  = newTemp(Ity_I32);
20169       IRTemp argR  = newTemp(Ity_I32);
20170       assign( argL, getIRegT(rN) );
20171       assign( argR, mkU32(uimm3) );
20172       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20173                          mkexpr(argL), mkexpr(argR)),
20174                    condT);
20175       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20176                       argL, argR, cond_AND_notInIT_T );
20177       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
20178       goto decode_success;
20179    }
20180
20181    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
20182    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
20183       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
20184       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
20185       UInt   rM    = INSN0(8,6);
20186       UInt   rN    = INSN0(5,3);
20187       UInt   rD    = INSN0(2,0);
20188       UInt   isSub = INSN0(9,9);
20189       IRTemp argL  = newTemp(Ity_I32);
20190       IRTemp argR  = newTemp(Ity_I32);
20191       assign( argL, getIRegT(rN) );
20192       assign( argR, getIRegT(rM) );
20193       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20194                           mkexpr(argL), mkexpr(argR)),
20195                     condT );
20196       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20197                       argL, argR, cond_AND_notInIT_T );
20198       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
20199       goto decode_success;
20200    }
20201
20202    case BITS7(0,1,0,1,0,0,0):   /* STR */
20203    case BITS7(0,1,0,1,1,0,0): { /* LDR */
20204       /* ------------- LDR Rd, [Rn, Rm] ------------- */
20205       /* ------------- STR Rd, [Rn, Rm] ------------- */
20206       /* LDR/STR Rd, [Rn + Rm] */
20207       UInt    rD   = INSN0(2,0);
20208       UInt    rN   = INSN0(5,3);
20209       UInt    rM   = INSN0(8,6);
20210       UInt    isLD = INSN0(11,11);
20211
20212       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20213       put_ITSTATE(old_itstate); // backout
20214       if (isLD) {
20215          IRTemp tD = newTemp(Ity_I32);
20216          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20217          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20218       } else {
20219          storeGuardedLE(ea, getIRegT(rD), condT);
20220       }
20221       put_ITSTATE(new_itstate); // restore
20222
20223       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20224       goto decode_success;
20225    }
20226
20227    case BITS7(0,1,0,1,0,0,1):
20228    case BITS7(0,1,0,1,1,0,1): {
20229       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
20230       /* ------------- STRH Rd, [Rn, Rm] ------------- */
20231       /* LDRH/STRH Rd, [Rn + Rm] */
20232       UInt    rD   = INSN0(2,0);
20233       UInt    rN   = INSN0(5,3);
20234       UInt    rM   = INSN0(8,6);
20235       UInt    isLD = INSN0(11,11);
20236
20237       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20238       put_ITSTATE(old_itstate); // backout
20239       if (isLD) {
20240          IRTemp tD = newTemp(Ity_I32);
20241          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
20242          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20243       } else {
20244          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20245       }
20246       put_ITSTATE(new_itstate); // restore
20247
20248       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20249       goto decode_success;
20250    }
20251
20252    case BITS7(0,1,0,1,1,1,1): {
20253       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
20254       /* LDRSH Rd, [Rn + Rm] */
20255       UInt    rD = INSN0(2,0);
20256       UInt    rN = INSN0(5,3);
20257       UInt    rM = INSN0(8,6);
20258
20259       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20260       put_ITSTATE(old_itstate); // backout
20261       IRTemp tD = newTemp(Ity_I32);
20262       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
20263       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20264       put_ITSTATE(new_itstate); // restore
20265
20266       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
20267       goto decode_success;
20268    }
20269
20270    case BITS7(0,1,0,1,0,1,1): {
20271       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
20272       /* LDRSB Rd, [Rn + Rm] */
20273       UInt    rD = INSN0(2,0);
20274       UInt    rN = INSN0(5,3);
20275       UInt    rM = INSN0(8,6);
20276
20277       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20278       put_ITSTATE(old_itstate); // backout
20279       IRTemp tD = newTemp(Ity_I32);
20280       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
20281       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20282       put_ITSTATE(new_itstate); // restore
20283
20284       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
20285       goto decode_success;
20286    }
20287
20288    case BITS7(0,1,0,1,0,1,0):
20289    case BITS7(0,1,0,1,1,1,0): {
20290       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
20291       /* ------------- STRB Rd, [Rn, Rm] ------------- */
20292       /* LDRB/STRB Rd, [Rn + Rm] */
20293       UInt    rD   = INSN0(2,0);
20294       UInt    rN   = INSN0(5,3);
20295       UInt    rM   = INSN0(8,6);
20296       UInt    isLD = INSN0(11,11);
20297
20298       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20299       put_ITSTATE(old_itstate); // backout
20300       if (isLD) {
20301          IRTemp tD = newTemp(Ity_I32);
20302          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
20303          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20304       } else {
20305          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20306       }
20307       put_ITSTATE(new_itstate); // restore
20308
20309       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20310       goto decode_success;
20311    }
20312
20313    default:
20314       break; /* examine the next shortest prefix */
20315
20316    }
20317
20318
20319    /* ================ 16-bit 15:11 cases ================ */
20320
20321    switch (INSN0(15,11)) {
20322
20323    case BITS5(0,0,1,1,0):
20324    case BITS5(0,0,1,1,1): {
20325       /* ---------------- ADDS Rn, #uimm8 ---------------- */
20326       /* ---------------- SUBS Rn, #uimm8 ---------------- */
20327       UInt   isSub = INSN0(11,11);
20328       UInt   rN    = INSN0(10,8);
20329       UInt   uimm8 = INSN0(7,0);
20330       IRTemp argL  = newTemp(Ity_I32);
20331       IRTemp argR  = newTemp(Ity_I32);
20332       assign( argL, getIRegT(rN) );
20333       assign( argR, mkU32(uimm8) );
20334       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
20335                           mkexpr(argL), mkexpr(argR)), condT );
20336       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20337                       argL, argR, cond_AND_notInIT_T );
20338       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
20339       goto decode_success;
20340    }
20341
20342    case BITS5(1,0,1,0,0): {
20343       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
20344       /* a.k.a. ADR */
20345       /* rD = align4(PC) + imm8 * 4 */
20346       UInt rD   = INSN0(10,8);
20347       UInt imm8 = INSN0(7,0);
20348       putIRegT(rD, binop(Iop_Add32,
20349                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20350                          mkU32(imm8 * 4)),
20351                    condT);
20352       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
20353       goto decode_success;
20354    }
20355
20356    case BITS5(1,0,1,0,1): {
20357       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
20358       UInt rD   = INSN0(10,8);
20359       UInt imm8 = INSN0(7,0);
20360       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
20361                    condT);
20362       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
20363       goto decode_success;
20364    }
20365
20366    case BITS5(0,0,1,0,1): {
20367       /* ---------------- CMP Rn, #uimm8 ---------------- */
20368       UInt   rN    = INSN0(10,8);
20369       UInt   uimm8 = INSN0(7,0);
20370       IRTemp argL  = newTemp(Ity_I32);
20371       IRTemp argR  = newTemp(Ity_I32);
20372       assign( argL, getIRegT(rN) );
20373       assign( argR, mkU32(uimm8) );
20374       /* Update flags regardless of whether in an IT block or not. */
20375       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20376       DIP("cmp r%u, #%u\n", rN, uimm8);
20377       goto decode_success;
20378    }
20379
20380    case BITS5(0,0,1,0,0): {
20381       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
20382       UInt   rD    = INSN0(10,8);
20383       UInt   uimm8 = INSN0(7,0);
20384       IRTemp oldV  = newTemp(Ity_I32);
20385       IRTemp oldC  = newTemp(Ity_I32);
20386       IRTemp res   = newTemp(Ity_I32);
20387       assign( oldV, mk_armg_calculate_flag_v() );
20388       assign( oldC, mk_armg_calculate_flag_c() );
20389       assign( res, mkU32(uimm8) );
20390       putIRegT(rD, mkexpr(res), condT);
20391       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
20392                          cond_AND_notInIT_T );
20393       DIP("movs r%u, #%u\n", rD, uimm8);
20394       goto decode_success;
20395    }
20396
20397    case BITS5(0,1,0,0,1): {
20398       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
20399       /* LDR Rd, [align4(PC) + imm8 * 4] */
20400       UInt   rD   = INSN0(10,8);
20401       UInt   imm8 = INSN0(7,0);
20402       IRTemp ea   = newTemp(Ity_I32);
20403
20404       assign(ea, binop(Iop_Add32,
20405                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20406                        mkU32(imm8 * 4)));
20407       put_ITSTATE(old_itstate); // backout
20408       IRTemp tD = newTemp(Ity_I32);
20409       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
20410       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20411       put_ITSTATE(new_itstate); // restore
20412
20413       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
20414       goto decode_success;
20415    }
20416
20417    case BITS5(0,1,1,0,0):   /* STR */
20418    case BITS5(0,1,1,0,1): { /* LDR */
20419       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
20420       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
20421       /* LDR/STR Rd, [Rn + imm5 * 4] */
20422       UInt    rD   = INSN0(2,0);
20423       UInt    rN   = INSN0(5,3);
20424       UInt    imm5 = INSN0(10,6);
20425       UInt    isLD = INSN0(11,11);
20426
20427       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
20428       put_ITSTATE(old_itstate); // backout
20429       if (isLD) {
20430          IRTemp tD = newTemp(Ity_I32);
20431          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20432          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20433       } else {
20434          storeGuardedLE( ea, getIRegT(rD), condT );
20435       }
20436       put_ITSTATE(new_itstate); // restore
20437
20438       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
20439       goto decode_success;
20440    }
20441
20442    case BITS5(1,0,0,0,0):   /* STRH */
20443    case BITS5(1,0,0,0,1): { /* LDRH */
20444       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
20445       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
20446       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
20447       UInt    rD   = INSN0(2,0);
20448       UInt    rN   = INSN0(5,3);
20449       UInt    imm5 = INSN0(10,6);
20450       UInt    isLD = INSN0(11,11);
20451
20452       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
20453       put_ITSTATE(old_itstate); // backout
20454       if (isLD) {
20455          IRTemp tD = newTemp(Ity_I32);
20456          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
20457          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20458       } else {
20459          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20460       }
20461       put_ITSTATE(new_itstate); // restore
20462
20463       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
20464       goto decode_success;
20465    }
20466
20467    case BITS5(0,1,1,1,0):   /* STRB */
20468    case BITS5(0,1,1,1,1): { /* LDRB */
20469       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
20470       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
20471       /* LDRB/STRB Rd, [Rn + imm5] */
20472       UInt    rD   = INSN0(2,0);
20473       UInt    rN   = INSN0(5,3);
20474       UInt    imm5 = INSN0(10,6);
20475       UInt    isLD = INSN0(11,11);
20476
20477       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
20478       put_ITSTATE(old_itstate); // backout
20479       if (isLD) {
20480          IRTemp tD = newTemp(Ity_I32);
20481          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
20482          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20483       } else {
20484          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20485       }
20486       put_ITSTATE(new_itstate); // restore
20487
20488       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
20489       goto decode_success;
20490    }
20491
20492    case BITS5(1,0,0,1,0):   /* STR */
20493    case BITS5(1,0,0,1,1): { /* LDR */
20494       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
20495       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
20496       /* LDR/STR Rd, [SP + imm8 * 4] */
20497       UInt rD    = INSN0(10,8);
20498       UInt imm8  = INSN0(7,0);
20499       UInt isLD  = INSN0(11,11);
20500
20501       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
20502       put_ITSTATE(old_itstate); // backout
20503       if (isLD) {
20504          IRTemp tD = newTemp(Ity_I32);
20505          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20506          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20507       } else {
20508          storeGuardedLE(ea, getIRegT(rD), condT);
20509       }
20510       put_ITSTATE(new_itstate); // restore
20511
20512       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
20513       goto decode_success;
20514    }
20515
20516    case BITS5(1,1,0,0,1): {
20517       /* ------------- LDMIA Rn!, {reglist} ------------- */
20518       Int i, nRegs = 0;
20519       UInt rN   = INSN0(10,8);
20520       UInt list = INSN0(7,0);
20521       /* Empty lists aren't allowed. */
20522       if (list != 0) {
20523          mk_skip_over_T16_if_cond_is_false(condT);
20524          condT = IRTemp_INVALID;
20525          put_ITSTATE(old_itstate);
20526          // now uncond
20527
20528          IRTemp oldRn = newTemp(Ity_I32);
20529          IRTemp base  = newTemp(Ity_I32);
20530          assign(oldRn, getIRegT(rN));
20531          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20532          for (i = 0; i < 8; i++) {
20533             if (0 == (list & (1 << i)))
20534                continue;
20535             nRegs++;
20536             putIRegT(
20537                i, loadLE(Ity_I32,
20538                          binop(Iop_Add32, mkexpr(base),
20539                                           mkU32(nRegs * 4 - 4))),
20540                IRTemp_INVALID
20541             );
20542          }
20543          /* Only do the writeback for rN if it isn't in the list of
20544             registers to be transferred. */
20545          if (0 == (list & (1 << rN))) {
20546             putIRegT(rN,
20547                      binop(Iop_Add32, mkexpr(oldRn),
20548                                       mkU32(nRegs * 4)),
20549                      IRTemp_INVALID
20550             );
20551          }
20552
20553          /* Reinstate the ITSTATE update. */
20554          put_ITSTATE(new_itstate);
20555
20556          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
20557          goto decode_success;
20558       }
20559       break;
20560    }
20561
20562    case BITS5(1,1,0,0,0): {
20563       /* ------------- STMIA Rn!, {reglist} ------------- */
20564       Int i, nRegs = 0;
20565       UInt rN   = INSN0(10,8);
20566       UInt list = INSN0(7,0);
20567       /* Empty lists aren't allowed.  Also, if rN is in the list then
20568          it must be the lowest numbered register in the list. */
20569       Bool valid = list != 0;
20570       if (valid && 0 != (list & (1 << rN))) {
20571          for (i = 0; i < rN; i++) {
20572             if (0 != (list & (1 << i)))
20573                valid = False;
20574          }
20575       }
20576       if (valid) {
20577          mk_skip_over_T16_if_cond_is_false(condT);
20578          condT = IRTemp_INVALID;
20579          put_ITSTATE(old_itstate);
20580          // now uncond
20581
20582          IRTemp oldRn = newTemp(Ity_I32);
20583          IRTemp base = newTemp(Ity_I32);
20584          assign(oldRn, getIRegT(rN));
20585          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20586          for (i = 0; i < 8; i++) {
20587             if (0 == (list & (1 << i)))
20588                continue;
20589             nRegs++;
20590             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
20591                      getIRegT(i) );
20592          }
20593          /* Always do the writeback. */
20594          putIRegT(rN,
20595                   binop(Iop_Add32, mkexpr(oldRn),
20596                                    mkU32(nRegs * 4)),
20597                   IRTemp_INVALID);
20598
20599          /* Reinstate the ITSTATE update. */
20600          put_ITSTATE(new_itstate);
20601
20602          DIP("stmia r%u!, {0x%04x}\n", rN, list);
20603          goto decode_success;
20604       }
20605       break;
20606    }
20607
20608    case BITS5(0,0,0,0,0):   /* LSLS */
20609    case BITS5(0,0,0,0,1):   /* LSRS */
20610    case BITS5(0,0,0,1,0): { /* ASRS */
20611       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
20612       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
20613       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
20614       UInt   rD   = INSN0(2,0);
20615       UInt   rM   = INSN0(5,3);
20616       UInt   imm5 = INSN0(10,6);
20617       IRTemp res  = newTemp(Ity_I32);
20618       IRTemp resC = newTemp(Ity_I32);
20619       IRTemp rMt  = newTemp(Ity_I32);
20620       IRTemp oldV = newTemp(Ity_I32);
20621       const HChar* wot  = "???";
20622       assign(rMt, getIRegT(rM));
20623       assign(oldV, mk_armg_calculate_flag_v());
20624       /* Looks like INSN0(12,11) are the standard 'how' encoding.
20625          Could compactify if the ROR case later appears. */
20626       switch (INSN0(15,11)) {
20627          case BITS5(0,0,0,0,0):
20628             compute_result_and_C_after_LSL_by_imm5(
20629                dis_buf, &res, &resC, rMt, imm5, rM
20630             );
20631             wot = "lsl";
20632             break;
20633          case BITS5(0,0,0,0,1):
20634             compute_result_and_C_after_LSR_by_imm5(
20635                dis_buf, &res, &resC, rMt, imm5, rM
20636             );
20637             wot = "lsr";
20638             break;
20639          case BITS5(0,0,0,1,0):
20640             compute_result_and_C_after_ASR_by_imm5(
20641                dis_buf, &res, &resC, rMt, imm5, rM
20642             );
20643             wot = "asr";
20644             break;
20645          default:
20646             /*NOTREACHED*/vassert(0);
20647       }
20648       // not safe to read guest state after this point
20649       putIRegT(rD, mkexpr(res), condT);
20650       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
20651                          cond_AND_notInIT_T );
20652       /* ignore buf and roll our own output */
20653       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
20654       goto decode_success;
20655    }
20656
20657    case BITS5(1,1,1,0,0): {
20658       /* ---------------- B #simm11 ---------------- */
20659       UInt uimm11 = INSN0(10,0);  uimm11 <<= 21;
20660       Int  simm11 = (Int)uimm11;  simm11 >>= 20;
20661       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
20662       /* Only allowed outside or last-in IT block; SIGILL if not so. */
20663       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20664       // and skip this insn if not selected; being cleverer is too
20665       // difficult
20666       mk_skip_over_T16_if_cond_is_false(condT);
20667       condT = IRTemp_INVALID;
20668       // now uncond
20669       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20670       dres.jk_StopHere = Ijk_Boring;
20671       dres.whatNext    = Dis_StopHere;
20672       DIP("b 0x%x\n", dst);
20673       goto decode_success;
20674    }
20675
20676    default:
20677       break; /* examine the next shortest prefix */
20678
20679    }
20680
20681
20682    /* ================ 16-bit 15:12 cases ================ */
20683
20684    switch (INSN0(15,12)) {
20685
20686    case BITS4(1,1,0,1): {
20687       /* ---------------- Bcond #simm8 ---------------- */
20688       UInt cond  = INSN0(11,8);
20689       UInt uimm8 = INSN0(7,0);  uimm8 <<= 24;
20690       Int  simm8 = (Int)uimm8;  simm8 >>= 23;
20691       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
20692       if (cond != ARMCondAL && cond != ARMCondNV) {
20693          /* Not allowed in an IT block; SIGILL if so. */
20694          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20695
20696          IRTemp kondT = newTemp(Ity_I32);
20697          assign( kondT, mk_armg_calculate_condition(cond) );
20698          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20699                             Ijk_Boring,
20700                             IRConst_U32(dst | 1/*CPSR.T*/),
20701                             OFFB_R15T ));
20702          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
20703                               | 1 /*CPSR.T*/ ));
20704          dres.jk_StopHere = Ijk_Boring;
20705          dres.whatNext    = Dis_StopHere;
20706          DIP("b%s 0x%x\n", nCC(cond), dst);
20707          goto decode_success;
20708       }
20709       break;
20710    }
20711
20712    default:
20713       break; /* hmm, nothing matched */
20714
20715    }
20716
20717    /* ================ 16-bit misc cases ================ */
20718
20719    switch (INSN0(15,0)) {
20720       case 0xBF00:
20721          /* ------ NOP ------ */
20722          DIP("nop\n");
20723          goto decode_success;
20724       case 0xBF10: // YIELD
20725       case 0xBF20: // WFE
20726          /* ------ WFE, YIELD ------ */
20727          /* Both appear to get used as a spin-loop hints.  Do the usual thing,
20728             which is to continue after yielding. */
20729          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
20730                             Ijk_Yield,
20731                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
20732                                         | 1 /*CPSR.T*/),
20733                             OFFB_R15T ));
20734          Bool isWFE = INSN0(15,0) == 0xBF20;
20735          DIP(isWFE ? "wfe\n" : "yield\n");
20736          goto decode_success;
20737       case 0xBF40:
20738          /* ------ SEV ------ */
20739          /* Treat this as a no-op.  Any matching WFEs won't really
20740             cause the host CPU to snooze; they just cause V to try to
20741             run some other thread for a while.  So there's no point in
20742             really doing anything for SEV. */
20743          DIP("sev\n");
20744          goto decode_success;
20745       default:
20746          break; /* fall through */
20747    }
20748
20749    /* ----------------------------------------------------------- */
20750    /* --                                                       -- */
20751    /* -- Thumb 32-bit integer instructions                     -- */
20752    /* --                                                       -- */
20753    /* ----------------------------------------------------------- */
20754
20755 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
20756
20757    /* second 16 bits of the instruction, if any */
20758    vassert(insn1 == 0);
20759    insn1 = getUShortLittleEndianly( guest_instr+2 );
20760
20761    anOp   = Iop_INVALID; /* paranoia */
20762    anOpNm = NULL;        /* paranoia */
20763
20764    /* Change result defaults to suit 32-bit insns. */
20765    vassert(dres.whatNext   == Dis_Continue);
20766    vassert(dres.len        == 2);
20767    vassert(dres.continueAt == 0);
20768    dres.len = 4;
20769
20770    /* ---------------- BL/BLX simm26 ---------------- */
20771    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
20772       UInt isBL = INSN1(12,12);
20773       UInt bS   = INSN0(10,10);
20774       UInt bJ1  = INSN1(13,13);
20775       UInt bJ2  = INSN1(11,11);
20776       UInt bI1  = 1 ^ (bJ1 ^ bS);
20777       UInt bI2  = 1 ^ (bJ2 ^ bS);
20778       UInt uimm25
20779          =   (bS          << (1 + 1 + 10 + 11 + 1))
20780            | (bI1         << (1 + 10 + 11 + 1))
20781            | (bI2         << (10 + 11 + 1))
20782            | (INSN0(9,0)  << (11 + 1))
20783            | (INSN1(10,0) << 1);
20784       uimm25 <<= 7;
20785       Int simm25 = (Int)uimm25;
20786       simm25 >>= 7;
20787
20788       vassert(0 == (guest_R15_curr_instr_notENC & 1));
20789       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20790
20791       /* One further validity case to check: in the case of BLX
20792          (not-BL), that insn1[0] must be zero. */
20793       Bool valid = True;
20794       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
20795       if (valid) {
20796          /* Only allowed outside or last-in IT block; SIGILL if not so. */
20797          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20798          // and skip this insn if not selected; being cleverer is too
20799          // difficult
20800          mk_skip_over_T32_if_cond_is_false(condT);
20801          condT = IRTemp_INVALID;
20802          // now uncond
20803
20804          /* We're returning to Thumb code, hence "| 1" */
20805          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
20806                    IRTemp_INVALID);
20807          if (isBL) {
20808             /* BL: unconditional T -> T call */
20809             /* we're calling Thumb code, hence "| 1" */
20810             llPutIReg(15, mkU32( dst | 1 ));
20811             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
20812          } else {
20813             /* BLX: unconditional T -> A call */
20814             /* we're calling ARM code, hence "& 3" to align to a
20815                valid ARM insn address */
20816             llPutIReg(15, mkU32( dst & ~3 ));
20817             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
20818          }
20819          dres.whatNext    = Dis_StopHere;
20820          dres.jk_StopHere = Ijk_Call;
20821          goto decode_success;
20822       }
20823    }
20824
20825    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
20826    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
20827        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
20828       UInt bW      = INSN0(5,5); /* writeback Rn ? */
20829       UInt bL      = INSN0(4,4);
20830       UInt rN      = INSN0(3,0);
20831       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
20832       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
20833       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
20834       UInt rL13    = INSN1(13,13); /* must be zero */
20835       UInt regList = 0;
20836       Bool valid   = True;
20837
20838       UInt bINC    = 1;
20839       UInt bBEFORE = 0;
20840       if (INSN0(15,6) == 0x3a4) {
20841          bINC    = 0;
20842          bBEFORE = 1;
20843       }
20844
20845       /* detect statically invalid cases, and construct the final
20846          reglist */
20847       if (rL13 == 1)
20848          valid = False;
20849
20850       if (bL == 1) {
20851          regList = (bP << 15) | (bM << 14) | rLmost;
20852          if (rN == 15)                       valid = False;
20853          if (popcount32(regList) < 2)        valid = False;
20854          if (bP == 1 && bM == 1)             valid = False;
20855          if (bW == 1 && (regList & (1<<rN))) valid = False;
20856       } else {
20857          regList = (bM << 14) | rLmost;
20858          if (bP == 1)                        valid = False;
20859          if (rN == 15)                       valid = False;
20860          if (popcount32(regList) < 2)        valid = False;
20861          if (bW == 1 && (regList & (1<<rN))) valid = False;
20862       }
20863
20864       if (valid) {
20865          if (bL == 1 && bP == 1) {
20866             // We'll be writing the PC.  Hence:
20867             /* Only allowed outside or last-in IT block; SIGILL if not so. */
20868             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20869          }
20870
20871          /* Go uncond: */
20872          mk_skip_over_T32_if_cond_is_false(condT);
20873          condT = IRTemp_INVALID;
20874          // now uncond
20875
20876          /* Generate the IR.  This might generate a write to R15. */
20877          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
20878
20879          if (bL == 1 && (regList & (1<<15))) {
20880             // If we wrote to R15, we have an interworking return to
20881             // deal with.
20882             llPutIReg(15, llGetIReg(15));
20883             dres.jk_StopHere = Ijk_Ret;
20884             dres.whatNext    = Dis_StopHere;
20885          }
20886
20887          DIP("%sm%c%c r%u%s, {0x%04x}\n",
20888               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
20889               rN, bW ? "!" : "", regList);
20890
20891          goto decode_success;
20892       }
20893    }
20894
20895    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
20896    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20897        && INSN0(9,5) == BITS5(0,1,0,0,0)
20898        && INSN1(15,15) == 0) {
20899       UInt bS = INSN0(4,4);
20900       UInt rN = INSN0(3,0);
20901       UInt rD = INSN1(11,8);
20902       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20903       /* but allow "add.w reg, sp, #constT" for reg != PC */
20904       if (!valid && rD <= 14 && rN == 13)
20905          valid = True;
20906       if (valid) {
20907          IRTemp argL  = newTemp(Ity_I32);
20908          IRTemp argR  = newTemp(Ity_I32);
20909          IRTemp res   = newTemp(Ity_I32);
20910          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20911          assign(argL, getIRegT(rN));
20912          assign(argR, mkU32(imm32));
20913          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20914          putIRegT(rD, mkexpr(res), condT);
20915          if (bS == 1)
20916             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
20917          DIP("add%s.w r%u, r%u, #%u\n",
20918              bS == 1 ? "s" : "", rD, rN, imm32);
20919          goto decode_success;
20920       }
20921    }
20922
20923    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
20924    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20925        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
20926        && INSN1(15,15) == 0) {
20927       UInt rN = INSN0(3,0);
20928       UInt rD = INSN1(11,8);
20929       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20930       /* but allow "addw reg, sp, #uimm12" for reg != PC */
20931       if (!valid && rD <= 14 && rN == 13)
20932          valid = True;
20933       if (valid) {
20934          IRTemp argL = newTemp(Ity_I32);
20935          IRTemp argR = newTemp(Ity_I32);
20936          IRTemp res  = newTemp(Ity_I32);
20937          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20938          assign(argL, getIRegT(rN));
20939          assign(argR, mkU32(imm12));
20940          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20941          putIRegT(rD, mkexpr(res), condT);
20942          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
20943          goto decode_success;
20944       }
20945    }
20946
20947    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
20948    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
20949    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20950        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
20951            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
20952        && INSN1(15,15) == 0
20953        && INSN1(11,8) == BITS4(1,1,1,1)) {
20954       UInt rN = INSN0(3,0);
20955       if (rN != 15) {
20956          IRTemp argL  = newTemp(Ity_I32);
20957          IRTemp argR  = newTemp(Ity_I32);
20958          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
20959          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20960          assign(argL, getIRegT(rN));
20961          assign(argR, mkU32(imm32));
20962          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
20963                          argL, argR, condT );
20964          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
20965          goto decode_success;
20966       }
20967    }
20968
20969    /* -------------- (T1) TST.W Rn, #constT -------------- */
20970    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
20971    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20972        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
20973            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
20974        && INSN1(15,15) == 0
20975        && INSN1(11,8) == BITS4(1,1,1,1)) {
20976       UInt rN = INSN0(3,0);
20977       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
20978          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
20979          IRTemp argL  = newTemp(Ity_I32);
20980          IRTemp argR  = newTemp(Ity_I32);
20981          IRTemp res   = newTemp(Ity_I32);
20982          IRTemp oldV  = newTemp(Ity_I32);
20983          IRTemp oldC  = newTemp(Ity_I32);
20984          Bool   updC  = False;
20985          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
20986          assign(argL, getIRegT(rN));
20987          assign(argR, mkU32(imm32));
20988          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
20989                             mkexpr(argL), mkexpr(argR)));
20990          assign( oldV, mk_armg_calculate_flag_v() );
20991          assign( oldC, updC
20992                        ? mkU32((imm32 >> 31) & 1)
20993                        : mk_armg_calculate_flag_c() );
20994          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
20995          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
20996          goto decode_success;
20997       }
20998    }
20999
21000    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
21001    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
21002    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21003        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
21004            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
21005        && INSN1(15,15) == 0) {
21006       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
21007       UInt bS    = INSN0(4,4);
21008       UInt rN    = INSN0(3,0);
21009       UInt rD    = INSN1(11,8);
21010       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
21011       /* but allow "sub{s}.w reg, sp, #constT
21012          this is (T2) of "SUB (SP minus immediate)" */
21013       if (!valid && !isRSB && rN == 13 && rD != 15)
21014          valid = True;
21015       if (valid) {
21016          IRTemp argL  = newTemp(Ity_I32);
21017          IRTemp argR  = newTemp(Ity_I32);
21018          IRTemp res   = newTemp(Ity_I32);
21019          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21020          assign(argL, getIRegT(rN));
21021          assign(argR, mkU32(imm32));
21022          assign(res,  isRSB
21023                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
21024                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
21025          putIRegT(rD, mkexpr(res), condT);
21026          if (bS == 1) {
21027             if (isRSB)
21028                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21029             else
21030                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21031          }
21032          DIP("%s%s.w r%u, r%u, #%u\n",
21033              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
21034          goto decode_success;
21035       }
21036    }
21037
21038    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
21039    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21040        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
21041        && INSN1(15,15) == 0) {
21042       UInt rN = INSN0(3,0);
21043       UInt rD = INSN1(11,8);
21044       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
21045       /* but allow "subw sp, sp, #uimm12" */
21046       if (!valid && rD == 13 && rN == 13)
21047          valid = True;
21048       if (valid) {
21049          IRTemp argL  = newTemp(Ity_I32);
21050          IRTemp argR  = newTemp(Ity_I32);
21051          IRTemp res   = newTemp(Ity_I32);
21052          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
21053          assign(argL, getIRegT(rN));
21054          assign(argR, mkU32(imm12));
21055          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
21056          putIRegT(rD, mkexpr(res), condT);
21057          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
21058          goto decode_success;
21059       }
21060    }
21061
21062    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
21063    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
21064    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21065        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
21066            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
21067        && INSN1(15,15) == 0) {
21068       /* ADC:  Rd = Rn + constT + oldC */
21069       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
21070       UInt bS    = INSN0(4,4);
21071       UInt rN    = INSN0(3,0);
21072       UInt rD    = INSN1(11,8);
21073       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21074          IRTemp argL  = newTemp(Ity_I32);
21075          IRTemp argR  = newTemp(Ity_I32);
21076          IRTemp res   = newTemp(Ity_I32);
21077          IRTemp oldC  = newTemp(Ity_I32);
21078          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21079          assign(argL, getIRegT(rN));
21080          assign(argR, mkU32(imm32));
21081          assign(oldC, mk_armg_calculate_flag_c() );
21082          const HChar* nm  = "???";
21083          switch (INSN0(9,5)) {
21084             case BITS5(0,1,0,1,0): // ADC
21085                nm = "adc";
21086                assign(res,
21087                       binop(Iop_Add32,
21088                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21089                             mkexpr(oldC) ));
21090                putIRegT(rD, mkexpr(res), condT);
21091                if (bS)
21092                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21093                                      argL, argR, oldC, condT );
21094                break;
21095             case BITS5(0,1,0,1,1): // SBC
21096                nm = "sbc";
21097                assign(res,
21098                       binop(Iop_Sub32,
21099                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21100                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21101                putIRegT(rD, mkexpr(res), condT);
21102                if (bS)
21103                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21104                                      argL, argR, oldC, condT );
21105                break;
21106             default:
21107               vassert(0);
21108          }
21109          DIP("%s%s.w r%u, r%u, #%u\n",
21110              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21111          goto decode_success;
21112       }
21113    }
21114
21115    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
21116    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
21117    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
21118    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
21119    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21120        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
21121            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
21122            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
21123            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
21124            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
21125        && INSN1(15,15) == 0) {
21126       UInt bS = INSN0(4,4);
21127       UInt rN = INSN0(3,0);
21128       UInt rD = INSN1(11,8);
21129       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21130          Bool   notArgR = False;
21131          IROp   op      = Iop_INVALID;
21132          const HChar* nm = "???";
21133          switch (INSN0(9,5)) {
21134             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21135             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
21136             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
21137                                    notArgR = True; break;
21138             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21139             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
21140                                    notArgR = True; break;
21141             default: vassert(0);
21142          }
21143          IRTemp argL  = newTemp(Ity_I32);
21144          IRTemp argR  = newTemp(Ity_I32);
21145          IRTemp res   = newTemp(Ity_I32);
21146          Bool   updC  = False;
21147          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21148          assign(argL, getIRegT(rN));
21149          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
21150          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
21151          putIRegT(rD, mkexpr(res), condT);
21152          if (bS) {
21153             IRTemp oldV = newTemp(Ity_I32);
21154             IRTemp oldC = newTemp(Ity_I32);
21155             assign( oldV, mk_armg_calculate_flag_v() );
21156             assign( oldC, updC
21157                           ? mkU32((imm32 >> 31) & 1)
21158                           : mk_armg_calculate_flag_c() );
21159             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21160                                condT );
21161          }
21162          DIP("%s%s.w r%u, r%u, #%u\n",
21163              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21164          goto decode_success;
21165       }
21166    }
21167
21168    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
21169    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
21170    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
21171    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21172        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
21173            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
21174            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
21175        && INSN1(15,15) == 0) {
21176       UInt rN   = INSN0(3,0);
21177       UInt rD   = INSN1(11,8);
21178       UInt rM   = INSN1(3,0);
21179       UInt bS   = INSN0(4,4);
21180       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21181       UInt how  = INSN1(5,4);
21182
21183       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
21184       /* but allow "add.w reg, sp, reg, lsl #N for N=0..31
21185          (T3) "ADD (SP plus register) */
21186       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
21187           && rD != 15 && rN == 13 && imm5 <= 31 && how == 0) {
21188          valid = True;
21189       }
21190       /* also allow "sub.w reg, sp, reg   lsl #N for N=0 .. 5
21191          (T1) "SUB (SP minus register) */
21192       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
21193           && rD != 15 && rN == 13 && imm5 <= 5 && how == 0) {
21194          valid = True;
21195       }
21196       if (valid) {
21197          Bool   swap = False;
21198          IROp   op   = Iop_INVALID;
21199          const HChar* nm = "???";
21200          switch (INSN0(8,5)) {
21201             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
21202             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
21203             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
21204                                  swap = True; break;
21205             default: vassert(0);
21206          }
21207
21208          IRTemp argL = newTemp(Ity_I32);
21209          assign(argL, getIRegT(rN));
21210
21211          IRTemp rMt = newTemp(Ity_I32);
21212          assign(rMt, getIRegT(rM));
21213
21214          IRTemp argR = newTemp(Ity_I32);
21215          compute_result_and_C_after_shift_by_imm5(
21216             dis_buf, &argR, NULL, rMt, how, imm5, rM
21217          );
21218
21219          IRTemp res = newTemp(Ity_I32);
21220          assign(res, swap
21221                      ? binop(op, mkexpr(argR), mkexpr(argL))
21222                      : binop(op, mkexpr(argL), mkexpr(argR)));
21223
21224          putIRegT(rD, mkexpr(res), condT);
21225          if (bS) {
21226             switch (op) {
21227                case Iop_Add32:
21228                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
21229                   break;
21230                case Iop_Sub32:
21231                   if (swap)
21232                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21233                   else
21234                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21235                   break;
21236                default:
21237                   vassert(0);
21238             }
21239          }
21240
21241          DIP("%s%s.w r%u, r%u, %s\n",
21242              nm, bS ? "s" : "", rD, rN, dis_buf);
21243          goto decode_success;
21244       }
21245    }
21246
21247    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
21248    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
21249    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21250        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
21251            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
21252        && INSN1(15,15) == 0) {
21253       /* ADC:  Rd = Rn + shifter_operand + oldC */
21254       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
21255       UInt rN = INSN0(3,0);
21256       UInt rD = INSN1(11,8);
21257       UInt rM = INSN1(3,0);
21258       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21259          UInt bS   = INSN0(4,4);
21260          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21261          UInt how  = INSN1(5,4);
21262
21263          IRTemp argL = newTemp(Ity_I32);
21264          assign(argL, getIRegT(rN));
21265
21266          IRTemp rMt = newTemp(Ity_I32);
21267          assign(rMt, getIRegT(rM));
21268
21269          IRTemp oldC = newTemp(Ity_I32);
21270          assign(oldC, mk_armg_calculate_flag_c());
21271
21272          IRTemp argR = newTemp(Ity_I32);
21273          compute_result_and_C_after_shift_by_imm5(
21274             dis_buf, &argR, NULL, rMt, how, imm5, rM
21275          );
21276
21277          const HChar* nm  = "???";
21278          IRTemp res = newTemp(Ity_I32);
21279          switch (INSN0(8,5)) {
21280             case BITS4(1,0,1,0): // ADC
21281                nm = "adc";
21282                assign(res,
21283                       binop(Iop_Add32,
21284                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21285                             mkexpr(oldC) ));
21286                putIRegT(rD, mkexpr(res), condT);
21287                if (bS)
21288                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21289                                      argL, argR, oldC, condT );
21290                break;
21291             case BITS4(1,0,1,1): // SBC
21292                nm = "sbc";
21293                assign(res,
21294                       binop(Iop_Sub32,
21295                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21296                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21297                putIRegT(rD, mkexpr(res), condT);
21298                if (bS)
21299                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21300                                      argL, argR, oldC, condT );
21301                break;
21302             default:
21303                vassert(0);
21304          }
21305
21306          DIP("%s%s.w r%u, r%u, %s\n",
21307              nm, bS ? "s" : "", rD, rN, dis_buf);
21308          goto decode_success;
21309       }
21310    }
21311
21312    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
21313    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
21314    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
21315    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
21316    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
21317    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21318        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
21319            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
21320            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
21321            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
21322            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
21323        && INSN1(15,15) == 0) {
21324       UInt rN = INSN0(3,0);
21325       UInt rD = INSN1(11,8);
21326       UInt rM = INSN1(3,0);
21327       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21328          Bool notArgR = False;
21329          IROp op      = Iop_INVALID;
21330          const HChar* nm  = "???";
21331          switch (INSN0(8,5)) {
21332             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
21333             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21334             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21335             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
21336                                  notArgR = True; break;
21337             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
21338                                  notArgR = True; break;
21339             default: vassert(0);
21340          }
21341          UInt bS   = INSN0(4,4);
21342          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21343          UInt how  = INSN1(5,4);
21344
21345          IRTemp rNt = newTemp(Ity_I32);
21346          assign(rNt, getIRegT(rN));
21347
21348          IRTemp rMt = newTemp(Ity_I32);
21349          assign(rMt, getIRegT(rM));
21350
21351          IRTemp argR = newTemp(Ity_I32);
21352          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21353
21354          compute_result_and_C_after_shift_by_imm5(
21355             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
21356          );
21357
21358          IRTemp res = newTemp(Ity_I32);
21359          if (notArgR) {
21360             vassert(op == Iop_And32 || op == Iop_Or32);
21361             assign(res, binop(op, mkexpr(rNt),
21362                                   unop(Iop_Not32, mkexpr(argR))));
21363          } else {
21364             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
21365          }
21366
21367          putIRegT(rD, mkexpr(res), condT);
21368          if (bS) {
21369             IRTemp oldV = newTemp(Ity_I32);
21370             assign( oldV, mk_armg_calculate_flag_v() );
21371             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21372                                condT );
21373          }
21374
21375          DIP("%s%s.w r%u, r%u, %s\n",
21376              nm, bS ? "s" : "", rD, rN, dis_buf);
21377          goto decode_success;
21378       }
21379    }
21380
21381    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
21382    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
21383    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
21384    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
21385    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
21386        && INSN1(15,12) == BITS4(1,1,1,1)
21387        && INSN1(7,4) == BITS4(0,0,0,0)) {
21388       UInt how = INSN0(6,5); // standard encoding
21389       UInt rN  = INSN0(3,0);
21390       UInt rD  = INSN1(11,8);
21391       UInt rM  = INSN1(3,0);
21392       UInt bS  = INSN0(4,4);
21393       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
21394       if (valid) {
21395          IRTemp rNt    = newTemp(Ity_I32);
21396          IRTemp rMt    = newTemp(Ity_I32);
21397          IRTemp res    = newTemp(Ity_I32);
21398          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21399          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21400          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
21401          const HChar* nm     = nms[how];
21402          assign(rNt, getIRegT(rN));
21403          assign(rMt, getIRegT(rM));
21404          compute_result_and_C_after_shift_by_reg(
21405             dis_buf, &res, bS ? &oldC : NULL,
21406             rNt, how, rMt, rN, rM
21407          );
21408          if (bS)
21409             assign(oldV, mk_armg_calculate_flag_v());
21410          putIRegT(rD, mkexpr(res), condT);
21411          if (bS) {
21412             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21413                                condT );
21414          }
21415          DIP("%s%s.w r%u, r%u, r%u\n",
21416              nm, bS ? "s" : "", rD, rN, rM);
21417          goto decode_success;
21418       }
21419    }
21420
21421    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
21422    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
21423    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
21424        && INSN1(15,15) == 0) {
21425       UInt rD      = INSN1(11,8);
21426       UInt rN      = INSN1(3,0);
21427       UInt bS      = INSN0(4,4);
21428       UInt isMVN   = INSN0(5,5);
21429       Bool regsOK  = (bS || isMVN)
21430                         ? (!isBadRegT(rD) && !isBadRegT(rN))
21431                         : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
21432       if (regsOK) {
21433          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21434          UInt how   = INSN1(5,4);
21435
21436          IRTemp rNt = newTemp(Ity_I32);
21437          assign(rNt, getIRegT(rN));
21438
21439          IRTemp oldRn = newTemp(Ity_I32);
21440          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21441          compute_result_and_C_after_shift_by_imm5(
21442             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
21443          );
21444
21445          IRTemp res = newTemp(Ity_I32);
21446          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
21447                            : mkexpr(oldRn));
21448
21449          putIRegT(rD, mkexpr(res), condT);
21450          if (bS) {
21451             IRTemp oldV = newTemp(Ity_I32);
21452             assign( oldV, mk_armg_calculate_flag_v() );
21453             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
21454          }
21455          DIP("%s%s.w r%u, %s\n",
21456              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
21457          goto decode_success;
21458       }
21459    }
21460
21461    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
21462    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
21463    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21464        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
21465            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
21466        && INSN1(15,15) == 0
21467        && INSN1(11,8) == BITS4(1,1,1,1)) {
21468       UInt rN = INSN0(3,0);
21469       UInt rM = INSN1(3,0);
21470       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21471          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
21472
21473          UInt how  = INSN1(5,4);
21474          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21475
21476          IRTemp argL = newTemp(Ity_I32);
21477          assign(argL, getIRegT(rN));
21478
21479          IRTemp rMt = newTemp(Ity_I32);
21480          assign(rMt, getIRegT(rM));
21481
21482          IRTemp argR = newTemp(Ity_I32);
21483          IRTemp oldC = newTemp(Ity_I32);
21484          compute_result_and_C_after_shift_by_imm5(
21485             dis_buf, &argR, &oldC, rMt, how, imm5, rM
21486          );
21487
21488          IRTemp oldV = newTemp(Ity_I32);
21489          assign( oldV, mk_armg_calculate_flag_v() );
21490
21491          IRTemp res = newTemp(Ity_I32);
21492          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
21493                            mkexpr(argL), mkexpr(argR)));
21494
21495          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21496                             condT );
21497          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
21498          goto decode_success;
21499       }
21500    }
21501
21502    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
21503    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
21504    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21505        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
21506            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
21507        && INSN1(15,15) == 0
21508        && INSN1(11,8) == BITS4(1,1,1,1)) {
21509       UInt rN = INSN0(3,0);
21510       UInt rM = INSN1(3,0);
21511       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21512          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
21513          UInt how   = INSN1(5,4);
21514          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21515
21516          IRTemp argL = newTemp(Ity_I32);
21517          assign(argL, getIRegT(rN));
21518
21519          IRTemp rMt = newTemp(Ity_I32);
21520          assign(rMt, getIRegT(rM));
21521
21522          IRTemp argR = newTemp(Ity_I32);
21523          compute_result_and_C_after_shift_by_imm5(
21524             dis_buf, &argR, NULL, rMt, how, imm5, rM
21525          );
21526
21527          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
21528                          argL, argR, condT );
21529
21530          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
21531          goto decode_success;
21532       }
21533    }
21534
21535    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
21536    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
21537    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21538        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
21539            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
21540        && INSN0(3,0) == BITS4(1,1,1,1)
21541        && INSN1(15,15) == 0) {
21542       UInt rD = INSN1(11,8);
21543       if (!isBadRegT(rD)) {
21544          Bool   updC  = False;
21545          UInt   bS    = INSN0(4,4);
21546          Bool   isMVN = INSN0(5,5) == 1;
21547          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21548          IRTemp res   = newTemp(Ity_I32);
21549          assign(res, mkU32(isMVN ? ~imm32 : imm32));
21550          putIRegT(rD, mkexpr(res), condT);
21551          if (bS) {
21552             IRTemp oldV = newTemp(Ity_I32);
21553             IRTemp oldC = newTemp(Ity_I32);
21554             assign( oldV, mk_armg_calculate_flag_v() );
21555             assign( oldC, updC
21556                           ? mkU32((imm32 >> 31) & 1)
21557                           : mk_armg_calculate_flag_c() );
21558             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21559                                condT );
21560          }
21561          DIP("%s%s.w r%u, #%u\n",
21562              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
21563          goto decode_success;
21564       }
21565    }
21566
21567    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
21568    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21569        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
21570        && INSN1(15,15) == 0) {
21571       UInt rD = INSN1(11,8);
21572       if (!isBadRegT(rD)) {
21573          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21574                       | (INSN1(14,12) << 8) | INSN1(7,0);
21575          putIRegT(rD, mkU32(imm16), condT);
21576          DIP("movw r%u, #%u\n", rD, imm16);
21577          goto decode_success;
21578       }
21579    }
21580
21581    /* ---------------- MOVT Rd, #imm16 ---------------- */
21582    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21583        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
21584        && INSN1(15,15) == 0) {
21585       UInt rD = INSN1(11,8);
21586       if (!isBadRegT(rD)) {
21587          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21588                       | (INSN1(14,12) << 8) | INSN1(7,0);
21589          IRTemp res = newTemp(Ity_I32);
21590          assign(res,
21591                 binop(Iop_Or32,
21592                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
21593                       mkU32(imm16 << 16)));
21594          putIRegT(rD, mkexpr(res), condT);
21595          DIP("movt r%u, #%u\n", rD, imm16);
21596          goto decode_success;
21597       }
21598    }
21599
21600    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
21601    /* Loads and stores of the form:
21602          op  Rt, [Rn, #-imm8]      or
21603          op  Rt, [Rn], #+/-imm8    or
21604          op  Rt, [Rn, #+/-imm8]!
21605       where op is one of
21606          ldrb ldrh ldr  ldrsb ldrsh
21607          strb strh str
21608    */
21609    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
21610       Bool   valid  = True;
21611       Bool   syned  = False;
21612       Bool   isST   = False;
21613       IRType ty     = Ity_I8;
21614       const HChar* nm = "???";
21615
21616       switch (INSN0(8,4)) {
21617          case BITS5(0,0,0,0,0):   // strb
21618             nm = "strb"; isST = True; break;
21619          case BITS5(0,0,0,0,1):   // ldrb
21620             nm = "ldrb"; break;
21621          case BITS5(1,0,0,0,1):   // ldrsb
21622             nm = "ldrsb"; syned = True; break;
21623          case BITS5(0,0,0,1,0):   // strh
21624             nm = "strh"; ty = Ity_I16; isST = True; break;
21625          case BITS5(0,0,0,1,1):   // ldrh
21626             nm = "ldrh"; ty = Ity_I16; break;
21627          case BITS5(1,0,0,1,1):   // ldrsh
21628             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21629          case BITS5(0,0,1,0,0):   // str
21630             nm = "str"; ty = Ity_I32; isST = True; break;
21631          case BITS5(0,0,1,0,1):
21632             nm = "ldr"; ty = Ity_I32; break;  // ldr
21633          default:
21634             valid = False; break;
21635       }
21636
21637       UInt rN      = INSN0(3,0);
21638       UInt rT      = INSN1(15,12);
21639       UInt bP      = INSN1(10,10);
21640       UInt bU      = INSN1(9,9);
21641       UInt bW      = INSN1(8,8);
21642       UInt imm8    = INSN1(7,0);
21643       Bool loadsPC = False;
21644
21645       if (valid) {
21646          if (bP == 1 && bU == 1 && bW == 0)
21647             valid = False;
21648          if (bP == 0 && bW == 0)
21649             valid = False;
21650          if (rN == 15)
21651             valid = False;
21652          if (bW == 1 && rN == rT)
21653             valid = False;
21654          if (ty == Ity_I8 || ty == Ity_I16) {
21655             if (isBadRegT(rT))
21656                valid = False;
21657          } else {
21658             /* ty == Ity_I32 */
21659             if (isST && rT == 15)
21660                valid = False;
21661             if (!isST && rT == 15)
21662                loadsPC = True;
21663          }
21664       }
21665
21666       if (valid) {
21667          // if it's a branch, it can't happen in the middle of an IT block
21668          // Also, if it is a branch, make it unconditional at this point.
21669          // Doing conditional branches in-line is too complex (for now)
21670          if (loadsPC) {
21671             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21672             // go uncond
21673             mk_skip_over_T32_if_cond_is_false(condT);
21674             condT = IRTemp_INVALID;
21675             // now uncond
21676          }
21677
21678          IRTemp preAddr = newTemp(Ity_I32);
21679          assign(preAddr, getIRegT(rN));
21680
21681          IRTemp postAddr = newTemp(Ity_I32);
21682          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21683                                 mkexpr(preAddr), mkU32(imm8)));
21684
21685          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
21686
21687          if (isST) {
21688
21689             /* Store.  If necessary, update the base register before
21690                the store itself, so that the common idiom of "str rX,
21691                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
21692                a.k.a "push rX") doesn't cause Memcheck to complain
21693                that the access is below the stack pointer.  Also, not
21694                updating sp before the store confuses Valgrind's
21695                dynamic stack-extending logic.  So do it before the
21696                store.  Hence we need to snarf the store data before
21697                doing the basereg update. */
21698
21699             /* get hold of the data to be stored */
21700             IRTemp oldRt = newTemp(Ity_I32);
21701             assign(oldRt, getIRegT(rT));
21702
21703             /* Update Rn if necessary. */
21704             if (bW == 1) {
21705                vassert(rN != rT); // assured by validity check above
21706                putIRegT(rN, mkexpr(postAddr), condT);
21707             }
21708
21709             /* generate the transfer */
21710             IRExpr* data = NULL;
21711             switch (ty) {
21712                case Ity_I8:
21713                   data = unop(Iop_32to8, mkexpr(oldRt));
21714                   break;
21715                case Ity_I16:
21716                   data = unop(Iop_32to16, mkexpr(oldRt));
21717                   break;
21718                case Ity_I32:
21719                   data = mkexpr(oldRt);
21720                   break;
21721                default:
21722                   vassert(0);
21723             }
21724             storeGuardedLE(mkexpr(transAddr), data, condT);
21725
21726          } else {
21727
21728             /* Load. */
21729             IRTemp llOldRt = newTemp(Ity_I32);
21730             assign(llOldRt, llGetIReg(rT));
21731
21732             /* generate the transfer */
21733             IRTemp    newRt = newTemp(Ity_I32);
21734             IRLoadGOp widen = ILGop_INVALID;
21735             switch (ty) {
21736                case Ity_I8:
21737                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21738                case Ity_I16:
21739                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21740                case Ity_I32:
21741                   widen = ILGop_Ident32; break;
21742                default:
21743                   vassert(0);
21744             }
21745             loadGuardedLE(newRt, widen,
21746                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21747             if (rT == 15) {
21748                vassert(loadsPC);
21749                /* We'll do the write to the PC just below */
21750             } else {
21751                vassert(!loadsPC);
21752                /* IRTemp_INVALID is OK here because in the case where
21753                   condT is false at run time, we're just putting the
21754                   old rT value back. */
21755                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21756             }
21757
21758             /* Update Rn if necessary. */
21759             if (bW == 1) {
21760                vassert(rN != rT); // assured by validity check above
21761                putIRegT(rN, mkexpr(postAddr), condT);
21762             }
21763
21764             if (loadsPC) {
21765                /* Presumably this is an interworking branch. */
21766                vassert(rN != 15); // assured by validity check above
21767                vassert(rT == 15);
21768                vassert(condT == IRTemp_INVALID); /* due to check above */
21769                llPutIReg(15, mkexpr(newRt));
21770                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21771                dres.whatNext    = Dis_StopHere;
21772             }
21773          }
21774
21775          if (bP == 1 && bW == 0) {
21776             DIP("%s.w r%u, [r%u, #%c%u]\n",
21777                 nm, rT, rN, bU ? '+' : '-', imm8);
21778          }
21779          else if (bP == 1 && bW == 1) {
21780             DIP("%s.w r%u, [r%u, #%c%u]!\n",
21781                 nm, rT, rN, bU ? '+' : '-', imm8);
21782          }
21783          else {
21784             vassert(bP == 0 && bW == 1);
21785             DIP("%s.w r%u, [r%u], #%c%u\n",
21786                 nm, rT, rN, bU ? '+' : '-', imm8);
21787          }
21788
21789          goto decode_success;
21790       }
21791    }
21792
21793    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
21794    /* Loads and stores of the form:
21795          op  Rt, [Rn, Rm, LSL #imm8]
21796       where op is one of
21797          ldrb ldrh ldr  ldrsb ldrsh
21798          strb strh str
21799    */
21800    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
21801        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
21802       Bool   valid  = True;
21803       Bool   syned  = False;
21804       Bool   isST   = False;
21805       IRType ty     = Ity_I8;
21806       const HChar* nm = "???";
21807
21808       switch (INSN0(8,4)) {
21809          case BITS5(0,0,0,0,0):   // strb
21810             nm = "strb"; isST = True; break;
21811          case BITS5(0,0,0,0,1):   // ldrb
21812             nm = "ldrb"; break;
21813          case BITS5(1,0,0,0,1):   // ldrsb
21814             nm = "ldrsb"; syned = True; break;
21815          case BITS5(0,0,0,1,0):   // strh
21816             nm = "strh"; ty = Ity_I16; isST = True; break;
21817          case BITS5(0,0,0,1,1):   // ldrh
21818             nm = "ldrh"; ty = Ity_I16; break;
21819          case BITS5(1,0,0,1,1):   // ldrsh
21820             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21821          case BITS5(0,0,1,0,0):   // str
21822             nm = "str"; ty = Ity_I32; isST = True; break;
21823          case BITS5(0,0,1,0,1):
21824             nm = "ldr"; ty = Ity_I32; break;  // ldr
21825          default:
21826             valid = False; break;
21827       }
21828
21829       UInt rN      = INSN0(3,0);
21830       UInt rM      = INSN1(3,0);
21831       UInt rT      = INSN1(15,12);
21832       UInt imm2    = INSN1(5,4);
21833       Bool loadsPC = False;
21834
21835       if (ty == Ity_I8 || ty == Ity_I16) {
21836          /* all 8- and 16-bit load and store cases have the
21837             same exclusion set. */
21838          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
21839             valid = False;
21840       } else {
21841          vassert(ty == Ity_I32);
21842          if (rN == 15 || isBadRegT(rM))
21843             valid = False;
21844          if (isST && rT == 15)
21845             valid = False;
21846          /* If it is a load and rT is 15, that's only allowable if we
21847             not in an IT block, or are the last in it.  Need to insert
21848             a dynamic check for that. */
21849          if (!isST && rT == 15)
21850             loadsPC = True;
21851       }
21852
21853       if (valid) {
21854          // if it's a branch, it can't happen in the middle of an IT block
21855          // Also, if it is a branch, make it unconditional at this point.
21856          // Doing conditional branches in-line is too complex (for now)
21857          if (loadsPC) {
21858             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21859             // go uncond
21860             mk_skip_over_T32_if_cond_is_false(condT);
21861             condT = IRTemp_INVALID;
21862             // now uncond
21863          }
21864
21865          IRTemp transAddr = newTemp(Ity_I32);
21866          assign(transAddr,
21867                 binop( Iop_Add32,
21868                        getIRegT(rN),
21869                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
21870
21871          if (isST) {
21872
21873             /* get hold of the data to be stored */
21874             IRTemp oldRt = newTemp(Ity_I32);
21875             assign(oldRt, getIRegT(rT));
21876
21877             /* generate the transfer */
21878             IRExpr* data = NULL;
21879             switch (ty) {
21880                case Ity_I8:
21881                   data = unop(Iop_32to8, mkexpr(oldRt));
21882                   break;
21883                case Ity_I16:
21884                   data = unop(Iop_32to16, mkexpr(oldRt));
21885                   break;
21886               case Ity_I32:
21887                   data = mkexpr(oldRt);
21888                   break;
21889               default:
21890                  vassert(0);
21891             }
21892             storeGuardedLE(mkexpr(transAddr), data, condT);
21893
21894          } else {
21895
21896             /* Load. */
21897             IRTemp llOldRt = newTemp(Ity_I32);
21898             assign(llOldRt, llGetIReg(rT));
21899
21900             /* generate the transfer */
21901             IRTemp    newRt = newTemp(Ity_I32);
21902             IRLoadGOp widen = ILGop_INVALID;
21903             switch (ty) {
21904                case Ity_I8:
21905                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21906                case Ity_I16:
21907                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21908                case Ity_I32:
21909                   widen = ILGop_Ident32; break;
21910                default:
21911                   vassert(0);
21912             }
21913             loadGuardedLE(newRt, widen,
21914                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21915
21916             if (rT == 15) {
21917                vassert(loadsPC);
21918                /* We'll do the write to the PC just below */
21919             } else {
21920                vassert(!loadsPC);
21921                /* IRTemp_INVALID is OK here because in the case where
21922                   condT is false at run time, we're just putting the
21923                   old rT value back. */
21924                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21925             }
21926
21927             if (loadsPC) {
21928                /* Presumably this is an interworking branch. */
21929                vassert(rN != 15); // assured by validity check above
21930                vassert(rT == 15);
21931                vassert(condT == IRTemp_INVALID); /* due to check above */
21932                llPutIReg(15, mkexpr(newRt));
21933                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21934                dres.whatNext    = Dis_StopHere;
21935             }
21936          }
21937
21938          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
21939              nm, rT, rN, rM, imm2);
21940
21941          goto decode_success;
21942       }
21943    }
21944
21945    /* --------------- LD/ST reg+imm12 --------------- */
21946    /* Loads and stores of the form:
21947          op  Rt, [Rn, #+-imm12]
21948       where op is one of
21949          ldrb ldrh ldr  ldrsb ldrsh
21950          strb strh str
21951    */
21952    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
21953       Bool   valid  = True;
21954       Bool   syned  = INSN0(8,8) == 1;
21955       Bool   isST   = False;
21956       IRType ty     = Ity_I8;
21957       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
21958                                   // -imm is only supported by literal versions
21959       const HChar* nm = "???";
21960
21961       switch (INSN0(6,4)) {
21962          case BITS3(0,0,0):   // strb
21963             nm = "strb"; isST = True; break;
21964          case BITS3(0,0,1):   // ldrb
21965             nm = syned ? "ldrsb" : "ldrb"; break;
21966          case BITS3(0,1,0):   // strh
21967             nm = "strh"; ty = Ity_I16; isST = True; break;
21968          case BITS3(0,1,1):   // ldrh
21969             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
21970          case BITS3(1,0,0):   // str
21971             nm = "str"; ty = Ity_I32; isST = True; break;
21972          case BITS3(1,0,1):
21973             nm = "ldr"; ty = Ity_I32; break;  // ldr
21974          default:
21975             valid = False; break;
21976       }
21977
21978       UInt rN      = INSN0(3,0);
21979       UInt rT      = INSN1(15,12);
21980       UInt imm12   = INSN1(11,0);
21981       Bool loadsPC = False;
21982
21983       if (rN != 15 && bU == 0) {
21984          // only pc supports #-imm12
21985          valid = False;
21986       }
21987
21988       if (isST) {
21989          if (syned) valid = False;
21990          if (rN == 15 || rT == 15)
21991             valid = False;
21992       } else {
21993          /* For a 32-bit load, rT == 15 is only allowable if we are not
21994             in an IT block, or are the last in it.  Need to insert
21995             a dynamic check for that.  Also, in this particular
21996             case, rN == 15 is allowable.  In this case however, the
21997             value obtained for rN is (apparently)
21998             "word-align(address of current insn + 4)". */
21999          if (rT == 15) {
22000             if (ty == Ity_I32)
22001                loadsPC = True;
22002             else // Can't do it for B/H loads
22003                valid = False;
22004          }
22005       }
22006
22007       if (valid) {
22008          // if it's a branch, it can't happen in the middle of an IT block
22009          // Also, if it is a branch, make it unconditional at this point.
22010          // Doing conditional branches in-line is too complex (for now)
22011          if (loadsPC) {
22012             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22013             // go uncond
22014             mk_skip_over_T32_if_cond_is_false(condT);
22015             condT = IRTemp_INVALID;
22016             // now uncond
22017          }
22018
22019          IRTemp rNt = newTemp(Ity_I32);
22020          if (rN == 15) {
22021             vassert(!isST);
22022             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
22023          } else {
22024             assign(rNt, getIRegT(rN));
22025          }
22026
22027          IRTemp transAddr = newTemp(Ity_I32);
22028          assign(transAddr,
22029                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22030                       mkexpr(rNt), mkU32(imm12)));
22031
22032          IRTemp oldRt = newTemp(Ity_I32);
22033          assign(oldRt, getIRegT(rT));
22034
22035          IRTemp llOldRt = newTemp(Ity_I32);
22036          assign(llOldRt, llGetIReg(rT));
22037
22038          if (isST) {
22039             IRExpr* data = NULL;
22040             switch (ty) {
22041                case Ity_I8:
22042                   data = unop(Iop_32to8, mkexpr(oldRt));
22043                   break;
22044                case Ity_I16:
22045                   data = unop(Iop_32to16, mkexpr(oldRt));
22046                   break;
22047               case Ity_I32:
22048                   data = mkexpr(oldRt);
22049                   break;
22050               default:
22051                  vassert(0);
22052             }
22053             storeGuardedLE(mkexpr(transAddr), data, condT);
22054          } else {
22055             IRTemp    newRt = newTemp(Ity_I32);
22056             IRLoadGOp widen = ILGop_INVALID;
22057             switch (ty) {
22058                case Ity_I8:
22059                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
22060                case Ity_I16:
22061                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
22062                case Ity_I32:
22063                   widen = ILGop_Ident32; break;
22064                default:
22065                   vassert(0);
22066             }
22067             loadGuardedLE(newRt, widen,
22068                           mkexpr(transAddr), mkexpr(llOldRt), condT);
22069             if (rT == 15) {
22070                vassert(loadsPC);
22071                /* We'll do the write to the PC just below */
22072             } else {
22073                vassert(!loadsPC);
22074                /* IRTemp_INVALID is OK here because in the case where
22075                   condT is false at run time, we're just putting the
22076                   old rT value back. */
22077                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
22078             }
22079
22080             if (loadsPC) {
22081                /* Presumably this is an interworking branch. */
22082                vassert(rT == 15);
22083                vassert(condT == IRTemp_INVALID); /* due to check above */
22084                llPutIReg(15, mkexpr(newRt));
22085                dres.jk_StopHere = Ijk_Boring;
22086                dres.whatNext    = Dis_StopHere;
22087             }
22088          }
22089
22090          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
22091
22092          goto decode_success;
22093       }
22094    }
22095
22096    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
22097    /* Doubleword loads and stores of the form:
22098          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
22099          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
22100          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
22101    */
22102    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
22103       UInt bP   = INSN0(8,8);
22104       UInt bU   = INSN0(7,7);
22105       UInt bW   = INSN0(5,5);
22106       UInt bL   = INSN0(4,4);  // 1: load  0: store
22107       UInt rN   = INSN0(3,0);
22108       UInt rT   = INSN1(15,12);
22109       UInt rT2  = INSN1(11,8);
22110       UInt imm8 = INSN1(7,0);
22111
22112       Bool valid = True;
22113       if (bP == 0 && bW == 0)                 valid = False;
22114       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
22115       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
22116       if (bL == 1 && rT == rT2)               valid = False;
22117       /* It's OK to use PC as the base register only in the
22118          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
22119       if (rN == 15 && (bL == 0/*store*/
22120                        || bW == 1/*wb*/))     valid = False;
22121
22122       if (valid) {
22123          IRTemp preAddr = newTemp(Ity_I32);
22124          assign(preAddr, 15 == rN
22125                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
22126                            : getIRegT(rN));
22127
22128          IRTemp postAddr = newTemp(Ity_I32);
22129          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22130                                 mkexpr(preAddr), mkU32(imm8 << 2)));
22131
22132          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
22133
22134          /* For almost all cases, we do the writeback after the transfers.
22135             However, that leaves the stack "uncovered" in cases like:
22136                strd    rD, [sp, #-8]
22137                strd    rD, [sp, #-16]
22138             In which case, do the writeback to SP now, instead of later.
22139             This is bad in that it makes the insn non-restartable if the
22140             accesses fault, but at least keeps Memcheck happy. */
22141          Bool writeback_already_done = False;
22142          if (bL == 0/*store*/ && bW == 1/*wb*/
22143              && rN == 13 && rN != rT && rN != rT2
22144              && bU == 0/*minus*/
22145              && ((imm8 << 2) == 8 || (imm8 << 2) == 16)) {
22146             putIRegT(rN, mkexpr(postAddr), condT);
22147             writeback_already_done = True;
22148          }
22149
22150          if (bL == 0) {
22151             IRTemp oldRt  = newTemp(Ity_I32);
22152             IRTemp oldRt2 = newTemp(Ity_I32);
22153             assign(oldRt,  getIRegT(rT));
22154             assign(oldRt2, getIRegT(rT2));
22155             storeGuardedLE( mkexpr(transAddr),
22156                             mkexpr(oldRt), condT );
22157             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22158                             mkexpr(oldRt2), condT );
22159          } else {
22160             IRTemp oldRt  = newTemp(Ity_I32);
22161             IRTemp oldRt2 = newTemp(Ity_I32);
22162             IRTemp newRt  = newTemp(Ity_I32);
22163             IRTemp newRt2 = newTemp(Ity_I32);
22164             assign(oldRt,  llGetIReg(rT));
22165             assign(oldRt2, llGetIReg(rT2));
22166             loadGuardedLE( newRt, ILGop_Ident32,
22167                            mkexpr(transAddr),
22168                            mkexpr(oldRt), condT );
22169             loadGuardedLE( newRt2, ILGop_Ident32,
22170                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22171                            mkexpr(oldRt2), condT );
22172             /* Put unconditionally, since we already switched on the condT
22173                in the guarded loads. */
22174             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
22175             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
22176          }
22177
22178          if (bW == 1 && !writeback_already_done) {
22179             putIRegT(rN, mkexpr(postAddr), condT);
22180          }
22181
22182          const HChar* nm = bL ? "ldrd" : "strd";
22183
22184          if (bP == 1 && bW == 0) {
22185             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
22186                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22187          }
22188          else if (bP == 1 && bW == 1) {
22189             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
22190                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22191          }
22192          else {
22193             vassert(bP == 0 && bW == 1);
22194             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
22195                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22196          }
22197
22198          goto decode_success;
22199       }
22200    }
22201
22202    /* -------------- (T3) Bcond.W label -------------- */
22203    /* This variant carries its own condition, so can't be part of an
22204       IT block ... */
22205    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22206        && INSN1(15,14) == BITS2(1,0)
22207        && INSN1(12,12) == 0) {
22208       UInt cond = INSN0(9,6);
22209       if (cond != ARMCondAL && cond != ARMCondNV) {
22210          UInt uimm21
22211             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
22212               | (INSN1(11,11) << (1 + 6 + 11 + 1))
22213               | (INSN1(13,13) << (6 + 11 + 1))
22214               | (INSN0(5,0)   << (11 + 1))
22215               | (INSN1(10,0)  << 1);
22216          uimm21 <<= 11;
22217          Int simm21 = (Int)uimm21;
22218          simm21 >>= 11;
22219
22220          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22221          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
22222
22223          /* Not allowed in an IT block; SIGILL if so. */
22224          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
22225
22226          IRTemp kondT = newTemp(Ity_I32);
22227          assign( kondT, mk_armg_calculate_condition(cond) );
22228          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
22229                             Ijk_Boring,
22230                             IRConst_U32(dst | 1/*CPSR.T*/),
22231                             OFFB_R15T ));
22232          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
22233                               | 1 /*CPSR.T*/ ));
22234          dres.jk_StopHere = Ijk_Boring;
22235          dres.whatNext    = Dis_StopHere;
22236          DIP("b%s.w 0x%x\n", nCC(cond), dst);
22237          goto decode_success;
22238       }
22239    }
22240
22241    /* ---------------- (T4) B.W label ---------------- */
22242    /* ... whereas this variant doesn't carry its own condition, so it
22243       has to be either unconditional or the conditional by virtue of
22244       being the last in an IT block.  The upside is that there's 4
22245       more bits available for the jump offset, so it has a 16-times
22246       greater branch range than the T3 variant. */
22247    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22248        && INSN1(15,14) == BITS2(1,0)
22249        && INSN1(12,12) == 1) {
22250       if (1) {
22251          UInt bS  = INSN0(10,10);
22252          UInt bJ1 = INSN1(13,13);
22253          UInt bJ2 = INSN1(11,11);
22254          UInt bI1 = 1 ^ (bJ1 ^ bS);
22255          UInt bI2 = 1 ^ (bJ2 ^ bS);
22256          UInt uimm25
22257             =   (bS          << (1 + 1 + 10 + 11 + 1))
22258               | (bI1         << (1 + 10 + 11 + 1))
22259               | (bI2         << (10 + 11 + 1))
22260               | (INSN0(9,0)  << (11 + 1))
22261               | (INSN1(10,0) << 1);
22262          uimm25 <<= 7;
22263          Int simm25 = (Int)uimm25;
22264          simm25 >>= 7;
22265
22266          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22267          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
22268
22269          /* If in an IT block, must be the last insn. */
22270          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22271
22272          // go uncond
22273          mk_skip_over_T32_if_cond_is_false(condT);
22274          condT = IRTemp_INVALID;
22275          // now uncond
22276
22277          // branch to dst
22278          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
22279          dres.jk_StopHere = Ijk_Boring;
22280          dres.whatNext    = Dis_StopHere;
22281          DIP("b.w 0x%x\n", dst);
22282          goto decode_success;
22283       }
22284    }
22285
22286    /* ------------------ TBB, TBH ------------------ */
22287    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
22288       UInt rN = INSN0(3,0);
22289       UInt rM = INSN1(3,0);
22290       UInt bH = INSN1(4,4);
22291       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
22292          /* Must be last or not-in IT block */
22293          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22294          /* Go uncond */
22295          mk_skip_over_T32_if_cond_is_false(condT);
22296          condT = IRTemp_INVALID;
22297
22298          IRExpr* ea
22299              = binop(Iop_Add32,
22300                      getIRegT(rN),
22301                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
22302                         : getIRegT(rM));
22303
22304          IRTemp delta = newTemp(Ity_I32);
22305          if (bH) {
22306             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
22307          } else {
22308             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
22309          }
22310
22311          llPutIReg(
22312             15,
22313             binop(Iop_Or32,
22314                   binop(Iop_Add32,
22315                         getIRegT(15),
22316                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
22317                   ),
22318                   mkU32(1)
22319          ));
22320          dres.jk_StopHere = Ijk_Boring;
22321          dres.whatNext    = Dis_StopHere;
22322          DIP("tb%c [r%u, r%u%s]\n",
22323              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
22324          goto decode_success;
22325       }
22326    }
22327
22328    /* ------------------ UBFX ------------------ */
22329    /* ------------------ SBFX ------------------ */
22330    /* There's also ARM versions of same, but it doesn't seem worth the
22331       hassle to common up the handling (it's only a couple of C
22332       statements). */
22333    if ((INSN0(15,4) == 0xF3C // UBFX
22334         || INSN0(15,4) == 0xF34) // SBFX
22335        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22336       UInt rN  = INSN0(3,0);
22337       UInt rD  = INSN1(11,8);
22338       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22339       UInt wm1 = INSN1(4,0);
22340       UInt msb =  lsb + wm1;
22341       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
22342          Bool   isU  = INSN0(15,4) == 0xF3C;
22343          IRTemp src  = newTemp(Ity_I32);
22344          IRTemp tmp  = newTemp(Ity_I32);
22345          IRTemp res  = newTemp(Ity_I32);
22346          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
22347          vassert(msb >= 0 && msb <= 31);
22348          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
22349
22350          assign(src, getIRegT(rN));
22351          assign(tmp, binop(Iop_And32,
22352                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
22353                            mkU32(mask)));
22354          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
22355                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
22356                            mkU8(31-wm1)));
22357
22358          putIRegT(rD, mkexpr(res), condT);
22359
22360          DIP("%s r%u, r%u, #%u, #%u\n",
22361              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
22362          goto decode_success;
22363       }
22364    }
22365
22366    /* ------------------ UXTB ------------------ */
22367    /* ------------------ UXTH ------------------ */
22368    /* ------------------ SXTB ------------------ */
22369    /* ------------------ SXTH ------------------ */
22370    /* ----------------- UXTB16 ----------------- */
22371    /* ----------------- SXTB16 ----------------- */
22372    /* FIXME: this is an exact duplicate of the ARM version.  They
22373       should be commoned up. */
22374    if ((INSN0(15,0) == 0xFA5F     // UXTB
22375         || INSN0(15,0) == 0xFA1F  // UXTH
22376         || INSN0(15,0) == 0xFA4F  // SXTB
22377         || INSN0(15,0) == 0xFA0F  // SXTH
22378         || INSN0(15,0) == 0xFA3F  // UXTB16
22379         || INSN0(15,0) == 0xFA2F) // SXTB16
22380        && INSN1(15,12) == BITS4(1,1,1,1)
22381        && INSN1(7,6) == BITS2(1,0)) {
22382       UInt rD = INSN1(11,8);
22383       UInt rM = INSN1(3,0);
22384       UInt rot = INSN1(5,4);
22385       if (!isBadRegT(rD) && !isBadRegT(rM)) {
22386          const HChar* nm = "???";
22387          IRTemp srcT = newTemp(Ity_I32);
22388          IRTemp rotT = newTemp(Ity_I32);
22389          IRTemp dstT = newTemp(Ity_I32);
22390          assign(srcT, getIRegT(rM));
22391          assign(rotT, genROR32(srcT, 8 * rot));
22392          switch (INSN0(15,0)) {
22393             case 0xFA5F: // UXTB
22394                nm = "uxtb";
22395                assign(dstT, unop(Iop_8Uto32,
22396                                  unop(Iop_32to8, mkexpr(rotT))));
22397                break;
22398             case 0xFA1F: // UXTH
22399                nm = "uxth";
22400                assign(dstT, unop(Iop_16Uto32,
22401                                  unop(Iop_32to16, mkexpr(rotT))));
22402                break;
22403             case 0xFA4F: // SXTB
22404                nm = "sxtb";
22405                assign(dstT, unop(Iop_8Sto32,
22406                                  unop(Iop_32to8, mkexpr(rotT))));
22407                break;
22408             case 0xFA0F: // SXTH
22409                nm = "sxth";
22410                assign(dstT, unop(Iop_16Sto32,
22411                                  unop(Iop_32to16, mkexpr(rotT))));
22412                break;
22413             case 0xFA3F: // UXTB16
22414                nm = "uxtb16";
22415                assign(dstT, binop(Iop_And32, mkexpr(rotT),
22416                                              mkU32(0x00FF00FF)));
22417                break;
22418             case 0xFA2F: { // SXTB16
22419                nm = "sxtb16";
22420                IRTemp lo32 = newTemp(Ity_I32);
22421                IRTemp hi32 = newTemp(Ity_I32);
22422                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
22423                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
22424                assign(
22425                   dstT,
22426                   binop(Iop_Or32,
22427                         binop(Iop_And32,
22428                               unop(Iop_8Sto32,
22429                                    unop(Iop_32to8, mkexpr(lo32))),
22430                               mkU32(0xFFFF)),
22431                         binop(Iop_Shl32,
22432                               unop(Iop_8Sto32,
22433                                    unop(Iop_32to8, mkexpr(hi32))),
22434                               mkU8(16))
22435                ));
22436                break;
22437             }
22438             default:
22439                vassert(0);
22440          }
22441          putIRegT(rD, mkexpr(dstT), condT);
22442          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
22443          goto decode_success;
22444       }
22445    }
22446
22447    /* -------------- MUL.W Rd, Rn, Rm -------------- */
22448    if (INSN0(15,4) == 0xFB0
22449        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
22450       UInt rN = INSN0(3,0);
22451       UInt rD = INSN1(11,8);
22452       UInt rM = INSN1(3,0);
22453       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22454          IRTemp res = newTemp(Ity_I32);
22455          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
22456          putIRegT(rD, mkexpr(res), condT);
22457          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
22458          goto decode_success;
22459       }
22460    }
22461
22462    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
22463    if (INSN0(15,4) == 0xFB9
22464        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22465       UInt rN = INSN0(3,0);
22466       UInt rD = INSN1(11,8);
22467       UInt rM = INSN1(3,0);
22468       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22469          IRTemp res  = newTemp(Ity_I32);
22470          IRTemp argL = newTemp(Ity_I32);
22471          IRTemp argR = newTemp(Ity_I32);
22472          assign(argL, getIRegT(rN));
22473          assign(argR, getIRegT(rM));
22474          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
22475          putIRegT(rD, mkexpr(res), condT);
22476          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
22477          goto decode_success;
22478       }
22479    }
22480
22481    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
22482    if (INSN0(15,4) == 0xFBB
22483        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22484       UInt rN = INSN0(3,0);
22485       UInt rD = INSN1(11,8);
22486       UInt rM = INSN1(3,0);
22487       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22488          IRTemp res  = newTemp(Ity_I32);
22489          IRTemp argL = newTemp(Ity_I32);
22490          IRTemp argR = newTemp(Ity_I32);
22491          assign(argL, getIRegT(rN));
22492          assign(argR, getIRegT(rM));
22493          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
22494          putIRegT(rD, mkexpr(res), condT);
22495          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
22496          goto decode_success;
22497       }
22498    }
22499
22500    /* ------------------ {U,S}MULL ------------------ */
22501    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
22502        && INSN1(7,4) == BITS4(0,0,0,0)) {
22503       UInt isU  = INSN0(5,5);
22504       UInt rN   = INSN0(3,0);
22505       UInt rDlo = INSN1(15,12);
22506       UInt rDhi = INSN1(11,8);
22507       UInt rM   = INSN1(3,0);
22508       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
22509           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
22510          IRTemp res   = newTemp(Ity_I64);
22511          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
22512                            getIRegT(rN), getIRegT(rM)));
22513          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
22514          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
22515          DIP("%cmull r%u, r%u, r%u, r%u\n",
22516              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
22517          goto decode_success;
22518       }
22519    }
22520
22521    /* ------------------ ML{A,S} ------------------ */
22522    if (INSN0(15,4) == 0xFB0
22523        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
22524            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
22525       UInt rN = INSN0(3,0);
22526       UInt rA = INSN1(15,12);
22527       UInt rD = INSN1(11,8);
22528       UInt rM = INSN1(3,0);
22529       if (!isBadRegT(rD) && !isBadRegT(rN)
22530           && !isBadRegT(rM) && !isBadRegT(rA)) {
22531          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
22532          IRTemp res   = newTemp(Ity_I32);
22533          assign(res,
22534                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
22535                       getIRegT(rA),
22536                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
22537          putIRegT(rD, mkexpr(res), condT);
22538          DIP("%s r%u, r%u, r%u, r%u\n",
22539              isMLA ? "mla" : "mls", rD, rN, rM, rA);
22540          goto decode_success;
22541       }
22542    }
22543
22544    /* ------------------ (T3) ADR ------------------ */
22545    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
22546        && INSN1(15,15) == 0) {
22547       /* rD = align4(PC) + imm32 */
22548       UInt rD = INSN1(11,8);
22549       if (!isBadRegT(rD)) {
22550          UInt imm32 = (INSN0(10,10) << 11)
22551                       | (INSN1(14,12) << 8) | INSN1(7,0);
22552          putIRegT(rD, binop(Iop_Add32,
22553                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22554                             mkU32(imm32)),
22555                       condT);
22556          DIP("add r%u, pc, #%u\n", rD, imm32);
22557          goto decode_success;
22558       }
22559    }
22560
22561    /* ----------------- (T1) UMLAL ----------------- */
22562    /* ----------------- (T1) SMLAL ----------------- */
22563    if ((INSN0(15,4) == 0xFBE // UMLAL
22564         || INSN0(15,4) == 0xFBC) // SMLAL
22565        && INSN1(7,4) == BITS4(0,0,0,0)) {
22566       UInt rN   = INSN0(3,0);
22567       UInt rDlo = INSN1(15,12);
22568       UInt rDhi = INSN1(11,8);
22569       UInt rM   = INSN1(3,0);
22570       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22571           && !isBadRegT(rM) && rDhi != rDlo) {
22572          Bool   isS   = INSN0(15,4) == 0xFBC;
22573          IRTemp argL  = newTemp(Ity_I32);
22574          IRTemp argR  = newTemp(Ity_I32);
22575          IRTemp old   = newTemp(Ity_I64);
22576          IRTemp res   = newTemp(Ity_I64);
22577          IRTemp resHi = newTemp(Ity_I32);
22578          IRTemp resLo = newTemp(Ity_I32);
22579          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
22580          assign( argL, getIRegT(rM));
22581          assign( argR, getIRegT(rN));
22582          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
22583          assign( res, binop(Iop_Add64,
22584                             mkexpr(old),
22585                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
22586          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22587          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22588          putIRegT( rDhi, mkexpr(resHi), condT );
22589          putIRegT( rDlo, mkexpr(resLo), condT );
22590          DIP("%cmlal r%u, r%u, r%u, r%u\n",
22591              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
22592          goto decode_success;
22593       }
22594    }
22595
22596    /* ------------------ (T1) UMAAL ------------------ */
22597    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
22598       UInt rN   = INSN0(3,0);
22599       UInt rDlo = INSN1(15,12);
22600       UInt rDhi = INSN1(11,8);
22601       UInt rM   = INSN1(3,0);
22602       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22603           && !isBadRegT(rM) && rDhi != rDlo) {
22604          IRTemp argN   = newTemp(Ity_I32);
22605          IRTemp argM   = newTemp(Ity_I32);
22606          IRTemp argDhi = newTemp(Ity_I32);
22607          IRTemp argDlo = newTemp(Ity_I32);
22608          IRTemp res    = newTemp(Ity_I64);
22609          IRTemp resHi  = newTemp(Ity_I32);
22610          IRTemp resLo  = newTemp(Ity_I32);
22611          assign( argN,   getIRegT(rN) );
22612          assign( argM,   getIRegT(rM) );
22613          assign( argDhi, getIRegT(rDhi) );
22614          assign( argDlo, getIRegT(rDlo) );
22615          assign( res,
22616                  binop(Iop_Add64,
22617                        binop(Iop_Add64,
22618                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
22619                              unop(Iop_32Uto64, mkexpr(argDhi))),
22620                        unop(Iop_32Uto64, mkexpr(argDlo))) );
22621          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22622          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22623          putIRegT( rDhi, mkexpr(resHi), condT );
22624          putIRegT( rDlo, mkexpr(resLo), condT );
22625          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
22626          goto decode_success;
22627       }
22628    }
22629
22630    /* ------------------- (T1) SMMUL{R} ------------------ */
22631    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22632        && INSN0(6,4) == BITS3(1,0,1)
22633        && INSN1(15,12) == BITS4(1,1,1,1)
22634        && INSN1(7,5) == BITS3(0,0,0)) {
22635       UInt bitR = INSN1(4,4);
22636       UInt rD = INSN1(11,8);
22637       UInt rM = INSN1(3,0);
22638       UInt rN = INSN0(3,0);
22639       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22640          IRExpr* res
22641          = unop(Iop_64HIto32,
22642                 binop(Iop_Add64,
22643                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
22644                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22645          putIRegT(rD, res, condT);
22646          DIP("smmul%s r%u, r%u, r%u\n",
22647              bitR ? "r" : "", rD, rN, rM);
22648          goto decode_success;
22649       }
22650    }
22651
22652    /* ------------------- (T1) SMMLA{R} ------------------ */
22653    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22654        && INSN0(6,4) == BITS3(1,0,1)
22655        && INSN1(7,5) == BITS3(0,0,0)) {
22656       UInt bitR = INSN1(4,4);
22657       UInt rA = INSN1(15,12);
22658       UInt rD = INSN1(11,8);
22659       UInt rM = INSN1(3,0);
22660       UInt rN = INSN0(3,0);
22661       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
22662          IRExpr* res
22663          = unop(Iop_64HIto32,
22664                 binop(Iop_Add64,
22665                       binop(Iop_Add64,
22666                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
22667                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
22668                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22669          putIRegT(rD, res, condT);
22670          DIP("smmla%s r%u, r%u, r%u, r%u\n",
22671              bitR ? "r" : "", rD, rN, rM, rA);
22672          goto decode_success;
22673       }
22674    }
22675
22676    /* ------------------ (T2) ADR ------------------ */
22677    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
22678        && INSN1(15,15) == 0) {
22679       /* rD = align4(PC) - imm32 */
22680       UInt rD = INSN1(11,8);
22681       if (!isBadRegT(rD)) {
22682          UInt imm32 = (INSN0(10,10) << 11)
22683                       | (INSN1(14,12) << 8) | INSN1(7,0);
22684          putIRegT(rD, binop(Iop_Sub32,
22685                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22686                             mkU32(imm32)),
22687                       condT);
22688          DIP("sub r%u, pc, #%u\n", rD, imm32);
22689          goto decode_success;
22690       }
22691    }
22692
22693    /* ------------------- (T1) BFI ------------------- */
22694    /* ------------------- (T1) BFC ------------------- */
22695    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22696       UInt rD  = INSN1(11,8);
22697       UInt rN  = INSN0(3,0);
22698       UInt msb = INSN1(4,0);
22699       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22700       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
22701          /* undecodable; fall through */
22702       } else {
22703          IRTemp src    = newTemp(Ity_I32);
22704          IRTemp olddst = newTemp(Ity_I32);
22705          IRTemp newdst = newTemp(Ity_I32);
22706          UInt   mask   = ((UInt)1) << (msb - lsb);
22707          mask = (mask - 1) + mask;
22708          vassert(mask != 0); // guaranteed by "msb < lsb" check above
22709          mask <<= lsb;
22710
22711          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
22712          assign(olddst, getIRegT(rD));
22713          assign(newdst,
22714                 binop(Iop_Or32,
22715                    binop(Iop_And32,
22716                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
22717                          mkU32(mask)),
22718                    binop(Iop_And32,
22719                          mkexpr(olddst),
22720                          mkU32(~mask)))
22721                );
22722
22723          putIRegT(rD, mkexpr(newdst), condT);
22724
22725          if (rN == 15) {
22726             DIP("bfc r%u, #%u, #%u\n",
22727                 rD, lsb, msb-lsb+1);
22728          } else {
22729             DIP("bfi r%u, r%u, #%u, #%u\n",
22730                 rD, rN, lsb, msb-lsb+1);
22731          }
22732          goto decode_success;
22733       }
22734    }
22735
22736    /* ------------------- (T1) SXTAH ------------------- */
22737    /* ------------------- (T1) UXTAH ------------------- */
22738    if ((INSN0(15,4) == 0xFA1      // UXTAH
22739         || INSN0(15,4) == 0xFA0)  // SXTAH
22740        && INSN1(15,12) == BITS4(1,1,1,1)
22741        && INSN1(7,6) == BITS2(1,0)) {
22742       Bool isU = INSN0(15,4) == 0xFA1;
22743       UInt rN  = INSN0(3,0);
22744       UInt rD  = INSN1(11,8);
22745       UInt rM  = INSN1(3,0);
22746       UInt rot = INSN1(5,4);
22747       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22748          IRTemp srcL = newTemp(Ity_I32);
22749          IRTemp srcR = newTemp(Ity_I32);
22750          IRTemp res  = newTemp(Ity_I32);
22751          assign(srcR, getIRegT(rM));
22752          assign(srcL, getIRegT(rN));
22753          assign(res,  binop(Iop_Add32,
22754                             mkexpr(srcL),
22755                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
22756                                  unop(Iop_32to16,
22757                                       genROR32(srcR, 8 * rot)))));
22758          putIRegT(rD, mkexpr(res), condT);
22759          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
22760              isU ? 'u' : 's', rD, rN, rM, rot);
22761          goto decode_success;
22762       }
22763    }
22764
22765    /* ------------------- (T1) SXTAB ------------------- */
22766    /* ------------------- (T1) UXTAB ------------------- */
22767    if ((INSN0(15,4) == 0xFA5      // UXTAB
22768         || INSN0(15,4) == 0xFA4)  // SXTAB
22769        && INSN1(15,12) == BITS4(1,1,1,1)
22770        && INSN1(7,6) == BITS2(1,0)) {
22771       Bool isU = INSN0(15,4) == 0xFA5;
22772       UInt rN  = INSN0(3,0);
22773       UInt rD  = INSN1(11,8);
22774       UInt rM  = INSN1(3,0);
22775       UInt rot = INSN1(5,4);
22776       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22777          IRTemp srcL = newTemp(Ity_I32);
22778          IRTemp srcR = newTemp(Ity_I32);
22779          IRTemp res  = newTemp(Ity_I32);
22780          assign(srcR, getIRegT(rM));
22781          assign(srcL, getIRegT(rN));
22782          assign(res,  binop(Iop_Add32,
22783                             mkexpr(srcL),
22784                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
22785                                  unop(Iop_32to8,
22786                                       genROR32(srcR, 8 * rot)))));
22787          putIRegT(rD, mkexpr(res), condT);
22788          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
22789              isU ? 'u' : 's', rD, rN, rM, rot);
22790          goto decode_success;
22791       }
22792    }
22793
22794    /* ------------------- (T1) CLZ ------------------- */
22795    if (INSN0(15,4) == 0xFAB
22796        && INSN1(15,12) == BITS4(1,1,1,1)
22797        && INSN1(7,4) == BITS4(1,0,0,0)) {
22798       UInt rM1 = INSN0(3,0);
22799       UInt rD  = INSN1(11,8);
22800       UInt rM2 = INSN1(3,0);
22801       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22802          IRTemp arg = newTemp(Ity_I32);
22803          IRTemp res = newTemp(Ity_I32);
22804          assign(arg, getIRegT(rM1));
22805          assign(res, IRExpr_ITE(
22806                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
22807                         mkU32(32),
22808                         unop(Iop_Clz32, mkexpr(arg))
22809          ));
22810          putIRegT(rD, mkexpr(res), condT);
22811          DIP("clz r%u, r%u\n", rD, rM1);
22812          goto decode_success;
22813       }
22814    }
22815
22816    /* ------------------- (T1) RBIT ------------------- */
22817    if (INSN0(15,4) == 0xFA9
22818        && INSN1(15,12) == BITS4(1,1,1,1)
22819        && INSN1(7,4) == BITS4(1,0,1,0)) {
22820       UInt rM1 = INSN0(3,0);
22821       UInt rD  = INSN1(11,8);
22822       UInt rM2 = INSN1(3,0);
22823       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22824          IRTemp arg = newTemp(Ity_I32);
22825          assign(arg, getIRegT(rM1));
22826          IRTemp res = gen_BITREV(arg);
22827          putIRegT(rD, mkexpr(res), condT);
22828          DIP("rbit r%u, r%u\n", rD, rM1);
22829          goto decode_success;
22830       }
22831    }
22832
22833    /* ------------------- (T2) REV   ------------------- */
22834    /* ------------------- (T2) REV16 ------------------- */
22835    if (INSN0(15,4) == 0xFA9
22836        && INSN1(15,12) == BITS4(1,1,1,1)
22837        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
22838            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
22839       UInt rM1   = INSN0(3,0);
22840       UInt rD    = INSN1(11,8);
22841       UInt rM2   = INSN1(3,0);
22842       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
22843       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22844          IRTemp arg = newTemp(Ity_I32);
22845          assign(arg, getIRegT(rM1));
22846          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
22847          putIRegT(rD, mkexpr(res), condT);
22848          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
22849          goto decode_success;
22850       }
22851    }
22852
22853    /* ------------------- (T2) REVSH ------------------ */
22854    if (INSN0(15,4) == 0xFA9
22855        && INSN1(15,12) == BITS4(1,1,1,1)
22856        && INSN1(7,4) == BITS4(1,0,1,1)) {
22857       UInt rM1 = INSN0(3,0);
22858       UInt rM2 = INSN1(3,0);
22859       UInt rD  = INSN1(11,8);
22860       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22861          IRTemp irt_rM  = newTemp(Ity_I32);
22862          IRTemp irt_hi  = newTemp(Ity_I32);
22863          IRTemp irt_low = newTemp(Ity_I32);
22864          IRTemp irt_res = newTemp(Ity_I32);
22865          assign(irt_rM, getIRegT(rM1));
22866          assign(irt_hi,
22867                 binop(Iop_Sar32,
22868                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
22869                       mkU8(16)
22870                 )
22871          );
22872          assign(irt_low,
22873                 binop(Iop_And32,
22874                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
22875                       mkU32(0xFF)
22876                 )
22877          );
22878          assign(irt_res,
22879                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
22880          );
22881          putIRegT(rD, mkexpr(irt_res), condT);
22882          DIP("revsh r%u, r%u\n", rD, rM1);
22883          goto decode_success;
22884       }
22885    }
22886
22887    /* -------------- (T1) MSR apsr, reg -------------- */
22888    if (INSN0(15,4) == 0xF38
22889        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
22890       UInt rN          = INSN0(3,0);
22891       UInt write_ge    = INSN1(10,10);
22892       UInt write_nzcvq = INSN1(11,11);
22893       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
22894          IRTemp rNt = newTemp(Ity_I32);
22895          assign(rNt, getIRegT(rN));
22896          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
22897          DIP("msr cpsr_%s%s, r%u\n",
22898              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
22899          goto decode_success;
22900       }
22901    }
22902
22903    /* -------------- (T1) MRS reg, apsr -------------- */
22904    if (INSN0(15,0) == 0xF3EF
22905        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
22906       UInt rD = INSN1(11,8);
22907       if (!isBadRegT(rD)) {
22908          IRTemp apsr = synthesise_APSR();
22909          putIRegT( rD, mkexpr(apsr), condT );
22910          DIP("mrs r%u, cpsr\n", rD);
22911          goto decode_success;
22912       }
22913    }
22914
22915    /* ----------------- (T1) LDREX ----------------- */
22916    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
22917       UInt rN   = INSN0(3,0);
22918       UInt rT   = INSN1(15,12);
22919       UInt imm8 = INSN1(7,0);
22920       if (!isBadRegT(rT) && rN != 15) {
22921          IRTemp res;
22922          // go uncond
22923          mk_skip_over_T32_if_cond_is_false( condT );
22924          // now uncond
22925          res = newTemp(Ity_I32);
22926          stmt( IRStmt_LLSC(Iend_LE,
22927                            res,
22928                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22929                            NULL/*this is a load*/ ));
22930          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
22931          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
22932          goto decode_success;
22933       }
22934    }
22935
22936    /* --------------- (T1) LDREX{B,H} --------------- */
22937    if (INSN0(15,4) == 0xE8D
22938        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
22939       UInt rN  = INSN0(3,0);
22940       UInt rT  = INSN1(15,12);
22941       Bool isH = INSN1(11,0) == 0xF5F;
22942       if (!isBadRegT(rT) && rN != 15) {
22943          IRTemp res;
22944          // go uncond
22945          mk_skip_over_T32_if_cond_is_false( condT );
22946          // now uncond
22947          res = newTemp(isH ? Ity_I16 : Ity_I8);
22948          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22949                            NULL/*this is a load*/ ));
22950          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
22951                       IRTemp_INVALID);
22952          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
22953          goto decode_success;
22954       }
22955    }
22956
22957    /* --------------- (T1) LDREXD --------------- */
22958    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
22959       UInt rN  = INSN0(3,0);
22960       UInt rT  = INSN1(15,12);
22961       UInt rT2 = INSN1(11,8);
22962       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
22963          IRTemp res;
22964          // go uncond
22965          mk_skip_over_T32_if_cond_is_false( condT );
22966          // now uncond
22967          res = newTemp(Ity_I64);
22968          // FIXME: assumes little-endian guest
22969          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22970                            NULL/*this is a load*/ ));
22971          // FIXME: assumes little-endian guest
22972          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
22973          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
22974          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
22975          goto decode_success;
22976       }
22977    }
22978
22979    /* ----------------- (T1) STREX ----------------- */
22980    if (INSN0(15,4) == 0xE84) {
22981       UInt rN   = INSN0(3,0);
22982       UInt rT   = INSN1(15,12);
22983       UInt rD   = INSN1(11,8);
22984       UInt imm8 = INSN1(7,0);
22985       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22986           && rD != rN && rD != rT) {
22987          IRTemp resSC1, resSC32;
22988          // go uncond
22989          mk_skip_over_T32_if_cond_is_false( condT );
22990          // now uncond
22991          /* Ok, now we're unconditional.  Do the store. */
22992          resSC1 = newTemp(Ity_I1);
22993          stmt( IRStmt_LLSC(Iend_LE,
22994                            resSC1,
22995                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22996                            getIRegT(rT)) );
22997          /* Set rD to 1 on failure, 0 on success.  Currently we have
22998             resSC1 == 0 on failure, 1 on success. */
22999          resSC32 = newTemp(Ity_I32);
23000          assign(resSC32,
23001                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23002          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23003          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
23004          goto decode_success;
23005       }
23006    }
23007
23008    /* --------------- (T1) STREX{B,H} --------------- */
23009    if (INSN0(15,4) == 0xE8C
23010        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
23011       UInt rN  = INSN0(3,0);
23012       UInt rT  = INSN1(15,12);
23013       UInt rD  = INSN1(3,0);
23014       Bool isH = INSN1(11,4) == 0xF5;
23015       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
23016           && rD != rN && rD != rT) {
23017          IRTemp resSC1, resSC32;
23018          // go uncond
23019          mk_skip_over_T32_if_cond_is_false( condT );
23020          // now uncond
23021          /* Ok, now we're unconditional.  Do the store. */
23022          resSC1 = newTemp(Ity_I1);
23023          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
23024                            unop(isH ? Iop_32to16 : Iop_32to8,
23025                                 getIRegT(rT))) );
23026          /* Set rD to 1 on failure, 0 on success.  Currently we have
23027             resSC1 == 0 on failure, 1 on success. */
23028          resSC32 = newTemp(Ity_I32);
23029          assign(resSC32,
23030                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23031          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23032          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
23033          goto decode_success;
23034       }
23035    }
23036
23037    /* ---------------- (T1) STREXD ---------------- */
23038    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
23039       UInt rN  = INSN0(3,0);
23040       UInt rT  = INSN1(15,12);
23041       UInt rT2 = INSN1(11,8);
23042       UInt rD  = INSN1(3,0);
23043       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
23044           && rN != 15 && rD != rN && rD != rT && rD != rT2) {
23045          IRTemp resSC1, resSC32, data;
23046          // go uncond
23047          mk_skip_over_T32_if_cond_is_false( condT );
23048          // now uncond
23049          /* Ok, now we're unconditional.  Do the store. */
23050          resSC1 = newTemp(Ity_I1);
23051          data = newTemp(Ity_I64);
23052          // FIXME: assumes little-endian guest
23053          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
23054          // FIXME: assumes little-endian guest
23055          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
23056          /* Set rD to 1 on failure, 0 on success.  Currently we have
23057             resSC1 == 0 on failure, 1 on success. */
23058          resSC32 = newTemp(Ity_I32);
23059          assign(resSC32,
23060                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23061          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23062          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
23063          goto decode_success;
23064       }
23065    }
23066
23067    /* -------------- v7 barrier insns -------------- */
23068    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
23069       /* FIXME: should this be unconditional? */
23070       /* XXX this isn't really right, is it?  The generated IR does
23071          them unconditionally.  I guess it doesn't matter since it
23072          doesn't do any harm to do them even when the guarding
23073          condition is false -- it's just a performance loss. */
23074       switch (INSN1(7,0)) {
23075          case 0x4F: /* DSB sy */
23076          case 0x4E: /* DSB st */
23077          case 0x4B: /* DSB ish */
23078          case 0x4A: /* DSB ishst */
23079          case 0x47: /* DSB nsh */
23080          case 0x46: /* DSB nshst */
23081          case 0x43: /* DSB osh */
23082          case 0x42: /* DSB oshst */
23083             stmt( IRStmt_MBE(Imbe_Fence) );
23084             DIP("DSB\n");
23085             goto decode_success;
23086          case 0x5F: /* DMB sy */
23087          case 0x5E: /* DMB st */
23088          case 0x5B: /* DMB ish */
23089          case 0x5A: /* DMB ishst */
23090          case 0x57: /* DMB nsh */
23091          case 0x56: /* DMB nshst */
23092          case 0x53: /* DMB osh */
23093          case 0x52: /* DMB oshst */
23094             stmt( IRStmt_MBE(Imbe_Fence) );
23095             DIP("DMB\n");
23096             goto decode_success;
23097          case 0x6F: /* ISB */
23098             stmt( IRStmt_MBE(Imbe_Fence) );
23099             DIP("ISB\n");
23100             goto decode_success;
23101          default:
23102             break;
23103       }
23104    }
23105
23106    /* ---------------------- PLD{,W} ---------------------- */
23107    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
23108       /* FIXME: should this be unconditional? */
23109       /* PLD/PLDW immediate, encoding T1 */
23110       UInt rN    = INSN0(3,0);
23111       UInt bW    = INSN0(5,5);
23112       UInt imm12 = INSN1(11,0);
23113       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
23114       goto decode_success;
23115    }
23116
23117    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
23118       /* FIXME: should this be unconditional? */
23119       /* PLD/PLDW immediate, encoding T2 */
23120       UInt rN    = INSN0(3,0);
23121       UInt bW    = INSN0(5,5);
23122       UInt imm8  = INSN1(7,0);
23123       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
23124       goto decode_success;
23125    }
23126
23127    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
23128       /* FIXME: should this be unconditional? */
23129       /* PLD/PLDW register, encoding T1 */
23130       UInt rN   = INSN0(3,0);
23131       UInt rM   = INSN1(3,0);
23132       UInt bW   = INSN0(5,5);
23133       UInt imm2 = INSN1(5,4);
23134       if (!isBadRegT(rM)) {
23135          DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
23136          goto decode_success;
23137       }
23138       /* fall through */
23139    }
23140
23141    /* -------------- read CP15 TPIDRURO register ------------- */
23142    /* mrc     p15, 0,  r0, c13, c0, 3  up to
23143       mrc     p15, 0, r14, c13, c0, 3
23144    */
23145    /* I don't know whether this is really v7-only.  But anyway, we
23146       have to support it since arm-linux uses TPIDRURO as a thread
23147       state register. */
23148    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
23149       UInt rD = INSN1(15,12);
23150       if (!isBadRegT(rD)) {
23151          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
23152          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
23153          goto decode_success;
23154       }
23155       /* fall through */
23156    }
23157
23158    /* ------------ read/write CP15 TPIDRURW register ----------- */
23159    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
23160       mcr     p15, 0, r14, c13, c0, 2
23161
23162       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
23163       mrc     p15, 0, r14, c13, c0, 2
23164    */
23165    if ((INSN0(15,0) == 0xEE0D) && (INSN1(11,0) == 0x0F50)) {
23166       UInt rS = INSN1(15,12);
23167       if (!isBadRegT(rS)) {
23168          putMiscReg32(OFFB_TPIDRURW, getIRegT(rS), condT);
23169          DIP("mcr p15,0, r%u, c13, c0, 2\n", rS);
23170          goto decode_success;
23171       }
23172       /* fall through */
23173    }
23174    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F50)) {
23175       UInt rD = INSN1(15,12);
23176       if (!isBadRegT(rD)) {
23177          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32), condT);
23178          DIP("mrc p15,0, r%u, c13, c0, 2\n", rD);
23179          goto decode_success;
23180       }
23181       /* fall through */
23182    }
23183
23184    /* -------------- read CP15 PMUSRENR register ------------- */
23185    /* mrc     p15, 0, r0,  c9, c14, 0  up to
23186       mrc     p15, 0, r14, c9, c14, 0
23187       See comment on the ARM equivalent of this (above) for details.
23188    */
23189    if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
23190       UInt rD = INSN1(15,12);
23191       if (!isBadRegT(rD)) {
23192          putIRegT(rD, mkU32(0), condT);
23193          DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
23194          goto decode_success;
23195       }
23196       /* fall through */
23197    }
23198
23199    /* ------------------- CLREX ------------------ */
23200    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
23201       /* AFAICS, this simply cancels a (all?) reservations made by a
23202          (any?) preceding LDREX(es).  Arrange to hand it through to
23203          the back end. */
23204       mk_skip_over_T32_if_cond_is_false( condT );
23205       stmt( IRStmt_MBE(Imbe_CancelReservation) );
23206       DIP("clrex\n");
23207       goto decode_success;
23208    }
23209
23210    /* ------------------- NOP ------------------ */
23211    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
23212       DIP("nop\n");
23213       goto decode_success;
23214    }
23215
23216    /* -------------- (T1) LDRT reg+#imm8 -------------- */
23217    /* Load Register Unprivileged:
23218       ldrt Rt, [Rn, #imm8]
23219    */
23220    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
23221        && INSN1(11,8) == BITS4(1,1,1,0)) {
23222       UInt rT    = INSN1(15,12);
23223       UInt rN    = INSN0(3,0);
23224       UInt imm8  = INSN1(7,0);
23225       Bool valid = True;
23226       if (rN == 15 || isBadRegT(rT)) valid = False;
23227       if (valid) {
23228          put_ITSTATE(old_itstate);
23229          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23230          IRTemp newRt = newTemp(Ity_I32);
23231          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
23232          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23233          put_ITSTATE(new_itstate);
23234          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
23235          goto decode_success;
23236       }
23237    }
23238
23239    /* -------------- (T1) STRT reg+#imm8 -------------- */
23240    /* Store Register Unprivileged:
23241       strt Rt, [Rn, #imm8]
23242    */
23243    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
23244        && INSN1(11,8) == BITS4(1,1,1,0)) {
23245       UInt rT    = INSN1(15,12);
23246       UInt rN    = INSN0(3,0);
23247       UInt imm8  = INSN1(7,0);
23248       Bool valid = True;
23249       if (rN == 15 || isBadRegT(rT)) valid = False;
23250       if (valid) {
23251          put_ITSTATE(old_itstate);
23252          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23253          storeGuardedLE( address, llGetIReg(rT), condT );
23254          put_ITSTATE(new_itstate);
23255          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
23256          goto decode_success;
23257       }
23258    }
23259
23260    /* -------------- (T1) STRBT reg+#imm8 -------------- */
23261    /* Store Register Byte Unprivileged:
23262       strbt Rt, [Rn, #imm8]
23263    */
23264    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
23265        && INSN1(11,8) == BITS4(1,1,1,0)) {
23266       UInt rT    = INSN1(15,12);
23267       UInt rN    = INSN0(3,0);
23268       UInt imm8  = INSN1(7,0);
23269       Bool valid = True;
23270       if (rN == 15 || isBadRegT(rT)) valid = False;
23271       if (valid) {
23272          put_ITSTATE(old_itstate);
23273          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23274          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
23275          storeGuardedLE( address, data, condT );
23276          put_ITSTATE(new_itstate);
23277          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23278          goto decode_success;
23279       }
23280    }
23281
23282    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
23283    /* Load Register Halfword Unprivileged:
23284       ldrht Rt, [Rn, #imm8]
23285    */
23286    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
23287        && INSN1(11,8) == BITS4(1,1,1,0)) {
23288       UInt rN    = INSN0(3,0);
23289       Bool valid = True;
23290       if (rN == 15) {
23291          /* In this case our instruction is LDRH (literal), in fact:
23292             LDRH (literal) was realized earlier, so we don't want to
23293             make it twice. */
23294          valid = False;
23295       }
23296       UInt rT    = INSN1(15,12);
23297       UInt imm8  = INSN1(7,0);
23298       if (isBadRegT(rT)) valid = False;
23299       if (valid) {
23300          put_ITSTATE(old_itstate);
23301          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23302          IRTemp newRt = newTemp(Ity_I32);
23303          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
23304          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23305          put_ITSTATE(new_itstate);
23306          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
23307          goto decode_success;
23308       }
23309    }
23310
23311    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
23312    /* Load Register Signed Halfword Unprivileged:
23313       ldrsht Rt, [Rn, #imm8]
23314    */
23315    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
23316        && INSN1(11,8) == BITS4(1,1,1,0)) {
23317       UInt rN    = INSN0(3,0);
23318       Bool valid = True;
23319       if (rN == 15) {
23320          /* In this case our instruction is LDRSH (literal), in fact:
23321             LDRSH (literal) was realized earlier, so we don't want to
23322             make it twice. */
23323          valid = False;
23324       }
23325       UInt rT    = INSN1(15,12);
23326       UInt imm8  = INSN1(7,0);
23327       if (isBadRegT(rT)) valid = False;
23328       if (valid) {
23329          put_ITSTATE(old_itstate);
23330          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23331          IRTemp newRt = newTemp(Ity_I32);
23332          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
23333          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23334          put_ITSTATE(new_itstate);
23335          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
23336          goto decode_success;
23337       }
23338    }
23339
23340    /* -------------- (T1) STRHT reg+#imm8 -------------- */
23341    /* Store Register Halfword Unprivileged:
23342       strht Rt, [Rn, #imm8]
23343    */
23344    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
23345        && INSN1(11,8) == BITS4(1,1,1,0)) {
23346       UInt rT    = INSN1(15,12);
23347       UInt rN    = INSN0(3,0);
23348       UInt imm8  = INSN1(7,0);
23349       Bool valid = True;
23350       if (rN == 15 || isBadRegT(rT)) valid = False;
23351       if (valid) {
23352          put_ITSTATE(old_itstate);
23353          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23354          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
23355          storeGuardedLE( address, data, condT );
23356          put_ITSTATE(new_itstate);
23357          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
23358          goto decode_success;
23359       }
23360    }
23361
23362    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
23363    /* Load Register Byte Unprivileged:
23364       ldrbt Rt, [Rn, #imm8]
23365    */
23366    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
23367        && INSN1(11,8) == BITS4(1,1,1,0)) {
23368       UInt rN    = INSN0(3,0);
23369       UInt rT    = INSN1(15,12);
23370       UInt imm8  = INSN1(7,0);
23371       Bool valid = True;
23372       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
23373       if (isBadRegT(rT)) valid = False;
23374       if (valid) {
23375          put_ITSTATE(old_itstate);
23376          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23377          IRTemp newRt = newTemp(Ity_I32);
23378          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
23379          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23380          put_ITSTATE(new_itstate);
23381          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23382          goto decode_success;
23383       }
23384    }
23385
23386    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
23387    /* Load Register Signed Byte Unprivileged:
23388       ldrsbt Rt, [Rn, #imm8]
23389    */
23390    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23391        && INSN1(11,8) == BITS4(1,1,1,0)) {
23392       UInt rN    = INSN0(3,0);
23393       Bool valid = True;
23394       UInt rT    = INSN1(15,12);
23395       UInt imm8  = INSN1(7,0);
23396       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
23397       if (isBadRegT(rT)) valid = False;
23398       if (valid) {
23399          put_ITSTATE(old_itstate);
23400          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23401          IRTemp newRt = newTemp(Ity_I32);
23402          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
23403          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23404          put_ITSTATE(new_itstate);
23405          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23406          goto decode_success;
23407       }
23408    }
23409
23410    /* -------------- (T1) PLI reg+#imm12 -------------- */
23411    /* Preload Instruction:
23412       pli [Rn, #imm12]
23413    */
23414    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
23415        && INSN1(15,12) == BITS4(1,1,1,1)) {
23416       UInt rN    = INSN0(3,0);
23417       UInt imm12 = INSN1(11,0);
23418       if (rN != 15) {
23419          DIP("pli [r%u, #%u]\n", rN, imm12);
23420          goto decode_success;
23421       }
23422    }
23423
23424    /* -------------- (T2) PLI reg-#imm8 -------------- */
23425    /* Preload Instruction:
23426       pli [Rn, #-imm8]
23427    */
23428    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23429        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
23430       UInt rN   = INSN0(3,0);
23431       UInt imm8 = INSN1(7,0);
23432       if (rN != 15) {
23433          DIP("pli [r%u, #-%u]\n", rN, imm8);
23434          goto decode_success;
23435       }
23436    }
23437
23438    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
23439    /* Preload Instruction:
23440       pli [PC, #+/-imm12]
23441    */
23442    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
23443        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
23444        && INSN1(15,12) == BITS4(1,1,1,1)) {
23445       UInt imm12 = INSN1(11,0);
23446       UInt bU    = INSN0(7,7);
23447       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
23448       goto decode_success;
23449    }
23450
23451    /* ----------------------------------------------------------- */
23452    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
23453    /* ----------------------------------------------------------- */
23454
23455    if (INSN0(15,12) == BITS4(1,1,1,0)) {
23456       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
23457       Bool ok_vfp = decode_CP10_CP11_instruction (
23458                        &dres, insn28, condT, ARMCondAL/*bogus*/,
23459                        True/*isT*/
23460                     );
23461       if (ok_vfp)
23462          goto decode_success;
23463    }
23464
23465    /* ----------------------------------------------------------- */
23466    /* -- NEON instructions (only v7 and below, in Thumb mode)  -- */
23467    /* ----------------------------------------------------------- */
23468
23469    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
23470       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23471       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
23472                         &dres, insn32, condT, True/*isT*/
23473                      );
23474       if (ok_neon)
23475          goto decode_success;
23476    }
23477
23478    /* ----------------------------------------------------------- */
23479    /* -- v6 media instructions (in Thumb mode)                 -- */
23480    /* ----------------------------------------------------------- */
23481
23482    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23483      Bool ok_v6m = decode_V6MEDIA_instruction(
23484                       &dres, insn32, condT, ARMCondAL/*bogus*/,
23485                       True/*isT*/
23486                    );
23487      if (ok_v6m)
23488         goto decode_success;
23489    }
23490
23491    /* ----------------------------------------------------------- */
23492    /* -- v8 instructions (in Thumb mode)                       -- */
23493    /* ----------------------------------------------------------- */
23494
23495    /* If we get here, it means that all attempts to decode the
23496       instruction as ARMv7 or earlier have failed.  So, if we're doing
23497       ARMv8 or later, here is the point to try for it. */
23498
23499    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
23500       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23501       Bool ok_v8
23502          = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
23503                                   old_itstate, new_itstate );
23504       if (ok_v8)
23505          goto decode_success;
23506    }
23507
23508    /* ----------------------------------------------------------- */
23509    /* -- Undecodable                                           -- */
23510    /* ----------------------------------------------------------- */
23511
23512    goto decode_failure;
23513    /*NOTREACHED*/
23514
23515   decode_failure:
23516    /* All decode failures end up here. */
23517    if (sigill_diag)
23518       vex_printf("disInstr(thumb): unhandled instruction: "
23519                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
23520
23521    /* Back up ITSTATE to the initial value for this instruction.
23522       If we don't do that, any subsequent restart of the instruction
23523       will restart with the wrong value. */
23524    if (old_itstate != IRTemp_INVALID)
23525       put_ITSTATE(old_itstate);
23526
23527    /* Tell the dispatcher that this insn cannot be decoded, and so has
23528       not been executed, and (is currently) the next to be executed.
23529       R15 should be up-to-date since it made so at the start of each
23530       insn, but nevertheless be paranoid and update it again right
23531       now. */
23532    vassert(0 == (guest_R15_curr_instr_notENC & 1));
23533    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
23534    dres.len         = 0;
23535    dres.whatNext    = Dis_StopHere;
23536    dres.jk_StopHere = Ijk_NoDecode;
23537    dres.continueAt  = 0;
23538    return dres;
23539
23540   decode_success:
23541    /* All decode successes end up here. */
23542    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
23543    switch (dres.whatNext) {
23544       case Dis_Continue:
23545          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
23546          break;
23547       case Dis_ResteerU:
23548       case Dis_ResteerC:
23549          llPutIReg(15, mkU32(dres.continueAt));
23550          break;
23551       case Dis_StopHere:
23552          break;
23553       default:
23554          vassert(0);
23555    }
23556
23557    DIP("\n");
23558
23559    return dres;
23560
23561 #  undef INSN0
23562 #  undef INSN1
23563 }
23564
23565 #undef DIP
23566 #undef DIS
23567
23568
23569 /* Helper table for figuring out how many insns an IT insn
23570    conditionalises.
23571
23572    An ITxyz instruction of the format "1011 1111 firstcond mask"
23573    conditionalises some number of instructions, as indicated by the
23574    following table.  A value of zero indicates the instruction is
23575    invalid in some way.
23576
23577    mask = 0 means this isn't an IT instruction
23578    fc = 15 (NV) means unpredictable
23579
23580    The line fc = 14 (AL) is different from the others; there are
23581    additional constraints in this case.
23582
23583           mask(0 ..                   15)
23584         +--------------------------------
23585    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23586    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23587         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23588         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23589         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23590         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23591         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23592         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23593         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23594         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23595         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23596         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23597         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23598         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23599         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
23600    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23601
23602    To be conservative with the analysis, let's rule out the mask = 0
23603    case, since that isn't an IT insn at all.  But for all the other
23604    cases where the table contains zero, that means unpredictable, so
23605    let's say 4 to be conservative.  Hence we have a safe value for any
23606    IT (mask,fc) pair that the CPU would actually identify as an IT
23607    instruction.  The final table is
23608
23609           mask(0 ..                   15)
23610         +--------------------------------
23611    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23612    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23613         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23614         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23615         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23616         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23617         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23618         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23619         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23620         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23621         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23622         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23623         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23624         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23625         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
23626    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
23627 */
23628 static const UChar it_length_table[256]
23629    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23630        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23631        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23632        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23633        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23634        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23635        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23636        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23637        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23638        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23639        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23640        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23641        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23642        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23643        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
23644        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
23645      };
23646
23647
23648 /*------------------------------------------------------------*/
23649 /*--- Top-level fn                                         ---*/
23650 /*------------------------------------------------------------*/
23651
23652 /* Disassemble a single instruction into IR.  The instruction
23653    is located in host memory at &guest_code[delta]. */
23654
23655 DisResult disInstr_ARM ( IRSB*        irsb_IN,
23656                          Bool         (*resteerOkFn) ( void*, Addr ),
23657                          Bool         resteerCisOk,
23658                          void*        callback_opaque,
23659                          const UChar* guest_code_IN,
23660                          Long         delta_ENCODED,
23661                          Addr         guest_IP_ENCODED,
23662                          VexArch      guest_arch,
23663                          const VexArchInfo* archinfo,
23664                          const VexAbiInfo*  abiinfo,
23665                          VexEndness   host_endness_IN,
23666                          Bool         sigill_diag_IN )
23667 {
23668    DisResult dres;
23669    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
23670
23671    /* Set globals (see top of this file) */
23672    vassert(guest_arch == VexArchARM);
23673
23674    irsb            = irsb_IN;
23675    host_endness    = host_endness_IN;
23676    __curr_is_Thumb = isThumb;
23677
23678    if (isThumb) {
23679       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
23680    } else {
23681       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
23682    }
23683
23684    if (isThumb) {
23685       dres = disInstr_THUMB_WRK ( resteerOkFn,
23686                                   resteerCisOk, callback_opaque,
23687                                   &guest_code_IN[delta_ENCODED - 1],
23688                                   archinfo, abiinfo, sigill_diag_IN );
23689    } else {
23690       dres = disInstr_ARM_WRK ( resteerOkFn,
23691                                 resteerCisOk, callback_opaque,
23692                                 &guest_code_IN[delta_ENCODED],
23693                                 archinfo, abiinfo, sigill_diag_IN );
23694    }
23695
23696    return dres;
23697 }
23698
23699 /* Test program for the conversion of IRCmpF64Result values to VFP
23700    nzcv values.  See handling of FCMPD et al above. */
23701 /*
23702 UInt foo ( UInt x )
23703 {
23704    UInt ix    = ((x >> 5) & 3) | (x & 1);
23705    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
23706    UInt termR = (ix & (ix >> 1) & 1);
23707    return termL  -  termR;
23708 }
23709
23710 void try ( char* s, UInt ir, UInt req )
23711 {
23712    UInt act = foo(ir);
23713    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
23714           s, ir, (req >> 3) & 1, (req >> 2) & 1,
23715                  (req >> 1) & 1, (req >> 0) & 1,
23716                  (act >> 3) & 1, (act >> 2) & 1,
23717                  (act >> 1) & 1, (act >> 0) & 1, act);
23718
23719 }
23720
23721 int main ( void )
23722 {
23723    printf("\n");
23724    try("UN", 0x45, 0b0011);
23725    try("LT", 0x01, 0b1000);
23726    try("GT", 0x00, 0b0010);
23727    try("EQ", 0x40, 0b0110);
23728    printf("\n");
23729    return 0;
23730 }
23731 */
23732
23733 /* Spare code for doing reference implementations of various 64-bit
23734    SIMD interleaves/deinterleaves/concatenation ops. */
23735 /*
23736 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
23737 // the top halves guaranteed to be zero.
23738 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
23739                            IRTemp* out0, IRTemp v64 )
23740 {
23741   if (out3) *out3 = newTemp(Ity_I32);
23742   if (out2) *out2 = newTemp(Ity_I32);
23743   if (out1) *out1 = newTemp(Ity_I32);
23744   if (out0) *out0 = newTemp(Ity_I32);
23745   IRTemp hi32 = newTemp(Ity_I32);
23746   IRTemp lo32 = newTemp(Ity_I32);
23747   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23748   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23749   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
23750   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
23751   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
23752   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
23753 }
23754
23755 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
23756 // IRTemp.
23757 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23758 {
23759   IRTemp hi32 = newTemp(Ity_I32);
23760   IRTemp lo32 = newTemp(Ity_I32);
23761   assign(hi32,
23762          binop(Iop_Or32,
23763                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
23764                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
23765   assign(lo32,
23766          binop(Iop_Or32,
23767                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
23768                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
23769   IRTemp res = newTemp(Ity_I64);
23770   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23771   return res;
23772 }
23773
23774 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
23775 {
23776   // returns a1 b1 a0 b0
23777   IRTemp a1, a0, b1, b0;
23778   break64to16s(NULL, NULL, &a1, &a0, a3210);
23779   break64to16s(NULL, NULL, &b1, &b0, b3210);
23780   return mkexpr(mk64from16s(a1, b1, a0, b0));
23781 }
23782
23783 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
23784 {
23785   // returns a3 b3 a2 b2
23786   IRTemp a3, a2, b3, b2;
23787   break64to16s(&a3, &a2, NULL, NULL, a3210);
23788   break64to16s(&b3, &b2, NULL, NULL, b3210);
23789   return mkexpr(mk64from16s(a3, b3, a2, b2));
23790 }
23791
23792 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23793 {
23794   // returns a2 a0 b2 b0
23795   IRTemp a2, a0, b2, b0;
23796   break64to16s(NULL, &a2, NULL, &a0, a3210);
23797   break64to16s(NULL, &b2, NULL, &b0, b3210);
23798   return mkexpr(mk64from16s(a2, a0, b2, b0));
23799 }
23800
23801 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23802 {
23803   // returns a3 a1 b3 b1
23804   IRTemp a3, a1, b3, b1;
23805   break64to16s(&a3, NULL, &a1, NULL, a3210);
23806   break64to16s(&b3, NULL, &b1, NULL, b3210);
23807   return mkexpr(mk64from16s(a3, a1, b3, b1));
23808 }
23809
23810 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23811 {
23812   // returns a3 b3 a1 b1
23813   IRTemp a3, b3, a1, b1;
23814   break64to16s(&a3, NULL, &a1, NULL, a3210);
23815   break64to16s(&b3, NULL, &b1, NULL, b3210);
23816   return mkexpr(mk64from16s(a3, b3, a1, b1));
23817 }
23818
23819 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23820 {
23821   // returns a2 b2 a0 b0
23822   IRTemp a2, b2, a0, b0;
23823   break64to16s(NULL, &a2, NULL, &a0, a3210);
23824   break64to16s(NULL, &b2, NULL, &b0, b3210);
23825   return mkexpr(mk64from16s(a2, b2, a0, b0));
23826 }
23827
23828 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
23829                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
23830                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
23831 {
23832   if (out7) *out7 = newTemp(Ity_I32);
23833   if (out6) *out6 = newTemp(Ity_I32);
23834   if (out5) *out5 = newTemp(Ity_I32);
23835   if (out4) *out4 = newTemp(Ity_I32);
23836   if (out3) *out3 = newTemp(Ity_I32);
23837   if (out2) *out2 = newTemp(Ity_I32);
23838   if (out1) *out1 = newTemp(Ity_I32);
23839   if (out0) *out0 = newTemp(Ity_I32);
23840   IRTemp hi32 = newTemp(Ity_I32);
23841   IRTemp lo32 = newTemp(Ity_I32);
23842   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23843   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23844   if (out7)
23845     assign(*out7, binop(Iop_And32,
23846                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
23847                         mkU32(0xFF)));
23848   if (out6)
23849     assign(*out6, binop(Iop_And32,
23850                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
23851                         mkU32(0xFF)));
23852   if (out5)
23853     assign(*out5, binop(Iop_And32,
23854                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
23855                         mkU32(0xFF)));
23856   if (out4)
23857     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
23858   if (out3)
23859     assign(*out3, binop(Iop_And32,
23860                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
23861                         mkU32(0xFF)));
23862   if (out2)
23863     assign(*out2, binop(Iop_And32,
23864                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
23865                         mkU32(0xFF)));
23866   if (out1)
23867     assign(*out1, binop(Iop_And32,
23868                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
23869                         mkU32(0xFF)));
23870   if (out0)
23871     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
23872 }
23873
23874 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
23875                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23876 {
23877   IRTemp hi32 = newTemp(Ity_I32);
23878   IRTemp lo32 = newTemp(Ity_I32);
23879   assign(hi32,
23880          binop(Iop_Or32,
23881                binop(Iop_Or32,
23882                      binop(Iop_Shl32,
23883                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
23884                            mkU8(24)),
23885                      binop(Iop_Shl32,
23886                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
23887                            mkU8(16))),
23888                binop(Iop_Or32,
23889                      binop(Iop_Shl32,
23890                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
23891                      binop(Iop_And32,
23892                            mkexpr(in4), mkU32(0xFF)))));
23893   assign(lo32,
23894          binop(Iop_Or32,
23895                binop(Iop_Or32,
23896                      binop(Iop_Shl32,
23897                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
23898                            mkU8(24)),
23899                      binop(Iop_Shl32,
23900                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
23901                            mkU8(16))),
23902                binop(Iop_Or32,
23903                      binop(Iop_Shl32,
23904                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
23905                      binop(Iop_And32,
23906                            mkexpr(in0), mkU32(0xFF)))));
23907   IRTemp res = newTemp(Ity_I64);
23908   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23909   return res;
23910 }
23911
23912 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
23913 {
23914   // returns a3 b3 a2 b2 a1 b1 a0 b0
23915   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
23916   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
23917   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
23918   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
23919 }
23920
23921 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
23922 {
23923   // returns a7 b7 a6 b6 a5 b5 a4 b4
23924   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
23925   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
23926   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
23927   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
23928 }
23929
23930 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23931 {
23932   // returns a6 a4 a2 a0 b6 b4 b2 b0
23933   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
23934   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23935   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23936   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
23937 }
23938
23939 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23940 {
23941   // returns a7 a5 a3 a1 b7 b5 b3 b1
23942   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
23943   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23944   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23945   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
23946 }
23947
23948 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23949 {
23950   // returns a6 b6 a4 b4 a2 b2 a0 b0
23951   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
23952   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23953   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23954   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
23955 }
23956
23957 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23958 {
23959   // returns a7 b7 a5 b5 a3 b3 a1 b1
23960   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
23961   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23962   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23963   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
23964 }
23965
23966 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
23967 {
23968   // returns a0 b0
23969   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
23970                              unop(Iop_64to32, mkexpr(b10)));
23971 }
23972
23973 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
23974 {
23975   // returns a1 b1
23976   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
23977                              unop(Iop_64HIto32, mkexpr(b10)));
23978 }
23979 */
23980
23981 /*--------------------------------------------------------------------*/
23982 /*--- end                                         guest_arm_toIR.c ---*/
23983 /*--------------------------------------------------------------------*/