sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2024 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 /* This must come before any other includes.  */
  23 #include "defs.h"
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <sys/types.h>
  29 #include <math.h>
  30 #include <time.h>
  31 #include <limits.h>
  32
  33 #include "aarch64-sim.h"
  34 #include "simulator.h"
  35 #include "cpustate.h"
  36 #include "memory.h"
  37
  38 #include "sim-signal.h"
  39
  40 #define NO_SP 0
  41 #define SP_OK 1
  42
  43 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  44 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  45 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  46
  47 /* Space saver macro.  */
  48 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  49
  50 #define HALT_UNALLOC                                                    \
  51   do                                                                    \
  52     {                                                                   \
  53       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  54       TRACE_INSN (cpu,                                                  \
  55                   "Unallocated instruction detected at sim line %d,"    \
  56                   " exe addr %" PRIx64,                                 \
  57                   __LINE__, aarch64_get_PC (cpu));                      \
  58       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  59                        sim_stopped, SIM_SIGILL);                        \
  60     }                                                                   \
  61   while (0)
  62
  63 #define HALT_NYI                                                        \
  64   do                                                                    \
  65     {                                                                   \
  66       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  67       TRACE_INSN (cpu,                                                  \
  68                   "Unimplemented instruction detected at sim line %d,"  \
  69                   " exe addr %" PRIx64,                                 \
  70                   __LINE__, aarch64_get_PC (cpu));                      \
  71       if (! TRACE_ANY_P (cpu))                                          \
  72         sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
  73                         aarch64_get_instr (cpu));                       \
  74       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  75                        sim_stopped, SIM_SIGABRT);                       \
  76     }                                                                   \
  77   while (0)
  78
  79 #define NYI_assert(HI, LO, EXPECTED)                                    \
  80   do                                                                    \
  81     {                                                                   \
  82       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  83         HALT_NYI;                                                       \
  84     }                                                                   \
  85   while (0)
  86
  87 static uint64_t
  88 expand_logical_immediate (uint32_t s, uint32_t r, uint32_t n)
  89 {
  90   uint64_t mask;
  91   uint64_t imm;
  92   unsigned simd_size;
  93
  94   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
  95      (in other words, right rotated by R), then replicated. */
  96   if (n != 0)
  97     {
  98       simd_size = 64;
  99       mask = 0xffffffffffffffffull;
 100     }
 101   else
 102     {
 103       switch (s)
 104         {
 105         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 106         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; s &= 0xf; break;
 107         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; s &= 0x7; break;
 108         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; s &= 0x3; break;
 109         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; s &= 0x1; break;
 110         default: return 0;
 111         }
 112       mask = (1ull << simd_size) - 1;
 113       /* Top bits are IGNORED.  */
 114       r &= simd_size - 1;
 115     }
 116
 117   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 118   if (s == simd_size - 1)
 119     return 0;
 120
 121   /* S+1 consecutive bits to 1.  */
 122   /* NOTE: S can't be 63 due to detection above.  */
 123   imm = (1ull << (s + 1)) - 1;
 124
 125   /* Rotate to the left by simd_size - R.  */
 126   if (r != 0)
 127     imm = ((imm << (simd_size - r)) & mask) | (imm >> r);
 128
 129   /* Replicate the value according to SIMD size.  */
 130   switch (simd_size)
 131     {
 132     case  2: imm = (imm <<  2) | imm; ATTRIBUTE_FALLTHROUGH;
 133     case  4: imm = (imm <<  4) | imm; ATTRIBUTE_FALLTHROUGH;
 134     case  8: imm = (imm <<  8) | imm; ATTRIBUTE_FALLTHROUGH;
 135     case 16: imm = (imm << 16) | imm; ATTRIBUTE_FALLTHROUGH;
 136     case 32: imm = (imm << 32) | imm; ATTRIBUTE_FALLTHROUGH;
 137     case 64: break;
 138     default: return 0;
 139     }
 140
 141   return imm;
 142 }
 143
 144 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 145    for each possible combination i.e. 13 bits worth of int entries.  */
 146 #define  LI_TABLE_SIZE  (1 << 13)
 147 static uint64_t LITable[LI_TABLE_SIZE];
 148
 149 void
 150 aarch64_init_LIT_table (void)
 151 {
 152   unsigned index;
 153
 154   for (index = 0; index < LI_TABLE_SIZE; index++)
 155     {
 156       uint32_t n    = uimm (index, 12, 12);
 157       uint32_t immr = uimm (index, 11, 6);
 158       uint32_t imms = uimm (index, 5, 0);
 159
 160       LITable [index] = expand_logical_immediate (imms, immr, n);
 161     }
 162 }
 163
 164 static void
 165 dexNotify (sim_cpu *cpu)
 166 {
 167   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 168                            2 ==> exit Java, 3 ==> start next bytecode.  */
 169   uint32_t type = INSTR (14, 0);
 170
 171   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 172
 173   switch (type)
 174     {
 175     case 0:
 176       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 177          aarch64_get_reg_u64 (cpu, R22, 0));  */
 178       break;
 179     case 1:
 180       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 181          aarch64_get_reg_u64 (cpu, R22, 0));  */
 182       break;
 183     case 2:
 184       /* aarch64_notifyMethodExit ();  */
 185       break;
 186     case 3:
 187       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 188          aarch64_get_reg_u64 (cpu, R22, 0));  */
 189       break;
 190     }
 191 }
 192
 193 /* secondary decode within top level groups  */
 194
 195 static void
 196 dexPseudo (sim_cpu *cpu)
 197 {
 198   /* assert instr[28,27] = 00
 199
 200      We provide 2 pseudo instructions:
 201
 202      HALT stops execution of the simulator causing an immediate
 203      return to the x86 code which entered it.
 204
 205      CALLOUT initiates recursive entry into x86 code.  A register
 206      argument holds the address of the x86 routine.  Immediate
 207      values in the instruction identify the number of general
 208      purpose and floating point register arguments to be passed
 209      and the type of any value to be returned.  */
 210
 211   uint32_t PSEUDO_HALT      =  0xE0000000U;
 212   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 213   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 214   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 215   uint32_t dispatch;
 216
 217   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 218     {
 219       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 220       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 221                        sim_stopped, SIM_SIGTRAP);
 222     }
 223
 224   dispatch = INSTR (31, 15);
 225
 226   /* We do not handle callouts at the moment.  */
 227   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 228     {
 229       TRACE_EVENTS (cpu, " Callout");
 230       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 231                        sim_stopped, SIM_SIGABRT);
 232     }
 233
 234   else if (dispatch == PSEUDO_NOTIFY)
 235     dexNotify (cpu);
 236
 237   else
 238     HALT_UNALLOC;
 239 }
 240
 241 /* Load-store single register (unscaled offset)
 242    These instructions employ a base register plus an unscaled signed
 243    9 bit offset.
 244
 245    N.B. the base register (source) can be Xn or SP. all other
 246    registers may not be SP.  */
 247
 248 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 249 static void
 250 ldur32 (sim_cpu *cpu, int32_t offset)
 251 {
 252   unsigned rn = INSTR (9, 5);
 253   unsigned rt = INSTR (4, 0);
 254
 255   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 256   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 257                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 258                         + offset));
 259 }
 260
 261 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 262 static void
 263 ldur64 (sim_cpu *cpu, int32_t offset)
 264 {
 265   unsigned rn = INSTR (9, 5);
 266   unsigned rt = INSTR (4, 0);
 267
 268   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 269   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 270                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 271                         + offset));
 272 }
 273
 274 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 275 static void
 276 ldurb32 (sim_cpu *cpu, int32_t offset)
 277 {
 278   unsigned rn = INSTR (9, 5);
 279   unsigned rt = INSTR (4, 0);
 280
 281   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 282   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 283                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 284                         + offset));
 285 }
 286
 287 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 288 static void
 289 ldursb32 (sim_cpu *cpu, int32_t offset)
 290 {
 291   unsigned rn = INSTR (9, 5);
 292   unsigned rt = INSTR (4, 0);
 293
 294   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 295   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 296                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 297                         + offset));
 298 }
 299
 300 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 301 static void
 302 ldursb64 (sim_cpu *cpu, int32_t offset)
 303 {
 304   unsigned rn = INSTR (9, 5);
 305   unsigned rt = INSTR (4, 0);
 306
 307   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 308   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 309                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 310                         + offset));
 311 }
 312
 313 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 314 static void
 315 ldurh32 (sim_cpu *cpu, int32_t offset)
 316 {
 317   unsigned rn = INSTR (9, 5);
 318   unsigned rd = INSTR (4, 0);
 319
 320   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 321   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 322                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 323                         + offset));
 324 }
 325
 326 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 327 static void
 328 ldursh32 (sim_cpu *cpu, int32_t offset)
 329 {
 330   unsigned rn = INSTR (9, 5);
 331   unsigned rd = INSTR (4, 0);
 332
 333   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 334   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 335                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 336                         + offset));
 337 }
 338
 339 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 340 static void
 341 ldursh64 (sim_cpu *cpu, int32_t offset)
 342 {
 343   unsigned rn = INSTR (9, 5);
 344   unsigned rt = INSTR (4, 0);
 345
 346   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 347   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 348                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 349                         + offset));
 350 }
 351
 352 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 353 static void
 354 ldursw (sim_cpu *cpu, int32_t offset)
 355 {
 356   unsigned rn = INSTR (9, 5);
 357   unsigned rd = INSTR (4, 0);
 358
 359   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 360   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 361                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 362                         + offset));
 363 }
 364
 365 /* N.B. with stores the value in source is written to the address
 366    identified by source2 modified by offset.  */
 367
 368 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 369 static void
 370 stur32 (sim_cpu *cpu, int32_t offset)
 371 {
 372   unsigned rn = INSTR (9, 5);
 373   unsigned rd = INSTR (4, 0);
 374
 375   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 376   aarch64_set_mem_u32 (cpu,
 377                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 378                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 379 }
 380
 381 /* 64 bit store 64 bit unscaled signed 9 bit  */
 382 static void
 383 stur64 (sim_cpu *cpu, int32_t offset)
 384 {
 385   unsigned rn = INSTR (9, 5);
 386   unsigned rd = INSTR (4, 0);
 387
 388   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 389   aarch64_set_mem_u64 (cpu,
 390                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 391                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 392 }
 393
 394 /* 32 bit store byte unscaled signed 9 bit  */
 395 static void
 396 sturb (sim_cpu *cpu, int32_t offset)
 397 {
 398   unsigned rn = INSTR (9, 5);
 399   unsigned rd = INSTR (4, 0);
 400
 401   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 402   aarch64_set_mem_u8 (cpu,
 403                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 404                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 405 }
 406
 407 /* 32 bit store short unscaled signed 9 bit  */
 408 static void
 409 sturh (sim_cpu *cpu, int32_t offset)
 410 {
 411   unsigned rn = INSTR (9, 5);
 412   unsigned rd = INSTR (4, 0);
 413
 414   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 415   aarch64_set_mem_u16 (cpu,
 416                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 417                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 418 }
 419
 420 /* Load single register pc-relative label
 421    Offset is a signed 19 bit immediate count in words
 422    rt may not be SP.  */
 423
 424 /* 32 bit pc-relative load  */
 425 static void
 426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 427 {
 428   unsigned rd = INSTR (4, 0);
 429
 430   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 431   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 432                        aarch64_get_mem_u32
 433                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 434 }
 435
 436 /* 64 bit pc-relative load  */
 437 static void
 438 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 439 {
 440   unsigned rd = INSTR (4, 0);
 441
 442   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 443   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 444                        aarch64_get_mem_u64
 445                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 446 }
 447
 448 /* sign extended 32 bit pc-relative load  */
 449 static void
 450 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 451 {
 452   unsigned rd = INSTR (4, 0);
 453
 454   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 455   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 456                        aarch64_get_mem_s32
 457                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 458 }
 459
 460 /* float pc-relative load  */
 461 static void
 462 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 463 {
 464   unsigned int rd = INSTR (4, 0);
 465
 466   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 467   aarch64_set_vec_u32 (cpu, rd, 0,
 468                        aarch64_get_mem_u32
 469                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 470 }
 471
 472 /* double pc-relative load  */
 473 static void
 474 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 475 {
 476   unsigned int st = INSTR (4, 0);
 477
 478   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 479   aarch64_set_vec_u64 (cpu, st, 0,
 480                        aarch64_get_mem_u64
 481                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 482 }
 483
 484 /* long double pc-relative load.  */
 485 static void
 486 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 487 {
 488   unsigned int st = INSTR (4, 0);
 489   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 490   FRegister a;
 491
 492   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 493   aarch64_get_mem_long_double (cpu, addr, & a);
 494   aarch64_set_FP_long_double (cpu, st, a);
 495 }
 496
 497 /* This can be used to scale an offset by applying
 498    the requisite shift. the second argument is either
 499    16, 32 or 64.  */
 500
 501 #define SCALE(_offset, _elementSize) \
 502     ((_offset) << ScaleShift ## _elementSize)
 503
 504 /* This can be used to optionally scale a register derived offset
 505    by applying the requisite shift as indicated by the Scaling
 506    argument.  The second argument is either Byte, Short, Word
 507    or Long. The third argument is either Scaled or Unscaled.
 508    N.B. when _Scaling is Scaled the shift gets ANDed with
 509    all 1s while when it is Unscaled it gets ANDed with 0.  */
 510
 511 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 512   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 513
 514 /* This can be used to zero or sign extend a 32 bit register derived
 515    value to a 64 bit value.  the first argument must be the value as
 516    a uint32_t and the second must be either UXTW or SXTW. The result
 517    is returned as an int64_t.  */
 518
 519 static inline int64_t
 520 extend (uint32_t value, Extension extension)
 521 {
 522   union
 523   {
 524     uint32_t u;
 525     int32_t   n;
 526   } x;
 527
 528   /* A branchless variant of this ought to be possible.  */
 529   if (extension == UXTW || extension == NoExtension)
 530     return value;
 531
 532   x.u = value;
 533   return x.n;
 534 }
 535
 536 /* Scalar Floating Point
 537
 538    FP load/store single register (4 addressing modes)
 539
 540    N.B. the base register (source) can be the stack pointer.
 541    The secondary source register (source2) can only be an Xn register.  */
 542
 543 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 544 static void
 545 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 546 {
 547   unsigned rn = INSTR (9, 5);
 548   unsigned st = INSTR (4, 0);
 549   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 550
 551   if (wb != Post)
 552     address += offset;
 553
 554   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 555   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 556   if (wb == Post)
 557     address += offset;
 558
 559   if (wb != NoWriteBack)
 560     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 561 }
 562
 563 /* Load 8 bit with unsigned 12 bit offset.  */
 564 static void
 565 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 566 {
 567   unsigned rd = INSTR (4, 0);
 568   unsigned rn = INSTR (9, 5);
 569   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 570
 571   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 572   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 573 }
 574
 575 /* Load 16 bit scaled unsigned 12 bit.  */
 576 static void
 577 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 578 {
 579   unsigned rd = INSTR (4, 0);
 580   unsigned rn = INSTR (9, 5);
 581   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 582
 583   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 584   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 585 }
 586
 587 /* Load 32 bit scaled unsigned 12 bit.  */
 588 static void
 589 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 590 {
 591   unsigned rd = INSTR (4, 0);
 592   unsigned rn = INSTR (9, 5);
 593   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 594
 595   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 596   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 597 }
 598
 599 /* Load 64 bit scaled unsigned 12 bit.  */
 600 static void
 601 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 602 {
 603   unsigned rd = INSTR (4, 0);
 604   unsigned rn = INSTR (9, 5);
 605   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 606
 607   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 608   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 609 }
 610
 611 /* Load 128 bit scaled unsigned 12 bit.  */
 612 static void
 613 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 614 {
 615   unsigned rd = INSTR (4, 0);
 616   unsigned rn = INSTR (9, 5);
 617   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 618
 619   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 620   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 621   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 622 }
 623
 624 /* Load 32 bit scaled or unscaled zero- or sign-extended
 625    32-bit register offset.  */
 626 static void
 627 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 628 {
 629   unsigned rm = INSTR (20, 16);
 630   unsigned rn = INSTR (9, 5);
 631   unsigned st = INSTR (4, 0);
 632   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 633   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 634   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 635
 636   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 637   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 638                        (cpu, address + displacement));
 639 }
 640
 641 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 642 static void
 643 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 644 {
 645   unsigned rn = INSTR (9, 5);
 646   unsigned st = INSTR (4, 0);
 647   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 648
 649   if (wb != Post)
 650     address += offset;
 651
 652   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 653   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 654
 655   if (wb == Post)
 656     address += offset;
 657
 658   if (wb != NoWriteBack)
 659     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 660 }
 661
 662 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 663 static void
 664 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 665 {
 666   unsigned rm = INSTR (20, 16);
 667   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 668   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 669
 670   fldrd_wb (cpu, displacement, NoWriteBack);
 671 }
 672
 673 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 674 static void
 675 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 676 {
 677   FRegister a;
 678   unsigned rn = INSTR (9, 5);
 679   unsigned st = INSTR (4, 0);
 680   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 681
 682   if (wb != Post)
 683     address += offset;
 684
 685   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 686   aarch64_get_mem_long_double (cpu, address, & a);
 687   aarch64_set_FP_long_double (cpu, st, a);
 688
 689   if (wb == Post)
 690     address += offset;
 691
 692   if (wb != NoWriteBack)
 693     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 694 }
 695
 696 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 697 static void
 698 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 699 {
 700   unsigned rm = INSTR (20, 16);
 701   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 702   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 703
 704   fldrq_wb (cpu, displacement, NoWriteBack);
 705 }
 706
 707 /* Memory Access
 708
 709    load-store single register
 710    There are four addressing modes available here which all employ a
 711    64 bit source (base) register.
 712
 713    N.B. the base register (source) can be the stack pointer.
 714    The secondary source register (source2)can only be an Xn register.
 715
 716    Scaled, 12-bit, unsigned immediate offset, without pre- and
 717    post-index options.
 718    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 719    writeback.
 720    scaled or unscaled 64-bit register offset.
 721    scaled or unscaled 32-bit extended register offset.
 722
 723    All offsets are assumed to be raw from the decode i.e. the
 724    simulator is expected to adjust scaled offsets based on the
 725    accessed data size with register or extended register offset
 726    versions the same applies except that in the latter case the
 727    operation may also require a sign extend.
 728
 729    A separate method is provided for each possible addressing mode.  */
 730
 731 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 732 static void
 733 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 734 {
 735   unsigned rn = INSTR (9, 5);
 736   unsigned rt = INSTR (4, 0);
 737
 738   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 739   /* The target register may not be SP but the source may be.  */
 740   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 741                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 742                         + SCALE (offset, 32)));
 743 }
 744
 745 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 746 static void
 747 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 748 {
 749   unsigned rn = INSTR (9, 5);
 750   unsigned rt = INSTR (4, 0);
 751   uint64_t address;
 752
 753   if (rn == rt && wb != NoWriteBack)
 754     HALT_UNALLOC;
 755
 756   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 757
 758   if (wb != Post)
 759     address += offset;
 760
 761   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 762   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 763
 764   if (wb == Post)
 765     address += offset;
 766
 767   if (wb != NoWriteBack)
 768     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 769 }
 770
 771 /* 32 bit load 32 bit scaled or unscaled
 772    zero- or sign-extended 32-bit register offset  */
 773 static void
 774 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 775 {
 776   unsigned rm = INSTR (20, 16);
 777   unsigned rn = INSTR (9, 5);
 778   unsigned rt = INSTR (4, 0);
 779   /* rn may reference SP, rm and rt must reference ZR  */
 780
 781   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 782   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 783   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 784
 785   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 786   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 787                        aarch64_get_mem_u32 (cpu, address + displacement));
 788 }
 789
 790 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 791 static void
 792 ldr_abs (sim_cpu *cpu, uint32_t offset)
 793 {
 794   unsigned rn = INSTR (9, 5);
 795   unsigned rt = INSTR (4, 0);
 796
 797   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 798   /* The target register may not be SP but the source may be.  */
 799   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 800                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 801                         + SCALE (offset, 64)));
 802 }
 803
 804 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 805 static void
 806 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 807 {
 808   unsigned rn = INSTR (9, 5);
 809   unsigned rt = INSTR (4, 0);
 810   uint64_t address;
 811
 812   if (rn == rt && wb != NoWriteBack)
 813     HALT_UNALLOC;
 814
 815   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 816
 817   if (wb != Post)
 818     address += offset;
 819
 820   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 821   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 822
 823   if (wb == Post)
 824     address += offset;
 825
 826   if (wb != NoWriteBack)
 827     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 828 }
 829
 830 /* 64 bit load 64 bit scaled or unscaled zero-
 831    or sign-extended 32-bit register offset.  */
 832 static void
 833 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 834 {
 835   unsigned rm = INSTR (20, 16);
 836   unsigned rn = INSTR (9, 5);
 837   unsigned rt = INSTR (4, 0);
 838   /* rn may reference SP, rm and rt must reference ZR  */
 839
 840   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 841   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 842   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 843
 844   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 845   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 846                        aarch64_get_mem_u64 (cpu, address + displacement));
 847 }
 848
 849 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 850 static void
 851 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 852 {
 853   unsigned rn = INSTR (9, 5);
 854   unsigned rt = INSTR (4, 0);
 855
 856   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 857   /* The target register may not be SP but the source may be
 858      there is no scaling required for a byte load.  */
 859   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 860                        aarch64_get_mem_u8
 861                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 862 }
 863
 864 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 865 static void
 866 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 867 {
 868   unsigned rn = INSTR (9, 5);
 869   unsigned rt = INSTR (4, 0);
 870   uint64_t address;
 871
 872   if (rn == rt && wb != NoWriteBack)
 873     HALT_UNALLOC;
 874
 875   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 876
 877   if (wb != Post)
 878     address += offset;
 879
 880   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 881   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 882
 883   if (wb == Post)
 884     address += offset;
 885
 886   if (wb != NoWriteBack)
 887     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 888 }
 889
 890 /* 32 bit load zero-extended byte scaled or unscaled zero-
 891    or sign-extended 32-bit register offset.  */
 892 static void
 893 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 894 {
 895   unsigned rm = INSTR (20, 16);
 896   unsigned rn = INSTR (9, 5);
 897   unsigned rt = INSTR (4, 0);
 898   /* rn may reference SP, rm and rt must reference ZR  */
 899
 900   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 901   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 902                                  extension);
 903
 904   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 905   /* There is no scaling required for a byte load.  */
 906   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 907                        aarch64_get_mem_u8 (cpu, address + displacement));
 908 }
 909
 910 /* 64 bit load sign-extended byte unscaled signed 9 bit
 911    with pre- or post-writeback.  */
 912 static void
 913 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 914 {
 915   unsigned rn = INSTR (9, 5);
 916   unsigned rt = INSTR (4, 0);
 917   uint64_t address;
 918   int64_t val;
 919
 920   if (rn == rt && wb != NoWriteBack)
 921     HALT_UNALLOC;
 922
 923   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 924
 925   if (wb != Post)
 926     address += offset;
 927
 928   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 929   val = aarch64_get_mem_s8 (cpu, address);
 930   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
 931
 932   if (wb == Post)
 933     address += offset;
 934
 935   if (wb != NoWriteBack)
 936     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 937 }
 938
 939 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 940 static void
 941 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 942 {
 943   ldrsb_wb (cpu, offset, NoWriteBack);
 944 }
 945
 946 /* 64 bit load sign-extended byte scaled or unscaled zero-
 947    or sign-extended 32-bit register offset.  */
 948 static void
 949 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 950 {
 951   unsigned rm = INSTR (20, 16);
 952   unsigned rn = INSTR (9, 5);
 953   unsigned rt = INSTR (4, 0);
 954   /* rn may reference SP, rm and rt must reference ZR  */
 955
 956   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 957   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 958                                  extension);
 959   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 960   /* There is no scaling required for a byte load.  */
 961   aarch64_set_reg_s64 (cpu, rt, NO_SP,
 962                        aarch64_get_mem_s8 (cpu, address + displacement));
 963 }
 964
 965 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 966 static void
 967 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 968 {
 969   unsigned rn = INSTR (9, 5);
 970   unsigned rt = INSTR (4, 0);
 971   uint32_t val;
 972
 973   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 974   /* The target register may not be SP but the source may be.  */
 975   val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 976                              + SCALE (offset, 16));
 977   aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
 978 }
 979
 980 /* 32 bit load zero-extended short unscaled signed 9 bit
 981    with pre- or post-writeback.  */
 982 static void
 983 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 984 {
 985   unsigned rn = INSTR (9, 5);
 986   unsigned rt = INSTR (4, 0);
 987   uint64_t address;
 988
 989   if (rn == rt && wb != NoWriteBack)
 990     HALT_UNALLOC;
 991
 992   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 993
 994   if (wb != Post)
 995     address += offset;
 996
 997   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 998   aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
 999
1000   if (wb == Post)
1001     address += offset;
1002
1003   if (wb != NoWriteBack)
1004     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1005 }
1006
1007 /* 32 bit load zero-extended short scaled or unscaled zero-
1008    or sign-extended 32-bit register offset.  */
1009 static void
1010 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1011 {
1012   unsigned rm = INSTR (20, 16);
1013   unsigned rn = INSTR (9, 5);
1014   unsigned rt = INSTR (4, 0);
1015   /* rn may reference SP, rm and rt must reference ZR  */
1016
1017   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1018   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1019   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1020
1021   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1022   aarch64_set_reg_u32 (cpu, rt, NO_SP,
1023                        aarch64_get_mem_u16 (cpu, address + displacement));
1024 }
1025
1026 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
1027 static void
1028 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1029 {
1030   unsigned rn = INSTR (9, 5);
1031   unsigned rt = INSTR (4, 0);
1032   int32_t val;
1033
1034   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035   /* The target register may not be SP but the source may be.  */
1036   val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1037                              + SCALE (offset, 16));
1038   aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1039 }
1040
1041 /* 32 bit load sign-extended short unscaled signed 9 bit
1042    with pre- or post-writeback.  */
1043 static void
1044 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1045 {
1046   unsigned rn = INSTR (9, 5);
1047   unsigned rt = INSTR (4, 0);
1048   uint64_t address;
1049
1050   if (rn == rt && wb != NoWriteBack)
1051     HALT_UNALLOC;
1052
1053   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1054
1055   if (wb != Post)
1056     address += offset;
1057
1058   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1059   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1060                        (int32_t) aarch64_get_mem_s16 (cpu, address));
1061
1062   if (wb == Post)
1063     address += offset;
1064
1065   if (wb != NoWriteBack)
1066     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1067 }
1068
1069 /* 32 bit load sign-extended short scaled or unscaled zero-
1070    or sign-extended 32-bit register offset.  */
1071 static void
1072 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1073 {
1074   unsigned rm = INSTR (20, 16);
1075   unsigned rn = INSTR (9, 5);
1076   unsigned rt = INSTR (4, 0);
1077   /* rn may reference SP, rm and rt must reference ZR  */
1078
1079   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1080   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1081   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1082
1083   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1084   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1085                        (int32_t) aarch64_get_mem_s16
1086                        (cpu, address + displacement));
1087 }
1088
1089 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1090 static void
1091 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1092 {
1093   unsigned rn = INSTR (9, 5);
1094   unsigned rt = INSTR (4, 0);
1095   int64_t val;
1096
1097   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1098   /* The target register may not be SP but the source may be.  */
1099   val = aarch64_get_mem_s16  (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1100                               + SCALE (offset, 16));
1101   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1102 }
1103
1104 /* 64 bit load sign-extended short unscaled signed 9 bit
1105    with pre- or post-writeback.  */
1106 static void
1107 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1108 {
1109   unsigned rn = INSTR (9, 5);
1110   unsigned rt = INSTR (4, 0);
1111   uint64_t address;
1112   int64_t val;
1113
1114   if (rn == rt && wb != NoWriteBack)
1115     HALT_UNALLOC;
1116
1117   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1118   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1119
1120   if (wb != Post)
1121     address += offset;
1122
1123   val = aarch64_get_mem_s16 (cpu, address);
1124   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1125
1126   if (wb == Post)
1127     address += offset;
1128
1129   if (wb != NoWriteBack)
1130     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1131 }
1132
1133 /* 64 bit load sign-extended short scaled or unscaled zero-
1134    or sign-extended 32-bit register offset.  */
1135 static void
1136 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1137 {
1138   unsigned rm = INSTR (20, 16);
1139   unsigned rn = INSTR (9, 5);
1140   unsigned rt = INSTR (4, 0);
1141
1142   /* rn may reference SP, rm and rt must reference ZR  */
1143
1144   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1145   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1146   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1147   int64_t val;
1148
1149   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1150   val = aarch64_get_mem_s16 (cpu, address + displacement);
1151   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1152 }
1153
1154 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1155 static void
1156 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1157 {
1158   unsigned rn = INSTR (9, 5);
1159   unsigned rt = INSTR (4, 0);
1160   int64_t val;
1161
1162   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163   val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1164                              + SCALE (offset, 32));
1165   /* The target register may not be SP but the source may be.  */
1166   return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1167 }
1168
1169 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1170    with pre- or post-writeback.  */
1171 static void
1172 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1173 {
1174   unsigned rn = INSTR (9, 5);
1175   unsigned rt = INSTR (4, 0);
1176   uint64_t address;
1177
1178   if (rn == rt && wb != NoWriteBack)
1179     HALT_UNALLOC;
1180
1181   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1182
1183   if (wb != Post)
1184     address += offset;
1185
1186   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1187   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1188
1189   if (wb == Post)
1190     address += offset;
1191
1192   if (wb != NoWriteBack)
1193     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1194 }
1195
1196 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1197    or sign-extended 32-bit register offset.  */
1198 static void
1199 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1200 {
1201   unsigned rm = INSTR (20, 16);
1202   unsigned rn = INSTR (9, 5);
1203   unsigned rt = INSTR (4, 0);
1204   /* rn may reference SP, rm and rt must reference ZR  */
1205
1206   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1207   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1208   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1209
1210   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1211   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1212                        aarch64_get_mem_s32 (cpu, address + displacement));
1213 }
1214
1215 /* N.B. with stores the value in source is written to the
1216    address identified by source2 modified by source3/offset.  */
1217
1218 /* 32 bit store scaled unsigned 12 bit.  */
1219 static void
1220 str32_abs (sim_cpu *cpu, uint32_t offset)
1221 {
1222   unsigned rn = INSTR (9, 5);
1223   unsigned rt = INSTR (4, 0);
1224
1225   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1226   /* The target register may not be SP but the source may be.  */
1227   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1228                              + SCALE (offset, 32)),
1229                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1230 }
1231
1232 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1233 static void
1234 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1235 {
1236   unsigned rn = INSTR (9, 5);
1237   unsigned rt = INSTR (4, 0);
1238   uint64_t address;
1239
1240   if (rn == rt && wb != NoWriteBack)
1241     HALT_UNALLOC;
1242
1243   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1244   if (wb != Post)
1245     address += offset;
1246
1247   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1248   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1249
1250   if (wb == Post)
1251     address += offset;
1252
1253   if (wb != NoWriteBack)
1254     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1255 }
1256
1257 /* 32 bit store scaled or unscaled zero- or
1258    sign-extended 32-bit register offset.  */
1259 static void
1260 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1261 {
1262   unsigned rm = INSTR (20, 16);
1263   unsigned rn = INSTR (9, 5);
1264   unsigned rt = INSTR (4, 0);
1265
1266   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1267   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1268   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1269
1270   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1271   aarch64_set_mem_u32 (cpu, address + displacement,
1272                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1273 }
1274
1275 /* 64 bit store scaled unsigned 12 bit.  */
1276 static void
1277 str_abs (sim_cpu *cpu, uint32_t offset)
1278 {
1279   unsigned rn = INSTR (9, 5);
1280   unsigned rt = INSTR (4, 0);
1281
1282   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1283   aarch64_set_mem_u64 (cpu,
1284                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1285                        + SCALE (offset, 64),
1286                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1287 }
1288
1289 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1290 static void
1291 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1292 {
1293   unsigned rn = INSTR (9, 5);
1294   unsigned rt = INSTR (4, 0);
1295   uint64_t address;
1296
1297   if (rn == rt && wb != NoWriteBack)
1298     HALT_UNALLOC;
1299
1300   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1301
1302   if (wb != Post)
1303     address += offset;
1304
1305   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1306   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1307
1308   if (wb == Post)
1309     address += offset;
1310
1311   if (wb != NoWriteBack)
1312     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1313 }
1314
1315 /* 64 bit store scaled or unscaled zero-
1316    or sign-extended 32-bit register offset.  */
1317 static void
1318 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1319 {
1320   unsigned rm = INSTR (20, 16);
1321   unsigned rn = INSTR (9, 5);
1322   unsigned rt = INSTR (4, 0);
1323   /* rn may reference SP, rm and rt must reference ZR  */
1324
1325   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1326   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1327                                extension);
1328   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1329
1330   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1331   aarch64_set_mem_u64 (cpu, address + displacement,
1332                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1333 }
1334
1335 /* 32 bit store byte scaled unsigned 12 bit.  */
1336 static void
1337 strb_abs (sim_cpu *cpu, uint32_t offset)
1338 {
1339   unsigned rn = INSTR (9, 5);
1340   unsigned rt = INSTR (4, 0);
1341
1342   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1343   /* The target register may not be SP but the source may be.
1344      There is no scaling required for a byte load.  */
1345   aarch64_set_mem_u8 (cpu,
1346                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1347                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1348 }
1349
1350 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1351 static void
1352 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1353 {
1354   unsigned rn = INSTR (9, 5);
1355   unsigned rt = INSTR (4, 0);
1356   uint64_t address;
1357
1358   if (rn == rt && wb != NoWriteBack)
1359     HALT_UNALLOC;
1360
1361   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1362
1363   if (wb != Post)
1364     address += offset;
1365
1366   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1367   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1368
1369   if (wb == Post)
1370     address += offset;
1371
1372   if (wb != NoWriteBack)
1373     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1374 }
1375
1376 /* 32 bit store byte scaled or unscaled zero-
1377    or sign-extended 32-bit register offset.  */
1378 static void
1379 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1380 {
1381   unsigned rm = INSTR (20, 16);
1382   unsigned rn = INSTR (9, 5);
1383   unsigned rt = INSTR (4, 0);
1384   /* rn may reference SP, rm and rt must reference ZR  */
1385
1386   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1387   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1388                                  extension);
1389
1390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1391   /* There is no scaling required for a byte load.  */
1392   aarch64_set_mem_u8 (cpu, address + displacement,
1393                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1394 }
1395
1396 /* 32 bit store short scaled unsigned 12 bit.  */
1397 static void
1398 strh_abs (sim_cpu *cpu, uint32_t offset)
1399 {
1400   unsigned rn = INSTR (9, 5);
1401   unsigned rt = INSTR (4, 0);
1402
1403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404   /* The target register may not be SP but the source may be.  */
1405   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1406                        + SCALE (offset, 16),
1407                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1408 }
1409
1410 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1411 static void
1412 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1413 {
1414   unsigned rn = INSTR (9, 5);
1415   unsigned rt = INSTR (4, 0);
1416   uint64_t address;
1417
1418   if (rn == rt && wb != NoWriteBack)
1419     HALT_UNALLOC;
1420
1421   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1422
1423   if (wb != Post)
1424     address += offset;
1425
1426   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1427   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1428
1429   if (wb == Post)
1430     address += offset;
1431
1432   if (wb != NoWriteBack)
1433     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1434 }
1435
1436 /* 32 bit store short scaled or unscaled zero-
1437    or sign-extended 32-bit register offset.  */
1438 static void
1439 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1440 {
1441   unsigned rm = INSTR (20, 16);
1442   unsigned rn = INSTR (9, 5);
1443   unsigned rt = INSTR (4, 0);
1444   /* rn may reference SP, rm and rt must reference ZR  */
1445
1446   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1447   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1448   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1449
1450   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1451   aarch64_set_mem_u16 (cpu, address + displacement,
1452                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1453 }
1454
1455 /* Prefetch unsigned 12 bit.  */
1456 static void
1457 prfm_abs (sim_cpu *cpu, uint32_t offset)
1458 {
1459   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1460                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1461                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1462                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1463                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1464                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1465                           ow ==> UNALLOC
1466      PrfOp prfop = prfop (instr, 4, 0);
1467      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1468      + SCALE (offset, 64).  */
1469
1470   /* TODO : implement prefetch of address.  */
1471 }
1472
1473 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1474 static void
1475 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1476 {
1477   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1478                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1479                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1480                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1481                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1482                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1483                           ow ==> UNALLOC
1484      rn may reference SP, rm may only reference ZR
1485      PrfOp prfop = prfop (instr, 4, 0);
1486      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1487      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1488                                 extension);
1489      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1490      uint64_t address = base + displacement.  */
1491
1492   /* TODO : implement prefetch of address  */
1493 }
1494
1495 /* 64 bit pc-relative prefetch.  */
1496 static void
1497 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1498 {
1499   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1500                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1501                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1502                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1503                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1504                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1505                           ow ==> UNALLOC
1506      PrfOp prfop = prfop (instr, 4, 0);
1507      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1508
1509   /* TODO : implement this  */
1510 }
1511
1512 /* Load-store exclusive.  */
1513
1514 static void
1515 ldxr (sim_cpu *cpu)
1516 {
1517   unsigned rn = INSTR (9, 5);
1518   unsigned rt = INSTR (4, 0);
1519   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1520   int size = INSTR (31, 30);
1521   /* int ordered = INSTR (15, 15);  */
1522   /* int exclusive = ! INSTR (23, 23);  */
1523
1524   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1525   switch (size)
1526     {
1527     case 0:
1528       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1529       break;
1530     case 1:
1531       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1532       break;
1533     case 2:
1534       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1535       break;
1536     case 3:
1537       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1538       break;
1539     }
1540 }
1541
1542 static void
1543 stxr (sim_cpu *cpu)
1544 {
1545   unsigned rn = INSTR (9, 5);
1546   unsigned rt = INSTR (4, 0);
1547   unsigned rs = INSTR (20, 16);
1548   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1549   int      size = INSTR (31, 30);
1550   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1551
1552   switch (size)
1553     {
1554     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1555     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1556     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1557     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1558     }
1559
1560   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1561   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1562 }
1563
1564 static void
1565 dexLoadLiteral (sim_cpu *cpu)
1566 {
1567   /* instr[29,27] == 011
1568      instr[25,24] == 00
1569      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1570                             010 ==> LDRX,  011 ==> FLDRD
1571                             100 ==> LDRSW, 101 ==> FLDRQ
1572                             110 ==> PRFM, 111 ==> UNALLOC
1573      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1574      instr[23, 5] == simm19  */
1575
1576   /* unsigned rt = INSTR (4, 0);  */
1577   uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1578   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1579
1580   switch (dispatch)
1581     {
1582     case 0: ldr32_pcrel (cpu, imm); break;
1583     case 1: fldrs_pcrel (cpu, imm); break;
1584     case 2: ldr_pcrel   (cpu, imm); break;
1585     case 3: fldrd_pcrel (cpu, imm); break;
1586     case 4: ldrsw_pcrel (cpu, imm); break;
1587     case 5: fldrq_pcrel (cpu, imm); break;
1588     case 6: prfm_pcrel  (cpu, imm); break;
1589     case 7:
1590     default:
1591       HALT_UNALLOC;
1592     }
1593 }
1594
1595 /* Immediate arithmetic
1596    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1597    value left shifted by 12 bits (done at decode).
1598
1599    N.B. the register args (dest, source) can normally be Xn or SP.
1600    the exception occurs for flag setting instructions which may
1601    only use Xn for the output (dest).  */
1602
1603 /* 32 bit add immediate.  */
1604 static void
1605 add32 (sim_cpu *cpu, uint32_t aimm)
1606 {
1607   unsigned rn = INSTR (9, 5);
1608   unsigned rd = INSTR (4, 0);
1609
1610   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1611   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1612                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1613 }
1614
1615 /* 64 bit add immediate.  */
1616 static void
1617 add64 (sim_cpu *cpu, uint32_t aimm)
1618 {
1619   unsigned rn = INSTR (9, 5);
1620   unsigned rd = INSTR (4, 0);
1621
1622   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1623   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1624                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1625 }
1626
1627 static void
1628 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1629 {
1630   int32_t   result = value1 + value2;
1631   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1632   uint64_t  uresult = (uint64_t)(uint32_t) value1
1633     + (uint64_t)(uint32_t) value2;
1634   uint32_t  flags = 0;
1635
1636   if (result == 0)
1637     flags |= Z;
1638
1639   if (result & (1 << 31))
1640     flags |= N;
1641
1642   if (uresult != (uint32_t)uresult)
1643     flags |= C;
1644
1645   if (sresult != (int32_t)sresult)
1646     flags |= V;
1647
1648   aarch64_set_CPSR (cpu, flags);
1649 }
1650
1651 #define NEG(a) (((a) & signbit) == signbit)
1652 #define POS(a) (((a) & signbit) == 0)
1653
1654 static void
1655 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1656 {
1657   uint64_t result = value1 + value2;
1658   uint32_t flags = 0;
1659   uint64_t signbit = 1ULL << 63;
1660
1661   if (result == 0)
1662     flags |= Z;
1663
1664   if (NEG (result))
1665     flags |= N;
1666
1667   if (   (NEG (value1) && NEG (value2))
1668       || (NEG (value1) && POS (result))
1669       || (NEG (value2) && POS (result)))
1670     flags |= C;
1671
1672   if (   (NEG (value1) && NEG (value2) && POS (result))
1673       || (POS (value1) && POS (value2) && NEG (result)))
1674     flags |= V;
1675
1676   aarch64_set_CPSR (cpu, flags);
1677 }
1678
1679 static void
1680 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1681 {
1682   uint32_t result = value1 - value2;
1683   uint32_t flags = 0;
1684   uint32_t signbit = 1U << 31;
1685
1686   if (result == 0)
1687     flags |= Z;
1688
1689   if (NEG (result))
1690     flags |= N;
1691
1692   if (   (NEG (value1) && POS (value2))
1693       || (NEG (value1) && POS (result))
1694       || (POS (value2) && POS (result)))
1695     flags |= C;
1696
1697   if (   (NEG (value1) && POS (value2) && POS (result))
1698       || (POS (value1) && NEG (value2) && NEG (result)))
1699     flags |= V;
1700
1701   aarch64_set_CPSR (cpu, flags);
1702 }
1703
1704 static void
1705 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1706 {
1707   uint64_t result = value1 - value2;
1708   uint32_t flags = 0;
1709   uint64_t signbit = 1ULL << 63;
1710
1711   if (result == 0)
1712     flags |= Z;
1713
1714   if (NEG (result))
1715     flags |= N;
1716
1717   if (   (NEG (value1) && POS (value2))
1718       || (NEG (value1) && POS (result))
1719       || (POS (value2) && POS (result)))
1720     flags |= C;
1721
1722   if (   (NEG (value1) && POS (value2) && POS (result))
1723       || (POS (value1) && NEG (value2) && NEG (result)))
1724     flags |= V;
1725
1726   aarch64_set_CPSR (cpu, flags);
1727 }
1728
1729 static void
1730 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1731 {
1732   uint32_t flags = 0;
1733
1734   if (result == 0)
1735     flags |= Z;
1736   else
1737     flags &= ~ Z;
1738
1739   if (result & (1 << 31))
1740     flags |= N;
1741   else
1742     flags &= ~ N;
1743
1744   aarch64_set_CPSR (cpu, flags);
1745 }
1746
1747 static void
1748 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1749 {
1750   uint32_t flags = 0;
1751
1752   if (result == 0)
1753     flags |= Z;
1754   else
1755     flags &= ~ Z;
1756
1757   if (result & (1ULL << 63))
1758     flags |= N;
1759   else
1760     flags &= ~ N;
1761
1762   aarch64_set_CPSR (cpu, flags);
1763 }
1764
1765 /* 32 bit add immediate set flags.  */
1766 static void
1767 adds32 (sim_cpu *cpu, uint32_t aimm)
1768 {
1769   unsigned rn = INSTR (9, 5);
1770   unsigned rd = INSTR (4, 0);
1771   /* TODO : do we need to worry about signs here?  */
1772   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1773
1774   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1775   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1776   set_flags_for_add32 (cpu, value1, aimm);
1777 }
1778
1779 /* 64 bit add immediate set flags.  */
1780 static void
1781 adds64 (sim_cpu *cpu, uint32_t aimm)
1782 {
1783   unsigned rn = INSTR (9, 5);
1784   unsigned rd = INSTR (4, 0);
1785   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1786   uint64_t value2 = aimm;
1787
1788   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1789   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1790   set_flags_for_add64 (cpu, value1, value2);
1791 }
1792
1793 /* 32 bit sub immediate.  */
1794 static void
1795 sub32 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797   unsigned rn = INSTR (9, 5);
1798   unsigned rd = INSTR (4, 0);
1799
1800   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1801   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1802                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1803 }
1804
1805 /* 64 bit sub immediate.  */
1806 static void
1807 sub64 (sim_cpu *cpu, uint32_t aimm)
1808 {
1809   unsigned rn = INSTR (9, 5);
1810   unsigned rd = INSTR (4, 0);
1811
1812   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1813   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1814                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1815 }
1816
1817 /* 32 bit sub immediate set flags.  */
1818 static void
1819 subs32 (sim_cpu *cpu, uint32_t aimm)
1820 {
1821   unsigned rn = INSTR (9, 5);
1822   unsigned rd = INSTR (4, 0);
1823   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1824   uint32_t value2 = aimm;
1825
1826   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1827   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1828   set_flags_for_sub32 (cpu, value1, value2);
1829 }
1830
1831 /* 64 bit sub immediate set flags.  */
1832 static void
1833 subs64 (sim_cpu *cpu, uint32_t aimm)
1834 {
1835   unsigned rn = INSTR (9, 5);
1836   unsigned rd = INSTR (4, 0);
1837   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1838   uint32_t value2 = aimm;
1839
1840   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1841   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1842   set_flags_for_sub64 (cpu, value1, value2);
1843 }
1844
1845 /* Data Processing Register.  */
1846
1847 /* First two helpers to perform the shift operations.  */
1848
1849 static inline uint32_t
1850 shifted32 (uint32_t value, Shift shift, uint32_t count)
1851 {
1852   switch (shift)
1853     {
1854     default:
1855     case LSL:
1856       return (value << count);
1857     case LSR:
1858       return (value >> count);
1859     case ASR:
1860       {
1861         int32_t svalue = value;
1862         return (svalue >> count);
1863       }
1864     case ROR:
1865       {
1866         uint32_t top = value >> count;
1867         uint32_t bottom = value << (32 - count);
1868         return (bottom | top);
1869       }
1870     }
1871 }
1872
1873 static inline uint64_t
1874 shifted64 (uint64_t value, Shift shift, uint32_t count)
1875 {
1876   switch (shift)
1877     {
1878     default:
1879     case LSL:
1880       return (value << count);
1881     case LSR:
1882       return (value >> count);
1883     case ASR:
1884       {
1885         int64_t svalue = value;
1886         return (svalue >> count);
1887       }
1888     case ROR:
1889       {
1890         uint64_t top = value >> count;
1891         uint64_t bottom = value << (64 - count);
1892         return (bottom | top);
1893       }
1894     }
1895 }
1896
1897 /* Arithmetic shifted register.
1898    These allow an optional LSL, ASR or LSR to the second source
1899    register with a count up to the register bit count.
1900
1901    N.B register args may not be SP.  */
1902
1903 /* 32 bit ADD shifted register.  */
1904 static void
1905 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1906 {
1907   unsigned rm = INSTR (20, 16);
1908   unsigned rn = INSTR (9, 5);
1909   unsigned rd = INSTR (4, 0);
1910
1911   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1912   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1913                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1914                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1915                                     shift, count));
1916 }
1917
1918 /* 64 bit ADD shifted register.  */
1919 static void
1920 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1921 {
1922   unsigned rm = INSTR (20, 16);
1923   unsigned rn = INSTR (9, 5);
1924   unsigned rd = INSTR (4, 0);
1925
1926   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1927   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1928                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1929                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1930                                     shift, count));
1931 }
1932
1933 /* 32 bit ADD shifted register setting flags.  */
1934 static void
1935 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1936 {
1937   unsigned rm = INSTR (20, 16);
1938   unsigned rn = INSTR (9, 5);
1939   unsigned rd = INSTR (4, 0);
1940
1941   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1942   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1943                                shift, count);
1944
1945   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1946   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1947   set_flags_for_add32 (cpu, value1, value2);
1948 }
1949
1950 /* 64 bit ADD shifted register setting flags.  */
1951 static void
1952 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1953 {
1954   unsigned rm = INSTR (20, 16);
1955   unsigned rn = INSTR (9, 5);
1956   unsigned rd = INSTR (4, 0);
1957
1958   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1959   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1960                                shift, count);
1961
1962   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1963   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1964   set_flags_for_add64 (cpu, value1, value2);
1965 }
1966
1967 /* 32 bit SUB shifted register.  */
1968 static void
1969 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1970 {
1971   unsigned rm = INSTR (20, 16);
1972   unsigned rn = INSTR (9, 5);
1973   unsigned rd = INSTR (4, 0);
1974
1975   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1976   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1977                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1978                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1979                                     shift, count));
1980 }
1981
1982 /* 64 bit SUB shifted register.  */
1983 static void
1984 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986   unsigned rm = INSTR (20, 16);
1987   unsigned rn = INSTR (9, 5);
1988   unsigned rd = INSTR (4, 0);
1989
1990   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1991   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1992                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1993                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1994                                     shift, count));
1995 }
1996
1997 /* 32 bit SUB shifted register setting flags.  */
1998 static void
1999 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2000 {
2001   unsigned rm = INSTR (20, 16);
2002   unsigned rn = INSTR (9, 5);
2003   unsigned rd = INSTR (4, 0);
2004
2005   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2006   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2007                               shift, count);
2008
2009   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2010   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2011   set_flags_for_sub32 (cpu, value1, value2);
2012 }
2013
2014 /* 64 bit SUB shifted register setting flags.  */
2015 static void
2016 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2017 {
2018   unsigned rm = INSTR (20, 16);
2019   unsigned rn = INSTR (9, 5);
2020   unsigned rd = INSTR (4, 0);
2021
2022   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2023   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2024                                shift, count);
2025
2026   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2027   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2028   set_flags_for_sub64 (cpu, value1, value2);
2029 }
2030
2031 /* First a couple more helpers to fetch the
2032    relevant source register element either
2033    sign or zero extended as required by the
2034    extension value.  */
2035
2036 static uint32_t
2037 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2038 {
2039   switch (extension)
2040     {
2041     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2042     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2043     case UXTW: ATTRIBUTE_FALLTHROUGH;
2044     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2045     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2046     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2047     case SXTW: ATTRIBUTE_FALLTHROUGH;
2048     case SXTX: ATTRIBUTE_FALLTHROUGH;
2049     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2050   }
2051 }
2052
2053 static uint64_t
2054 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2055 {
2056   switch (extension)
2057     {
2058     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2059     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2060     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2061     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2062     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2063     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2064     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065     case SXTX:
2066     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2067     }
2068 }
2069
2070 /* Arithmetic extending register
2071    These allow an optional sign extension of some portion of the
2072    second source register followed by an optional left shift of
2073    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2074
2075    N.B output (dest) and first input arg (source) may normally be Xn
2076    or SP. However, for flag setting operations dest can only be
2077    Xn. Second input registers are always Xn.  */
2078
2079 /* 32 bit ADD extending register.  */
2080 static void
2081 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2082 {
2083   unsigned rm = INSTR (20, 16);
2084   unsigned rn = INSTR (9, 5);
2085   unsigned rd = INSTR (4, 0);
2086
2087   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2088   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2089                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2090                        + (extreg32 (cpu, rm, extension) << shift));
2091 }
2092
2093 /* 64 bit ADD extending register.
2094    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2095 static void
2096 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2097 {
2098   unsigned rm = INSTR (20, 16);
2099   unsigned rn = INSTR (9, 5);
2100   unsigned rd = INSTR (4, 0);
2101
2102   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2103   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2104                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2105                        + (extreg64 (cpu, rm, extension) << shift));
2106 }
2107
2108 /* 32 bit ADD extending register setting flags.  */
2109 static void
2110 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2111 {
2112   unsigned rm = INSTR (20, 16);
2113   unsigned rn = INSTR (9, 5);
2114   unsigned rd = INSTR (4, 0);
2115
2116   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2117   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2118
2119   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2120   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2121   set_flags_for_add32 (cpu, value1, value2);
2122 }
2123
2124 /* 64 bit ADD extending register setting flags  */
2125 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2126 static void
2127 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2128 {
2129   unsigned rm = INSTR (20, 16);
2130   unsigned rn = INSTR (9, 5);
2131   unsigned rd = INSTR (4, 0);
2132
2133   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2134   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2135
2136   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2137   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2138   set_flags_for_add64 (cpu, value1, value2);
2139 }
2140
2141 /* 32 bit SUB extending register.  */
2142 static void
2143 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2144 {
2145   unsigned rm = INSTR (20, 16);
2146   unsigned rn = INSTR (9, 5);
2147   unsigned rd = INSTR (4, 0);
2148
2149   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2150   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2151                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2152                        - (extreg32 (cpu, rm, extension) << shift));
2153 }
2154
2155 /* 64 bit SUB extending register.  */
2156 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2157 static void
2158 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2159 {
2160   unsigned rm = INSTR (20, 16);
2161   unsigned rn = INSTR (9, 5);
2162   unsigned rd = INSTR (4, 0);
2163
2164   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2165   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2166                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2167                        - (extreg64 (cpu, rm, extension) << shift));
2168 }
2169
2170 /* 32 bit SUB extending register setting flags.  */
2171 static void
2172 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2173 {
2174   unsigned rm = INSTR (20, 16);
2175   unsigned rn = INSTR (9, 5);
2176   unsigned rd = INSTR (4, 0);
2177
2178   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2179   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2180
2181   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2182   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2183   set_flags_for_sub32 (cpu, value1, value2);
2184 }
2185
2186 /* 64 bit SUB extending register setting flags  */
2187 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2188 static void
2189 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2190 {
2191   unsigned rm = INSTR (20, 16);
2192   unsigned rn = INSTR (9, 5);
2193   unsigned rd = INSTR (4, 0);
2194
2195   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2196   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2197
2198   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2199   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2200   set_flags_for_sub64 (cpu, value1, value2);
2201 }
2202
2203 static void
2204 dexAddSubtractImmediate (sim_cpu *cpu)
2205 {
2206   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2207      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2208      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2209      instr[28,24] = 10001
2210      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2211      instr[21,10] = uimm12
2212      instr[9,5]   = Rn
2213      instr[4,0]   = Rd  */
2214
2215   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2216   uint32_t shift = INSTR (23, 22);
2217   uint32_t imm = INSTR (21, 10);
2218   uint32_t dispatch = INSTR (31, 29);
2219
2220   NYI_assert (28, 24, 0x11);
2221
2222   if (shift > 1)
2223     HALT_UNALLOC;
2224
2225   if (shift)
2226     imm <<= 12;
2227
2228   switch (dispatch)
2229     {
2230     case 0: add32 (cpu, imm); break;
2231     case 1: adds32 (cpu, imm); break;
2232     case 2: sub32 (cpu, imm); break;
2233     case 3: subs32 (cpu, imm); break;
2234     case 4: add64 (cpu, imm); break;
2235     case 5: adds64 (cpu, imm); break;
2236     case 6: sub64 (cpu, imm); break;
2237     case 7: subs64 (cpu, imm); break;
2238     }
2239 }
2240
2241 static void
2242 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2243 {
2244   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2245      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2246      instr[28,24] = 01011
2247      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2248      instr[21]    = 0
2249      instr[20,16] = Rm
2250      instr[15,10] = count : must be 0xxxxx for 32 bit
2251      instr[9,5]   = Rn
2252      instr[4,0]   = Rd  */
2253
2254   uint32_t size = INSTR (31, 31);
2255   uint32_t count = INSTR (15, 10);
2256   Shift shiftType = INSTR (23, 22);
2257
2258   NYI_assert (28, 24, 0x0B);
2259   NYI_assert (21, 21, 0);
2260
2261   /* Shift encoded as ROR is unallocated.  */
2262   if (shiftType == ROR)
2263     HALT_UNALLOC;
2264
2265   /* 32 bit operations must have count[5] = 0
2266      or else we have an UNALLOC.  */
2267   if (size == 0 && uimm (count, 5, 5))
2268     HALT_UNALLOC;
2269
2270   /* Dispatch on size:op i.e instr [31,29].  */
2271   switch (INSTR (31, 29))
2272     {
2273     case 0: add32_shift  (cpu, shiftType, count); break;
2274     case 1: adds32_shift (cpu, shiftType, count); break;
2275     case 2: sub32_shift  (cpu, shiftType, count); break;
2276     case 3: subs32_shift (cpu, shiftType, count); break;
2277     case 4: add64_shift  (cpu, shiftType, count); break;
2278     case 5: adds64_shift (cpu, shiftType, count); break;
2279     case 6: sub64_shift  (cpu, shiftType, count); break;
2280     case 7: subs64_shift (cpu, shiftType, count); break;
2281     }
2282 }
2283
2284 static void
2285 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2286 {
2287   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2288      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2289      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2290      instr[28,24] = 01011
2291      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2292      instr[21]    = 1
2293      instr[20,16] = Rm
2294      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2295                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2296                              000 ==> SXTB, 001 ==> SXTH,
2297                              000 ==> SXTW, 001 ==> SXTX,
2298      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2299      instr[9,5]   = Rn
2300      instr[4,0]   = Rd  */
2301
2302   Extension extensionType = INSTR (15, 13);
2303   uint32_t shift = INSTR (12, 10);
2304
2305   NYI_assert (28, 24, 0x0B);
2306   NYI_assert (21, 21, 1);
2307
2308   /* Shift may not exceed 4.  */
2309   if (shift > 4)
2310     HALT_UNALLOC;
2311
2312   /* Dispatch on size:op:set?.  */
2313   switch (INSTR (31, 29))
2314     {
2315     case 0: add32_ext  (cpu, extensionType, shift); break;
2316     case 1: adds32_ext (cpu, extensionType, shift); break;
2317     case 2: sub32_ext  (cpu, extensionType, shift); break;
2318     case 3: subs32_ext (cpu, extensionType, shift); break;
2319     case 4: add64_ext  (cpu, extensionType, shift); break;
2320     case 5: adds64_ext (cpu, extensionType, shift); break;
2321     case 6: sub64_ext  (cpu, extensionType, shift); break;
2322     case 7: subs64_ext (cpu, extensionType, shift); break;
2323     }
2324 }
2325
2326 /* Conditional data processing
2327    Condition register is implicit 3rd source.  */
2328
2329 /* 32 bit add with carry.  */
2330 /* N.B register args may not be SP.  */
2331
2332 static void
2333 adc32 (sim_cpu *cpu)
2334 {
2335   unsigned rm = INSTR (20, 16);
2336   unsigned rn = INSTR (9, 5);
2337   unsigned rd = INSTR (4, 0);
2338
2339   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2340   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2341                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2342                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2343                        + IS_SET (C));
2344 }
2345
2346 /* 64 bit add with carry  */
2347 static void
2348 adc64 (sim_cpu *cpu)
2349 {
2350   unsigned rm = INSTR (20, 16);
2351   unsigned rn = INSTR (9, 5);
2352   unsigned rd = INSTR (4, 0);
2353
2354   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2355   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2356                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2357                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2358                        + IS_SET (C));
2359 }
2360
2361 /* 32 bit add with carry setting flags.  */
2362 static void
2363 adcs32 (sim_cpu *cpu)
2364 {
2365   unsigned rm = INSTR (20, 16);
2366   unsigned rn = INSTR (9, 5);
2367   unsigned rd = INSTR (4, 0);
2368
2369   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2370   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2371   uint32_t carry = IS_SET (C);
2372
2373   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2374   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2375   set_flags_for_add32 (cpu, value1, value2 + carry);
2376 }
2377
2378 /* 64 bit add with carry setting flags.  */
2379 static void
2380 adcs64 (sim_cpu *cpu)
2381 {
2382   unsigned rm = INSTR (20, 16);
2383   unsigned rn = INSTR (9, 5);
2384   unsigned rd = INSTR (4, 0);
2385
2386   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2387   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2388   uint64_t carry = IS_SET (C);
2389
2390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2391   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2392   set_flags_for_add64 (cpu, value1, value2 + carry);
2393 }
2394
2395 /* 32 bit sub with carry.  */
2396 static void
2397 sbc32 (sim_cpu *cpu)
2398 {
2399   unsigned rm = INSTR (20, 16);
2400   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2401   unsigned rd = INSTR (4, 0);
2402
2403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2404   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2405                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2406                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2407                        - 1 + IS_SET (C));
2408 }
2409
2410 /* 64 bit sub with carry  */
2411 static void
2412 sbc64 (sim_cpu *cpu)
2413 {
2414   unsigned rm = INSTR (20, 16);
2415   unsigned rn = INSTR (9, 5);
2416   unsigned rd = INSTR (4, 0);
2417
2418   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2419   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2420                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2421                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2422                        - 1 + IS_SET (C));
2423 }
2424
2425 /* 32 bit sub with carry setting flags  */
2426 static void
2427 sbcs32 (sim_cpu *cpu)
2428 {
2429   unsigned rm = INSTR (20, 16);
2430   unsigned rn = INSTR (9, 5);
2431   unsigned rd = INSTR (4, 0);
2432
2433   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2434   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2435   uint32_t carry  = IS_SET (C);
2436   uint32_t result = value1 - value2 + 1 - carry;
2437
2438   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2439   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2440   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2441 }
2442
2443 /* 64 bit sub with carry setting flags  */
2444 static void
2445 sbcs64 (sim_cpu *cpu)
2446 {
2447   unsigned rm = INSTR (20, 16);
2448   unsigned rn = INSTR (9, 5);
2449   unsigned rd = INSTR (4, 0);
2450
2451   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2452   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2453   uint64_t carry  = IS_SET (C);
2454   uint64_t result = value1 - value2 + 1 - carry;
2455
2456   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2457   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2458   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2459 }
2460
2461 static void
2462 dexAddSubtractWithCarry (sim_cpu *cpu)
2463 {
2464   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2465      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2466      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2467      instr[28,21] = 1 1010 000
2468      instr[20,16] = Rm
2469      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2470      instr[9,5]   = Rn
2471      instr[4,0]   = Rd  */
2472
2473   uint32_t op2 = INSTR (15, 10);
2474
2475   NYI_assert (28, 21, 0xD0);
2476
2477   if (op2 != 0)
2478     HALT_UNALLOC;
2479
2480   /* Dispatch on size:op:set?.  */
2481   switch (INSTR (31, 29))
2482     {
2483     case 0: adc32 (cpu); break;
2484     case 1: adcs32 (cpu); break;
2485     case 2: sbc32 (cpu); break;
2486     case 3: sbcs32 (cpu); break;
2487     case 4: adc64 (cpu); break;
2488     case 5: adcs64 (cpu); break;
2489     case 6: sbc64 (cpu); break;
2490     case 7: sbcs64 (cpu); break;
2491     }
2492 }
2493
2494 static uint32_t
2495 testConditionCode (sim_cpu *cpu, CondCode cc)
2496 {
2497   /* This should be reduceable to branchless logic
2498      by some careful testing of bits in CC followed
2499      by the requisite masking and combining of bits
2500      from the flag register.
2501
2502      For now we do it with a switch.  */
2503   int res;
2504
2505   switch (cc)
2506     {
2507     case EQ:  res = IS_SET (Z);    break;
2508     case NE:  res = IS_CLEAR (Z);  break;
2509     case CS:  res = IS_SET (C);    break;
2510     case CC:  res = IS_CLEAR (C);  break;
2511     case MI:  res = IS_SET (N);    break;
2512     case PL:  res = IS_CLEAR (N);  break;
2513     case VS:  res = IS_SET (V);    break;
2514     case VC:  res = IS_CLEAR (V);  break;
2515     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2516     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2517     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2518     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2519     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2520     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2521     case AL:
2522     case NV:
2523     default:
2524       res = 1;
2525       break;
2526     }
2527   return res;
2528 }
2529
2530 static void
2531 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2532 {
2533   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2534      instr[30]    = compare with positive (1) or negative value (0)
2535      instr[29,21] = 1 1101 0010
2536      instr[20,16] = Rm or const
2537      instr[15,12] = cond
2538      instr[11]    = compare reg (0) or const (1)
2539      instr[10]    = 0
2540      instr[9,5]   = Rn
2541      instr[4]     = 0
2542      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2543   signed int negate;
2544   unsigned rm;
2545   unsigned rn;
2546
2547   NYI_assert (29, 21, 0x1d2);
2548   NYI_assert (10, 10, 0);
2549   NYI_assert (4, 4, 0);
2550
2551   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2552   if (! testConditionCode (cpu, INSTR (15, 12)))
2553     {
2554       aarch64_set_CPSR (cpu, INSTR (3, 0));
2555       return;
2556     }
2557
2558   negate = INSTR (30, 30) ? 1 : -1;
2559   rm = INSTR (20, 16);
2560   rn = INSTR ( 9,  5);
2561
2562   if (INSTR (31, 31))
2563     {
2564       if (INSTR (11, 11))
2565         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2566                              negate * (uint64_t) rm);
2567       else
2568         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2569                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2570     }
2571   else
2572     {
2573       if (INSTR (11, 11))
2574         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2575                              negate * rm);
2576       else
2577         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2578                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2579     }
2580 }
2581
2582 static void
2583 do_vec_MOV_whole_vector (sim_cpu *cpu)
2584 {
2585   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2586
2587      instr[31]    = 0
2588      instr[30]    = half(0)/full(1)
2589      instr[29,21] = 001110101
2590      instr[20,16] = Vs
2591      instr[15,10] = 000111
2592      instr[9,5]   = Vs
2593      instr[4,0]   = Vd  */
2594
2595   unsigned vs = INSTR (9, 5);
2596   unsigned vd = INSTR (4, 0);
2597
2598   NYI_assert (29, 21, 0x075);
2599   NYI_assert (15, 10, 0x07);
2600
2601   if (INSTR (20, 16) != vs)
2602     HALT_NYI;
2603
2604   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2605   if (INSTR (30, 30))
2606     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2607
2608   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2609 }
2610
2611 static void
2612 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2613 {
2614   /* instr[31]    = 0
2615      instr[30]    = word(0)/long(1)
2616      instr[29,21] = 00 1110 000
2617      instr[20,16] = element size and index
2618      instr[15,10] = 00 0010 11
2619      instr[9,5]   = V source
2620      instr[4,0]   = R dest  */
2621
2622   unsigned vs = INSTR (9, 5);
2623   unsigned rd = INSTR (4, 0);
2624   unsigned imm5 = INSTR (20, 16);
2625   unsigned full = INSTR (30, 30);
2626   int size, index;
2627
2628   NYI_assert (29, 21, 0x070);
2629   NYI_assert (15, 10, 0x0B);
2630
2631   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2632
2633   if (imm5 & 0x1)
2634     {
2635       size = 0;
2636       index = (imm5 >> 1) & 0xF;
2637     }
2638   else if (imm5 & 0x2)
2639     {
2640       size = 1;
2641       index = (imm5 >> 2) & 0x7;
2642     }
2643   else if (full && (imm5 & 0x4))
2644     {
2645       size = 2;
2646       index = (imm5 >> 3) & 0x3;
2647     }
2648   else
2649     HALT_UNALLOC;
2650
2651   switch (size)
2652     {
2653     case 0:
2654       if (full)
2655         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2656                              aarch64_get_vec_s8 (cpu, vs, index));
2657       else
2658         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2659                              aarch64_get_vec_s8 (cpu, vs, index));
2660       break;
2661
2662     case 1:
2663       if (full)
2664         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2665                              aarch64_get_vec_s16 (cpu, vs, index));
2666       else
2667         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2668                              aarch64_get_vec_s16 (cpu, vs, index));
2669       break;
2670
2671     case 2:
2672       aarch64_set_reg_s64 (cpu, rd, NO_SP,
2673                            aarch64_get_vec_s32 (cpu, vs, index));
2674       break;
2675
2676     default:
2677       HALT_UNALLOC;
2678     }
2679 }
2680
2681 static void
2682 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2683 {
2684   /* instr[31]    = 0
2685      instr[30]    = word(0)/long(1)
2686      instr[29,21] = 00 1110 000
2687      instr[20,16] = element size and index
2688      instr[15,10] = 00 0011 11
2689      instr[9,5]   = V source
2690      instr[4,0]   = R dest  */
2691
2692   unsigned vs = INSTR (9, 5);
2693   unsigned rd = INSTR (4, 0);
2694   unsigned imm5 = INSTR (20, 16);
2695   unsigned full = INSTR (30, 30);
2696   int size, index;
2697
2698   NYI_assert (29, 21, 0x070);
2699   NYI_assert (15, 10, 0x0F);
2700
2701   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2702
2703   if (!full)
2704     {
2705       if (imm5 & 0x1)
2706         {
2707           size = 0;
2708           index = (imm5 >> 1) & 0xF;
2709         }
2710       else if (imm5 & 0x2)
2711         {
2712           size = 1;
2713           index = (imm5 >> 2) & 0x7;
2714         }
2715       else if (imm5 & 0x4)
2716         {
2717           size = 2;
2718           index = (imm5 >> 3) & 0x3;
2719         }
2720       else
2721         HALT_UNALLOC;
2722     }
2723   else if (imm5 & 0x8)
2724     {
2725       size = 3;
2726       index = (imm5 >> 4) & 0x1;
2727     }
2728   else
2729     HALT_UNALLOC;
2730
2731   switch (size)
2732     {
2733     case 0:
2734       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2735                            aarch64_get_vec_u8 (cpu, vs, index));
2736       break;
2737
2738     case 1:
2739       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2740                            aarch64_get_vec_u16 (cpu, vs, index));
2741       break;
2742
2743     case 2:
2744       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2745                            aarch64_get_vec_u32 (cpu, vs, index));
2746       break;
2747
2748     case 3:
2749       aarch64_set_reg_u64 (cpu, rd, NO_SP,
2750                            aarch64_get_vec_u64 (cpu, vs, index));
2751       break;
2752
2753     default:
2754       HALT_UNALLOC;
2755     }
2756 }
2757
2758 static void
2759 do_vec_INS (sim_cpu *cpu)
2760 {
2761   /* instr[31,21] = 01001110000
2762      instr[20,16] = element size and index
2763      instr[15,10] = 000111
2764      instr[9,5]   = W source
2765      instr[4,0]   = V dest  */
2766
2767   int index;
2768   unsigned rs = INSTR (9, 5);
2769   unsigned vd = INSTR (4, 0);
2770
2771   NYI_assert (31, 21, 0x270);
2772   NYI_assert (15, 10, 0x07);
2773
2774   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2775   if (INSTR (16, 16))
2776     {
2777       index = INSTR (20, 17);
2778       aarch64_set_vec_u8 (cpu, vd, index,
2779                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2780     }
2781   else if (INSTR (17, 17))
2782     {
2783       index = INSTR (20, 18);
2784       aarch64_set_vec_u16 (cpu, vd, index,
2785                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2786     }
2787   else if (INSTR (18, 18))
2788     {
2789       index = INSTR (20, 19);
2790       aarch64_set_vec_u32 (cpu, vd, index,
2791                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2792     }
2793   else if (INSTR (19, 19))
2794     {
2795       index = INSTR (20, 20);
2796       aarch64_set_vec_u64 (cpu, vd, index,
2797                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2798     }
2799   else
2800     HALT_NYI;
2801 }
2802
2803 static void
2804 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2805 {
2806   /* instr[31]    = 0
2807      instr[30]    = half(0)/full(1)
2808      instr[29,21] = 00 1110 000
2809      instr[20,16] = element size and index
2810      instr[15,10] = 0000 01
2811      instr[9,5]   = V source
2812      instr[4,0]   = V dest.  */
2813
2814   unsigned full = INSTR (30, 30);
2815   unsigned vs = INSTR (9, 5);
2816   unsigned vd = INSTR (4, 0);
2817   int i, index;
2818
2819   NYI_assert (29, 21, 0x070);
2820   NYI_assert (15, 10, 0x01);
2821
2822   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2823   if (INSTR (16, 16))
2824     {
2825       index = INSTR (20, 17);
2826
2827       for (i = 0; i < (full ? 16 : 8); i++)
2828         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2829     }
2830   else if (INSTR (17, 17))
2831     {
2832       index = INSTR (20, 18);
2833
2834       for (i = 0; i < (full ? 8 : 4); i++)
2835         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2836     }
2837   else if (INSTR (18, 18))
2838     {
2839       index = INSTR (20, 19);
2840
2841       for (i = 0; i < (full ? 4 : 2); i++)
2842         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2843     }
2844   else
2845     {
2846       if (INSTR (19, 19) == 0)
2847         HALT_UNALLOC;
2848
2849       if (! full)
2850         HALT_UNALLOC;
2851
2852       index = INSTR (20, 20);
2853
2854       for (i = 0; i < 2; i++)
2855         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2856     }
2857 }
2858
2859 static void
2860 do_vec_TBL (sim_cpu *cpu)
2861 {
2862   /* instr[31]    = 0
2863      instr[30]    = half(0)/full(1)
2864      instr[29,21] = 00 1110 000
2865      instr[20,16] = Vm
2866      instr[15]    = 0
2867      instr[14,13] = vec length
2868      instr[12,10] = 000
2869      instr[9,5]   = V start
2870      instr[4,0]   = V dest  */
2871
2872   int full    = INSTR (30, 30);
2873   int len     = INSTR (14, 13) + 1;
2874   unsigned vm = INSTR (20, 16);
2875   unsigned vn = INSTR (9, 5);
2876   unsigned vd = INSTR (4, 0);
2877   unsigned i;
2878
2879   NYI_assert (29, 21, 0x070);
2880   NYI_assert (12, 10, 0);
2881
2882   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2883   for (i = 0; i < (full ? 16 : 8); i++)
2884     {
2885       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2886       uint8_t val;
2887
2888       if (selector < 16)
2889         val = aarch64_get_vec_u8 (cpu, vn, selector);
2890       else if (selector < 32)
2891         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2892       else if (selector < 48)
2893         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2894       else if (selector < 64)
2895         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2896       else
2897         val = 0;
2898
2899       aarch64_set_vec_u8 (cpu, vd, i, val);
2900     }
2901 }
2902
2903 static void
2904 do_vec_TRN (sim_cpu *cpu)
2905 {
2906   /* instr[31]    = 0
2907      instr[30]    = half(0)/full(1)
2908      instr[29,24] = 00 1110
2909      instr[23,22] = size
2910      instr[21]    = 0
2911      instr[20,16] = Vm
2912      instr[15]    = 0
2913      instr[14]    = TRN1 (0) / TRN2 (1)
2914      instr[13,10] = 1010
2915      instr[9,5]   = V source
2916      instr[4,0]   = V dest.  */
2917
2918   int full    = INSTR (30, 30);
2919   int second  = INSTR (14, 14);
2920   unsigned vm = INSTR (20, 16);
2921   unsigned vn = INSTR (9, 5);
2922   unsigned vd = INSTR (4, 0);
2923   unsigned i;
2924
2925   NYI_assert (29, 24, 0x0E);
2926   NYI_assert (13, 10, 0xA);
2927
2928   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2929   switch (INSTR (23, 22))
2930     {
2931     case 0:
2932       for (i = 0; i < (full ? 8 : 4); i++)
2933         {
2934           aarch64_set_vec_u8
2935             (cpu, vd, i * 2,
2936              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2937           aarch64_set_vec_u8
2938             (cpu, vd, 1 * 2 + 1,
2939              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2940         }
2941       break;
2942
2943     case 1:
2944       for (i = 0; i < (full ? 4 : 2); i++)
2945         {
2946           aarch64_set_vec_u16
2947             (cpu, vd, i * 2,
2948              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2949           aarch64_set_vec_u16
2950             (cpu, vd, 1 * 2 + 1,
2951              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2952         }
2953       break;
2954
2955     case 2:
2956       aarch64_set_vec_u32
2957         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2958       aarch64_set_vec_u32
2959         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2960       aarch64_set_vec_u32
2961         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2962       aarch64_set_vec_u32
2963         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2964       break;
2965
2966     case 3:
2967       if (! full)
2968         HALT_UNALLOC;
2969
2970       aarch64_set_vec_u64 (cpu, vd, 0,
2971                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2972       aarch64_set_vec_u64 (cpu, vd, 1,
2973                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2974       break;
2975     }
2976 }
2977
2978 static void
2979 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2980 {
2981   /* instr[31]    = 0
2982      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2983                     [must be 1 for 64-bit xfer]
2984      instr[29,20] = 00 1110 0000
2985      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2986                                   0100=> 32-bits. 1000=>64-bits
2987      instr[15,10] = 0000 11
2988      instr[9,5]   = W source
2989      instr[4,0]   = V dest.  */
2990
2991   unsigned i;
2992   unsigned Vd = INSTR (4, 0);
2993   unsigned Rs = INSTR (9, 5);
2994   int both    = INSTR (30, 30);
2995
2996   NYI_assert (29, 20, 0x0E0);
2997   NYI_assert (15, 10, 0x03);
2998
2999   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3000   switch (INSTR (19, 16))
3001     {
3002     case 1:
3003       for (i = 0; i < (both ? 16 : 8); i++)
3004         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3005       break;
3006
3007     case 2:
3008       for (i = 0; i < (both ? 8 : 4); i++)
3009         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3010       break;
3011
3012     case 4:
3013       for (i = 0; i < (both ? 4 : 2); i++)
3014         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3015       break;
3016
3017     case 8:
3018       if (!both)
3019         HALT_NYI;
3020       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3021       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3022       break;
3023
3024     default:
3025       HALT_NYI;
3026     }
3027 }
3028
3029 static void
3030 do_vec_UZP (sim_cpu *cpu)
3031 {
3032   /* instr[31]    = 0
3033      instr[30]    = half(0)/full(1)
3034      instr[29,24] = 00 1110
3035      instr[23,22] = size: byte(00), half(01), word (10), long (11)
3036      instr[21]    = 0
3037      instr[20,16] = Vm
3038      instr[15]    = 0
3039      instr[14]    = lower (0) / upper (1)
3040      instr[13,10] = 0110
3041      instr[9,5]   = Vn
3042      instr[4,0]   = Vd.  */
3043
3044   int full = INSTR (30, 30);
3045   int upper = INSTR (14, 14);
3046
3047   unsigned vm = INSTR (20, 16);
3048   unsigned vn = INSTR (9, 5);
3049   unsigned vd = INSTR (4, 0);
3050
3051   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3052   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3053   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3054   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3055
3056   uint64_t val1;
3057   uint64_t val2;
3058
3059   uint64_t input2 = full ? val_n2 : val_m1;
3060
3061   NYI_assert (29, 24, 0x0E);
3062   NYI_assert (21, 21, 0);
3063   NYI_assert (15, 15, 0);
3064   NYI_assert (13, 10, 6);
3065
3066   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3067   switch (INSTR (23, 22))
3068     {
3069     case 0:
3070       val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3071       val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3072       val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3073       val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3074
3075       val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3076       val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3077       val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3078       val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3079
3080       if (full)
3081         {
3082           val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3083           val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3084           val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3085           val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3086
3087           val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3088           val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3089           val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3090           val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3091         }
3092       break;
3093
3094     case 1:
3095       val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3096       val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3097
3098       val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3099       val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3100
3101       if (full)
3102         {
3103           val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3104           val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3105
3106           val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3107           val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3108         }
3109       break;
3110
3111     case 2:
3112       val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3113       val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3114
3115       if (full)
3116         {
3117           val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3118           val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3119         }
3120       break;
3121
3122     case 3:
3123       if (! full)
3124         HALT_UNALLOC;
3125
3126       val1 = upper ? val_n2 : val_n1;
3127       val2 = upper ? val_m2 : val_m1;
3128       break;
3129     }
3130
3131   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3132   if (full)
3133     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3134 }
3135
3136 static void
3137 do_vec_ZIP (sim_cpu *cpu)
3138 {
3139   /* instr[31]    = 0
3140      instr[30]    = half(0)/full(1)
3141      instr[29,24] = 00 1110
3142      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3143      instr[21]    = 0
3144      instr[20,16] = Vm
3145      instr[15]    = 0
3146      instr[14]    = lower (0) / upper (1)
3147      instr[13,10] = 1110
3148      instr[9,5]   = Vn
3149      instr[4,0]   = Vd.  */
3150
3151   int full = INSTR (30, 30);
3152   int upper = INSTR (14, 14);
3153
3154   unsigned vm = INSTR (20, 16);
3155   unsigned vn = INSTR (9, 5);
3156   unsigned vd = INSTR (4, 0);
3157
3158   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3159   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3160   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3161   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3162
3163   uint64_t val1 = 0;
3164   uint64_t val2 = 0;
3165
3166   uint64_t input1 = upper ? val_n1 : val_m1;
3167   uint64_t input2 = upper ? val_n2 : val_m2;
3168
3169   NYI_assert (29, 24, 0x0E);
3170   NYI_assert (21, 21, 0);
3171   NYI_assert (15, 15, 0);
3172   NYI_assert (13, 10, 0xE);
3173
3174   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3175   switch (INSTR (23, 23))
3176     {
3177     case 0:
3178       val1 =
3179           ((input1 <<  0) & (0xFF    <<  0))
3180         | ((input2 <<  8) & (0xFF    <<  8))
3181         | ((input1 <<  8) & (0xFF    << 16))
3182         | ((input2 << 16) & (0xFF    << 24))
3183         | ((input1 << 16) & (0xFFULL << 32))
3184         | ((input2 << 24) & (0xFFULL << 40))
3185         | ((input1 << 24) & (0xFFULL << 48))
3186         | ((input2 << 32) & (0xFFULL << 56));
3187
3188       val2 =
3189           ((input1 >> 32) & (0xFF    <<  0))
3190         | ((input2 >> 24) & (0xFF    <<  8))
3191         | ((input1 >> 24) & (0xFF    << 16))
3192         | ((input2 >> 16) & (0xFF    << 24))
3193         | ((input1 >> 16) & (0xFFULL << 32))
3194         | ((input2 >>  8) & (0xFFULL << 40))
3195         | ((input1 >>  8) & (0xFFULL << 48))
3196         | ((input2 >>  0) & (0xFFULL << 56));
3197       break;
3198
3199     case 1:
3200       val1 =
3201           ((input1 <<  0) & (0xFFFF    <<  0))
3202         | ((input2 << 16) & (0xFFFF    << 16))
3203         | ((input1 << 16) & (0xFFFFULL << 32))
3204         | ((input2 << 32) & (0xFFFFULL << 48));
3205
3206       val2 =
3207           ((input1 >> 32) & (0xFFFF    <<  0))
3208         | ((input2 >> 16) & (0xFFFF    << 16))
3209         | ((input1 >> 16) & (0xFFFFULL << 32))
3210         | ((input2 >>  0) & (0xFFFFULL << 48));
3211       break;
3212
3213     case 2:
3214       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3215       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3216       break;
3217
3218     case 3:
3219       val1 = input1;
3220       val2 = input2;
3221       break;
3222     }
3223
3224   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3225   if (full)
3226     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3227 }
3228
3229 /* Floating point immediates are encoded in 8 bits.
3230    fpimm[7] = sign bit.
3231    fpimm[6:4] = signed exponent.
3232    fpimm[3:0] = fraction (assuming leading 1).
3233    i.e. F = s * 1.f * 2^(e - b).  */
3234
3235 static float
3236 fp_immediate_for_encoding_32 (uint32_t imm8)
3237 {
3238   float u;
3239   uint32_t s, e, f, i;
3240
3241   s = (imm8 >> 7) & 0x1;
3242   e = (imm8 >> 4) & 0x7;
3243   f = imm8 & 0xf;
3244
3245   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3246   u = (16.0 + f) / 16.0;
3247
3248   /* N.B. exponent is signed.  */
3249   if (e < 4)
3250     {
3251       int epos = e;
3252
3253       for (i = 0; i <= epos; i++)
3254         u *= 2.0;
3255     }
3256   else
3257     {
3258       int eneg = 7 - e;
3259
3260       for (i = 0; i < eneg; i++)
3261         u /= 2.0;
3262     }
3263
3264   if (s)
3265     u = - u;
3266
3267   return u;
3268 }
3269
3270 static double
3271 fp_immediate_for_encoding_64 (uint32_t imm8)
3272 {
3273   double u;
3274   uint32_t s, e, f, i;
3275
3276   s = (imm8 >> 7) & 0x1;
3277   e = (imm8 >> 4) & 0x7;
3278   f = imm8 & 0xf;
3279
3280   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3281   u = (16.0 + f) / 16.0;
3282
3283   /* N.B. exponent is signed.  */
3284   if (e < 4)
3285     {
3286       int epos = e;
3287
3288       for (i = 0; i <= epos; i++)
3289         u *= 2.0;
3290     }
3291   else
3292     {
3293       int eneg = 7 - e;
3294
3295       for (i = 0; i < eneg; i++)
3296         u /= 2.0;
3297     }
3298
3299   if (s)
3300     u = - u;
3301
3302   return u;
3303 }
3304
3305 static void
3306 do_vec_MOV_immediate (sim_cpu *cpu)
3307 {
3308   /* instr[31]    = 0
3309      instr[30]    = full/half selector
3310      instr[29,19] = 00111100000
3311      instr[18,16] = high 3 bits of uimm8
3312      instr[15,12] = size & shift:
3313                                   0000 => 32-bit
3314                                   0010 => 32-bit + LSL#8
3315                                   0100 => 32-bit + LSL#16
3316                                   0110 => 32-bit + LSL#24
3317                                   1010 => 16-bit + LSL#8
3318                                   1000 => 16-bit
3319                                   1101 => 32-bit + MSL#16
3320                                   1100 => 32-bit + MSL#8
3321                                   1110 => 8-bit
3322                                   1111 => double
3323      instr[11,10] = 01
3324      instr[9,5]   = low 5-bits of uimm8
3325      instr[4,0]   = Vd.  */
3326
3327   int full     = INSTR (30, 30);
3328   unsigned vd  = INSTR (4, 0);
3329   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3330   unsigned i;
3331
3332   NYI_assert (29, 19, 0x1E0);
3333   NYI_assert (11, 10, 1);
3334
3335   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3336   switch (INSTR (15, 12))
3337     {
3338     case 0x0: /* 32-bit, no shift.  */
3339     case 0x2: /* 32-bit, shift by 8.  */
3340     case 0x4: /* 32-bit, shift by 16.  */
3341     case 0x6: /* 32-bit, shift by 24.  */
3342       val <<= (8 * INSTR (14, 13));
3343       for (i = 0; i < (full ? 4 : 2); i++)
3344         aarch64_set_vec_u32 (cpu, vd, i, val);
3345       break;
3346
3347     case 0xa: /* 16-bit, shift by 8.  */
3348       val <<= 8;
3349       ATTRIBUTE_FALLTHROUGH;
3350     case 0x8: /* 16-bit, no shift.  */
3351       for (i = 0; i < (full ? 8 : 4); i++)
3352         aarch64_set_vec_u16 (cpu, vd, i, val);
3353       break;
3354
3355     case 0xd: /* 32-bit, mask shift by 16.  */
3356       val <<= 8;
3357       val |= 0xFF;
3358       ATTRIBUTE_FALLTHROUGH;
3359     case 0xc: /* 32-bit, mask shift by 8. */
3360       val <<= 8;
3361       val |= 0xFF;
3362       for (i = 0; i < (full ? 4 : 2); i++)
3363         aarch64_set_vec_u32 (cpu, vd, i, val);
3364       break;
3365
3366     case 0xe: /* 8-bit, no shift.  */
3367       for (i = 0; i < (full ? 16 : 8); i++)
3368         aarch64_set_vec_u8 (cpu, vd, i, val);
3369       break;
3370
3371     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3372       {
3373         float u = fp_immediate_for_encoding_32 (val);
3374         for (i = 0; i < (full ? 4 : 2); i++)
3375           aarch64_set_vec_float (cpu, vd, i, u);
3376         break;
3377       }
3378
3379     default:
3380       HALT_NYI;
3381     }
3382 }
3383
3384 static void
3385 do_vec_MVNI (sim_cpu *cpu)
3386 {
3387   /* instr[31]    = 0
3388      instr[30]    = full/half selector
3389      instr[29,19] = 10111100000
3390      instr[18,16] = high 3 bits of uimm8
3391      instr[15,12] = selector
3392      instr[11,10] = 01
3393      instr[9,5]   = low 5-bits of uimm8
3394      instr[4,0]   = Vd.  */
3395
3396   int full     = INSTR (30, 30);
3397   unsigned vd  = INSTR (4, 0);
3398   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3399   unsigned i;
3400
3401   NYI_assert (29, 19, 0x5E0);
3402   NYI_assert (11, 10, 1);
3403
3404   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3405   switch (INSTR (15, 12))
3406     {
3407     case 0x0: /* 32-bit, no shift.  */
3408     case 0x2: /* 32-bit, shift by 8.  */
3409     case 0x4: /* 32-bit, shift by 16.  */
3410     case 0x6: /* 32-bit, shift by 24.  */
3411       val <<= (8 * INSTR (14, 13));
3412       val = ~ val;
3413       for (i = 0; i < (full ? 4 : 2); i++)
3414         aarch64_set_vec_u32 (cpu, vd, i, val);
3415       return;
3416
3417     case 0xa: /* 16-bit, 8 bit shift. */
3418       val <<= 8;
3419       ATTRIBUTE_FALLTHROUGH;
3420     case 0x8: /* 16-bit, no shift. */
3421       val = ~ val;
3422       for (i = 0; i < (full ? 8 : 4); i++)
3423         aarch64_set_vec_u16 (cpu, vd, i, val);
3424       return;
3425
3426     case 0xd: /* 32-bit, mask shift by 16.  */
3427       val <<= 8;
3428       val |= 0xFF;
3429       ATTRIBUTE_FALLTHROUGH;
3430     case 0xc: /* 32-bit, mask shift by 8. */
3431       val <<= 8;
3432       val |= 0xFF;
3433       val = ~ val;
3434       for (i = 0; i < (full ? 4 : 2); i++)
3435         aarch64_set_vec_u32 (cpu, vd, i, val);
3436       return;
3437
3438     case 0xE: /* MOVI Dn, #mask64 */
3439       {
3440         uint64_t mask = 0;
3441
3442         for (i = 0; i < 8; i++)
3443           if (val & (1 << i))
3444             mask |= (0xFFUL << (i * 8));
3445         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3446         aarch64_set_vec_u64 (cpu, vd, 1, mask);
3447         return;
3448       }
3449
3450     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3451       {
3452         double u = fp_immediate_for_encoding_64 (val);
3453
3454         if (! full)
3455           HALT_UNALLOC;
3456
3457         aarch64_set_vec_double (cpu, vd, 0, u);
3458         aarch64_set_vec_double (cpu, vd, 1, u);
3459         return;
3460       }
3461
3462     default:
3463       HALT_NYI;
3464     }
3465 }
3466
3467 #define ABS(A) ((A) < 0 ? - (A) : (A))
3468
3469 static void
3470 do_vec_ABS (sim_cpu *cpu)
3471 {
3472   /* instr[31]    = 0
3473      instr[30]    = half(0)/full(1)
3474      instr[29,24] = 00 1110
3475      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3476      instr[21,10] = 10 0000 1011 10
3477      instr[9,5]   = Vn
3478      instr[4.0]   = Vd.  */
3479
3480   unsigned vn = INSTR (9, 5);
3481   unsigned vd = INSTR (4, 0);
3482   unsigned full = INSTR (30, 30);
3483   unsigned i;
3484
3485   NYI_assert (29, 24, 0x0E);
3486   NYI_assert (21, 10, 0x82E);
3487
3488   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3489   switch (INSTR (23, 22))
3490     {
3491     case 0:
3492       for (i = 0; i < (full ? 16 : 8); i++)
3493         aarch64_set_vec_s8 (cpu, vd, i,
3494                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3495       break;
3496
3497     case 1:
3498       for (i = 0; i < (full ? 8 : 4); i++)
3499         aarch64_set_vec_s16 (cpu, vd, i,
3500                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3501       break;
3502
3503     case 2:
3504       for (i = 0; i < (full ? 4 : 2); i++)
3505         aarch64_set_vec_s32 (cpu, vd, i,
3506                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3507       break;
3508
3509     case 3:
3510       if (! full)
3511         HALT_NYI;
3512       for (i = 0; i < 2; i++)
3513         aarch64_set_vec_s64 (cpu, vd, i,
3514                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3515       break;
3516     }
3517 }
3518
3519 static void
3520 do_vec_ADDV (sim_cpu *cpu)
3521 {
3522   /* instr[31]    = 0
3523      instr[30]    = full/half selector
3524      instr[29,24] = 00 1110
3525      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3526      instr[21,10] = 11 0001 1011 10
3527      instr[9,5]   = Vm
3528      instr[4.0]   = Rd.  */
3529
3530   unsigned vm = INSTR (9, 5);
3531   unsigned rd = INSTR (4, 0);
3532   unsigned i;
3533   int      full = INSTR (30, 30);
3534
3535   NYI_assert (29, 24, 0x0E);
3536   NYI_assert (21, 10, 0xC6E);
3537
3538   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3539   switch (INSTR (23, 22))
3540     {
3541     case 0:
3542       {
3543         uint8_t val = 0;
3544         for (i = 0; i < (full ? 16 : 8); i++)
3545           val += aarch64_get_vec_u8 (cpu, vm, i);
3546         aarch64_set_vec_u64 (cpu, rd, 0, val);
3547         return;
3548       }
3549
3550     case 1:
3551       {
3552         uint16_t val = 0;
3553         for (i = 0; i < (full ? 8 : 4); i++)
3554           val += aarch64_get_vec_u16 (cpu, vm, i);
3555         aarch64_set_vec_u64 (cpu, rd, 0, val);
3556         return;
3557       }
3558
3559     case 2:
3560       {
3561         uint32_t val = 0;
3562         if (! full)
3563           HALT_UNALLOC;
3564         for (i = 0; i < 4; i++)
3565           val += aarch64_get_vec_u32 (cpu, vm, i);
3566         aarch64_set_vec_u64 (cpu, rd, 0, val);
3567         return;
3568       }
3569
3570     case 3:
3571       HALT_UNALLOC;
3572     }
3573 }
3574
3575 static void
3576 do_vec_ins_2 (sim_cpu *cpu)
3577 {
3578   /* instr[31,21] = 01001110000
3579      instr[20,18] = size & element selector
3580      instr[17,14] = 0000
3581      instr[13]    = direction: to vec(0), from vec (1)
3582      instr[12,10] = 111
3583      instr[9,5]   = Vm
3584      instr[4,0]   = Vd.  */
3585
3586   unsigned elem;
3587   unsigned vm = INSTR (9, 5);
3588   unsigned vd = INSTR (4, 0);
3589
3590   NYI_assert (31, 21, 0x270);
3591   NYI_assert (17, 14, 0);
3592   NYI_assert (12, 10, 7);
3593
3594   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3595   if (INSTR (13, 13) == 1)
3596     {
3597       if (INSTR (18, 18) == 1)
3598         {
3599           /* 32-bit moves.  */
3600           elem = INSTR (20, 19);
3601           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3602                                aarch64_get_vec_u32 (cpu, vm, elem));
3603         }
3604       else
3605         {
3606           /* 64-bit moves.  */
3607           if (INSTR (19, 19) != 1)
3608             HALT_NYI;
3609
3610           elem = INSTR (20, 20);
3611           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3612                                aarch64_get_vec_u64 (cpu, vm, elem));
3613         }
3614     }
3615   else
3616     {
3617       if (INSTR (18, 18) == 1)
3618         {
3619           /* 32-bit moves.  */
3620           elem = INSTR (20, 19);
3621           aarch64_set_vec_u32 (cpu, vd, elem,
3622                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3623         }
3624       else
3625         {
3626           /* 64-bit moves.  */
3627           if (INSTR (19, 19) != 1)
3628             HALT_NYI;
3629
3630           elem = INSTR (20, 20);
3631           aarch64_set_vec_u64 (cpu, vd, elem,
3632                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3633         }
3634     }
3635 }
3636
3637 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE)   \
3638   do                                                              \
3639     {                                                             \
3640       DST_TYPE a[N], b[N];                                        \
3641                                                                   \
3642       for (i = 0; i < (N); i++)                                   \
3643         {                                                         \
3644           a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3645           b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3646         }                                                         \
3647       for (i = 0; i < (N); i++)                                   \
3648         aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]);   \
3649     }                                                             \
3650   while (0)
3651
3652 static void
3653 do_vec_mull (sim_cpu *cpu)
3654 {
3655   /* instr[31]    = 0
3656      instr[30]    = lower(0)/upper(1) selector
3657      instr[29]    = signed(0)/unsigned(1)
3658      instr[28,24] = 0 1110
3659      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3660      instr[21]    = 1
3661      instr[20,16] = Vm
3662      instr[15,10] = 11 0000
3663      instr[9,5]   = Vn
3664      instr[4.0]   = Vd.  */
3665
3666   int    unsign = INSTR (29, 29);
3667   int    bias = INSTR (30, 30);
3668   unsigned vm = INSTR (20, 16);
3669   unsigned vn = INSTR ( 9,  5);
3670   unsigned vd = INSTR ( 4,  0);
3671   unsigned i;
3672
3673   NYI_assert (28, 24, 0x0E);
3674   NYI_assert (15, 10, 0x30);
3675
3676   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3677   /* NB: Read source values before writing results, in case
3678      the source and destination vectors are the same.  */
3679   switch (INSTR (23, 22))
3680     {
3681     case 0:
3682       if (bias)
3683         bias = 8;
3684       if (unsign)
3685         DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3686       else
3687         DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3688       return;
3689
3690     case 1:
3691       if (bias)
3692         bias = 4;
3693       if (unsign)
3694         DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3695       else
3696         DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3697       return;
3698
3699     case 2:
3700       if (bias)
3701         bias = 2;
3702       if (unsign)
3703         DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3704       else
3705         DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3706       return;
3707
3708     case 3:
3709       HALT_NYI;
3710     }
3711 }
3712
3713 static void
3714 do_vec_fadd (sim_cpu *cpu)
3715 {
3716   /* instr[31]    = 0
3717      instr[30]    = half(0)/full(1)
3718      instr[29,24] = 001110
3719      instr[23]    = FADD(0)/FSUB(1)
3720      instr[22]    = float (0)/double(1)
3721      instr[21]    = 1
3722      instr[20,16] = Vm
3723      instr[15,10] = 110101
3724      instr[9,5]   = Vn
3725      instr[4.0]   = Vd.  */
3726
3727   unsigned vm = INSTR (20, 16);
3728   unsigned vn = INSTR (9, 5);
3729   unsigned vd = INSTR (4, 0);
3730   unsigned i;
3731   int      full = INSTR (30, 30);
3732
3733   NYI_assert (29, 24, 0x0E);
3734   NYI_assert (21, 21, 1);
3735   NYI_assert (15, 10, 0x35);
3736
3737   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3738   if (INSTR (23, 23))
3739     {
3740       if (INSTR (22, 22))
3741         {
3742           if (! full)
3743             HALT_NYI;
3744
3745           for (i = 0; i < 2; i++)
3746             aarch64_set_vec_double (cpu, vd, i,
3747                                     aarch64_get_vec_double (cpu, vn, i)
3748                                     - aarch64_get_vec_double (cpu, vm, i));
3749         }
3750       else
3751         {
3752           for (i = 0; i < (full ? 4 : 2); i++)
3753             aarch64_set_vec_float (cpu, vd, i,
3754                                    aarch64_get_vec_float (cpu, vn, i)
3755                                    - aarch64_get_vec_float (cpu, vm, i));
3756         }
3757     }
3758   else
3759     {
3760       if (INSTR (22, 22))
3761         {
3762           if (! full)
3763             HALT_NYI;
3764
3765           for (i = 0; i < 2; i++)
3766             aarch64_set_vec_double (cpu, vd, i,
3767                                     aarch64_get_vec_double (cpu, vm, i)
3768                                     + aarch64_get_vec_double (cpu, vn, i));
3769         }
3770       else
3771         {
3772           for (i = 0; i < (full ? 4 : 2); i++)
3773             aarch64_set_vec_float (cpu, vd, i,
3774                                    aarch64_get_vec_float (cpu, vm, i)
3775                                    + aarch64_get_vec_float (cpu, vn, i));
3776         }
3777     }
3778 }
3779
3780 static void
3781 do_vec_add (sim_cpu *cpu)
3782 {
3783   /* instr[31]    = 0
3784      instr[30]    = full/half selector
3785      instr[29,24] = 001110
3786      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3787      instr[21]    = 1
3788      instr[20,16] = Vn
3789      instr[15,10] = 100001
3790      instr[9,5]   = Vm
3791      instr[4.0]   = Vd.  */
3792
3793   unsigned vm = INSTR (20, 16);
3794   unsigned vn = INSTR (9, 5);
3795   unsigned vd = INSTR (4, 0);
3796   unsigned i;
3797   int      full = INSTR (30, 30);
3798
3799   NYI_assert (29, 24, 0x0E);
3800   NYI_assert (21, 21, 1);
3801   NYI_assert (15, 10, 0x21);
3802
3803   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3804   switch (INSTR (23, 22))
3805     {
3806     case 0:
3807       for (i = 0; i < (full ? 16 : 8); i++)
3808         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3809                             + aarch64_get_vec_u8 (cpu, vm, i));
3810       return;
3811
3812     case 1:
3813       for (i = 0; i < (full ? 8 : 4); i++)
3814         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3815                              + aarch64_get_vec_u16 (cpu, vm, i));
3816       return;
3817
3818     case 2:
3819       for (i = 0; i < (full ? 4 : 2); i++)
3820         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3821                              + aarch64_get_vec_u32 (cpu, vm, i));
3822       return;
3823
3824     case 3:
3825       if (! full)
3826         HALT_UNALLOC;
3827       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3828                            + aarch64_get_vec_u64 (cpu, vm, 0));
3829       aarch64_set_vec_u64 (cpu, vd, 1,
3830                            aarch64_get_vec_u64 (cpu, vn, 1)
3831                            + aarch64_get_vec_u64 (cpu, vm, 1));
3832       return;
3833     }
3834 }
3835
3836 static void
3837 do_vec_mul (sim_cpu *cpu)
3838 {
3839   /* instr[31]    = 0
3840      instr[30]    = full/half selector
3841      instr[29,24] = 00 1110
3842      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3843      instr[21]    = 1
3844      instr[20,16] = Vn
3845      instr[15,10] = 10 0111
3846      instr[9,5]   = Vm
3847      instr[4.0]   = Vd.  */
3848
3849   unsigned vm = INSTR (20, 16);
3850   unsigned vn = INSTR (9, 5);
3851   unsigned vd = INSTR (4, 0);
3852   unsigned i;
3853   int      full = INSTR (30, 30);
3854   int      bias = 0;
3855
3856   NYI_assert (29, 24, 0x0E);
3857   NYI_assert (21, 21, 1);
3858   NYI_assert (15, 10, 0x27);
3859
3860   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3861   switch (INSTR (23, 22))
3862     {
3863     case 0:
3864       DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3865       return;
3866
3867     case 1:
3868       DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3869       return;
3870
3871     case 2:
3872       DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3873       return;
3874
3875     case 3:
3876       HALT_UNALLOC;
3877     }
3878 }
3879
3880 static void
3881 do_vec_MLA (sim_cpu *cpu)
3882 {
3883   /* instr[31]    = 0
3884      instr[30]    = full/half selector
3885      instr[29,24] = 00 1110
3886      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3887      instr[21]    = 1
3888      instr[20,16] = Vn
3889      instr[15,10] = 1001 01
3890      instr[9,5]   = Vm
3891      instr[4.0]   = Vd.  */
3892
3893   unsigned vm = INSTR (20, 16);
3894   unsigned vn = INSTR (9, 5);
3895   unsigned vd = INSTR (4, 0);
3896   unsigned i;
3897   int      full = INSTR (30, 30);
3898
3899   NYI_assert (29, 24, 0x0E);
3900   NYI_assert (21, 21, 1);
3901   NYI_assert (15, 10, 0x25);
3902
3903   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3904   switch (INSTR (23, 22))
3905     {
3906     case 0:
3907       for (i = 0; i < (full ? 16 : 8); i++)
3908         aarch64_set_vec_u8 (cpu, vd, i,
3909                             aarch64_get_vec_u8 (cpu, vd, i)
3910                             + (aarch64_get_vec_u8 (cpu, vn, i)
3911                                * aarch64_get_vec_u8 (cpu, vm, i)));
3912       return;
3913
3914     case 1:
3915       for (i = 0; i < (full ? 8 : 4); i++)
3916         aarch64_set_vec_u16 (cpu, vd, i,
3917                              aarch64_get_vec_u16 (cpu, vd, i)
3918                              + (aarch64_get_vec_u16 (cpu, vn, i)
3919                                 * aarch64_get_vec_u16 (cpu, vm, i)));
3920       return;
3921
3922     case 2:
3923       for (i = 0; i < (full ? 4 : 2); i++)
3924         aarch64_set_vec_u32 (cpu, vd, i,
3925                              aarch64_get_vec_u32 (cpu, vd, i)
3926                              + (aarch64_get_vec_u32 (cpu, vn, i)
3927                                 * aarch64_get_vec_u32 (cpu, vm, i)));
3928       return;
3929
3930     default:
3931       HALT_UNALLOC;
3932     }
3933 }
3934
3935 static float
3936 fmaxnm (float a, float b)
3937 {
3938   if (! isnan (a))
3939     {
3940       if (! isnan (b))
3941         return a > b ? a : b;
3942       return a;
3943     }
3944   else if (! isnan (b))
3945     return b;
3946   return a;
3947 }
3948
3949 static float
3950 fminnm (float a, float b)
3951 {
3952   if (! isnan (a))
3953     {
3954       if (! isnan (b))
3955         return a < b ? a : b;
3956       return a;
3957     }
3958   else if (! isnan (b))
3959     return b;
3960   return a;
3961 }
3962
3963 static double
3964 dmaxnm (double a, double b)
3965 {
3966   if (! isnan (a))
3967     {
3968       if (! isnan (b))
3969         return a > b ? a : b;
3970       return a;
3971     }
3972   else if (! isnan (b))
3973     return b;
3974   return a;
3975 }
3976
3977 static double
3978 dminnm (double a, double b)
3979 {
3980   if (! isnan (a))
3981     {
3982       if (! isnan (b))
3983         return a < b ? a : b;
3984       return a;
3985     }
3986   else if (! isnan (b))
3987     return b;
3988   return a;
3989 }
3990
3991 static void
3992 do_vec_FminmaxNMP (sim_cpu *cpu)
3993 {
3994   /* instr [31]    = 0
3995      instr [30]    = half (0)/full (1)
3996      instr [29,24] = 10 1110
3997      instr [23]    = max(0)/min(1)
3998      instr [22]    = float (0)/double (1)
3999      instr [21]    = 1
4000      instr [20,16] = Vn
4001      instr [15,10] = 1100 01
4002      instr [9,5]   = Vm
4003      instr [4.0]   = Vd.  */
4004
4005   unsigned vm = INSTR (20, 16);
4006   unsigned vn = INSTR (9, 5);
4007   unsigned vd = INSTR (4, 0);
4008   int      full = INSTR (30, 30);
4009
4010   NYI_assert (29, 24, 0x2E);
4011   NYI_assert (21, 21, 1);
4012   NYI_assert (15, 10, 0x31);
4013
4014   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4015   if (INSTR (22, 22))
4016     {
4017       double (* fn)(double, double) = INSTR (23, 23)
4018         ? dminnm : dmaxnm;
4019
4020       if (! full)
4021         HALT_NYI;
4022       aarch64_set_vec_double (cpu, vd, 0,
4023                               fn (aarch64_get_vec_double (cpu, vn, 0),
4024                                   aarch64_get_vec_double (cpu, vn, 1)));
4025       aarch64_set_vec_double (cpu, vd, 0,
4026                               fn (aarch64_get_vec_double (cpu, vm, 0),
4027                                   aarch64_get_vec_double (cpu, vm, 1)));
4028     }
4029   else
4030     {
4031       float (* fn)(float, float) = INSTR (23, 23)
4032         ? fminnm : fmaxnm;
4033
4034       aarch64_set_vec_float (cpu, vd, 0,
4035                              fn (aarch64_get_vec_float (cpu, vn, 0),
4036                                  aarch64_get_vec_float (cpu, vn, 1)));
4037       if (full)
4038         aarch64_set_vec_float (cpu, vd, 1,
4039                                fn (aarch64_get_vec_float (cpu, vn, 2),
4040                                    aarch64_get_vec_float (cpu, vn, 3)));
4041
4042       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4043                              fn (aarch64_get_vec_float (cpu, vm, 0),
4044                                  aarch64_get_vec_float (cpu, vm, 1)));
4045       if (full)
4046         aarch64_set_vec_float (cpu, vd, 3,
4047                                fn (aarch64_get_vec_float (cpu, vm, 2),
4048                                    aarch64_get_vec_float (cpu, vm, 3)));
4049     }
4050 }
4051
4052 static void
4053 do_vec_AND (sim_cpu *cpu)
4054 {
4055   /* instr[31]    = 0
4056      instr[30]    = half (0)/full (1)
4057      instr[29,21] = 001110001
4058      instr[20,16] = Vm
4059      instr[15,10] = 000111
4060      instr[9,5]   = Vn
4061      instr[4.0]   = Vd.  */
4062
4063   unsigned vm = INSTR (20, 16);
4064   unsigned vn = INSTR (9, 5);
4065   unsigned vd = INSTR (4, 0);
4066   unsigned i;
4067   int      full = INSTR (30, 30);
4068
4069   NYI_assert (29, 21, 0x071);
4070   NYI_assert (15, 10, 0x07);
4071
4072   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4073   for (i = 0; i < (full ? 4 : 2); i++)
4074     aarch64_set_vec_u32 (cpu, vd, i,
4075                          aarch64_get_vec_u32 (cpu, vn, i)
4076                          & aarch64_get_vec_u32 (cpu, vm, i));
4077 }
4078
4079 static void
4080 do_vec_BSL (sim_cpu *cpu)
4081 {
4082   /* instr[31]    = 0
4083      instr[30]    = half (0)/full (1)
4084      instr[29,21] = 101110011
4085      instr[20,16] = Vm
4086      instr[15,10] = 000111
4087      instr[9,5]   = Vn
4088      instr[4.0]   = Vd.  */
4089
4090   unsigned vm = INSTR (20, 16);
4091   unsigned vn = INSTR (9, 5);
4092   unsigned vd = INSTR (4, 0);
4093   unsigned i;
4094   int      full = INSTR (30, 30);
4095
4096   NYI_assert (29, 21, 0x173);
4097   NYI_assert (15, 10, 0x07);
4098
4099   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4100   for (i = 0; i < (full ? 16 : 8); i++)
4101     aarch64_set_vec_u8 (cpu, vd, i,
4102                         (    aarch64_get_vec_u8 (cpu, vd, i)
4103                            & aarch64_get_vec_u8 (cpu, vn, i))
4104                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4105                            & aarch64_get_vec_u8 (cpu, vm, i)));
4106 }
4107
4108 static void
4109 do_vec_EOR (sim_cpu *cpu)
4110 {
4111   /* instr[31]    = 0
4112      instr[30]    = half (0)/full (1)
4113      instr[29,21] = 10 1110 001
4114      instr[20,16] = Vm
4115      instr[15,10] = 000111
4116      instr[9,5]   = Vn
4117      instr[4.0]   = Vd.  */
4118
4119   unsigned vm = INSTR (20, 16);
4120   unsigned vn = INSTR (9, 5);
4121   unsigned vd = INSTR (4, 0);
4122   unsigned i;
4123   int      full = INSTR (30, 30);
4124
4125   NYI_assert (29, 21, 0x171);
4126   NYI_assert (15, 10, 0x07);
4127
4128   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4129   for (i = 0; i < (full ? 4 : 2); i++)
4130     aarch64_set_vec_u32 (cpu, vd, i,
4131                          aarch64_get_vec_u32 (cpu, vn, i)
4132                          ^ aarch64_get_vec_u32 (cpu, vm, i));
4133 }
4134
4135 static void
4136 do_vec_bit (sim_cpu *cpu)
4137 {
4138   /* instr[31]    = 0
4139      instr[30]    = half (0)/full (1)
4140      instr[29,23] = 10 1110 1
4141      instr[22]    = BIT (0) / BIF (1)
4142      instr[21]    = 1
4143      instr[20,16] = Vm
4144      instr[15,10] = 0001 11
4145      instr[9,5]   = Vn
4146      instr[4.0]   = Vd.  */
4147
4148   unsigned vm = INSTR (20, 16);
4149   unsigned vn = INSTR (9, 5);
4150   unsigned vd = INSTR (4, 0);
4151   unsigned full = INSTR (30, 30);
4152   unsigned test_false = INSTR (22, 22);
4153   unsigned i;
4154
4155   NYI_assert (29, 23, 0x5D);
4156   NYI_assert (21, 21, 1);
4157   NYI_assert (15, 10, 0x07);
4158
4159   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4160   for (i = 0; i < (full ? 4 : 2); i++)
4161     {
4162       uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4163       uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4164       uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4165       if (test_false)
4166         aarch64_set_vec_u32 (cpu, vd, i,
4167                              (vd_val & vm_val) | (vn_val & ~vm_val));
4168       else
4169         aarch64_set_vec_u32 (cpu, vd, i,
4170                              (vd_val & ~vm_val) | (vn_val & vm_val));
4171     }
4172 }
4173
4174 static void
4175 do_vec_ORN (sim_cpu *cpu)
4176 {
4177   /* instr[31]    = 0
4178      instr[30]    = half (0)/full (1)
4179      instr[29,21] = 00 1110 111
4180      instr[20,16] = Vm
4181      instr[15,10] = 00 0111
4182      instr[9,5]   = Vn
4183      instr[4.0]   = Vd.  */
4184
4185   unsigned vm = INSTR (20, 16);
4186   unsigned vn = INSTR (9, 5);
4187   unsigned vd = INSTR (4, 0);
4188   unsigned i;
4189   int      full = INSTR (30, 30);
4190
4191   NYI_assert (29, 21, 0x077);
4192   NYI_assert (15, 10, 0x07);
4193
4194   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4195   for (i = 0; i < (full ? 16 : 8); i++)
4196     aarch64_set_vec_u8 (cpu, vd, i,
4197                         aarch64_get_vec_u8 (cpu, vn, i)
4198                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
4199 }
4200
4201 static void
4202 do_vec_ORR (sim_cpu *cpu)
4203 {
4204   /* instr[31]    = 0
4205      instr[30]    = half (0)/full (1)
4206      instr[29,21] = 00 1110 101
4207      instr[20,16] = Vm
4208      instr[15,10] = 0001 11
4209      instr[9,5]   = Vn
4210      instr[4.0]   = Vd.  */
4211
4212   unsigned vm = INSTR (20, 16);
4213   unsigned vn = INSTR (9, 5);
4214   unsigned vd = INSTR (4, 0);
4215   unsigned i;
4216   int      full = INSTR (30, 30);
4217
4218   NYI_assert (29, 21, 0x075);
4219   NYI_assert (15, 10, 0x07);
4220
4221   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4222   for (i = 0; i < (full ? 16 : 8); i++)
4223     aarch64_set_vec_u8 (cpu, vd, i,
4224                         aarch64_get_vec_u8 (cpu, vn, i)
4225                         | aarch64_get_vec_u8 (cpu, vm, i));
4226 }
4227
4228 static void
4229 do_vec_BIC (sim_cpu *cpu)
4230 {
4231   /* instr[31]    = 0
4232      instr[30]    = half (0)/full (1)
4233      instr[29,21] = 00 1110 011
4234      instr[20,16] = Vm
4235      instr[15,10] = 00 0111
4236      instr[9,5]   = Vn
4237      instr[4.0]   = Vd.  */
4238
4239   unsigned vm = INSTR (20, 16);
4240   unsigned vn = INSTR (9, 5);
4241   unsigned vd = INSTR (4, 0);
4242   unsigned i;
4243   int      full = INSTR (30, 30);
4244
4245   NYI_assert (29, 21, 0x073);
4246   NYI_assert (15, 10, 0x07);
4247
4248   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4249   for (i = 0; i < (full ? 16 : 8); i++)
4250     aarch64_set_vec_u8 (cpu, vd, i,
4251                         aarch64_get_vec_u8 (cpu, vn, i)
4252                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4253 }
4254
4255 static void
4256 do_vec_XTN (sim_cpu *cpu)
4257 {
4258   /* instr[31]    = 0
4259      instr[30]    = first part (0)/ second part (1)
4260      instr[29,24] = 00 1110
4261      instr[23,22] = size: byte(00), half(01), word (10)
4262      instr[21,10] = 1000 0100 1010
4263      instr[9,5]   = Vs
4264      instr[4,0]   = Vd.  */
4265
4266   unsigned vs = INSTR (9, 5);
4267   unsigned vd = INSTR (4, 0);
4268   unsigned bias = INSTR (30, 30);
4269   unsigned i;
4270
4271   NYI_assert (29, 24, 0x0E);
4272   NYI_assert (21, 10, 0x84A);
4273
4274   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4275   switch (INSTR (23, 22))
4276     {
4277     case 0:
4278       for (i = 0; i < 8; i++)
4279         aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4280                             aarch64_get_vec_u16 (cpu, vs, i));
4281       return;
4282
4283     case 1:
4284       for (i = 0; i < 4; i++)
4285         aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4286                              aarch64_get_vec_u32 (cpu, vs, i));
4287       return;
4288
4289     case 2:
4290       for (i = 0; i < 2; i++)
4291         aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4292                              aarch64_get_vec_u64 (cpu, vs, i));
4293       return;
4294     }
4295 }
4296
4297 /* Return the number of bits set in the input value.  */
4298 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4299 # define popcount __builtin_popcount
4300 #else
4301 static int
4302 popcount (unsigned char x)
4303 {
4304   static const unsigned char popcnt[16] =
4305     {
4306       0, 1, 1, 2,
4307       1, 2, 2, 3,
4308       1, 2, 2, 3,
4309       2, 3, 3, 4
4310     };
4311
4312   /* Only counts the low 8 bits of the input as that is all we need.  */
4313   return popcnt[x % 16] + popcnt[x / 16];
4314 }
4315 #endif
4316
4317 static void
4318 do_vec_CNT (sim_cpu *cpu)
4319 {
4320   /* instr[31]    = 0
4321      instr[30]    = half (0)/ full (1)
4322      instr[29,24] = 00 1110
4323      instr[23,22] = size: byte(00)
4324      instr[21,10] = 1000 0001 0110
4325      instr[9,5]   = Vs
4326      instr[4,0]   = Vd.  */
4327
4328   unsigned vs = INSTR (9, 5);
4329   unsigned vd = INSTR (4, 0);
4330   int full = INSTR (30, 30);
4331   int size = INSTR (23, 22);
4332   int i;
4333
4334   NYI_assert (29, 24, 0x0E);
4335   NYI_assert (21, 10, 0x816);
4336
4337   if (size != 0)
4338     HALT_UNALLOC;
4339
4340   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4341
4342   for (i = 0; i < (full ? 16 : 8); i++)
4343     aarch64_set_vec_u8 (cpu, vd, i,
4344                         popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4345 }
4346
4347 static void
4348 do_vec_maxv (sim_cpu *cpu)
4349 {
4350   /* instr[31]    = 0
4351      instr[30]    = half(0)/full(1)
4352      instr[29]    = signed (0)/unsigned(1)
4353      instr[28,24] = 0 1110
4354      instr[23,22] = size: byte(00), half(01), word (10)
4355      instr[21]    = 1
4356      instr[20,17] = 1 000
4357      instr[16]    = max(0)/min(1)
4358      instr[15,10] = 1010 10
4359      instr[9,5]   = V source
4360      instr[4.0]   = R dest.  */
4361
4362   unsigned vs = INSTR (9, 5);
4363   unsigned rd = INSTR (4, 0);
4364   unsigned full = INSTR (30, 30);
4365   unsigned i;
4366
4367   NYI_assert (28, 24, 0x0E);
4368   NYI_assert (21, 21, 1);
4369   NYI_assert (20, 17, 8);
4370   NYI_assert (15, 10, 0x2A);
4371
4372   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4373   switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4374     {
4375     case 0: /* SMAXV.  */
4376        {
4377         int64_t smax;
4378         switch (INSTR (23, 22))
4379           {
4380           case 0:
4381             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4382             for (i = 1; i < (full ? 16 : 8); i++)
4383               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4384             break;
4385           case 1:
4386             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4387             for (i = 1; i < (full ? 8 : 4); i++)
4388               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4389             break;
4390           case 2:
4391             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4392             for (i = 1; i < (full ? 4 : 2); i++)
4393               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4394             break;
4395           case 3:
4396             HALT_UNALLOC;
4397           }
4398         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4399         return;
4400       }
4401
4402     case 1: /* SMINV.  */
4403       {
4404         int64_t smin;
4405         switch (INSTR (23, 22))
4406           {
4407           case 0:
4408             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4409             for (i = 1; i < (full ? 16 : 8); i++)
4410               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4411             break;
4412           case 1:
4413             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4414             for (i = 1; i < (full ? 8 : 4); i++)
4415               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4416             break;
4417           case 2:
4418             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4419             for (i = 1; i < (full ? 4 : 2); i++)
4420               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4421             break;
4422
4423           case 3:
4424             HALT_UNALLOC;
4425           }
4426         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4427         return;
4428       }
4429
4430     case 2: /* UMAXV.  */
4431       {
4432         uint64_t umax;
4433         switch (INSTR (23, 22))
4434           {
4435           case 0:
4436             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4437             for (i = 1; i < (full ? 16 : 8); i++)
4438               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4439             break;
4440           case 1:
4441             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4442             for (i = 1; i < (full ? 8 : 4); i++)
4443               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4444             break;
4445           case 2:
4446             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4447             for (i = 1; i < (full ? 4 : 2); i++)
4448               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4449             break;
4450
4451           case 3:
4452             HALT_UNALLOC;
4453           }
4454         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4455         return;
4456       }
4457
4458     case 3: /* UMINV.  */
4459       {
4460         uint64_t umin;
4461         switch (INSTR (23, 22))
4462           {
4463           case 0:
4464             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4465             for (i = 1; i < (full ? 16 : 8); i++)
4466               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4467             break;
4468           case 1:
4469             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4470             for (i = 1; i < (full ? 8 : 4); i++)
4471               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4472             break;
4473           case 2:
4474             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4475             for (i = 1; i < (full ? 4 : 2); i++)
4476               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4477             break;
4478
4479           case 3:
4480             HALT_UNALLOC;
4481           }
4482         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4483         return;
4484       }
4485     }
4486 }
4487
4488 static void
4489 do_vec_fminmaxV (sim_cpu *cpu)
4490 {
4491   /* instr[31,24] = 0110 1110
4492      instr[23]    = max(0)/min(1)
4493      instr[22,14] = 011 0000 11
4494      instr[13,12] = nm(00)/normal(11)
4495      instr[11,10] = 10
4496      instr[9,5]   = V source
4497      instr[4.0]   = R dest.  */
4498
4499   unsigned vs = INSTR (9, 5);
4500   unsigned rd = INSTR (4, 0);
4501   unsigned i;
4502   float res   = aarch64_get_vec_float (cpu, vs, 0);
4503
4504   NYI_assert (31, 24, 0x6E);
4505   NYI_assert (22, 14, 0x0C3);
4506   NYI_assert (11, 10, 2);
4507
4508   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4509   if (INSTR (23, 23))
4510     {
4511       switch (INSTR (13, 12))
4512         {
4513         case 0: /* FMNINNMV.  */
4514           for (i = 1; i < 4; i++)
4515             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4516           break;
4517
4518         case 3: /* FMINV.  */
4519           for (i = 1; i < 4; i++)
4520             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4521           break;
4522
4523         default:
4524           HALT_NYI;
4525         }
4526     }
4527   else
4528     {
4529       switch (INSTR (13, 12))
4530         {
4531         case 0: /* FMNAXNMV.  */
4532           for (i = 1; i < 4; i++)
4533             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4534           break;
4535
4536         case 3: /* FMAXV.  */
4537           for (i = 1; i < 4; i++)
4538             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4539           break;
4540
4541         default:
4542           HALT_NYI;
4543         }
4544     }
4545
4546   aarch64_set_FP_float (cpu, rd, res);
4547 }
4548
4549 static void
4550 do_vec_Fminmax (sim_cpu *cpu)
4551 {
4552   /* instr[31]    = 0
4553      instr[30]    = half(0)/full(1)
4554      instr[29,24] = 00 1110
4555      instr[23]    = max(0)/min(1)
4556      instr[22]    = float(0)/double(1)
4557      instr[21]    = 1
4558      instr[20,16] = Vm
4559      instr[15,14] = 11
4560      instr[13,12] = nm(00)/normal(11)
4561      instr[11,10] = 01
4562      instr[9,5]   = Vn
4563      instr[4,0]   = Vd.  */
4564
4565   unsigned vm = INSTR (20, 16);
4566   unsigned vn = INSTR (9, 5);
4567   unsigned vd = INSTR (4, 0);
4568   unsigned full = INSTR (30, 30);
4569   unsigned min = INSTR (23, 23);
4570   unsigned i;
4571
4572   NYI_assert (29, 24, 0x0E);
4573   NYI_assert (21, 21, 1);
4574   NYI_assert (15, 14, 3);
4575   NYI_assert (11, 10, 1);
4576
4577   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4578   if (INSTR (22, 22))
4579     {
4580       double (* func)(double, double);
4581
4582       if (! full)
4583         HALT_NYI;
4584
4585       if (INSTR (13, 12) == 0)
4586         func = min ? dminnm : dmaxnm;
4587       else if (INSTR (13, 12) == 3)
4588         func = min ? fmin : fmax;
4589       else
4590         HALT_NYI;
4591
4592       for (i = 0; i < 2; i++)
4593         aarch64_set_vec_double (cpu, vd, i,
4594                                 func (aarch64_get_vec_double (cpu, vn, i),
4595                                       aarch64_get_vec_double (cpu, vm, i)));
4596     }
4597   else
4598     {
4599       float (* func)(float, float);
4600
4601       if (INSTR (13, 12) == 0)
4602         func = min ? fminnm : fmaxnm;
4603       else if (INSTR (13, 12) == 3)
4604         func = min ? fminf : fmaxf;
4605       else
4606         HALT_NYI;
4607
4608       for (i = 0; i < (full ? 4 : 2); i++)
4609         aarch64_set_vec_float (cpu, vd, i,
4610                                func (aarch64_get_vec_float (cpu, vn, i),
4611                                      aarch64_get_vec_float (cpu, vm, i)));
4612     }
4613 }
4614
4615 static void
4616 do_vec_SCVTF (sim_cpu *cpu)
4617 {
4618   /* instr[31]    = 0
4619      instr[30]    = Q
4620      instr[29,23] = 00 1110 0
4621      instr[22]    = float(0)/double(1)
4622      instr[21,10] = 10 0001 1101 10
4623      instr[9,5]   = Vn
4624      instr[4,0]   = Vd.  */
4625
4626   unsigned vn = INSTR (9, 5);
4627   unsigned vd = INSTR (4, 0);
4628   unsigned full = INSTR (30, 30);
4629   unsigned size = INSTR (22, 22);
4630   unsigned i;
4631
4632   NYI_assert (29, 23, 0x1C);
4633   NYI_assert (21, 10, 0x876);
4634
4635   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4636   if (size)
4637     {
4638       if (! full)
4639         HALT_UNALLOC;
4640
4641       for (i = 0; i < 2; i++)
4642         {
4643           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4644           aarch64_set_vec_double (cpu, vd, i, val);
4645         }
4646     }
4647   else
4648     {
4649       for (i = 0; i < (full ? 4 : 2); i++)
4650         {
4651           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4652           aarch64_set_vec_float (cpu, vd, i, val);
4653         }
4654     }
4655 }
4656
4657 #define VEC_CMP(SOURCE, CMP)                                            \
4658   do                                                                    \
4659     {                                                                   \
4660       switch (size)                                                     \
4661         {                                                               \
4662         case 0:                                                         \
4663           for (i = 0; i < (full ? 16 : 8); i++)                         \
4664             aarch64_set_vec_u8 (cpu, vd, i,                             \
4665                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4666                                 CMP                                     \
4667                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4668                                 ? -1 : 0);                              \
4669           return;                                                       \
4670         case 1:                                                         \
4671           for (i = 0; i < (full ? 8 : 4); i++)                          \
4672             aarch64_set_vec_u16 (cpu, vd, i,                            \
4673                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4674                                  CMP                                    \
4675                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4676                                  ? -1 : 0);                             \
4677           return;                                                       \
4678         case 2:                                                         \
4679           for (i = 0; i < (full ? 4 : 2); i++)                          \
4680             aarch64_set_vec_u32 (cpu, vd, i, \
4681                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4682                                  CMP                                    \
4683                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4684                                  ? -1 : 0);                             \
4685           return;                                                       \
4686         case 3:                                                         \
4687           if (! full)                                                   \
4688             HALT_UNALLOC;                                               \
4689           for (i = 0; i < 2; i++)                                       \
4690             aarch64_set_vec_u64 (cpu, vd, i, \
4691                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4692                                  CMP                                    \
4693                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4694                                  ? -1ULL : 0);                          \
4695           return;                                                       \
4696         default:                                                        \
4697           HALT_UNALLOC;                                                 \
4698         }                                                               \
4699     }                                                                   \
4700   while (0)
4701
4702 #define VEC_CMP0(SOURCE, CMP)                                           \
4703   do                                                                    \
4704     {                                                                   \
4705       switch (size)                                                     \
4706         {                                                               \
4707         case 0:                                                         \
4708           for (i = 0; i < (full ? 16 : 8); i++)                         \
4709             aarch64_set_vec_u8 (cpu, vd, i,                             \
4710                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4711                                 CMP 0 ? -1 : 0);                        \
4712           return;                                                       \
4713         case 1:                                                         \
4714           for (i = 0; i < (full ? 8 : 4); i++)                          \
4715             aarch64_set_vec_u16 (cpu, vd, i,                            \
4716                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4717                                  CMP 0 ? -1 : 0);                       \
4718           return;                                                       \
4719         case 2:                                                         \
4720           for (i = 0; i < (full ? 4 : 2); i++)                          \
4721             aarch64_set_vec_u32 (cpu, vd, i,                            \
4722                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4723                                  CMP 0 ? -1 : 0);                       \
4724           return;                                                       \
4725         case 3:                                                         \
4726           if (! full)                                                   \
4727             HALT_UNALLOC;                                               \
4728           for (i = 0; i < 2; i++)                                       \
4729             aarch64_set_vec_u64 (cpu, vd, i,                            \
4730                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4731                                  CMP 0 ? -1ULL : 0);                    \
4732           return;                                                       \
4733         default:                                                        \
4734           HALT_UNALLOC;                                                 \
4735         }                                                               \
4736     }                                                                   \
4737   while (0)
4738
4739 #define VEC_FCMP0(CMP)                                                  \
4740   do                                                                    \
4741     {                                                                   \
4742       if (vm != 0)                                                      \
4743         HALT_NYI;                                                       \
4744       if (INSTR (22, 22))                                               \
4745         {                                                               \
4746           if (! full)                                                   \
4747             HALT_NYI;                                                   \
4748           for (i = 0; i < 2; i++)                                       \
4749             aarch64_set_vec_u64 (cpu, vd, i,                            \
4750                                  aarch64_get_vec_double (cpu, vn, i)    \
4751                                  CMP 0.0 ? -1 : 0);                     \
4752         }                                                               \
4753       else                                                              \
4754         {                                                               \
4755           for (i = 0; i < (full ? 4 : 2); i++)                          \
4756             aarch64_set_vec_u32 (cpu, vd, i,                            \
4757                                  aarch64_get_vec_float (cpu, vn, i)     \
4758                                  CMP 0.0 ? -1 : 0);                     \
4759         }                                                               \
4760       return;                                                           \
4761     }                                                                   \
4762   while (0)
4763
4764 #define VEC_FCMP(CMP)                                                   \
4765   do                                                                    \
4766     {                                                                   \
4767       if (INSTR (22, 22))                                               \
4768         {                                                               \
4769           if (! full)                                                   \
4770             HALT_NYI;                                                   \
4771           for (i = 0; i < 2; i++)                                       \
4772             aarch64_set_vec_u64 (cpu, vd, i,                            \
4773                                  aarch64_get_vec_double (cpu, vn, i)    \
4774                                  CMP                                    \
4775                                  aarch64_get_vec_double (cpu, vm, i)    \
4776                                  ? -1 : 0);                             \
4777         }                                                               \
4778       else                                                              \
4779         {                                                               \
4780           for (i = 0; i < (full ? 4 : 2); i++)                          \
4781             aarch64_set_vec_u32 (cpu, vd, i,                            \
4782                                  aarch64_get_vec_float (cpu, vn, i)     \
4783                                  CMP                                    \
4784                                  aarch64_get_vec_float (cpu, vm, i)     \
4785                                  ? -1 : 0);                             \
4786         }                                                               \
4787       return;                                                           \
4788     }                                                                   \
4789   while (0)
4790
4791 static void
4792 do_vec_compare (sim_cpu *cpu)
4793 {
4794   /* instr[31]    = 0
4795      instr[30]    = half(0)/full(1)
4796      instr[29]    = part-of-comparison-type
4797      instr[28,24] = 0 1110
4798      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4799                     type of float compares: single (-0) / double (-1)
4800      instr[21]    = 1
4801      instr[20,16] = Vm or 00000 (compare vs 0)
4802      instr[15,10] = part-of-comparison-type
4803      instr[9,5]   = Vn
4804      instr[4.0]   = Vd.  */
4805
4806   int full = INSTR (30, 30);
4807   int size = INSTR (23, 22);
4808   unsigned vm = INSTR (20, 16);
4809   unsigned vn = INSTR (9, 5);
4810   unsigned vd = INSTR (4, 0);
4811   unsigned i;
4812
4813   NYI_assert (28, 24, 0x0E);
4814   NYI_assert (21, 21, 1);
4815
4816   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4817   if ((INSTR (11, 11)
4818        && INSTR (14, 14))
4819       || ((INSTR (11, 11) == 0
4820            && INSTR (10, 10) == 0)))
4821     {
4822       /* A compare vs 0.  */
4823       if (vm != 0)
4824         {
4825           if (INSTR (15, 10) == 0x2A)
4826             do_vec_maxv (cpu);
4827           else if (INSTR (15, 10) == 0x32
4828                    || INSTR (15, 10) == 0x3E)
4829             do_vec_fminmaxV (cpu);
4830           else if (INSTR (29, 23) == 0x1C
4831                    && INSTR (21, 10) == 0x876)
4832             do_vec_SCVTF (cpu);
4833           else
4834             HALT_NYI;
4835           return;
4836         }
4837     }
4838
4839   if (INSTR (14, 14))
4840     {
4841       /* A floating point compare.  */
4842       unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4843         | INSTR (13, 10);
4844
4845       NYI_assert (15, 15, 1);
4846
4847       switch (decode)
4848         {
4849         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4850         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4851         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4852         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4853         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4854         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4855         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4856         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4857
4858         default:
4859           HALT_NYI;
4860         }
4861     }
4862   else
4863     {
4864       unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4865
4866       switch (decode)
4867         {
4868         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4869         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4870         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4871         case 0x23: /* 0100011 TST */    VEC_CMP  (u, & );
4872         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4873         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4874         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4875         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4876         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4877         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4878         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4879         default:
4880           if (vm == 0)
4881             HALT_NYI;
4882           do_vec_maxv (cpu);
4883         }
4884     }
4885 }
4886
4887 static void
4888 do_vec_SSHL (sim_cpu *cpu)
4889 {
4890   /* instr[31]    = 0
4891      instr[30]    = first part (0)/ second part (1)
4892      instr[29,24] = 00 1110
4893      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4894      instr[21]    = 1
4895      instr[20,16] = Vm
4896      instr[15,10] = 0100 01
4897      instr[9,5]   = Vn
4898      instr[4,0]   = Vd.  */
4899
4900   unsigned full = INSTR (30, 30);
4901   unsigned vm = INSTR (20, 16);
4902   unsigned vn = INSTR (9, 5);
4903   unsigned vd = INSTR (4, 0);
4904   unsigned i;
4905   signed int shift;
4906
4907   NYI_assert (29, 24, 0x0E);
4908   NYI_assert (21, 21, 1);
4909   NYI_assert (15, 10, 0x11);
4910
4911   /* FIXME: What is a signed shift left in this context ?.  */
4912
4913   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4914   switch (INSTR (23, 22))
4915     {
4916     case 0:
4917       for (i = 0; i < (full ? 16 : 8); i++)
4918         {
4919           shift = aarch64_get_vec_s8 (cpu, vm, i);
4920           if (shift >= 0)
4921             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4922                                 << shift);
4923           else
4924             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4925                                 >> - shift);
4926         }
4927       return;
4928
4929     case 1:
4930       for (i = 0; i < (full ? 8 : 4); i++)
4931         {
4932           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4933           if (shift >= 0)
4934             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4935                                  << shift);
4936           else
4937             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4938                                  >> - shift);
4939         }
4940       return;
4941
4942     case 2:
4943       for (i = 0; i < (full ? 4 : 2); i++)
4944         {
4945           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4946           if (shift >= 0)
4947             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4948                                  << shift);
4949           else
4950             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4951                                  >> - shift);
4952         }
4953       return;
4954
4955     case 3:
4956       if (! full)
4957         HALT_UNALLOC;
4958       for (i = 0; i < 2; i++)
4959         {
4960           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4961           if (shift >= 0)
4962             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4963                                  << shift);
4964           else
4965             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4966                                  >> - shift);
4967         }
4968       return;
4969     }
4970 }
4971
4972 static void
4973 do_vec_USHL (sim_cpu *cpu)
4974 {
4975   /* instr[31]    = 0
4976      instr[30]    = first part (0)/ second part (1)
4977      instr[29,24] = 10 1110
4978      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4979      instr[21]    = 1
4980      instr[20,16] = Vm
4981      instr[15,10] = 0100 01
4982      instr[9,5]   = Vn
4983      instr[4,0]   = Vd  */
4984
4985   unsigned full = INSTR (30, 30);
4986   unsigned vm = INSTR (20, 16);
4987   unsigned vn = INSTR (9, 5);
4988   unsigned vd = INSTR (4, 0);
4989   unsigned i;
4990   signed int shift;
4991
4992   NYI_assert (29, 24, 0x2E);
4993   NYI_assert (15, 10, 0x11);
4994
4995   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4996   switch (INSTR (23, 22))
4997     {
4998     case 0:
4999         for (i = 0; i < (full ? 16 : 8); i++)
5000           {
5001             shift = aarch64_get_vec_s8 (cpu, vm, i);
5002             if (shift >= 0)
5003               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5004                                   << shift);
5005             else
5006               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5007                                   >> - shift);
5008           }
5009       return;
5010
5011     case 1:
5012       for (i = 0; i < (full ? 8 : 4); i++)
5013         {
5014           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5015           if (shift >= 0)
5016             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5017                                  << shift);
5018           else
5019             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5020                                  >> - shift);
5021         }
5022       return;
5023
5024     case 2:
5025       for (i = 0; i < (full ? 4 : 2); i++)
5026         {
5027           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5028           if (shift >= 0)
5029             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5030                                  << shift);
5031           else
5032             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5033                                  >> - shift);
5034         }
5035       return;
5036
5037     case 3:
5038       if (! full)
5039         HALT_UNALLOC;
5040       for (i = 0; i < 2; i++)
5041         {
5042           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5043           if (shift >= 0)
5044             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5045                                  << shift);
5046           else
5047             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5048                                  >> - shift);
5049         }
5050       return;
5051     }
5052 }
5053
5054 static void
5055 do_vec_FMLA (sim_cpu *cpu)
5056 {
5057   /* instr[31]    = 0
5058      instr[30]    = full/half selector
5059      instr[29,23] = 0011100
5060      instr[22]    = size: 0=>float, 1=>double
5061      instr[21]    = 1
5062      instr[20,16] = Vn
5063      instr[15,10] = 1100 11
5064      instr[9,5]   = Vm
5065      instr[4.0]   = Vd.  */
5066
5067   unsigned vm = INSTR (20, 16);
5068   unsigned vn = INSTR (9, 5);
5069   unsigned vd = INSTR (4, 0);
5070   unsigned i;
5071   int      full = INSTR (30, 30);
5072
5073   NYI_assert (29, 23, 0x1C);
5074   NYI_assert (21, 21, 1);
5075   NYI_assert (15, 10, 0x33);
5076
5077   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5078   if (INSTR (22, 22))
5079     {
5080       if (! full)
5081         HALT_UNALLOC;
5082       for (i = 0; i < 2; i++)
5083         aarch64_set_vec_double (cpu, vd, i,
5084                                 aarch64_get_vec_double (cpu, vn, i) *
5085                                 aarch64_get_vec_double (cpu, vm, i) +
5086                                 aarch64_get_vec_double (cpu, vd, i));
5087     }
5088   else
5089     {
5090       for (i = 0; i < (full ? 4 : 2); i++)
5091         aarch64_set_vec_float (cpu, vd, i,
5092                                aarch64_get_vec_float (cpu, vn, i) *
5093                                aarch64_get_vec_float (cpu, vm, i) +
5094                                aarch64_get_vec_float (cpu, vd, i));
5095     }
5096 }
5097
5098 static void
5099 do_vec_max (sim_cpu *cpu)
5100 {
5101   /* instr[31]    = 0
5102      instr[30]    = full/half selector
5103      instr[29]    = SMAX (0) / UMAX (1)
5104      instr[28,24] = 0 1110
5105      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5106      instr[21]    = 1
5107      instr[20,16] = Vn
5108      instr[15,10] = 0110 01
5109      instr[9,5]   = Vm
5110      instr[4.0]   = Vd.  */
5111
5112   unsigned vm = INSTR (20, 16);
5113   unsigned vn = INSTR (9, 5);
5114   unsigned vd = INSTR (4, 0);
5115   unsigned i;
5116   int      full = INSTR (30, 30);
5117
5118   NYI_assert (28, 24, 0x0E);
5119   NYI_assert (21, 21, 1);
5120   NYI_assert (15, 10, 0x19);
5121
5122   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5123   if (INSTR (29, 29))
5124     {
5125       switch (INSTR (23, 22))
5126         {
5127         case 0:
5128           for (i = 0; i < (full ? 16 : 8); i++)
5129             aarch64_set_vec_u8 (cpu, vd, i,
5130                                 aarch64_get_vec_u8 (cpu, vn, i)
5131                                 > aarch64_get_vec_u8 (cpu, vm, i)
5132                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5133                                 : aarch64_get_vec_u8 (cpu, vm, i));
5134           return;
5135
5136         case 1:
5137           for (i = 0; i < (full ? 8 : 4); i++)
5138             aarch64_set_vec_u16 (cpu, vd, i,
5139                                  aarch64_get_vec_u16 (cpu, vn, i)
5140                                  > aarch64_get_vec_u16 (cpu, vm, i)
5141                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5142                                  : aarch64_get_vec_u16 (cpu, vm, i));
5143           return;
5144
5145         case 2:
5146           for (i = 0; i < (full ? 4 : 2); i++)
5147             aarch64_set_vec_u32 (cpu, vd, i,
5148                                  aarch64_get_vec_u32 (cpu, vn, i)
5149                                  > aarch64_get_vec_u32 (cpu, vm, i)
5150                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5151                                  : aarch64_get_vec_u32 (cpu, vm, i));
5152           return;
5153
5154         case 3:
5155           HALT_UNALLOC;
5156         }
5157     }
5158   else
5159     {
5160       switch (INSTR (23, 22))
5161         {
5162         case 0:
5163           for (i = 0; i < (full ? 16 : 8); i++)
5164             aarch64_set_vec_s8 (cpu, vd, i,
5165                                 aarch64_get_vec_s8 (cpu, vn, i)
5166                                 > aarch64_get_vec_s8 (cpu, vm, i)
5167                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5168                                 : aarch64_get_vec_s8 (cpu, vm, i));
5169           return;
5170
5171         case 1:
5172           for (i = 0; i < (full ? 8 : 4); i++)
5173             aarch64_set_vec_s16 (cpu, vd, i,
5174                                  aarch64_get_vec_s16 (cpu, vn, i)
5175                                  > aarch64_get_vec_s16 (cpu, vm, i)
5176                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5177                                  : aarch64_get_vec_s16 (cpu, vm, i));
5178           return;
5179
5180         case 2:
5181           for (i = 0; i < (full ? 4 : 2); i++)
5182             aarch64_set_vec_s32 (cpu, vd, i,
5183                                  aarch64_get_vec_s32 (cpu, vn, i)
5184                                  > aarch64_get_vec_s32 (cpu, vm, i)
5185                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5186                                  : aarch64_get_vec_s32 (cpu, vm, i));
5187           return;
5188
5189         case 3:
5190           HALT_UNALLOC;
5191         }
5192     }
5193 }
5194
5195 static void
5196 do_vec_min (sim_cpu *cpu)
5197 {
5198   /* instr[31]    = 0
5199      instr[30]    = full/half selector
5200      instr[29]    = SMIN (0) / UMIN (1)
5201      instr[28,24] = 0 1110
5202      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5203      instr[21]    = 1
5204      instr[20,16] = Vn
5205      instr[15,10] = 0110 11
5206      instr[9,5]   = Vm
5207      instr[4.0]   = Vd.  */
5208
5209   unsigned vm = INSTR (20, 16);
5210   unsigned vn = INSTR (9, 5);
5211   unsigned vd = INSTR (4, 0);
5212   unsigned i;
5213   int      full = INSTR (30, 30);
5214
5215   NYI_assert (28, 24, 0x0E);
5216   NYI_assert (21, 21, 1);
5217   NYI_assert (15, 10, 0x1B);
5218
5219   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5220   if (INSTR (29, 29))
5221     {
5222       switch (INSTR (23, 22))
5223         {
5224         case 0:
5225           for (i = 0; i < (full ? 16 : 8); i++)
5226             aarch64_set_vec_u8 (cpu, vd, i,
5227                                 aarch64_get_vec_u8 (cpu, vn, i)
5228                                 < aarch64_get_vec_u8 (cpu, vm, i)
5229                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5230                                 : aarch64_get_vec_u8 (cpu, vm, i));
5231           return;
5232
5233         case 1:
5234           for (i = 0; i < (full ? 8 : 4); i++)
5235             aarch64_set_vec_u16 (cpu, vd, i,
5236                                  aarch64_get_vec_u16 (cpu, vn, i)
5237                                  < aarch64_get_vec_u16 (cpu, vm, i)
5238                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5239                                  : aarch64_get_vec_u16 (cpu, vm, i));
5240           return;
5241
5242         case 2:
5243           for (i = 0; i < (full ? 4 : 2); i++)
5244             aarch64_set_vec_u32 (cpu, vd, i,
5245                                  aarch64_get_vec_u32 (cpu, vn, i)
5246                                  < aarch64_get_vec_u32 (cpu, vm, i)
5247                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5248                                  : aarch64_get_vec_u32 (cpu, vm, i));
5249           return;
5250
5251         case 3:
5252           HALT_UNALLOC;
5253         }
5254     }
5255   else
5256     {
5257       switch (INSTR (23, 22))
5258         {
5259         case 0:
5260           for (i = 0; i < (full ? 16 : 8); i++)
5261             aarch64_set_vec_s8 (cpu, vd, i,
5262                                 aarch64_get_vec_s8 (cpu, vn, i)
5263                                 < aarch64_get_vec_s8 (cpu, vm, i)
5264                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5265                                 : aarch64_get_vec_s8 (cpu, vm, i));
5266           return;
5267
5268         case 1:
5269           for (i = 0; i < (full ? 8 : 4); i++)
5270             aarch64_set_vec_s16 (cpu, vd, i,
5271                                  aarch64_get_vec_s16 (cpu, vn, i)
5272                                  < aarch64_get_vec_s16 (cpu, vm, i)
5273                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5274                                  : aarch64_get_vec_s16 (cpu, vm, i));
5275           return;
5276
5277         case 2:
5278           for (i = 0; i < (full ? 4 : 2); i++)
5279             aarch64_set_vec_s32 (cpu, vd, i,
5280                                  aarch64_get_vec_s32 (cpu, vn, i)
5281                                  < aarch64_get_vec_s32 (cpu, vm, i)
5282                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5283                                  : aarch64_get_vec_s32 (cpu, vm, i));
5284           return;
5285
5286         case 3:
5287           HALT_UNALLOC;
5288         }
5289     }
5290 }
5291
5292 static void
5293 do_vec_sub_long (sim_cpu *cpu)
5294 {
5295   /* instr[31]    = 0
5296      instr[30]    = lower (0) / upper (1)
5297      instr[29]    = signed (0) / unsigned (1)
5298      instr[28,24] = 0 1110
5299      instr[23,22] = size: bytes (00), half (01), word (10)
5300      instr[21]    = 1
5301      insrt[20,16] = Vm
5302      instr[15,10] = 0010 00
5303      instr[9,5]   = Vn
5304      instr[4,0]   = V dest.  */
5305
5306   unsigned size = INSTR (23, 22);
5307   unsigned vm = INSTR (20, 16);
5308   unsigned vn = INSTR (9, 5);
5309   unsigned vd = INSTR (4, 0);
5310   unsigned bias = 0;
5311   unsigned i;
5312
5313   NYI_assert (28, 24, 0x0E);
5314   NYI_assert (21, 21, 1);
5315   NYI_assert (15, 10, 0x08);
5316
5317   if (size == 3)
5318     HALT_UNALLOC;
5319
5320   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5321   switch (INSTR (30, 29))
5322     {
5323     case 2: /* SSUBL2.  */
5324       bias = 2;
5325       ATTRIBUTE_FALLTHROUGH;
5326     case 0: /* SSUBL.  */
5327       switch (size)
5328         {
5329         case 0:
5330           bias *= 3;
5331           for (i = 0; i < 8; i++)
5332             aarch64_set_vec_s16 (cpu, vd, i,
5333                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5334                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5335           break;
5336
5337         case 1:
5338           bias *= 2;
5339           for (i = 0; i < 4; i++)
5340             aarch64_set_vec_s32 (cpu, vd, i,
5341                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5342                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5343           break;
5344
5345         case 2:
5346           for (i = 0; i < 2; i++)
5347             aarch64_set_vec_s64 (cpu, vd, i,
5348                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5349                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5350           break;
5351
5352         default:
5353           HALT_UNALLOC;
5354         }
5355       break;
5356
5357     case 3: /* USUBL2.  */
5358       bias = 2;
5359       ATTRIBUTE_FALLTHROUGH;
5360     case 1: /* USUBL.  */
5361       switch (size)
5362         {
5363         case 0:
5364           bias *= 3;
5365           for (i = 0; i < 8; i++)
5366             aarch64_set_vec_u16 (cpu, vd, i,
5367                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5368                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5369           break;
5370
5371         case 1:
5372           bias *= 2;
5373           for (i = 0; i < 4; i++)
5374             aarch64_set_vec_u32 (cpu, vd, i,
5375                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5376                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5377           break;
5378
5379         case 2:
5380           for (i = 0; i < 2; i++)
5381             aarch64_set_vec_u64 (cpu, vd, i,
5382                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5383                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5384           break;
5385
5386         default:
5387           HALT_UNALLOC;
5388         }
5389       break;
5390     }
5391 }
5392
5393 static void
5394 do_vec_ADDP (sim_cpu *cpu)
5395 {
5396   /* instr[31]    = 0
5397      instr[30]    = half(0)/full(1)
5398      instr[29,24] = 00 1110
5399      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5400      instr[21]    = 1
5401      insrt[20,16] = Vm
5402      instr[15,10] = 1011 11
5403      instr[9,5]   = Vn
5404      instr[4,0]   = V dest.  */
5405
5406   struct aarch64_sim_cpu *aarch64_cpu = AARCH64_SIM_CPU (cpu);
5407   FRegister copy_vn;
5408   FRegister copy_vm;
5409   unsigned full = INSTR (30, 30);
5410   unsigned size = INSTR (23, 22);
5411   unsigned vm = INSTR (20, 16);
5412   unsigned vn = INSTR (9, 5);
5413   unsigned vd = INSTR (4, 0);
5414   unsigned i, range;
5415
5416   NYI_assert (29, 24, 0x0E);
5417   NYI_assert (21, 21, 1);
5418   NYI_assert (15, 10, 0x2F);
5419
5420   /* Make copies of the source registers in case vd == vn/vm.  */
5421   copy_vn = aarch64_cpu->fr[vn];
5422   copy_vm = aarch64_cpu->fr[vm];
5423
5424   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5425   switch (size)
5426     {
5427     case 0:
5428       range = full ? 8 : 4;
5429       for (i = 0; i < range; i++)
5430         {
5431           aarch64_set_vec_u8 (cpu, vd, i,
5432                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5433           aarch64_set_vec_u8 (cpu, vd, i + range,
5434                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5435         }
5436       return;
5437
5438     case 1:
5439       range = full ? 4 : 2;
5440       for (i = 0; i < range; i++)
5441         {
5442           aarch64_set_vec_u16 (cpu, vd, i,
5443                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5444           aarch64_set_vec_u16 (cpu, vd, i + range,
5445                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5446         }
5447       return;
5448
5449     case 2:
5450       range = full ? 2 : 1;
5451       for (i = 0; i < range; i++)
5452         {
5453           aarch64_set_vec_u32 (cpu, vd, i,
5454                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5455           aarch64_set_vec_u32 (cpu, vd, i + range,
5456                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5457         }
5458       return;
5459
5460     case 3:
5461       if (! full)
5462         HALT_UNALLOC;
5463       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5464       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5465       return;
5466     }
5467 }
5468
5469 /* Float point vector convert to longer (precision).  */
5470 static void
5471 do_vec_FCVTL (sim_cpu *cpu)
5472 {
5473   /* instr[31]    = 0
5474      instr[30]    = half (0) / all (1)
5475      instr[29,23] = 00 1110 0
5476      instr[22]    = single (0) / double (1)
5477      instr[21,10] = 10 0001 0111 10
5478      instr[9,5]   = Rn
5479      instr[4,0]   = Rd.  */
5480
5481   unsigned rn = INSTR (9, 5);
5482   unsigned rd = INSTR (4, 0);
5483   unsigned full = INSTR (30, 30);
5484   unsigned i;
5485
5486   NYI_assert (31, 31, 0);
5487   NYI_assert (29, 23, 0x1C);
5488   NYI_assert (21, 10, 0x85E);
5489
5490   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5491   if (INSTR (22, 22))
5492     {
5493       for (i = 0; i < 2; i++)
5494         aarch64_set_vec_double (cpu, rd, i,
5495                                 aarch64_get_vec_float (cpu, rn, i + 2*full));
5496     }
5497   else
5498     {
5499       HALT_NYI;
5500
5501 #if 0
5502       /* TODO: Implement missing half-float support.  */
5503       for (i = 0; i < 4; i++)
5504         aarch64_set_vec_float (cpu, rd, i,
5505                              aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5506 #endif
5507     }
5508 }
5509
5510 static void
5511 do_vec_FABS (sim_cpu *cpu)
5512 {
5513   /* instr[31]    = 0
5514      instr[30]    = half(0)/full(1)
5515      instr[29,23] = 00 1110 1
5516      instr[22]    = float(0)/double(1)
5517      instr[21,16] = 10 0000
5518      instr[15,10] = 1111 10
5519      instr[9,5]   = Vn
5520      instr[4,0]   = Vd.  */
5521
5522   unsigned vn = INSTR (9, 5);
5523   unsigned vd = INSTR (4, 0);
5524   unsigned full = INSTR (30, 30);
5525   unsigned i;
5526
5527   NYI_assert (29, 23, 0x1D);
5528   NYI_assert (21, 10, 0x83E);
5529
5530   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5531   if (INSTR (22, 22))
5532     {
5533       if (! full)
5534         HALT_NYI;
5535
5536       for (i = 0; i < 2; i++)
5537         aarch64_set_vec_double (cpu, vd, i,
5538                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5539     }
5540   else
5541     {
5542       for (i = 0; i < (full ? 4 : 2); i++)
5543         aarch64_set_vec_float (cpu, vd, i,
5544                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5545     }
5546 }
5547
5548 static void
5549 do_vec_FCVTZS (sim_cpu *cpu)
5550 {
5551   /* instr[31]    = 0
5552      instr[30]    = half (0) / all (1)
5553      instr[29,23] = 00 1110 1
5554      instr[22]    = single (0) / double (1)
5555      instr[21,10] = 10 0001 1011 10
5556      instr[9,5]   = Rn
5557      instr[4,0]   = Rd.  */
5558
5559   unsigned rn = INSTR (9, 5);
5560   unsigned rd = INSTR (4, 0);
5561   unsigned full = INSTR (30, 30);
5562   unsigned i;
5563
5564   NYI_assert (31, 31, 0);
5565   NYI_assert (29, 23, 0x1D);
5566   NYI_assert (21, 10, 0x86E);
5567
5568   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5569   if (INSTR (22, 22))
5570     {
5571       if (! full)
5572         HALT_UNALLOC;
5573
5574       for (i = 0; i < 2; i++)
5575         aarch64_set_vec_s64 (cpu, rd, i,
5576                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5577     }
5578   else
5579     for (i = 0; i < (full ? 4 : 2); i++)
5580       aarch64_set_vec_s32 (cpu, rd, i,
5581                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5582 }
5583
5584 static void
5585 do_vec_REV64 (sim_cpu *cpu)
5586 {
5587   /* instr[31]    = 0
5588      instr[30]    = full/half
5589      instr[29,24] = 00 1110
5590      instr[23,22] = size
5591      instr[21,10] = 10 0000 0000 10
5592      instr[9,5]   = Rn
5593      instr[4,0]   = Rd.  */
5594
5595   unsigned rn = INSTR (9, 5);
5596   unsigned rd = INSTR (4, 0);
5597   unsigned size = INSTR (23, 22);
5598   unsigned full = INSTR (30, 30);
5599   unsigned i;
5600   FRegister val;
5601
5602   NYI_assert (29, 24, 0x0E);
5603   NYI_assert (21, 10, 0x802);
5604
5605   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5606   switch (size)
5607     {
5608     case 0:
5609       for (i = 0; i < (full ? 16 : 8); i++)
5610         val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5611       break;
5612
5613     case 1:
5614       for (i = 0; i < (full ? 8 : 4); i++)
5615         val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5616       break;
5617
5618     case 2:
5619       for (i = 0; i < (full ? 4 : 2); i++)
5620         val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5621       break;
5622
5623     case 3:
5624       HALT_UNALLOC;
5625     }
5626
5627   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5628   if (full)
5629     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5630 }
5631
5632 static void
5633 do_vec_REV16 (sim_cpu *cpu)
5634 {
5635   /* instr[31]    = 0
5636      instr[30]    = full/half
5637      instr[29,24] = 00 1110
5638      instr[23,22] = size
5639      instr[21,10] = 10 0000 0001 10
5640      instr[9,5]   = Rn
5641      instr[4,0]   = Rd.  */
5642
5643   unsigned rn = INSTR (9, 5);
5644   unsigned rd = INSTR (4, 0);
5645   unsigned size = INSTR (23, 22);
5646   unsigned full = INSTR (30, 30);
5647   unsigned i;
5648   FRegister val;
5649
5650   NYI_assert (29, 24, 0x0E);
5651   NYI_assert (21, 10, 0x806);
5652
5653   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5654   switch (size)
5655     {
5656     case 0:
5657       for (i = 0; i < (full ? 16 : 8); i++)
5658         val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5659       break;
5660
5661     default:
5662       HALT_UNALLOC;
5663     }
5664
5665   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5666   if (full)
5667     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5668 }
5669
5670 static void
5671 do_vec_op1 (sim_cpu *cpu)
5672 {
5673   /* instr[31]    = 0
5674      instr[30]    = half/full
5675      instr[29,24] = 00 1110
5676      instr[23,21] = ???
5677      instr[20,16] = Vm
5678      instr[15,10] = sub-opcode
5679      instr[9,5]   = Vn
5680      instr[4,0]   = Vd  */
5681   NYI_assert (29, 24, 0x0E);
5682
5683   if (INSTR (21, 21) == 0)
5684     {
5685       if (INSTR (23, 22) == 0)
5686         {
5687           if (INSTR (30, 30) == 1
5688               && INSTR (17, 14) == 0
5689               && INSTR (12, 10) == 7)
5690             return do_vec_ins_2 (cpu);
5691
5692           switch (INSTR (15, 10))
5693             {
5694             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5695             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5696             case 0x07: do_vec_INS (cpu); return;
5697             case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5698             case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5699
5700             case 0x00:
5701             case 0x08:
5702             case 0x10:
5703             case 0x18:
5704               do_vec_TBL (cpu); return;
5705
5706             case 0x06:
5707             case 0x16:
5708               do_vec_UZP (cpu); return;
5709
5710             case 0x0A: do_vec_TRN (cpu); return;
5711
5712             case 0x0E:
5713             case 0x1E:
5714               do_vec_ZIP (cpu); return;
5715
5716             default:
5717               HALT_NYI;
5718             }
5719         }
5720
5721       switch (INSTR (13, 10))
5722         {
5723         case 0x6: do_vec_UZP (cpu); return;
5724         case 0xE: do_vec_ZIP (cpu); return;
5725         case 0xA: do_vec_TRN (cpu); return;
5726         default:  HALT_NYI;
5727         }
5728     }
5729
5730   switch (INSTR (15, 10))
5731     {
5732     case 0x02: do_vec_REV64 (cpu); return;
5733     case 0x06: do_vec_REV16 (cpu); return;
5734
5735     case 0x07:
5736       switch (INSTR (23, 21))
5737         {
5738         case 1: do_vec_AND (cpu); return;
5739         case 3: do_vec_BIC (cpu); return;
5740         case 5: do_vec_ORR (cpu); return;
5741         case 7: do_vec_ORN (cpu); return;
5742         default: HALT_NYI;
5743         }
5744
5745     case 0x08: do_vec_sub_long (cpu); return;
5746     case 0x0a: do_vec_XTN (cpu); return;
5747     case 0x11: do_vec_SSHL (cpu); return;
5748     case 0x16: do_vec_CNT (cpu); return;
5749     case 0x19: do_vec_max (cpu); return;
5750     case 0x1B: do_vec_min (cpu); return;
5751     case 0x21: do_vec_add (cpu); return;
5752     case 0x25: do_vec_MLA (cpu); return;
5753     case 0x27: do_vec_mul (cpu); return;
5754     case 0x2F: do_vec_ADDP (cpu); return;
5755     case 0x30: do_vec_mull (cpu); return;
5756     case 0x33: do_vec_FMLA (cpu); return;
5757     case 0x35: do_vec_fadd (cpu); return;
5758
5759     case 0x1E:
5760       switch (INSTR (20, 16))
5761         {
5762         case 0x01: do_vec_FCVTL (cpu); return;
5763         default: HALT_NYI;
5764         }
5765
5766     case 0x2E:
5767       switch (INSTR (20, 16))
5768         {
5769         case 0x00: do_vec_ABS (cpu); return;
5770         case 0x01: do_vec_FCVTZS (cpu); return;
5771         case 0x11: do_vec_ADDV (cpu); return;
5772         default: HALT_NYI;
5773         }
5774
5775     case 0x31:
5776     case 0x3B:
5777       do_vec_Fminmax (cpu); return;
5778
5779     case 0x0D:
5780     case 0x0F:
5781     case 0x22:
5782     case 0x23:
5783     case 0x26:
5784     case 0x2A:
5785     case 0x32:
5786     case 0x36:
5787     case 0x39:
5788     case 0x3A:
5789       do_vec_compare (cpu); return;
5790
5791     case 0x3E:
5792       do_vec_FABS (cpu); return;
5793
5794     default:
5795       HALT_NYI;
5796     }
5797 }
5798
5799 static void
5800 do_vec_xtl (sim_cpu *cpu)
5801 {
5802   /* instr[31]    = 0
5803      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5804      instr[28,22] = 0 1111 00
5805      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5806      instr[15,10] = 1010 01
5807      instr[9,5]   = V source
5808      instr[4,0]   = V dest.  */
5809
5810   unsigned vs = INSTR (9, 5);
5811   unsigned vd = INSTR (4, 0);
5812   unsigned i, shift, bias = 0;
5813
5814   NYI_assert (28, 22, 0x3C);
5815   NYI_assert (15, 10, 0x29);
5816
5817   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5818   switch (INSTR (30, 29))
5819     {
5820     case 2: /* SXTL2, SSHLL2.  */
5821       bias = 2;
5822       ATTRIBUTE_FALLTHROUGH;
5823     case 0: /* SXTL, SSHLL.  */
5824       if (INSTR (21, 21))
5825         {
5826           int64_t val1, val2;
5827
5828           shift = INSTR (20, 16);
5829           /* Get the source values before setting the destination values
5830              in case the source and destination are the same.  */
5831           val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5832           val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5833           aarch64_set_vec_s64 (cpu, vd, 0, val1);
5834           aarch64_set_vec_s64 (cpu, vd, 1, val2);
5835         }
5836       else if (INSTR (20, 20))
5837         {
5838           int32_t v[4];
5839
5840           shift = INSTR (19, 16);
5841           bias *= 2;
5842           for (i = 0; i < 4; i++)
5843             v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5844           for (i = 0; i < 4; i++)
5845             aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5846         }
5847       else
5848         {
5849           int16_t v[8];
5850           NYI_assert (19, 19, 1);
5851
5852           shift = INSTR (18, 16);
5853           bias *= 4;
5854           for (i = 0; i < 8; i++)
5855             v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5856           for (i = 0; i < 8; i++)
5857             aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5858         }
5859       return;
5860
5861     case 3: /* UXTL2, USHLL2.  */
5862       bias = 2;
5863       ATTRIBUTE_FALLTHROUGH;
5864     case 1: /* UXTL, USHLL.  */
5865       if (INSTR (21, 21))
5866         {
5867           uint64_t v1, v2;
5868           shift = INSTR (20, 16);
5869           v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5870           v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5871           aarch64_set_vec_u64 (cpu, vd, 0, v1);
5872           aarch64_set_vec_u64 (cpu, vd, 1, v2);
5873         }
5874       else if (INSTR (20, 20))
5875         {
5876           uint32_t v[4];
5877           shift = INSTR (19, 16);
5878           bias *= 2;
5879           for (i = 0; i < 4; i++)
5880             v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5881           for (i = 0; i < 4; i++)
5882             aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5883         }
5884       else
5885         {
5886           uint16_t v[8];
5887           NYI_assert (19, 19, 1);
5888
5889           shift = INSTR (18, 16);
5890           bias *= 4;
5891           for (i = 0; i < 8; i++)
5892             v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5893           for (i = 0; i < 8; i++)
5894             aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5895         }
5896       return;
5897     }
5898 }
5899
5900 static void
5901 do_vec_SHL (sim_cpu *cpu)
5902 {
5903   /* instr [31]    = 0
5904      instr [30]    = half(0)/full(1)
5905      instr [29,23] = 001 1110
5906      instr [22,16] = size and shift amount
5907      instr [15,10] = 01 0101
5908      instr [9, 5]  = Vs
5909      instr [4, 0]  = Vd.  */
5910
5911   int shift;
5912   int full    = INSTR (30, 30);
5913   unsigned vs = INSTR (9, 5);
5914   unsigned vd = INSTR (4, 0);
5915   unsigned i;
5916
5917   NYI_assert (29, 23, 0x1E);
5918   NYI_assert (15, 10, 0x15);
5919
5920   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5921   if (INSTR (22, 22))
5922     {
5923       shift = INSTR (21, 16);
5924
5925       if (full == 0)
5926         HALT_UNALLOC;
5927
5928       for (i = 0; i < 2; i++)
5929         {
5930           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5931           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5932         }
5933
5934       return;
5935     }
5936
5937   if (INSTR (21, 21))
5938     {
5939       shift = INSTR (20, 16);
5940
5941       for (i = 0; i < (full ? 4 : 2); i++)
5942         {
5943           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5944           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5945         }
5946
5947       return;
5948     }
5949
5950   if (INSTR (20, 20))
5951     {
5952       shift = INSTR (19, 16);
5953
5954       for (i = 0; i < (full ? 8 : 4); i++)
5955         {
5956           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5957           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5958         }
5959
5960       return;
5961     }
5962
5963   if (INSTR (19, 19) == 0)
5964     HALT_UNALLOC;
5965
5966   shift = INSTR (18, 16);
5967
5968   for (i = 0; i < (full ? 16 : 8); i++)
5969     {
5970       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5971       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5972     }
5973 }
5974
5975 static void
5976 do_vec_SSHR_USHR (sim_cpu *cpu)
5977 {
5978   /* instr [31]    = 0
5979      instr [30]    = half(0)/full(1)
5980      instr [29]    = signed(0)/unsigned(1)
5981      instr [28,23] = 0 1111 0
5982      instr [22,16] = size and shift amount
5983      instr [15,10] = 0000 01
5984      instr [9, 5]  = Vs
5985      instr [4, 0]  = Vd.  */
5986
5987   int full       = INSTR (30, 30);
5988   int sign       = ! INSTR (29, 29);
5989   unsigned shift = INSTR (22, 16);
5990   unsigned vs    = INSTR (9, 5);
5991   unsigned vd    = INSTR (4, 0);
5992   unsigned i;
5993
5994   NYI_assert (28, 23, 0x1E);
5995   NYI_assert (15, 10, 0x01);
5996
5997   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5998   if (INSTR (22, 22))
5999     {
6000       shift = 128 - shift;
6001
6002       if (full == 0)
6003         HALT_UNALLOC;
6004
6005       if (sign)
6006         for (i = 0; i < 2; i++)
6007           {
6008             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6009             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6010           }
6011       else
6012         for (i = 0; i < 2; i++)
6013           {
6014             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6015             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6016           }
6017
6018       return;
6019     }
6020
6021   if (INSTR (21, 21))
6022     {
6023       shift = 64 - shift;
6024
6025       if (sign)
6026         for (i = 0; i < (full ? 4 : 2); i++)
6027           {
6028             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6029             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6030           }
6031       else
6032         for (i = 0; i < (full ? 4 : 2); i++)
6033           {
6034             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6035             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6036           }
6037
6038       return;
6039     }
6040
6041   if (INSTR (20, 20))
6042     {
6043       shift = 32 - shift;
6044
6045       if (sign)
6046         for (i = 0; i < (full ? 8 : 4); i++)
6047           {
6048             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6049             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6050           }
6051       else
6052         for (i = 0; i < (full ? 8 : 4); i++)
6053           {
6054             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6055             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6056           }
6057
6058       return;
6059     }
6060
6061   if (INSTR (19, 19) == 0)
6062     HALT_UNALLOC;
6063
6064   shift = 16 - shift;
6065
6066   if (sign)
6067     for (i = 0; i < (full ? 16 : 8); i++)
6068       {
6069         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6070         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6071       }
6072   else
6073     for (i = 0; i < (full ? 16 : 8); i++)
6074       {
6075         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6076         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6077       }
6078 }
6079
6080 static void
6081 do_vec_MUL_by_element (sim_cpu *cpu)
6082 {
6083   /* instr[31]    = 0
6084      instr[30]    = half/full
6085      instr[29,24] = 00 1111
6086      instr[23,22] = size
6087      instr[21]    = L
6088      instr[20]    = M
6089      instr[19,16] = m
6090      instr[15,12] = 1000
6091      instr[11]    = H
6092      instr[10]    = 0
6093      instr[9,5]   = Vn
6094      instr[4,0]   = Vd  */
6095
6096   unsigned full     = INSTR (30, 30);
6097   unsigned L        = INSTR (21, 21);
6098   unsigned H        = INSTR (11, 11);
6099   unsigned vn       = INSTR (9, 5);
6100   unsigned vd       = INSTR (4, 0);
6101   unsigned size     = INSTR (23, 22);
6102   unsigned index;
6103   unsigned vm;
6104   unsigned e;
6105
6106   NYI_assert (29, 24, 0x0F);
6107   NYI_assert (15, 12, 0x8);
6108   NYI_assert (10, 10, 0);
6109
6110   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6111   switch (size)
6112     {
6113     case 1:
6114       {
6115         /* 16 bit products.  */
6116         uint16_t product;
6117         uint16_t element1;
6118         uint16_t element2;
6119
6120         index = (H << 2) | (L << 1) | INSTR (20, 20);
6121         vm = INSTR (19, 16);
6122         element2 = aarch64_get_vec_u16 (cpu, vm, index);
6123
6124         for (e = 0; e < (full ? 8 : 4); e ++)
6125           {
6126             element1 = aarch64_get_vec_u16 (cpu, vn, e);
6127             product  = element1 * element2;
6128             aarch64_set_vec_u16 (cpu, vd, e, product);
6129           }
6130       }
6131       break;
6132
6133     case 2:
6134       {
6135         /* 32 bit products.  */
6136         uint32_t product;
6137         uint32_t element1;
6138         uint32_t element2;
6139
6140         index = (H << 1) | L;
6141         vm = INSTR (20, 16);
6142         element2 = aarch64_get_vec_u32 (cpu, vm, index);
6143
6144         for (e = 0; e < (full ? 4 : 2); e ++)
6145           {
6146             element1 = aarch64_get_vec_u32 (cpu, vn, e);
6147             product  = element1 * element2;
6148             aarch64_set_vec_u32 (cpu, vd, e, product);
6149           }
6150       }
6151       break;
6152
6153     default:
6154       HALT_UNALLOC;
6155     }
6156 }
6157
6158 static void
6159 do_FMLA_by_element (sim_cpu *cpu)
6160 {
6161   /* instr[31]    = 0
6162      instr[30]    = half/full
6163      instr[29,23] = 00 1111 1
6164      instr[22]    = size
6165      instr[21]    = L
6166      instr[20,16] = m
6167      instr[15,12] = 0001
6168      instr[11]    = H
6169      instr[10]    = 0
6170      instr[9,5]   = Vn
6171      instr[4,0]   = Vd  */
6172
6173   unsigned full     = INSTR (30, 30);
6174   unsigned size     = INSTR (22, 22);
6175   unsigned L        = INSTR (21, 21);
6176   unsigned vm       = INSTR (20, 16);
6177   unsigned H        = INSTR (11, 11);
6178   unsigned vn       = INSTR (9, 5);
6179   unsigned vd       = INSTR (4, 0);
6180   unsigned e;
6181
6182   NYI_assert (29, 23, 0x1F);
6183   NYI_assert (15, 12, 0x1);
6184   NYI_assert (10, 10, 0);
6185
6186   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6187   if (size)
6188     {
6189       double element1, element2;
6190
6191       if (! full || L)
6192         HALT_UNALLOC;
6193
6194       element2 = aarch64_get_vec_double (cpu, vm, H);
6195
6196       for (e = 0; e < 2; e++)
6197         {
6198           element1 = aarch64_get_vec_double (cpu, vn, e);
6199           element1 *= element2;
6200           element1 += aarch64_get_vec_double (cpu, vd, e);
6201           aarch64_set_vec_double (cpu, vd, e, element1);
6202         }
6203     }
6204   else
6205     {
6206       float element1;
6207       float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6208
6209       for (e = 0; e < (full ? 4 : 2); e++)
6210         {
6211           element1 = aarch64_get_vec_float (cpu, vn, e);
6212           element1 *= element2;
6213           element1 += aarch64_get_vec_float (cpu, vd, e);
6214           aarch64_set_vec_float (cpu, vd, e, element1);
6215         }
6216     }
6217 }
6218
6219 static void
6220 do_vec_op2 (sim_cpu *cpu)
6221 {
6222   /* instr[31]    = 0
6223      instr[30]    = half/full
6224      instr[29,24] = 00 1111
6225      instr[23]    = ?
6226      instr[22,16] = element size & index
6227      instr[15,10] = sub-opcode
6228      instr[9,5]   = Vm
6229      instr[4,0]   = Vd  */
6230
6231   NYI_assert (29, 24, 0x0F);
6232
6233   if (INSTR (23, 23) != 0)
6234     {
6235       switch (INSTR (15, 10))
6236         {
6237         case 0x04:
6238         case 0x06:
6239           do_FMLA_by_element (cpu);
6240           return;
6241
6242         case 0x20:
6243         case 0x22:
6244           do_vec_MUL_by_element (cpu);
6245           return;
6246
6247         default:
6248           HALT_NYI;
6249         }
6250     }
6251   else
6252     {
6253       switch (INSTR (15, 10))
6254         {
6255         case 0x01: do_vec_SSHR_USHR (cpu); return;
6256         case 0x15: do_vec_SHL (cpu); return;
6257         case 0x20:
6258         case 0x22: do_vec_MUL_by_element (cpu); return;
6259         case 0x29: do_vec_xtl (cpu); return;
6260         default:   HALT_NYI;
6261         }
6262     }
6263 }
6264
6265 static void
6266 do_vec_neg (sim_cpu *cpu)
6267 {
6268   /* instr[31]    = 0
6269      instr[30]    = full(1)/half(0)
6270      instr[29,24] = 10 1110
6271      instr[23,22] = size: byte(00), half (01), word (10), long (11)
6272      instr[21,10] = 1000 0010 1110
6273      instr[9,5]   = Vs
6274      instr[4,0]   = Vd  */
6275
6276   int    full = INSTR (30, 30);
6277   unsigned vs = INSTR (9, 5);
6278   unsigned vd = INSTR (4, 0);
6279   unsigned i;
6280
6281   NYI_assert (29, 24, 0x2E);
6282   NYI_assert (21, 10, 0x82E);
6283
6284   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6285   switch (INSTR (23, 22))
6286     {
6287     case 0:
6288       for (i = 0; i < (full ? 16 : 8); i++)
6289         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6290       return;
6291
6292     case 1:
6293       for (i = 0; i < (full ? 8 : 4); i++)
6294         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6295       return;
6296
6297     case 2:
6298       for (i = 0; i < (full ? 4 : 2); i++)
6299         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6300       return;
6301
6302     case 3:
6303       if (! full)
6304         HALT_NYI;
6305       for (i = 0; i < 2; i++)
6306         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6307       return;
6308     }
6309 }
6310
6311 static void
6312 do_vec_sqrt (sim_cpu *cpu)
6313 {
6314   /* instr[31]    = 0
6315      instr[30]    = full(1)/half(0)
6316      instr[29,23] = 101 1101
6317      instr[22]    = single(0)/double(1)
6318      instr[21,10] = 1000 0111 1110
6319      instr[9,5]   = Vs
6320      instr[4,0]   = Vd.  */
6321
6322   int    full = INSTR (30, 30);
6323   unsigned vs = INSTR (9, 5);
6324   unsigned vd = INSTR (4, 0);
6325   unsigned i;
6326
6327   NYI_assert (29, 23, 0x5B);
6328   NYI_assert (21, 10, 0x87E);
6329
6330   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6331   if (INSTR (22, 22) == 0)
6332     for (i = 0; i < (full ? 4 : 2); i++)
6333       aarch64_set_vec_float (cpu, vd, i,
6334                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6335   else
6336     for (i = 0; i < 2; i++)
6337       aarch64_set_vec_double (cpu, vd, i,
6338                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
6339 }
6340
6341 static void
6342 do_vec_mls_indexed (sim_cpu *cpu)
6343 {
6344   /* instr[31]       = 0
6345      instr[30]       = half(0)/full(1)
6346      instr[29,24]    = 10 1111
6347      instr[23,22]    = 16-bit(01)/32-bit(10)
6348      instr[21,20+11] = index (if 16-bit)
6349      instr[21+11]    = index (if 32-bit)
6350      instr[20,16]    = Vm
6351      instr[15,12]    = 0100
6352      instr[11]       = part of index
6353      instr[10]       = 0
6354      instr[9,5]      = Vs
6355      instr[4,0]      = Vd.  */
6356
6357   int    full = INSTR (30, 30);
6358   unsigned vs = INSTR (9, 5);
6359   unsigned vd = INSTR (4, 0);
6360   unsigned vm = INSTR (20, 16);
6361   unsigned i;
6362
6363   NYI_assert (15, 12, 4);
6364   NYI_assert (10, 10, 0);
6365
6366   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6367   switch (INSTR (23, 22))
6368     {
6369     case 1:
6370       {
6371         unsigned elem;
6372         uint32_t val;
6373
6374         if (vm > 15)
6375           HALT_NYI;
6376
6377         elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6378         val = aarch64_get_vec_u16 (cpu, vm, elem);
6379
6380         for (i = 0; i < (full ? 8 : 4); i++)
6381           aarch64_set_vec_u32 (cpu, vd, i,
6382                                aarch64_get_vec_u32 (cpu, vd, i) -
6383                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
6384         return;
6385       }
6386
6387     case 2:
6388       {
6389         unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6390         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6391
6392         for (i = 0; i < (full ? 4 : 2); i++)
6393           aarch64_set_vec_u64 (cpu, vd, i,
6394                                aarch64_get_vec_u64 (cpu, vd, i) -
6395                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
6396         return;
6397       }
6398
6399     case 0:
6400     case 3:
6401     default:
6402       HALT_NYI;
6403     }
6404 }
6405
6406 static void
6407 do_vec_SUB (sim_cpu *cpu)
6408 {
6409   /* instr [31]    = 0
6410      instr [30]    = half(0)/full(1)
6411      instr [29,24] = 10 1110
6412      instr [23,22] = size: byte(00, half(01), word (10), long (11)
6413      instr [21]    = 1
6414      instr [20,16] = Vm
6415      instr [15,10] = 10 0001
6416      instr [9, 5]  = Vn
6417      instr [4, 0]  = Vd.  */
6418
6419   unsigned full = INSTR (30, 30);
6420   unsigned vm = INSTR (20, 16);
6421   unsigned vn = INSTR (9, 5);
6422   unsigned vd = INSTR (4, 0);
6423   unsigned i;
6424
6425   NYI_assert (29, 24, 0x2E);
6426   NYI_assert (21, 21, 1);
6427   NYI_assert (15, 10, 0x21);
6428
6429   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6430   switch (INSTR (23, 22))
6431     {
6432     case 0:
6433       for (i = 0; i < (full ? 16 : 8); i++)
6434         aarch64_set_vec_s8 (cpu, vd, i,
6435                             aarch64_get_vec_s8 (cpu, vn, i)
6436                             - aarch64_get_vec_s8 (cpu, vm, i));
6437       return;
6438
6439     case 1:
6440       for (i = 0; i < (full ? 8 : 4); i++)
6441         aarch64_set_vec_s16 (cpu, vd, i,
6442                              aarch64_get_vec_s16 (cpu, vn, i)
6443                              - aarch64_get_vec_s16 (cpu, vm, i));
6444       return;
6445
6446     case 2:
6447       for (i = 0; i < (full ? 4 : 2); i++)
6448         aarch64_set_vec_s32 (cpu, vd, i,
6449                              aarch64_get_vec_s32 (cpu, vn, i)
6450                              - aarch64_get_vec_s32 (cpu, vm, i));
6451       return;
6452
6453     case 3:
6454       if (full == 0)
6455         HALT_UNALLOC;
6456
6457       for (i = 0; i < 2; i++)
6458         aarch64_set_vec_s64 (cpu, vd, i,
6459                              aarch64_get_vec_s64 (cpu, vn, i)
6460                              - aarch64_get_vec_s64 (cpu, vm, i));
6461       return;
6462     }
6463 }
6464
6465 static void
6466 do_vec_MLS (sim_cpu *cpu)
6467 {
6468   /* instr [31]    = 0
6469      instr [30]    = half(0)/full(1)
6470      instr [29,24] = 10 1110
6471      instr [23,22] = size: byte(00, half(01), word (10)
6472      instr [21]    = 1
6473      instr [20,16] = Vm
6474      instr [15,10] = 10 0101
6475      instr [9, 5]  = Vn
6476      instr [4, 0]  = Vd.  */
6477
6478   unsigned full = INSTR (30, 30);
6479   unsigned vm = INSTR (20, 16);
6480   unsigned vn = INSTR (9, 5);
6481   unsigned vd = INSTR (4, 0);
6482   unsigned i;
6483
6484   NYI_assert (29, 24, 0x2E);
6485   NYI_assert (21, 21, 1);
6486   NYI_assert (15, 10, 0x25);
6487
6488   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6489   switch (INSTR (23, 22))
6490     {
6491     case 0:
6492       for (i = 0; i < (full ? 16 : 8); i++)
6493         aarch64_set_vec_u8 (cpu, vd, i,
6494                             aarch64_get_vec_u8 (cpu, vd, i)
6495                             - (aarch64_get_vec_u8 (cpu, vn, i)
6496                                * aarch64_get_vec_u8 (cpu, vm, i)));
6497       return;
6498
6499     case 1:
6500       for (i = 0; i < (full ? 8 : 4); i++)
6501         aarch64_set_vec_u16 (cpu, vd, i,
6502                              aarch64_get_vec_u16 (cpu, vd, i)
6503                              - (aarch64_get_vec_u16 (cpu, vn, i)
6504                                 * aarch64_get_vec_u16 (cpu, vm, i)));
6505       return;
6506
6507     case 2:
6508       for (i = 0; i < (full ? 4 : 2); i++)
6509         aarch64_set_vec_u32 (cpu, vd, i,
6510                              aarch64_get_vec_u32 (cpu, vd, i)
6511                              - (aarch64_get_vec_u32 (cpu, vn, i)
6512                                 * aarch64_get_vec_u32 (cpu, vm, i)));
6513       return;
6514
6515     default:
6516       HALT_UNALLOC;
6517     }
6518 }
6519
6520 static void
6521 do_vec_FDIV (sim_cpu *cpu)
6522 {
6523   /* instr [31]    = 0
6524      instr [30]    = half(0)/full(1)
6525      instr [29,23] = 10 1110 0
6526      instr [22]    = float()/double(1)
6527      instr [21]    = 1
6528      instr [20,16] = Vm
6529      instr [15,10] = 1111 11
6530      instr [9, 5]  = Vn
6531      instr [4, 0]  = Vd.  */
6532
6533   unsigned full = INSTR (30, 30);
6534   unsigned vm = INSTR (20, 16);
6535   unsigned vn = INSTR (9, 5);
6536   unsigned vd = INSTR (4, 0);
6537   unsigned i;
6538
6539   NYI_assert (29, 23, 0x5C);
6540   NYI_assert (21, 21, 1);
6541   NYI_assert (15, 10, 0x3F);
6542
6543   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6544   if (INSTR (22, 22))
6545     {
6546       if (! full)
6547         HALT_UNALLOC;
6548
6549       for (i = 0; i < 2; i++)
6550         aarch64_set_vec_double (cpu, vd, i,
6551                                 aarch64_get_vec_double (cpu, vn, i)
6552                                 / aarch64_get_vec_double (cpu, vm, i));
6553     }
6554   else
6555     for (i = 0; i < (full ? 4 : 2); i++)
6556       aarch64_set_vec_float (cpu, vd, i,
6557                              aarch64_get_vec_float (cpu, vn, i)
6558                              / aarch64_get_vec_float (cpu, vm, i));
6559 }
6560
6561 static void
6562 do_vec_FMUL (sim_cpu *cpu)
6563 {
6564   /* instr [31]    = 0
6565      instr [30]    = half(0)/full(1)
6566      instr [29,23] = 10 1110 0
6567      instr [22]    = float(0)/double(1)
6568      instr [21]    = 1
6569      instr [20,16] = Vm
6570      instr [15,10] = 1101 11
6571      instr [9, 5]  = Vn
6572      instr [4, 0]  = Vd.  */
6573
6574   unsigned full = INSTR (30, 30);
6575   unsigned vm = INSTR (20, 16);
6576   unsigned vn = INSTR (9, 5);
6577   unsigned vd = INSTR (4, 0);
6578   unsigned i;
6579
6580   NYI_assert (29, 23, 0x5C);
6581   NYI_assert (21, 21, 1);
6582   NYI_assert (15, 10, 0x37);
6583
6584   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6585   if (INSTR (22, 22))
6586     {
6587       if (! full)
6588         HALT_UNALLOC;
6589
6590       for (i = 0; i < 2; i++)
6591         aarch64_set_vec_double (cpu, vd, i,
6592                                 aarch64_get_vec_double (cpu, vn, i)
6593                                 * aarch64_get_vec_double (cpu, vm, i));
6594     }
6595   else
6596     for (i = 0; i < (full ? 4 : 2); i++)
6597       aarch64_set_vec_float (cpu, vd, i,
6598                              aarch64_get_vec_float (cpu, vn, i)
6599                              * aarch64_get_vec_float (cpu, vm, i));
6600 }
6601
6602 static void
6603 do_vec_FADDP (sim_cpu *cpu)
6604 {
6605   /* instr [31]    = 0
6606      instr [30]    = half(0)/full(1)
6607      instr [29,23] = 10 1110 0
6608      instr [22]    = float(0)/double(1)
6609      instr [21]    = 1
6610      instr [20,16] = Vm
6611      instr [15,10] = 1101 01
6612      instr [9, 5]  = Vn
6613      instr [4, 0]  = Vd.  */
6614
6615   unsigned full = INSTR (30, 30);
6616   unsigned vm = INSTR (20, 16);
6617   unsigned vn = INSTR (9, 5);
6618   unsigned vd = INSTR (4, 0);
6619
6620   NYI_assert (29, 23, 0x5C);
6621   NYI_assert (21, 21, 1);
6622   NYI_assert (15, 10, 0x35);
6623
6624   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6625   if (INSTR (22, 22))
6626     {
6627       /* Extract values before adding them incase vd == vn/vm.  */
6628       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6629       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6630       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6631       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6632
6633       if (! full)
6634         HALT_UNALLOC;
6635
6636       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6637       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6638     }
6639   else
6640     {
6641       /* Extract values before adding them incase vd == vn/vm.  */
6642       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6643       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6644       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6645       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6646
6647       if (full)
6648         {
6649           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6650           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6651           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6652           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6653
6654           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6655           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6656           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6657           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6658         }
6659       else
6660         {
6661           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6662           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6663         }
6664     }
6665 }
6666
6667 static void
6668 do_vec_FSQRT (sim_cpu *cpu)
6669 {
6670   /* instr[31]    = 0
6671      instr[30]    = half(0)/full(1)
6672      instr[29,23] = 10 1110 1
6673      instr[22]    = single(0)/double(1)
6674      instr[21,10] = 10 0001 1111 10
6675      instr[9,5]   = Vsrc
6676      instr[4,0]   = Vdest.  */
6677
6678   unsigned vn = INSTR (9, 5);
6679   unsigned vd = INSTR (4, 0);
6680   unsigned full = INSTR (30, 30);
6681   int i;
6682
6683   NYI_assert (29, 23, 0x5D);
6684   NYI_assert (21, 10, 0x87E);
6685
6686   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6687   if (INSTR (22, 22))
6688     {
6689       if (! full)
6690         HALT_UNALLOC;
6691
6692       for (i = 0; i < 2; i++)
6693         aarch64_set_vec_double (cpu, vd, i,
6694                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6695     }
6696   else
6697     {
6698       for (i = 0; i < (full ? 4 : 2); i++)
6699         aarch64_set_vec_float (cpu, vd, i,
6700                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6701     }
6702 }
6703
6704 static void
6705 do_vec_FNEG (sim_cpu *cpu)
6706 {
6707   /* instr[31]    = 0
6708      instr[30]    = half (0)/full (1)
6709      instr[29,23] = 10 1110 1
6710      instr[22]    = single (0)/double (1)
6711      instr[21,10] = 10 0000 1111 10
6712      instr[9,5]   = Vsrc
6713      instr[4,0]   = Vdest.  */
6714
6715   unsigned vn = INSTR (9, 5);
6716   unsigned vd = INSTR (4, 0);
6717   unsigned full = INSTR (30, 30);
6718   int i;
6719
6720   NYI_assert (29, 23, 0x5D);
6721   NYI_assert (21, 10, 0x83E);
6722
6723   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6724   if (INSTR (22, 22))
6725     {
6726       if (! full)
6727         HALT_UNALLOC;
6728
6729       for (i = 0; i < 2; i++)
6730         aarch64_set_vec_double (cpu, vd, i,
6731                                 - aarch64_get_vec_double (cpu, vn, i));
6732     }
6733   else
6734     {
6735       for (i = 0; i < (full ? 4 : 2); i++)
6736         aarch64_set_vec_float (cpu, vd, i,
6737                                - aarch64_get_vec_float (cpu, vn, i));
6738     }
6739 }
6740
6741 static void
6742 do_vec_NOT (sim_cpu *cpu)
6743 {
6744   /* instr[31]    = 0
6745      instr[30]    = half (0)/full (1)
6746      instr[29,10] = 10 1110 0010 0000 0101 10
6747      instr[9,5]   = Vn
6748      instr[4.0]   = Vd.  */
6749
6750   unsigned vn = INSTR (9, 5);
6751   unsigned vd = INSTR (4, 0);
6752   unsigned i;
6753   int      full = INSTR (30, 30);
6754
6755   NYI_assert (29, 10, 0xB8816);
6756
6757   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6758   for (i = 0; i < (full ? 16 : 8); i++)
6759     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6760 }
6761
6762 static unsigned int
6763 clz (uint64_t val, unsigned size)
6764 {
6765   uint64_t mask = 1;
6766   int      count;
6767
6768   mask <<= (size - 1);
6769   count = 0;
6770   do
6771     {
6772       if (val & mask)
6773         break;
6774       mask >>= 1;
6775       count ++;
6776     }
6777   while (mask);
6778
6779   return count;
6780 }
6781
6782 static void
6783 do_vec_CLZ (sim_cpu *cpu)
6784 {
6785   /* instr[31]    = 0
6786      instr[30]    = half (0)/full (1)
6787      instr[29,24] = 10 1110
6788      instr[23,22] = size
6789      instr[21,10] = 10 0000 0100 10
6790      instr[9,5]   = Vn
6791      instr[4.0]   = Vd.  */
6792
6793   unsigned vn = INSTR (9, 5);
6794   unsigned vd = INSTR (4, 0);
6795   unsigned i;
6796   int      full = INSTR (30,30);
6797
6798   NYI_assert (29, 24, 0x2E);
6799   NYI_assert (21, 10, 0x812);
6800
6801   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6802   switch (INSTR (23, 22))
6803     {
6804     case 0:
6805       for (i = 0; i < (full ? 16 : 8); i++)
6806         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6807       break;
6808     case 1:
6809       for (i = 0; i < (full ? 8 : 4); i++)
6810         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6811       break;
6812     case 2:
6813       for (i = 0; i < (full ? 4 : 2); i++)
6814         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6815       break;
6816     case 3:
6817       if (! full)
6818         HALT_UNALLOC;
6819       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6820       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6821       break;
6822     }
6823 }
6824
6825 static void
6826 do_vec_MOV_element (sim_cpu *cpu)
6827 {
6828   /* instr[31,21] = 0110 1110 000
6829      instr[20,16] = size & dest index
6830      instr[15]    = 0
6831      instr[14,11] = source index
6832      instr[10]    = 1
6833      instr[9,5]   = Vs
6834      instr[4.0]   = Vd.  */
6835
6836   unsigned vs = INSTR (9, 5);
6837   unsigned vd = INSTR (4, 0);
6838   unsigned src_index;
6839   unsigned dst_index;
6840
6841   NYI_assert (31, 21, 0x370);
6842   NYI_assert (15, 15, 0);
6843   NYI_assert (10, 10, 1);
6844
6845   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6846   if (INSTR (16, 16))
6847     {
6848       /* Move a byte.  */
6849       src_index = INSTR (14, 11);
6850       dst_index = INSTR (20, 17);
6851       aarch64_set_vec_u8 (cpu, vd, dst_index,
6852                           aarch64_get_vec_u8 (cpu, vs, src_index));
6853     }
6854   else if (INSTR (17, 17))
6855     {
6856       /* Move 16-bits.  */
6857       NYI_assert (11, 11, 0);
6858       src_index = INSTR (14, 12);
6859       dst_index = INSTR (20, 18);
6860       aarch64_set_vec_u16 (cpu, vd, dst_index,
6861                            aarch64_get_vec_u16 (cpu, vs, src_index));
6862     }
6863   else if (INSTR (18, 18))
6864     {
6865       /* Move 32-bits.  */
6866       NYI_assert (12, 11, 0);
6867       src_index = INSTR (14, 13);
6868       dst_index = INSTR (20, 19);
6869       aarch64_set_vec_u32 (cpu, vd, dst_index,
6870                            aarch64_get_vec_u32 (cpu, vs, src_index));
6871     }
6872   else
6873     {
6874       NYI_assert (19, 19, 1);
6875       NYI_assert (13, 11, 0);
6876       src_index = INSTR (14, 14);
6877       dst_index = INSTR (20, 20);
6878       aarch64_set_vec_u64 (cpu, vd, dst_index,
6879                            aarch64_get_vec_u64 (cpu, vs, src_index));
6880     }
6881 }
6882
6883 static void
6884 do_vec_REV32 (sim_cpu *cpu)
6885 {
6886   /* instr[31]    = 0
6887      instr[30]    = full/half
6888      instr[29,24] = 10 1110
6889      instr[23,22] = size
6890      instr[21,10] = 10 0000 0000 10
6891      instr[9,5]   = Rn
6892      instr[4,0]   = Rd.  */
6893
6894   unsigned rn = INSTR (9, 5);
6895   unsigned rd = INSTR (4, 0);
6896   unsigned size = INSTR (23, 22);
6897   unsigned full = INSTR (30, 30);
6898   unsigned i;
6899   FRegister val;
6900
6901   NYI_assert (29, 24, 0x2E);
6902   NYI_assert (21, 10, 0x802);
6903
6904   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6905   switch (size)
6906     {
6907     case 0:
6908       for (i = 0; i < (full ? 16 : 8); i++)
6909         val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6910       break;
6911
6912     case 1:
6913       for (i = 0; i < (full ? 8 : 4); i++)
6914         val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6915       break;
6916
6917     default:
6918       HALT_UNALLOC;
6919     }
6920
6921   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6922   if (full)
6923     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6924 }
6925
6926 static void
6927 do_vec_EXT (sim_cpu *cpu)
6928 {
6929   /* instr[31]    = 0
6930      instr[30]    = full/half
6931      instr[29,21] = 10 1110 000
6932      instr[20,16] = Vm
6933      instr[15]    = 0
6934      instr[14,11] = source index
6935      instr[10]    = 0
6936      instr[9,5]   = Vn
6937      instr[4.0]   = Vd.  */
6938
6939   unsigned vm = INSTR (20, 16);
6940   unsigned vn = INSTR (9, 5);
6941   unsigned vd = INSTR (4, 0);
6942   unsigned src_index = INSTR (14, 11);
6943   unsigned full = INSTR (30, 30);
6944   unsigned i;
6945   unsigned j;
6946   FRegister val;
6947
6948   NYI_assert (31, 21, 0x370);
6949   NYI_assert (15, 15, 0);
6950   NYI_assert (10, 10, 0);
6951
6952   if (!full && (src_index & 0x8))
6953     HALT_UNALLOC;
6954
6955   j = 0;
6956
6957   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6958   for (i = src_index; i < (full ? 16 : 8); i++)
6959     val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6960   for (i = 0; i < src_index; i++)
6961     val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6962
6963   aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6964   if (full)
6965     aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6966 }
6967
6968 static void
6969 dexAdvSIMD0 (sim_cpu *cpu)
6970 {
6971   /* instr [28,25] = 0 111.  */
6972   if (    INSTR (15, 10) == 0x07
6973       && (INSTR (9, 5) ==
6974           INSTR (20, 16)))
6975     {
6976       if (INSTR (31, 21) == 0x075
6977           || INSTR (31, 21) == 0x275)
6978         {
6979           do_vec_MOV_whole_vector (cpu);
6980           return;
6981         }
6982     }
6983
6984   if (INSTR (29, 19) == 0x1E0)
6985     {
6986       do_vec_MOV_immediate (cpu);
6987       return;
6988     }
6989
6990   if (INSTR (29, 19) == 0x5E0)
6991     {
6992       do_vec_MVNI (cpu);
6993       return;
6994     }
6995
6996   if (INSTR (29, 19) == 0x1C0
6997       || INSTR (29, 19) == 0x1C1)
6998     {
6999       if (INSTR (15, 10) == 0x03)
7000         {
7001           do_vec_DUP_scalar_into_vector (cpu);
7002           return;
7003         }
7004     }
7005
7006   switch (INSTR (29, 24))
7007     {
7008     case 0x0E: do_vec_op1 (cpu); return;
7009     case 0x0F: do_vec_op2 (cpu); return;
7010
7011     case 0x2E:
7012       if (INSTR (21, 21) == 1)
7013         {
7014           switch (INSTR (15, 10))
7015             {
7016             case 0x02:
7017               do_vec_REV32 (cpu);
7018               return;
7019
7020             case 0x07:
7021               switch (INSTR (23, 22))
7022                 {
7023                 case 0: do_vec_EOR (cpu); return;
7024                 case 1: do_vec_BSL (cpu); return;
7025                 case 2:
7026                 case 3: do_vec_bit (cpu); return;
7027                 }
7028               break;
7029
7030             case 0x08: do_vec_sub_long (cpu); return;
7031             case 0x11: do_vec_USHL (cpu); return;
7032             case 0x12: do_vec_CLZ (cpu); return;
7033             case 0x16: do_vec_NOT (cpu); return;
7034             case 0x19: do_vec_max (cpu); return;
7035             case 0x1B: do_vec_min (cpu); return;
7036             case 0x21: do_vec_SUB (cpu); return;
7037             case 0x25: do_vec_MLS (cpu); return;
7038             case 0x31: do_vec_FminmaxNMP (cpu); return;
7039             case 0x35: do_vec_FADDP (cpu); return;
7040             case 0x37: do_vec_FMUL (cpu); return;
7041             case 0x3F: do_vec_FDIV (cpu); return;
7042
7043             case 0x3E:
7044               switch (INSTR (20, 16))
7045                 {
7046                 case 0x00: do_vec_FNEG (cpu); return;
7047                 case 0x01: do_vec_FSQRT (cpu); return;
7048                 default:   HALT_NYI;
7049                 }
7050
7051             case 0x0D:
7052             case 0x0F:
7053             case 0x22:
7054             case 0x23:
7055             case 0x26:
7056             case 0x2A:
7057             case 0x32:
7058             case 0x36:
7059             case 0x39:
7060             case 0x3A:
7061               do_vec_compare (cpu); return;
7062
7063             default:
7064               break;
7065             }
7066         }
7067
7068       if (INSTR (31, 21) == 0x370)
7069         {
7070           if (INSTR (10, 10))
7071             do_vec_MOV_element (cpu);
7072           else
7073             do_vec_EXT (cpu);
7074           return;
7075         }
7076
7077       switch (INSTR (21, 10))
7078         {
7079         case 0x82E: do_vec_neg (cpu); return;
7080         case 0x87E: do_vec_sqrt (cpu); return;
7081         default:
7082           if (INSTR (15, 10) == 0x30)
7083             {
7084               do_vec_mull (cpu);
7085               return;
7086             }
7087           break;
7088         }
7089       break;
7090
7091     case 0x2f:
7092       switch (INSTR (15, 10))
7093         {
7094         case 0x01: do_vec_SSHR_USHR (cpu); return;
7095         case 0x10:
7096         case 0x12: do_vec_mls_indexed (cpu); return;
7097         case 0x29: do_vec_xtl (cpu); return;
7098         default:
7099           HALT_NYI;
7100         }
7101
7102     default:
7103       break;
7104     }
7105
7106   HALT_NYI;
7107 }
7108
7109 /* 3 sources.  */
7110
7111 /* Float multiply add.  */
7112 static void
7113 fmadds (sim_cpu *cpu)
7114 {
7115   unsigned sa = INSTR (14, 10);
7116   unsigned sm = INSTR (20, 16);
7117   unsigned sn = INSTR ( 9,  5);
7118   unsigned sd = INSTR ( 4,  0);
7119
7120   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7121   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7122                         + aarch64_get_FP_float (cpu, sn)
7123                         * aarch64_get_FP_float (cpu, sm));
7124 }
7125
7126 /* Double multiply add.  */
7127 static void
7128 fmaddd (sim_cpu *cpu)
7129 {
7130   unsigned sa = INSTR (14, 10);
7131   unsigned sm = INSTR (20, 16);
7132   unsigned sn = INSTR ( 9,  5);
7133   unsigned sd = INSTR ( 4,  0);
7134
7135   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7136   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7137                          + aarch64_get_FP_double (cpu, sn)
7138                          * aarch64_get_FP_double (cpu, sm));
7139 }
7140
7141 /* Float multiply subtract.  */
7142 static void
7143 fmsubs (sim_cpu *cpu)
7144 {
7145   unsigned sa = INSTR (14, 10);
7146   unsigned sm = INSTR (20, 16);
7147   unsigned sn = INSTR ( 9,  5);
7148   unsigned sd = INSTR ( 4,  0);
7149
7150   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7151   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7152                         - aarch64_get_FP_float (cpu, sn)
7153                         * aarch64_get_FP_float (cpu, sm));
7154 }
7155
7156 /* Double multiply subtract.  */
7157 static void
7158 fmsubd (sim_cpu *cpu)
7159 {
7160   unsigned sa = INSTR (14, 10);
7161   unsigned sm = INSTR (20, 16);
7162   unsigned sn = INSTR ( 9,  5);
7163   unsigned sd = INSTR ( 4,  0);
7164
7165   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7166   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7167                          - aarch64_get_FP_double (cpu, sn)
7168                          * aarch64_get_FP_double (cpu, sm));
7169 }
7170
7171 /* Float negative multiply add.  */
7172 static void
7173 fnmadds (sim_cpu *cpu)
7174 {
7175   unsigned sa = INSTR (14, 10);
7176   unsigned sm = INSTR (20, 16);
7177   unsigned sn = INSTR ( 9,  5);
7178   unsigned sd = INSTR ( 4,  0);
7179
7180   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7181   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7182                         + (- aarch64_get_FP_float (cpu, sn))
7183                         * aarch64_get_FP_float (cpu, sm));
7184 }
7185
7186 /* Double negative multiply add.  */
7187 static void
7188 fnmaddd (sim_cpu *cpu)
7189 {
7190   unsigned sa = INSTR (14, 10);
7191   unsigned sm = INSTR (20, 16);
7192   unsigned sn = INSTR ( 9,  5);
7193   unsigned sd = INSTR ( 4,  0);
7194
7195   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7196   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7197                          + (- aarch64_get_FP_double (cpu, sn))
7198                          * aarch64_get_FP_double (cpu, sm));
7199 }
7200
7201 /* Float negative multiply subtract.  */
7202 static void
7203 fnmsubs (sim_cpu *cpu)
7204 {
7205   unsigned sa = INSTR (14, 10);
7206   unsigned sm = INSTR (20, 16);
7207   unsigned sn = INSTR ( 9,  5);
7208   unsigned sd = INSTR ( 4,  0);
7209
7210   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7211   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7212                         + aarch64_get_FP_float (cpu, sn)
7213                         * aarch64_get_FP_float (cpu, sm));
7214 }
7215
7216 /* Double negative multiply subtract.  */
7217 static void
7218 fnmsubd (sim_cpu *cpu)
7219 {
7220   unsigned sa = INSTR (14, 10);
7221   unsigned sm = INSTR (20, 16);
7222   unsigned sn = INSTR ( 9,  5);
7223   unsigned sd = INSTR ( 4,  0);
7224
7225   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7226   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7227                          + aarch64_get_FP_double (cpu, sn)
7228                          * aarch64_get_FP_double (cpu, sm));
7229 }
7230
7231 static void
7232 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7233 {
7234   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7235      instr[30]    = 0
7236      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7237      instr[28,25] = 1111
7238      instr[24]    = 1
7239      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7240      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
7241      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
7242
7243   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7244   /* dispatch on combined type:o1:o2.  */
7245   uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7246
7247   if (M_S != 0)
7248     HALT_UNALLOC;
7249
7250   switch (dispatch)
7251     {
7252     case 0: fmadds (cpu); return;
7253     case 1: fmsubs (cpu); return;
7254     case 2: fnmadds (cpu); return;
7255     case 3: fnmsubs (cpu); return;
7256     case 4: fmaddd (cpu); return;
7257     case 5: fmsubd (cpu); return;
7258     case 6: fnmaddd (cpu); return;
7259     case 7: fnmsubd (cpu); return;
7260     default:
7261       /* type > 1 is currently unallocated.  */
7262       HALT_UNALLOC;
7263     }
7264 }
7265
7266 static void
7267 dexSimpleFPFixedConvert (sim_cpu *cpu)
7268 {
7269   HALT_NYI;
7270 }
7271
7272 static void
7273 dexSimpleFPCondCompare (sim_cpu *cpu)
7274 {
7275   /* instr [31,23] = 0001 1110 0
7276      instr [22]    = type
7277      instr [21]    = 1
7278      instr [20,16] = Rm
7279      instr [15,12] = condition
7280      instr [11,10] = 01
7281      instr [9,5]   = Rn
7282      instr [4]     = 0
7283      instr [3,0]   = nzcv  */
7284
7285   unsigned rm = INSTR (20, 16);
7286   unsigned rn = INSTR (9, 5);
7287
7288   NYI_assert (31, 23, 0x3C);
7289   NYI_assert (11, 10, 0x1);
7290   NYI_assert (4,  4,  0);
7291
7292   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7293   if (! testConditionCode (cpu, INSTR (15, 12)))
7294     {
7295       aarch64_set_CPSR (cpu, INSTR (3, 0));
7296       return;
7297     }
7298
7299   if (INSTR (22, 22))
7300     {
7301       /* Double precision.  */
7302       double val1 = aarch64_get_vec_double (cpu, rn, 0);
7303       double val2 = aarch64_get_vec_double (cpu, rm, 0);
7304
7305       /* FIXME: Check for NaNs.  */
7306       if (val1 == val2)
7307         aarch64_set_CPSR (cpu, (Z | C));
7308       else if (val1 < val2)
7309         aarch64_set_CPSR (cpu, N);
7310       else /* val1 > val2 */
7311         aarch64_set_CPSR (cpu, C);
7312     }
7313   else
7314     {
7315       /* Single precision.  */
7316       float val1 = aarch64_get_vec_float (cpu, rn, 0);
7317       float val2 = aarch64_get_vec_float (cpu, rm, 0);
7318
7319       /* FIXME: Check for NaNs.  */
7320       if (val1 == val2)
7321         aarch64_set_CPSR (cpu, (Z | C));
7322       else if (val1 < val2)
7323         aarch64_set_CPSR (cpu, N);
7324       else /* val1 > val2 */
7325         aarch64_set_CPSR (cpu, C);
7326     }
7327 }
7328
7329 /* 2 sources.  */
7330
7331 /* Float add.  */
7332 static void
7333 fadds (sim_cpu *cpu)
7334 {
7335   unsigned sm = INSTR (20, 16);
7336   unsigned sn = INSTR ( 9,  5);
7337   unsigned sd = INSTR ( 4,  0);
7338
7339   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7340   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7341                         + aarch64_get_FP_float (cpu, sm));
7342 }
7343
7344 /* Double add.  */
7345 static void
7346 faddd (sim_cpu *cpu)
7347 {
7348   unsigned sm = INSTR (20, 16);
7349   unsigned sn = INSTR ( 9,  5);
7350   unsigned sd = INSTR ( 4,  0);
7351
7352   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7353   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7354                          + aarch64_get_FP_double (cpu, sm));
7355 }
7356
7357 /* Float divide.  */
7358 static void
7359 fdivs (sim_cpu *cpu)
7360 {
7361   unsigned sm = INSTR (20, 16);
7362   unsigned sn = INSTR ( 9,  5);
7363   unsigned sd = INSTR ( 4,  0);
7364
7365   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7366   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7367                         / aarch64_get_FP_float (cpu, sm));
7368 }
7369
7370 /* Double divide.  */
7371 static void
7372 fdivd (sim_cpu *cpu)
7373 {
7374   unsigned sm = INSTR (20, 16);
7375   unsigned sn = INSTR ( 9,  5);
7376   unsigned sd = INSTR ( 4,  0);
7377
7378   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7379   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7380                          / aarch64_get_FP_double (cpu, sm));
7381 }
7382
7383 /* Float multiply.  */
7384 static void
7385 fmuls (sim_cpu *cpu)
7386 {
7387   unsigned sm = INSTR (20, 16);
7388   unsigned sn = INSTR ( 9,  5);
7389   unsigned sd = INSTR ( 4,  0);
7390
7391   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7392   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7393                         * aarch64_get_FP_float (cpu, sm));
7394 }
7395
7396 /* Double multiply.  */
7397 static void
7398 fmuld (sim_cpu *cpu)
7399 {
7400   unsigned sm = INSTR (20, 16);
7401   unsigned sn = INSTR ( 9,  5);
7402   unsigned sd = INSTR ( 4,  0);
7403
7404   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7405   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7406                          * aarch64_get_FP_double (cpu, sm));
7407 }
7408
7409 /* Float negate and multiply.  */
7410 static void
7411 fnmuls (sim_cpu *cpu)
7412 {
7413   unsigned sm = INSTR (20, 16);
7414   unsigned sn = INSTR ( 9,  5);
7415   unsigned sd = INSTR ( 4,  0);
7416
7417   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7418   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7419                                     * aarch64_get_FP_float (cpu, sm)));
7420 }
7421
7422 /* Double negate and multiply.  */
7423 static void
7424 fnmuld (sim_cpu *cpu)
7425 {
7426   unsigned sm = INSTR (20, 16);
7427   unsigned sn = INSTR ( 9,  5);
7428   unsigned sd = INSTR ( 4,  0);
7429
7430   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7431   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7432                                      * aarch64_get_FP_double (cpu, sm)));
7433 }
7434
7435 /* Float subtract.  */
7436 static void
7437 fsubs (sim_cpu *cpu)
7438 {
7439   unsigned sm = INSTR (20, 16);
7440   unsigned sn = INSTR ( 9,  5);
7441   unsigned sd = INSTR ( 4,  0);
7442
7443   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7444   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7445                         - aarch64_get_FP_float (cpu, sm));
7446 }
7447
7448 /* Double subtract.  */
7449 static void
7450 fsubd (sim_cpu *cpu)
7451 {
7452   unsigned sm = INSTR (20, 16);
7453   unsigned sn = INSTR ( 9,  5);
7454   unsigned sd = INSTR ( 4,  0);
7455
7456   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7457   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7458                          - aarch64_get_FP_double (cpu, sm));
7459 }
7460
7461 static void
7462 do_FMINNM (sim_cpu *cpu)
7463 {
7464   /* instr[31,23] = 0 0011 1100
7465      instr[22]    = float(0)/double(1)
7466      instr[21]    = 1
7467      instr[20,16] = Sm
7468      instr[15,10] = 01 1110
7469      instr[9,5]   = Sn
7470      instr[4,0]   = Cpu  */
7471
7472   unsigned sm = INSTR (20, 16);
7473   unsigned sn = INSTR ( 9,  5);
7474   unsigned sd = INSTR ( 4,  0);
7475
7476   NYI_assert (31, 23, 0x03C);
7477   NYI_assert (15, 10, 0x1E);
7478
7479   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7480   if (INSTR (22, 22))
7481     aarch64_set_FP_double (cpu, sd,
7482                            dminnm (aarch64_get_FP_double (cpu, sn),
7483                                    aarch64_get_FP_double (cpu, sm)));
7484   else
7485     aarch64_set_FP_float (cpu, sd,
7486                           fminnm (aarch64_get_FP_float (cpu, sn),
7487                                   aarch64_get_FP_float (cpu, sm)));
7488 }
7489
7490 static void
7491 do_FMAXNM (sim_cpu *cpu)
7492 {
7493   /* instr[31,23] = 0 0011 1100
7494      instr[22]    = float(0)/double(1)
7495      instr[21]    = 1
7496      instr[20,16] = Sm
7497      instr[15,10] = 01 1010
7498      instr[9,5]   = Sn
7499      instr[4,0]   = Cpu  */
7500
7501   unsigned sm = INSTR (20, 16);
7502   unsigned sn = INSTR ( 9,  5);
7503   unsigned sd = INSTR ( 4,  0);
7504
7505   NYI_assert (31, 23, 0x03C);
7506   NYI_assert (15, 10, 0x1A);
7507
7508   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7509   if (INSTR (22, 22))
7510     aarch64_set_FP_double (cpu, sd,
7511                            dmaxnm (aarch64_get_FP_double (cpu, sn),
7512                                    aarch64_get_FP_double (cpu, sm)));
7513   else
7514     aarch64_set_FP_float (cpu, sd,
7515                           fmaxnm (aarch64_get_FP_float (cpu, sn),
7516                                   aarch64_get_FP_float (cpu, sm)));
7517 }
7518
7519 static void
7520 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7521 {
7522   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7523      instr[30]    = 0
7524      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7525      instr[28,25] = 1111
7526      instr[24]    = 0
7527      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7528      instr[21]    = 1
7529      instr[20,16] = Vm
7530      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7531                                0010 ==> FADD, 0011 ==> FSUB,
7532                                0100 ==> FMAX, 0101 ==> FMIN
7533                                0110 ==> FMAXNM, 0111 ==> FMINNM
7534                                1000 ==> FNMUL, ow ==> UNALLOC
7535      instr[11,10] = 10
7536      instr[9,5]   = Vn
7537      instr[4,0]   = Vd  */
7538
7539   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7540   uint32_t type = INSTR (23, 22);
7541   /* Dispatch on opcode.  */
7542   uint32_t dispatch = INSTR (15, 12);
7543
7544   if (type > 1)
7545     HALT_UNALLOC;
7546
7547   if (M_S != 0)
7548     HALT_UNALLOC;
7549
7550   if (type)
7551     switch (dispatch)
7552       {
7553       case 0: fmuld (cpu); return;
7554       case 1: fdivd (cpu); return;
7555       case 2: faddd (cpu); return;
7556       case 3: fsubd (cpu); return;
7557       case 6: do_FMAXNM (cpu); return;
7558       case 7: do_FMINNM (cpu); return;
7559       case 8: fnmuld (cpu); return;
7560
7561         /* Have not yet implemented fmax and fmin.  */
7562       case 4:
7563       case 5:
7564         HALT_NYI;
7565
7566       default:
7567         HALT_UNALLOC;
7568       }
7569   else /* type == 0 => floats.  */
7570     switch (dispatch)
7571       {
7572       case 0: fmuls (cpu); return;
7573       case 1: fdivs (cpu); return;
7574       case 2: fadds (cpu); return;
7575       case 3: fsubs (cpu); return;
7576       case 6: do_FMAXNM (cpu); return;
7577       case 7: do_FMINNM (cpu); return;
7578       case 8: fnmuls (cpu); return;
7579
7580       case 4:
7581       case 5:
7582         HALT_NYI;
7583
7584       default:
7585         HALT_UNALLOC;
7586       }
7587 }
7588
7589 static void
7590 dexSimpleFPCondSelect (sim_cpu *cpu)
7591 {
7592   /* FCSEL
7593      instr[31,23] = 0 0011 1100
7594      instr[22]    = 0=>single 1=>double
7595      instr[21]    = 1
7596      instr[20,16] = Sm
7597      instr[15,12] = cond
7598      instr[11,10] = 11
7599      instr[9,5]   = Sn
7600      instr[4,0]   = Cpu  */
7601   unsigned sm = INSTR (20, 16);
7602   unsigned sn = INSTR ( 9, 5);
7603   unsigned sd = INSTR ( 4, 0);
7604   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7605
7606   NYI_assert (31, 23, 0x03C);
7607   NYI_assert (11, 10, 0x3);
7608
7609   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7610   if (INSTR (22, 22))
7611     aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7612                                      : aarch64_get_FP_double (cpu, sm)));
7613   else
7614     aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7615                                     : aarch64_get_FP_float (cpu, sm)));
7616 }
7617
7618 /* Store 32 bit unscaled signed 9 bit.  */
7619 static void
7620 fsturs (sim_cpu *cpu, int32_t offset)
7621 {
7622   unsigned int rn = INSTR (9, 5);
7623   unsigned int st = INSTR (4, 0);
7624
7625   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7626   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7627                        aarch64_get_vec_u32 (cpu, st, 0));
7628 }
7629
7630 /* Store 64 bit unscaled signed 9 bit.  */
7631 static void
7632 fsturd (sim_cpu *cpu, int32_t offset)
7633 {
7634   unsigned int rn = INSTR (9, 5);
7635   unsigned int st = INSTR (4, 0);
7636
7637   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7638   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7639                        aarch64_get_vec_u64 (cpu, st, 0));
7640 }
7641
7642 /* Store 128 bit unscaled signed 9 bit.  */
7643 static void
7644 fsturq (sim_cpu *cpu, int32_t offset)
7645 {
7646   unsigned int rn = INSTR (9, 5);
7647   unsigned int st = INSTR (4, 0);
7648   FRegister a;
7649
7650   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7651   aarch64_get_FP_long_double (cpu, st, & a);
7652   aarch64_set_mem_long_double (cpu,
7653                                aarch64_get_reg_u64 (cpu, rn, 1)
7654                                + offset, a);
7655 }
7656
7657 /* TODO FP move register.  */
7658
7659 /* 32 bit fp to fp move register.  */
7660 static void
7661 ffmovs (sim_cpu *cpu)
7662 {
7663   unsigned int rn = INSTR (9, 5);
7664   unsigned int st = INSTR (4, 0);
7665
7666   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7667   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7668 }
7669
7670 /* 64 bit fp to fp move register.  */
7671 static void
7672 ffmovd (sim_cpu *cpu)
7673 {
7674   unsigned int rn = INSTR (9, 5);
7675   unsigned int st = INSTR (4, 0);
7676
7677   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7678   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7679 }
7680
7681 /* 32 bit GReg to Vec move register.  */
7682 static void
7683 fgmovs (sim_cpu *cpu)
7684 {
7685   unsigned int rn = INSTR (9, 5);
7686   unsigned int st = INSTR (4, 0);
7687
7688   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7689   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7690 }
7691
7692 /* 64 bit g to fp move register.  */
7693 static void
7694 fgmovd (sim_cpu *cpu)
7695 {
7696   unsigned int rn = INSTR (9, 5);
7697   unsigned int st = INSTR (4, 0);
7698
7699   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7700   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7701 }
7702
7703 /* 32 bit fp to g move register.  */
7704 static void
7705 gfmovs (sim_cpu *cpu)
7706 {
7707   unsigned int rn = INSTR (9, 5);
7708   unsigned int st = INSTR (4, 0);
7709
7710   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7711   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7712 }
7713
7714 /* 64 bit fp to g move register.  */
7715 static void
7716 gfmovd (sim_cpu *cpu)
7717 {
7718   unsigned int rn = INSTR (9, 5);
7719   unsigned int st = INSTR (4, 0);
7720
7721   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7722   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7723 }
7724
7725 /* FP move immediate
7726
7727    These install an immediate 8 bit value in the target register
7728    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7729    bit exponent.  */
7730
7731 static void
7732 fmovs (sim_cpu *cpu)
7733 {
7734   unsigned int sd = INSTR (4, 0);
7735   uint32_t imm = INSTR (20, 13);
7736   float f = fp_immediate_for_encoding_32 (imm);
7737
7738   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7739   aarch64_set_FP_float (cpu, sd, f);
7740 }
7741
7742 static void
7743 fmovd (sim_cpu *cpu)
7744 {
7745   unsigned int sd = INSTR (4, 0);
7746   uint32_t imm = INSTR (20, 13);
7747   double d = fp_immediate_for_encoding_64 (imm);
7748
7749   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7750   aarch64_set_FP_double (cpu, sd, d);
7751 }
7752
7753 static void
7754 dexSimpleFPImmediate (sim_cpu *cpu)
7755 {
7756   /* instr[31,23] == 00111100
7757      instr[22]    == type : single(0)/double(1)
7758      instr[21]    == 1
7759      instr[20,13] == imm8
7760      instr[12,10] == 100
7761      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7762      instr[4,0]   == Rd  */
7763   uint32_t imm5 = INSTR (9, 5);
7764
7765   NYI_assert (31, 23, 0x3C);
7766
7767   if (imm5 != 0)
7768     HALT_UNALLOC;
7769
7770   if (INSTR (22, 22))
7771     fmovd (cpu);
7772   else
7773     fmovs (cpu);
7774 }
7775
7776 /* TODO specific decode and execute for group Load Store.  */
7777
7778 /* TODO FP load/store single register (unscaled offset).  */
7779
7780 /* TODO load 8 bit unscaled signed 9 bit.  */
7781 /* TODO load 16 bit unscaled signed 9 bit.  */
7782
7783 /* Load 32 bit unscaled signed 9 bit.  */
7784 static void
7785 fldurs (sim_cpu *cpu, int32_t offset)
7786 {
7787   unsigned int rn = INSTR (9, 5);
7788   unsigned int st = INSTR (4, 0);
7789
7790   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7791   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7792                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7793 }
7794
7795 /* Load 64 bit unscaled signed 9 bit.  */
7796 static void
7797 fldurd (sim_cpu *cpu, int32_t offset)
7798 {
7799   unsigned int rn = INSTR (9, 5);
7800   unsigned int st = INSTR (4, 0);
7801
7802   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7803   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7804                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7805 }
7806
7807 /* Load 128 bit unscaled signed 9 bit.  */
7808 static void
7809 fldurq (sim_cpu *cpu, int32_t offset)
7810 {
7811   unsigned int rn = INSTR (9, 5);
7812   unsigned int st = INSTR (4, 0);
7813   FRegister a;
7814   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7815
7816   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7817   aarch64_get_mem_long_double (cpu, addr, & a);
7818   aarch64_set_FP_long_double (cpu, st, a);
7819 }
7820
7821 /* TODO store 8 bit unscaled signed 9 bit.  */
7822 /* TODO store 16 bit unscaled signed 9 bit.  */
7823
7824
7825 /* 1 source.  */
7826
7827 /* Float absolute value.  */
7828 static void
7829 fabss (sim_cpu *cpu)
7830 {
7831   unsigned sn = INSTR (9, 5);
7832   unsigned sd = INSTR (4, 0);
7833   float value = aarch64_get_FP_float (cpu, sn);
7834
7835   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7836   aarch64_set_FP_float (cpu, sd, fabsf (value));
7837 }
7838
7839 /* Double absolute value.  */
7840 static void
7841 fabcpu (sim_cpu *cpu)
7842 {
7843   unsigned sn = INSTR (9, 5);
7844   unsigned sd = INSTR (4, 0);
7845   double value = aarch64_get_FP_double (cpu, sn);
7846
7847   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7848   aarch64_set_FP_double (cpu, sd, fabs (value));
7849 }
7850
7851 /* Float negative value.  */
7852 static void
7853 fnegs (sim_cpu *cpu)
7854 {
7855   unsigned sn = INSTR (9, 5);
7856   unsigned sd = INSTR (4, 0);
7857
7858   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7859   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7860 }
7861
7862 /* Double negative value.  */
7863 static void
7864 fnegd (sim_cpu *cpu)
7865 {
7866   unsigned sn = INSTR (9, 5);
7867   unsigned sd = INSTR (4, 0);
7868
7869   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7870   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7871 }
7872
7873 /* Float square root.  */
7874 static void
7875 fsqrts (sim_cpu *cpu)
7876 {
7877   unsigned sn = INSTR (9, 5);
7878   unsigned sd = INSTR (4, 0);
7879
7880   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7881   aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7882 }
7883
7884 /* Double square root.  */
7885 static void
7886 fsqrtd (sim_cpu *cpu)
7887 {
7888   unsigned sn = INSTR (9, 5);
7889   unsigned sd = INSTR (4, 0);
7890
7891   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7892   aarch64_set_FP_double (cpu, sd,
7893                          sqrt (aarch64_get_FP_double (cpu, sn)));
7894 }
7895
7896 /* Convert double to float.  */
7897 static void
7898 fcvtds (sim_cpu *cpu)
7899 {
7900   unsigned sn = INSTR (9, 5);
7901   unsigned sd = INSTR (4, 0);
7902
7903   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7904   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7905 }
7906
7907 /* Convert float to double.  */
7908 static void
7909 fcvtcpu (sim_cpu *cpu)
7910 {
7911   unsigned sn = INSTR (9, 5);
7912   unsigned sd = INSTR (4, 0);
7913
7914   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7915   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7916 }
7917
7918 static void
7919 do_FRINT (sim_cpu *cpu)
7920 {
7921   /* instr[31,23] = 0001 1110 0
7922      instr[22]    = single(0)/double(1)
7923      instr[21,18] = 1001
7924      instr[17,15] = rounding mode
7925      instr[14,10] = 10000
7926      instr[9,5]   = source
7927      instr[4,0]   = dest  */
7928
7929   float val;
7930   unsigned rs = INSTR (9, 5);
7931   unsigned rd = INSTR (4, 0);
7932   unsigned int rmode = INSTR (17, 15);
7933
7934   NYI_assert (31, 23, 0x03C);
7935   NYI_assert (21, 18, 0x9);
7936   NYI_assert (14, 10, 0x10);
7937
7938   if (rmode == 6 || rmode == 7)
7939     /* FIXME: Add support for rmode == 6 exactness check.  */
7940     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7941
7942   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7943   if (INSTR (22, 22))
7944     {
7945       double dval = aarch64_get_FP_double (cpu, rs);
7946
7947       switch (rmode)
7948         {
7949         case 0: /* mode N: nearest or even.  */
7950           {
7951             double rval = round (dval);
7952
7953             if (dval - rval == 0.5)
7954               {
7955                 if (((rval / 2.0) * 2.0) != rval)
7956                   rval += 1.0;
7957               }
7958
7959             aarch64_set_FP_double (cpu, rd, round (dval));
7960             return;
7961           }
7962
7963         case 1: /* mode P: towards +inf.  */
7964           if (dval < 0.0)
7965             aarch64_set_FP_double (cpu, rd, trunc (dval));
7966           else
7967             aarch64_set_FP_double (cpu, rd, round (dval));
7968           return;
7969
7970         case 2: /* mode M: towards -inf.  */
7971           if (dval < 0.0)
7972             aarch64_set_FP_double (cpu, rd, round (dval));
7973           else
7974             aarch64_set_FP_double (cpu, rd, trunc (dval));
7975           return;
7976
7977         case 3: /* mode Z: towards 0.  */
7978           aarch64_set_FP_double (cpu, rd, trunc (dval));
7979           return;
7980
7981         case 4: /* mode A: away from 0.  */
7982           aarch64_set_FP_double (cpu, rd, round (dval));
7983           return;
7984
7985         case 6: /* mode X: use FPCR with exactness check.  */
7986         case 7: /* mode I: use FPCR mode.  */
7987           HALT_NYI;
7988
7989         default:
7990           HALT_UNALLOC;
7991         }
7992     }
7993
7994   val = aarch64_get_FP_float (cpu, rs);
7995
7996   switch (rmode)
7997     {
7998     case 0: /* mode N: nearest or even.  */
7999       {
8000         float rval = roundf (val);
8001
8002         if (val - rval == 0.5)
8003           {
8004             if (((rval / 2.0) * 2.0) != rval)
8005               rval += 1.0;
8006           }
8007
8008         aarch64_set_FP_float (cpu, rd, rval);
8009         return;
8010       }
8011
8012     case 1: /* mode P: towards +inf.  */
8013       if (val < 0.0)
8014         aarch64_set_FP_float (cpu, rd, truncf (val));
8015       else
8016         aarch64_set_FP_float (cpu, rd, roundf (val));
8017       return;
8018
8019     case 2: /* mode M: towards -inf.  */
8020       if (val < 0.0)
8021         aarch64_set_FP_float (cpu, rd, truncf (val));
8022       else
8023         aarch64_set_FP_float (cpu, rd, roundf (val));
8024       return;
8025
8026     case 3: /* mode Z: towards 0.  */
8027       aarch64_set_FP_float (cpu, rd, truncf (val));
8028       return;
8029
8030     case 4: /* mode A: away from 0.  */
8031       aarch64_set_FP_float (cpu, rd, roundf (val));
8032       return;
8033
8034     case 6: /* mode X: use FPCR with exactness check.  */
8035     case 7: /* mode I: use FPCR mode.  */
8036       HALT_NYI;
8037
8038     default:
8039       HALT_UNALLOC;
8040     }
8041 }
8042
8043 /* Convert half to float.  */
8044 static void
8045 do_FCVT_half_to_single (sim_cpu *cpu)
8046 {
8047   unsigned rn = INSTR (9, 5);
8048   unsigned rd = INSTR (4, 0);
8049
8050   NYI_assert (31, 10, 0x7B890);
8051
8052   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8053   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
8054 }
8055
8056 /* Convert half to double.  */
8057 static void
8058 do_FCVT_half_to_double (sim_cpu *cpu)
8059 {
8060   unsigned rn = INSTR (9, 5);
8061   unsigned rd = INSTR (4, 0);
8062
8063   NYI_assert (31, 10, 0x7B8B0);
8064
8065   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8066   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
8067 }
8068
8069 static void
8070 do_FCVT_single_to_half (sim_cpu *cpu)
8071 {
8072   unsigned rn = INSTR (9, 5);
8073   unsigned rd = INSTR (4, 0);
8074
8075   NYI_assert (31, 10, 0x788F0);
8076
8077   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8078   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
8079 }
8080
8081 /* Convert double to half.  */
8082 static void
8083 do_FCVT_double_to_half (sim_cpu *cpu)
8084 {
8085   unsigned rn = INSTR (9, 5);
8086   unsigned rd = INSTR (4, 0);
8087
8088   NYI_assert (31, 10, 0x798F0);
8089
8090   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8091   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
8092 }
8093
8094 static void
8095 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8096 {
8097   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
8098      instr[30]    = 0
8099      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
8100      instr[28,25] = 1111
8101      instr[24]    = 0
8102      instr[23,22] ==> type : 00 ==> source is single,
8103                              01 ==> source is double
8104                              10 ==> UNALLOC
8105                              11 ==> UNALLOC or source is half
8106      instr[21]    = 1
8107      instr[20,15] ==> opcode : with type 00 or 01
8108                                000000 ==> FMOV, 000001 ==> FABS,
8109                                000010 ==> FNEG, 000011 ==> FSQRT,
8110                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8111                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8112                                001000 ==> FRINTN, 001001 ==> FRINTP,
8113                                001010 ==> FRINTM, 001011 ==> FRINTZ,
8114                                001100 ==> FRINTA, 001101 ==> UNALLOC
8115                                001110 ==> FRINTX, 001111 ==> FRINTI
8116                                with type 11
8117                                000100 ==> FCVT (half-to-single)
8118                                000101 ==> FCVT (half-to-double)
8119                                instr[14,10] = 10000.  */
8120
8121   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8122   uint32_t type   = INSTR (23, 22);
8123   uint32_t opcode = INSTR (20, 15);
8124
8125   if (M_S != 0)
8126     HALT_UNALLOC;
8127
8128   if (type == 3)
8129     {
8130       if (opcode == 4)
8131         do_FCVT_half_to_single (cpu);
8132       else if (opcode == 5)
8133         do_FCVT_half_to_double (cpu);
8134       else
8135         HALT_UNALLOC;
8136       return;
8137     }
8138
8139   if (type == 2)
8140     HALT_UNALLOC;
8141
8142   switch (opcode)
8143     {
8144     case 0:
8145       if (type)
8146         ffmovd (cpu);
8147       else
8148         ffmovs (cpu);
8149       return;
8150
8151     case 1:
8152       if (type)
8153         fabcpu (cpu);
8154       else
8155         fabss (cpu);
8156       return;
8157
8158     case 2:
8159       if (type)
8160         fnegd (cpu);
8161       else
8162         fnegs (cpu);
8163       return;
8164
8165     case 3:
8166       if (type)
8167         fsqrtd (cpu);
8168       else
8169         fsqrts (cpu);
8170       return;
8171
8172     case 4:
8173       if (type)
8174         fcvtds (cpu);
8175       else
8176         HALT_UNALLOC;
8177       return;
8178
8179     case 5:
8180       if (type)
8181         HALT_UNALLOC;
8182       fcvtcpu (cpu);
8183       return;
8184
8185     case 8:             /* FRINTN etc.  */
8186     case 9:
8187     case 10:
8188     case 11:
8189     case 12:
8190     case 14:
8191     case 15:
8192        do_FRINT (cpu);
8193        return;
8194
8195     case 7:
8196       if (INSTR (22, 22))
8197         do_FCVT_double_to_half (cpu);
8198       else
8199         do_FCVT_single_to_half (cpu);
8200       return;
8201
8202     case 13:
8203       HALT_NYI;
8204
8205     default:
8206       HALT_UNALLOC;
8207     }
8208 }
8209
8210 /* 32 bit signed int to float.  */
8211 static void
8212 scvtf32 (sim_cpu *cpu)
8213 {
8214   unsigned rn = INSTR (9, 5);
8215   unsigned sd = INSTR (4, 0);
8216
8217   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8218   aarch64_set_FP_float
8219     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8220 }
8221
8222 /* signed int to float.  */
8223 static void
8224 scvtf (sim_cpu *cpu)
8225 {
8226   unsigned rn = INSTR (9, 5);
8227   unsigned sd = INSTR (4, 0);
8228
8229   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8230   aarch64_set_FP_float
8231     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8232 }
8233
8234 /* 32 bit signed int to double.  */
8235 static void
8236 scvtd32 (sim_cpu *cpu)
8237 {
8238   unsigned rn = INSTR (9, 5);
8239   unsigned sd = INSTR (4, 0);
8240
8241   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8242   aarch64_set_FP_double
8243     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8244 }
8245
8246 /* signed int to double.  */
8247 static void
8248 scvtd (sim_cpu *cpu)
8249 {
8250   unsigned rn = INSTR (9, 5);
8251   unsigned sd = INSTR (4, 0);
8252
8253   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8254   aarch64_set_FP_double
8255     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8256 }
8257
8258 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
8259 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
8260 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
8261 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
8262 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
8263 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
8264 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8265 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8266
8267 #define UINT_MIN 0
8268 #define ULONG_MIN 0
8269 static const float  FLOAT_UINT_MAX   = (float)  UINT_MAX;
8270 static const float  FLOAT_UINT_MIN   = (float)  UINT_MIN;
8271 static const double DOUBLE_UINT_MAX  = (double) UINT_MAX;
8272 static const double DOUBLE_UINT_MIN  = (double) UINT_MIN;
8273 static const float  FLOAT_ULONG_MAX  = (float)  ULONG_MAX;
8274 static const float  FLOAT_ULONG_MIN  = (float)  ULONG_MIN;
8275 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8276 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8277
8278 /* Check for FP exception conditions:
8279      NaN raises IO
8280      Infinity raises IO
8281      Out of Range raises IO and IX and saturates value
8282      Denormal raises ID and IX and sets to zero.  */
8283 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
8284   do                                                    \
8285     {                                                   \
8286       switch (fpclassify (F))                           \
8287         {                                               \
8288         case FP_INFINITE:                               \
8289         case FP_NAN:                                    \
8290           aarch64_set_FPSR (cpu, IO);                   \
8291           if (signbit (F))                              \
8292             VALUE = ITYPE##_MAX;                        \
8293           else                                          \
8294             VALUE = ITYPE##_MIN;                        \
8295           break;                                        \
8296                                                         \
8297         case FP_NORMAL:                                 \
8298           if (F >= FTYPE##_##ITYPE##_MAX)               \
8299             {                                           \
8300               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8301               VALUE = ITYPE##_MAX;                      \
8302             }                                           \
8303           else if (F <= FTYPE##_##ITYPE##_MIN)          \
8304             {                                           \
8305               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8306               VALUE = ITYPE##_MIN;                      \
8307             }                                           \
8308           break;                                        \
8309                                                         \
8310         case FP_SUBNORMAL:                              \
8311           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
8312           VALUE = 0;                                    \
8313           break;                                        \
8314                                                         \
8315         default:                                        \
8316         case FP_ZERO:                                   \
8317           VALUE = 0;                                    \
8318           break;                                        \
8319         }                                               \
8320     }                                                   \
8321   while (0)
8322
8323 /* 32 bit convert float to signed int truncate towards zero.  */
8324 static void
8325 fcvtszs32 (sim_cpu *cpu)
8326 {
8327   unsigned sn = INSTR (9, 5);
8328   unsigned rd = INSTR (4, 0);
8329   /* TODO : check that this rounds toward zero.  */
8330   float   f = aarch64_get_FP_float (cpu, sn);
8331   int32_t value = (int32_t) f;
8332
8333   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8334
8335   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8336   /* Avoid sign extension to 64 bit.  */
8337   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8338 }
8339
8340 /* 64 bit convert float to signed int truncate towards zero.  */
8341 static void
8342 fcvtszs (sim_cpu *cpu)
8343 {
8344   unsigned sn = INSTR (9, 5);
8345   unsigned rd = INSTR (4, 0);
8346   float f = aarch64_get_FP_float (cpu, sn);
8347   int64_t value = (int64_t) f;
8348
8349   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8350
8351   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8352   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8353 }
8354
8355 /* 32 bit convert double to signed int truncate towards zero.  */
8356 static void
8357 fcvtszd32 (sim_cpu *cpu)
8358 {
8359   unsigned sn = INSTR (9, 5);
8360   unsigned rd = INSTR (4, 0);
8361   /* TODO : check that this rounds toward zero.  */
8362   double   d = aarch64_get_FP_double (cpu, sn);
8363   int32_t  value = (int32_t) d;
8364
8365   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8366
8367   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8368   /* Avoid sign extension to 64 bit.  */
8369   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8370 }
8371
8372 /* 64 bit convert double to signed int truncate towards zero.  */
8373 static void
8374 fcvtszd (sim_cpu *cpu)
8375 {
8376   unsigned sn = INSTR (9, 5);
8377   unsigned rd = INSTR (4, 0);
8378   /* TODO : check that this rounds toward zero.  */
8379   double  d = aarch64_get_FP_double (cpu, sn);
8380   int64_t value;
8381
8382   value = (int64_t) d;
8383
8384   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8385
8386   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8387   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8388 }
8389
8390 static void
8391 do_fcvtzu (sim_cpu *cpu)
8392 {
8393   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8394      instr[30,23] = 00111100
8395      instr[22]    = type: single (0)/ double (1)
8396      instr[21]    = enable (0)/disable(1) precision
8397      instr[20,16] = 11001
8398      instr[15,10] = precision
8399      instr[9,5]   = Rs
8400      instr[4,0]   = Rd.  */
8401
8402   unsigned rs = INSTR (9, 5);
8403   unsigned rd = INSTR (4, 0);
8404
8405   NYI_assert (30, 23, 0x3C);
8406   NYI_assert (20, 16, 0x19);
8407
8408   if (INSTR (21, 21) != 1)
8409     /* Convert to fixed point.  */
8410     HALT_NYI;
8411
8412   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8413   if (INSTR (31, 31))
8414     {
8415       /* Convert to unsigned 64-bit integer.  */
8416       if (INSTR (22, 22))
8417         {
8418           double  d = aarch64_get_FP_double (cpu, rs);
8419           uint64_t value = (uint64_t) d;
8420
8421           /* Do not raise an exception if we have reached ULONG_MAX.  */
8422           if (value != (1ULL << 63))
8423             RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8424
8425           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8426         }
8427       else
8428         {
8429           float  f = aarch64_get_FP_float (cpu, rs);
8430           uint64_t value = (uint64_t) f;
8431
8432           /* Do not raise an exception if we have reached ULONG_MAX.  */
8433           if (value != (1ULL << 63))
8434             RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8435
8436           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8437         }
8438     }
8439   else
8440     {
8441       uint32_t value;
8442
8443       /* Convert to unsigned 32-bit integer.  */
8444       if (INSTR (22, 22))
8445         {
8446           double  d = aarch64_get_FP_double (cpu, rs);
8447
8448           value = (uint32_t) d;
8449           /* Do not raise an exception if we have reached UINT_MAX.  */
8450           if (value != (1UL << 31))
8451             RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8452         }
8453       else
8454         {
8455           float  f = aarch64_get_FP_float (cpu, rs);
8456
8457           value = (uint32_t) f;
8458           /* Do not raise an exception if we have reached UINT_MAX.  */
8459           if (value != (1UL << 31))
8460             RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8461         }
8462
8463       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8464     }
8465 }
8466
8467 static void
8468 do_UCVTF (sim_cpu *cpu)
8469 {
8470   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8471      instr[30,23] = 001 1110 0
8472      instr[22]    = type: single (0)/ double (1)
8473      instr[21]    = enable (0)/disable(1) precision
8474      instr[20,16] = 0 0011
8475      instr[15,10] = precision
8476      instr[9,5]   = Rs
8477      instr[4,0]   = Rd.  */
8478
8479   unsigned rs = INSTR (9, 5);
8480   unsigned rd = INSTR (4, 0);
8481
8482   NYI_assert (30, 23, 0x3C);
8483   NYI_assert (20, 16, 0x03);
8484
8485   if (INSTR (21, 21) != 1)
8486     HALT_NYI;
8487
8488   /* FIXME: Add exception raising.  */
8489   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8490   if (INSTR (31, 31))
8491     {
8492       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8493
8494       if (INSTR (22, 22))
8495         aarch64_set_FP_double (cpu, rd, (double) value);
8496       else
8497         aarch64_set_FP_float (cpu, rd, (float) value);
8498     }
8499   else
8500     {
8501       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
8502
8503       if (INSTR (22, 22))
8504         aarch64_set_FP_double (cpu, rd, (double) value);
8505       else
8506         aarch64_set_FP_float (cpu, rd, (float) value);
8507     }
8508 }
8509
8510 static void
8511 float_vector_move (sim_cpu *cpu)
8512 {
8513   /* instr[31,17] == 100 1111 0101 0111
8514      instr[16]    ==> direction 0=> to GR, 1=> from GR
8515      instr[15,10] => ???
8516      instr[9,5]   ==> source
8517      instr[4,0]   ==> dest.  */
8518
8519   unsigned rn = INSTR (9, 5);
8520   unsigned rd = INSTR (4, 0);
8521
8522   NYI_assert (31, 17, 0x4F57);
8523
8524   if (INSTR (15, 10) != 0)
8525     HALT_UNALLOC;
8526
8527   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8528   if (INSTR (16, 16))
8529     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8530   else
8531     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8532 }
8533
8534 static void
8535 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8536 {
8537   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
8538      instr[30     = 0
8539      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
8540      instr[28,25] = 1111
8541      instr[24]    = 0
8542      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8543      instr[21]    = 1
8544      instr[20,19] = rmode
8545      instr[18,16] = opcode
8546      instr[15,10] = 10 0000  */
8547
8548   uint32_t rmode_opcode;
8549   uint32_t size_type;
8550   uint32_t type;
8551   uint32_t size;
8552   uint32_t S;
8553
8554   if (INSTR (31, 17) == 0x4F57)
8555     {
8556       float_vector_move (cpu);
8557       return;
8558     }
8559
8560   size = INSTR (31, 31);
8561   S = INSTR (29, 29);
8562   if (S != 0)
8563     HALT_UNALLOC;
8564
8565   type = INSTR (23, 22);
8566   if (type > 1)
8567     HALT_UNALLOC;
8568
8569   rmode_opcode = INSTR (20, 16);
8570   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
8571
8572   switch (rmode_opcode)
8573     {
8574     case 2:                     /* SCVTF.  */
8575       switch (size_type)
8576         {
8577         case 0: scvtf32 (cpu); return;
8578         case 1: scvtd32 (cpu); return;
8579         case 2: scvtf (cpu); return;
8580         case 3: scvtd (cpu); return;
8581         default: HALT_UNALLOC;
8582         }
8583
8584     case 6:                     /* FMOV GR, Vec.  */
8585       switch (size_type)
8586         {
8587         case 0:  gfmovs (cpu); return;
8588         case 3:  gfmovd (cpu); return;
8589         default: HALT_UNALLOC;
8590         }
8591
8592     case 7:                     /* FMOV vec, GR.  */
8593       switch (size_type)
8594         {
8595         case 0:  fgmovs (cpu); return;
8596         case 3:  fgmovd (cpu); return;
8597         default: HALT_UNALLOC;
8598         }
8599
8600     case 24:                    /* FCVTZS.  */
8601       switch (size_type)
8602         {
8603         case 0: fcvtszs32 (cpu); return;
8604         case 1: fcvtszd32 (cpu); return;
8605         case 2: fcvtszs (cpu); return;
8606         case 3: fcvtszd (cpu); return;
8607         default: HALT_UNALLOC;
8608         }
8609
8610     case 25: do_fcvtzu (cpu); return;
8611     case 3:  do_UCVTF (cpu); return;
8612
8613     case 0:     /* FCVTNS.  */
8614     case 1:     /* FCVTNU.  */
8615     case 4:     /* FCVTAS.  */
8616     case 5:     /* FCVTAU.  */
8617     case 8:     /* FCVPTS.  */
8618     case 9:     /* FCVTPU.  */
8619     case 16:    /* FCVTMS.  */
8620     case 17:    /* FCVTMU.  */
8621     default:
8622       HALT_NYI;
8623     }
8624 }
8625
8626 static void
8627 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8628 {
8629   uint32_t flags;
8630
8631   /* FIXME: Add exception raising.  */
8632   if (isnan (fvalue1) || isnan (fvalue2))
8633     flags = C|V;
8634   else if (isinf (fvalue1) && isinf (fvalue2))
8635     {
8636       /* Subtracting two infinities may give a NaN.  We only need to compare
8637          the signs, which we can get from isinf.  */
8638       int result = isinf (fvalue1) - isinf (fvalue2);
8639
8640       if (result == 0)
8641         flags = Z|C;
8642       else if (result < 0)
8643         flags = N;
8644       else /* (result > 0).  */
8645         flags = C;
8646     }
8647   else
8648     {
8649       float result = fvalue1 - fvalue2;
8650
8651       if (result == 0.0)
8652         flags = Z|C;
8653       else if (result < 0)
8654         flags = N;
8655       else /* (result > 0).  */
8656         flags = C;
8657     }
8658
8659   aarch64_set_CPSR (cpu, flags);
8660 }
8661
8662 static void
8663 fcmps (sim_cpu *cpu)
8664 {
8665   unsigned sm = INSTR (20, 16);
8666   unsigned sn = INSTR ( 9,  5);
8667
8668   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8669   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8670
8671   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8672   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8673 }
8674
8675 /* Float compare to zero -- Invalid Operation exception
8676    only on signaling NaNs.  */
8677 static void
8678 fcmpzs (sim_cpu *cpu)
8679 {
8680   unsigned sn = INSTR ( 9,  5);
8681   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8682
8683   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8684   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8685 }
8686
8687 /* Float compare -- Invalid Operation exception on all NaNs.  */
8688 static void
8689 fcmpes (sim_cpu *cpu)
8690 {
8691   unsigned sm = INSTR (20, 16);
8692   unsigned sn = INSTR ( 9,  5);
8693
8694   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8695   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8696
8697   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8698   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8699 }
8700
8701 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8702 static void
8703 fcmpzes (sim_cpu *cpu)
8704 {
8705   unsigned sn = INSTR ( 9,  5);
8706   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8707
8708   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8709   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8710 }
8711
8712 static void
8713 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8714 {
8715   uint32_t flags;
8716
8717   /* FIXME: Add exception raising.  */
8718   if (isnan (dval1) || isnan (dval2))
8719     flags = C|V;
8720   else if (isinf (dval1) && isinf (dval2))
8721     {
8722       /* Subtracting two infinities may give a NaN.  We only need to compare
8723          the signs, which we can get from isinf.  */
8724       int result = isinf (dval1) - isinf (dval2);
8725
8726       if (result == 0)
8727         flags = Z|C;
8728       else if (result < 0)
8729         flags = N;
8730       else /* (result > 0).  */
8731         flags = C;
8732     }
8733   else
8734     {
8735       double result = dval1 - dval2;
8736
8737       if (result == 0.0)
8738         flags = Z|C;
8739       else if (result < 0)
8740         flags = N;
8741       else /* (result > 0).  */
8742         flags = C;
8743     }
8744
8745   aarch64_set_CPSR (cpu, flags);
8746 }
8747
8748 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8749 static void
8750 fcmpd (sim_cpu *cpu)
8751 {
8752   unsigned sm = INSTR (20, 16);
8753   unsigned sn = INSTR ( 9,  5);
8754
8755   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8756   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8757
8758   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8759   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8760 }
8761
8762 /* Double compare to zero -- Invalid Operation exception
8763    only on signaling NaNs.  */
8764 static void
8765 fcmpzd (sim_cpu *cpu)
8766 {
8767   unsigned sn = INSTR ( 9,  5);
8768   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8769
8770   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8771   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8772 }
8773
8774 /* Double compare -- Invalid Operation exception on all NaNs.  */
8775 static void
8776 fcmped (sim_cpu *cpu)
8777 {
8778   unsigned sm = INSTR (20, 16);
8779   unsigned sn = INSTR ( 9,  5);
8780
8781   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8782   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8783
8784   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8785   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8786 }
8787
8788 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8789 static void
8790 fcmpzed (sim_cpu *cpu)
8791 {
8792   unsigned sn = INSTR ( 9,  5);
8793   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8794
8795   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8796   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8797 }
8798
8799 static void
8800 dexSimpleFPCompare (sim_cpu *cpu)
8801 {
8802   /* assert instr[28,25] == 1111
8803      instr[30:24:21:13,10] = 0011000
8804      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8805      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8806      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8807      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8808      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8809                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8810                               ow ==> UNALLOC  */
8811   uint32_t dispatch;
8812   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8813   uint32_t type = INSTR (23, 22);
8814   uint32_t op = INSTR (15, 14);
8815   uint32_t op2_2_0 = INSTR (2, 0);
8816
8817   if (op2_2_0 != 0)
8818     HALT_UNALLOC;
8819
8820   if (M_S != 0)
8821     HALT_UNALLOC;
8822
8823   if (type > 1)
8824     HALT_UNALLOC;
8825
8826   if (op != 0)
8827     HALT_UNALLOC;
8828
8829   /* dispatch on type and top 2 bits of opcode.  */
8830   dispatch = (type << 2) | INSTR (4, 3);
8831
8832   switch (dispatch)
8833     {
8834     case 0: fcmps (cpu); return;
8835     case 1: fcmpzs (cpu); return;
8836     case 2: fcmpes (cpu); return;
8837     case 3: fcmpzes (cpu); return;
8838     case 4: fcmpd (cpu); return;
8839     case 5: fcmpzd (cpu); return;
8840     case 6: fcmped (cpu); return;
8841     case 7: fcmpzed (cpu); return;
8842     }
8843 }
8844
8845 static void
8846 do_scalar_FADDP (sim_cpu *cpu)
8847 {
8848   /* instr [31,23] = 0111 1110 0
8849      instr [22]    = single(0)/double(1)
8850      instr [21,10] = 11 0000 1101 10
8851      instr [9,5]   = Fn
8852      instr [4,0]   = Fd.  */
8853
8854   unsigned Fn = INSTR (9, 5);
8855   unsigned Fd = INSTR (4, 0);
8856
8857   NYI_assert (31, 23, 0x0FC);
8858   NYI_assert (21, 10, 0xC36);
8859
8860   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8861   if (INSTR (22, 22))
8862     {
8863       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8864       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8865
8866       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8867     }
8868   else
8869     {
8870       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8871       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8872
8873       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8874     }
8875 }
8876
8877 /* Floating point absolute difference.  */
8878
8879 static void
8880 do_scalar_FABD (sim_cpu *cpu)
8881 {
8882   /* instr [31,23] = 0111 1110 1
8883      instr [22]    = float(0)/double(1)
8884      instr [21]    = 1
8885      instr [20,16] = Rm
8886      instr [15,10] = 1101 01
8887      instr [9, 5]  = Rn
8888      instr [4, 0]  = Rd.  */
8889
8890   unsigned rm = INSTR (20, 16);
8891   unsigned rn = INSTR (9, 5);
8892   unsigned rd = INSTR (4, 0);
8893
8894   NYI_assert (31, 23, 0x0FD);
8895   NYI_assert (21, 21, 1);
8896   NYI_assert (15, 10, 0x35);
8897
8898   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8899   if (INSTR (22, 22))
8900     aarch64_set_FP_double (cpu, rd,
8901                            fabs (aarch64_get_FP_double (cpu, rn)
8902                                  - aarch64_get_FP_double (cpu, rm)));
8903   else
8904     aarch64_set_FP_float (cpu, rd,
8905                           fabsf (aarch64_get_FP_float (cpu, rn)
8906                                  - aarch64_get_FP_float (cpu, rm)));
8907 }
8908
8909 static void
8910 do_scalar_CMGT (sim_cpu *cpu)
8911 {
8912   /* instr [31,21] = 0101 1110 111
8913      instr [20,16] = Rm
8914      instr [15,10] = 00 1101
8915      instr [9, 5]  = Rn
8916      instr [4, 0]  = Rd.  */
8917
8918   unsigned rm = INSTR (20, 16);
8919   unsigned rn = INSTR (9, 5);
8920   unsigned rd = INSTR (4, 0);
8921
8922   NYI_assert (31, 21, 0x2F7);
8923   NYI_assert (15, 10, 0x0D);
8924
8925   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8926   aarch64_set_vec_u64 (cpu, rd, 0,
8927                        aarch64_get_vec_u64 (cpu, rn, 0) >
8928                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8929 }
8930
8931 static void
8932 do_scalar_USHR (sim_cpu *cpu)
8933 {
8934   /* instr [31,23] = 0111 1111 0
8935      instr [22,16] = shift amount
8936      instr [15,10] = 0000 01
8937      instr [9, 5]  = Rn
8938      instr [4, 0]  = Rd.  */
8939
8940   unsigned amount = 128 - INSTR (22, 16);
8941   unsigned rn = INSTR (9, 5);
8942   unsigned rd = INSTR (4, 0);
8943
8944   NYI_assert (31, 23, 0x0FE);
8945   NYI_assert (15, 10, 0x01);
8946
8947   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8948   aarch64_set_vec_u64 (cpu, rd, 0,
8949                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8950 }
8951
8952 static void
8953 do_scalar_SSHL (sim_cpu *cpu)
8954 {
8955   /* instr [31,21] = 0101 1110 111
8956      instr [20,16] = Rm
8957      instr [15,10] = 0100 01
8958      instr [9, 5]  = Rn
8959      instr [4, 0]  = Rd.  */
8960
8961   unsigned rm = INSTR (20, 16);
8962   unsigned rn = INSTR (9, 5);
8963   unsigned rd = INSTR (4, 0);
8964   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8965
8966   NYI_assert (31, 21, 0x2F7);
8967   NYI_assert (15, 10, 0x11);
8968
8969   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8970   if (shift >= 0)
8971     aarch64_set_vec_s64 (cpu, rd, 0,
8972                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8973   else
8974     aarch64_set_vec_s64 (cpu, rd, 0,
8975                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8976 }
8977
8978 /* Floating point scalar compare greater than or equal to 0.  */
8979 static void
8980 do_scalar_FCMGE_zero (sim_cpu *cpu)
8981 {
8982   /* instr [31,23] = 0111 1110 1
8983      instr [22,22] = size
8984      instr [21,16] = 1000 00
8985      instr [15,10] = 1100 10
8986      instr [9, 5]  = Rn
8987      instr [4, 0]  = Rd.  */
8988
8989   unsigned size = INSTR (22, 22);
8990   unsigned rn = INSTR (9, 5);
8991   unsigned rd = INSTR (4, 0);
8992
8993   NYI_assert (31, 23, 0x0FD);
8994   NYI_assert (21, 16, 0x20);
8995   NYI_assert (15, 10, 0x32);
8996
8997   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8998   if (size)
8999     aarch64_set_vec_u64 (cpu, rd, 0,
9000                          aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9001   else
9002     aarch64_set_vec_u32 (cpu, rd, 0,
9003                          aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9004 }
9005
9006 /* Floating point scalar compare less than or equal to 0.  */
9007 static void
9008 do_scalar_FCMLE_zero (sim_cpu *cpu)
9009 {
9010   /* instr [31,23] = 0111 1110 1
9011      instr [22,22] = size
9012      instr [21,16] = 1000 00
9013      instr [15,10] = 1101 10
9014      instr [9, 5]  = Rn
9015      instr [4, 0]  = Rd.  */
9016
9017   unsigned size = INSTR (22, 22);
9018   unsigned rn = INSTR (9, 5);
9019   unsigned rd = INSTR (4, 0);
9020
9021   NYI_assert (31, 23, 0x0FD);
9022   NYI_assert (21, 16, 0x20);
9023   NYI_assert (15, 10, 0x36);
9024
9025   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9026   if (size)
9027     aarch64_set_vec_u64 (cpu, rd, 0,
9028                          aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9029   else
9030     aarch64_set_vec_u32 (cpu, rd, 0,
9031                          aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9032 }
9033
9034 /* Floating point scalar compare greater than 0.  */
9035 static void
9036 do_scalar_FCMGT_zero (sim_cpu *cpu)
9037 {
9038   /* instr [31,23] = 0101 1110 1
9039      instr [22,22] = size
9040      instr [21,16] = 1000 00
9041      instr [15,10] = 1100 10
9042      instr [9, 5]  = Rn
9043      instr [4, 0]  = Rd.  */
9044
9045   unsigned size = INSTR (22, 22);
9046   unsigned rn = INSTR (9, 5);
9047   unsigned rd = INSTR (4, 0);
9048
9049   NYI_assert (31, 23, 0x0BD);
9050   NYI_assert (21, 16, 0x20);
9051   NYI_assert (15, 10, 0x32);
9052
9053   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9054   if (size)
9055     aarch64_set_vec_u64 (cpu, rd, 0,
9056                          aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9057   else
9058     aarch64_set_vec_u32 (cpu, rd, 0,
9059                          aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9060 }
9061
9062 /* Floating point scalar compare equal to 0.  */
9063 static void
9064 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9065 {
9066   /* instr [31,23] = 0101 1110 1
9067      instr [22,22] = size
9068      instr [21,16] = 1000 00
9069      instr [15,10] = 1101 10
9070      instr [9, 5]  = Rn
9071      instr [4, 0]  = Rd.  */
9072
9073   unsigned size = INSTR (22, 22);
9074   unsigned rn = INSTR (9, 5);
9075   unsigned rd = INSTR (4, 0);
9076
9077   NYI_assert (31, 23, 0x0BD);
9078   NYI_assert (21, 16, 0x20);
9079   NYI_assert (15, 10, 0x36);
9080
9081   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9082   if (size)
9083     aarch64_set_vec_u64 (cpu, rd, 0,
9084                          aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9085   else
9086     aarch64_set_vec_u32 (cpu, rd, 0,
9087                          aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9088 }
9089
9090 /* Floating point scalar compare less than 0.  */
9091 static void
9092 do_scalar_FCMLT_zero (sim_cpu *cpu)
9093 {
9094   /* instr [31,23] = 0101 1110 1
9095      instr [22,22] = size
9096      instr [21,16] = 1000 00
9097      instr [15,10] = 1110 10
9098      instr [9, 5]  = Rn
9099      instr [4, 0]  = Rd.  */
9100
9101   unsigned size = INSTR (22, 22);
9102   unsigned rn = INSTR (9, 5);
9103   unsigned rd = INSTR (4, 0);
9104
9105   NYI_assert (31, 23, 0x0BD);
9106   NYI_assert (21, 16, 0x20);
9107   NYI_assert (15, 10, 0x3A);
9108
9109   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9110   if (size)
9111     aarch64_set_vec_u64 (cpu, rd, 0,
9112                          aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9113   else
9114     aarch64_set_vec_u32 (cpu, rd, 0,
9115                          aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9116 }
9117
9118 static void
9119 do_scalar_shift (sim_cpu *cpu)
9120 {
9121   /* instr [31,23] = 0101 1111 0
9122      instr [22,16] = shift amount
9123      instr [15,10] = 0101 01   [SHL]
9124      instr [15,10] = 0000 01   [SSHR]
9125      instr [9, 5]  = Rn
9126      instr [4, 0]  = Rd.  */
9127
9128   unsigned rn = INSTR (9, 5);
9129   unsigned rd = INSTR (4, 0);
9130   unsigned amount;
9131
9132   NYI_assert (31, 23, 0x0BE);
9133
9134   if (INSTR (22, 22) == 0)
9135     HALT_UNALLOC;
9136
9137   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9138   switch (INSTR (15, 10))
9139     {
9140     case 0x01: /* SSHR */
9141       amount = 128 - INSTR (22, 16);
9142       aarch64_set_vec_s64 (cpu, rd, 0,
9143                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9144       return;
9145     case 0x15: /* SHL */
9146       amount = INSTR (22, 16) - 64;
9147       aarch64_set_vec_u64 (cpu, rd, 0,
9148                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9149       return;
9150     default:
9151       HALT_NYI;
9152     }
9153 }
9154
9155 /* FCMEQ FCMGT FCMGE.  */
9156 static void
9157 do_scalar_FCM (sim_cpu *cpu)
9158 {
9159   /* instr [31,30] = 01
9160      instr [29]    = U
9161      instr [28,24] = 1 1110
9162      instr [23]    = E
9163      instr [22]    = size
9164      instr [21]    = 1
9165      instr [20,16] = Rm
9166      instr [15,12] = 1110
9167      instr [11]    = AC
9168      instr [10]    = 1
9169      instr [9, 5]  = Rn
9170      instr [4, 0]  = Rd.  */
9171
9172   unsigned rm = INSTR (20, 16);
9173   unsigned rn = INSTR (9, 5);
9174   unsigned rd = INSTR (4, 0);
9175   unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9176   unsigned result;
9177   float val1;
9178   float val2;
9179
9180   NYI_assert (31, 30, 1);
9181   NYI_assert (28, 24, 0x1E);
9182   NYI_assert (21, 21, 1);
9183   NYI_assert (15, 12, 0xE);
9184   NYI_assert (10, 10, 1);
9185
9186   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9187   if (INSTR (22, 22))
9188     {
9189       double dval1 = aarch64_get_FP_double (cpu, rn);
9190       double dval2 = aarch64_get_FP_double (cpu, rm);
9191
9192       switch (EUac)
9193         {
9194         case 0: /* 000 */
9195           result = dval1 == dval2;
9196           break;
9197
9198         case 3: /* 011 */
9199           dval1 = fabs (dval1);
9200           dval2 = fabs (dval2);
9201           ATTRIBUTE_FALLTHROUGH;
9202         case 2: /* 010 */
9203           result = dval1 >= dval2;
9204           break;
9205
9206         case 7: /* 111 */
9207           dval1 = fabs (dval1);
9208           dval2 = fabs (dval2);
9209           ATTRIBUTE_FALLTHROUGH;
9210         case 6: /* 110 */
9211           result = dval1 > dval2;
9212           break;
9213
9214         default:
9215           HALT_UNALLOC;
9216         }
9217
9218       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9219       return;
9220     }
9221
9222   val1 = aarch64_get_FP_float (cpu, rn);
9223   val2 = aarch64_get_FP_float (cpu, rm);
9224
9225   switch (EUac)
9226     {
9227     case 0: /* 000 */
9228       result = val1 == val2;
9229       break;
9230
9231     case 3: /* 011 */
9232       val1 = fabsf (val1);
9233       val2 = fabsf (val2);
9234       ATTRIBUTE_FALLTHROUGH;
9235     case 2: /* 010 */
9236       result = val1 >= val2;
9237       break;
9238
9239     case 7: /* 111 */
9240       val1 = fabsf (val1);
9241       val2 = fabsf (val2);
9242       ATTRIBUTE_FALLTHROUGH;
9243     case 6: /* 110 */
9244       result = val1 > val2;
9245       break;
9246
9247     default:
9248       HALT_UNALLOC;
9249     }
9250
9251   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9252 }
9253
9254 /* An alias of DUP.  */
9255 static void
9256 do_scalar_MOV (sim_cpu *cpu)
9257 {
9258   /* instr [31,21] = 0101 1110 000
9259      instr [20,16] = imm5
9260      instr [15,10] = 0000 01
9261      instr [9, 5]  = Rn
9262      instr [4, 0]  = Rd.  */
9263
9264   unsigned rn = INSTR (9, 5);
9265   unsigned rd = INSTR (4, 0);
9266   unsigned index;
9267
9268   NYI_assert (31, 21, 0x2F0);
9269   NYI_assert (15, 10, 0x01);
9270
9271   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9272   if (INSTR (16, 16))
9273     {
9274       /* 8-bit.  */
9275       index = INSTR (20, 17);
9276       aarch64_set_vec_u8
9277         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9278     }
9279   else if (INSTR (17, 17))
9280     {
9281       /* 16-bit.  */
9282       index = INSTR (20, 18);
9283       aarch64_set_vec_u16
9284         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9285     }
9286   else if (INSTR (18, 18))
9287     {
9288       /* 32-bit.  */
9289       index = INSTR (20, 19);
9290       aarch64_set_vec_u32
9291         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9292     }
9293   else if (INSTR (19, 19))
9294     {
9295       /* 64-bit.  */
9296       index = INSTR (20, 20);
9297       aarch64_set_vec_u64
9298         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9299     }
9300   else
9301     HALT_UNALLOC;
9302 }
9303
9304 static void
9305 do_scalar_NEG (sim_cpu *cpu)
9306 {
9307   /* instr [31,10] = 0111 1110 1110 0000 1011 10
9308      instr [9, 5]  = Rn
9309      instr [4, 0]  = Rd.  */
9310
9311   unsigned rn = INSTR (9, 5);
9312   unsigned rd = INSTR (4, 0);
9313
9314   NYI_assert (31, 10, 0x1FB82E);
9315
9316   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9317   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9318 }
9319
9320 static void
9321 do_scalar_USHL (sim_cpu *cpu)
9322 {
9323   /* instr [31,21] = 0111 1110 111
9324      instr [20,16] = Rm
9325      instr [15,10] = 0100 01
9326      instr [9, 5]  = Rn
9327      instr [4, 0]  = Rd.  */
9328
9329   unsigned rm = INSTR (20, 16);
9330   unsigned rn = INSTR (9, 5);
9331   unsigned rd = INSTR (4, 0);
9332   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9333
9334   NYI_assert (31, 21, 0x3F7);
9335   NYI_assert (15, 10, 0x11);
9336
9337   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9338   if (shift >= 0)
9339     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9340   else
9341     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9342 }
9343
9344 static void
9345 do_double_add (sim_cpu *cpu)
9346 {
9347   /* instr [31,21] = 0101 1110 111
9348      instr [20,16] = Fn
9349      instr [15,10] = 1000 01
9350      instr [9,5]   = Fm
9351      instr [4,0]   = Fd.  */
9352   unsigned Fd;
9353   unsigned Fm;
9354   unsigned Fn;
9355   double val1;
9356   double val2;
9357
9358   NYI_assert (31, 21, 0x2F7);
9359   NYI_assert (15, 10, 0x21);
9360
9361   Fd = INSTR (4, 0);
9362   Fm = INSTR (9, 5);
9363   Fn = INSTR (20, 16);
9364
9365   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9366   val1 = aarch64_get_FP_double (cpu, Fm);
9367   val2 = aarch64_get_FP_double (cpu, Fn);
9368
9369   aarch64_set_FP_double (cpu, Fd, val1 + val2);
9370 }
9371
9372 static void
9373 do_scalar_UCVTF (sim_cpu *cpu)
9374 {
9375   /* instr [31,23] = 0111 1110 0
9376      instr [22]    = single(0)/double(1)
9377      instr [21,10] = 10 0001 1101 10
9378      instr [9,5]   = rn
9379      instr [4,0]   = rd.  */
9380
9381   unsigned rn = INSTR (9, 5);
9382   unsigned rd = INSTR (4, 0);
9383
9384   NYI_assert (31, 23, 0x0FC);
9385   NYI_assert (21, 10, 0x876);
9386
9387   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9388   if (INSTR (22, 22))
9389     {
9390       uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9391
9392       aarch64_set_vec_double (cpu, rd, 0, (double) val);
9393     }
9394   else
9395     {
9396       uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9397
9398       aarch64_set_vec_float (cpu, rd, 0, (float) val);
9399     }
9400 }
9401
9402 static void
9403 do_scalar_vec (sim_cpu *cpu)
9404 {
9405   /* instr [30] = 1.  */
9406   /* instr [28,25] = 1111.  */
9407   switch (INSTR (31, 23))
9408     {
9409     case 0xBC:
9410       switch (INSTR (15, 10))
9411         {
9412         case 0x01: do_scalar_MOV (cpu); return;
9413         case 0x39: do_scalar_FCM (cpu); return;
9414         case 0x3B: do_scalar_FCM (cpu); return;
9415         }
9416       break;
9417
9418     case 0xBE: do_scalar_shift (cpu); return;
9419
9420     case 0xFC:
9421       switch (INSTR (15, 10))
9422         {
9423         case 0x36:
9424           switch (INSTR (21, 16))
9425             {
9426             case 0x30: do_scalar_FADDP (cpu); return;
9427             case 0x21: do_scalar_UCVTF (cpu); return;
9428             }
9429           HALT_NYI;
9430         case 0x39: do_scalar_FCM (cpu); return;
9431         case 0x3B: do_scalar_FCM (cpu); return;
9432         }
9433       break;
9434
9435     case 0xFD:
9436       switch (INSTR (15, 10))
9437         {
9438         case 0x0D: do_scalar_CMGT (cpu); return;
9439         case 0x11: do_scalar_USHL (cpu); return;
9440         case 0x2E: do_scalar_NEG (cpu); return;
9441         case 0x32: do_scalar_FCMGE_zero (cpu); return;
9442         case 0x35: do_scalar_FABD (cpu); return;
9443         case 0x36: do_scalar_FCMLE_zero (cpu); return;
9444         case 0x39: do_scalar_FCM (cpu); return;
9445         case 0x3B: do_scalar_FCM (cpu); return;
9446         default:
9447           HALT_NYI;
9448         }
9449
9450     case 0xFE: do_scalar_USHR (cpu); return;
9451
9452     case 0xBD:
9453       switch (INSTR (15, 10))
9454         {
9455         case 0x21: do_double_add (cpu); return;
9456         case 0x11: do_scalar_SSHL (cpu); return;
9457         case 0x32: do_scalar_FCMGT_zero (cpu); return;
9458         case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9459         case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9460         default:
9461           HALT_NYI;
9462         }
9463
9464     default:
9465       HALT_NYI;
9466     }
9467 }
9468
9469 static void
9470 dexAdvSIMD1 (sim_cpu *cpu)
9471 {
9472   /* instr [28,25] = 1 111.  */
9473
9474   /* We are currently only interested in the basic
9475      scalar fp routines which all have bit 30 = 0.  */
9476   if (INSTR (30, 30))
9477     do_scalar_vec (cpu);
9478
9479   /* instr[24] is set for FP data processing 3-source and clear for
9480      all other basic scalar fp instruction groups.  */
9481   else if (INSTR (24, 24))
9482     dexSimpleFPDataProc3Source (cpu);
9483
9484   /* instr[21] is clear for floating <-> fixed conversions and set for
9485      all other basic scalar fp instruction groups.  */
9486   else if (!INSTR (21, 21))
9487     dexSimpleFPFixedConvert (cpu);
9488
9489   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9490      11 ==> cond select,  00 ==> other.  */
9491   else
9492     switch (INSTR (11, 10))
9493       {
9494       case 1: dexSimpleFPCondCompare (cpu); return;
9495       case 2: dexSimpleFPDataProc2Source (cpu); return;
9496       case 3: dexSimpleFPCondSelect (cpu); return;
9497
9498       default:
9499         /* Now an ordered cascade of tests.
9500            FP immediate has instr [12] == 1.
9501            FP compare has   instr [13] == 1.
9502            FP Data Proc 1 Source has instr [14] == 1.
9503            FP floating <--> integer conversions has instr [15] == 0.  */
9504         if (INSTR (12, 12))
9505           dexSimpleFPImmediate (cpu);
9506
9507         else if (INSTR (13, 13))
9508           dexSimpleFPCompare (cpu);
9509
9510         else if (INSTR (14, 14))
9511           dexSimpleFPDataProc1Source (cpu);
9512
9513         else if (!INSTR (15, 15))
9514           dexSimpleFPIntegerConvert (cpu);
9515
9516         else
9517           /* If we get here then instr[15] == 1 which means UNALLOC.  */
9518           HALT_UNALLOC;
9519       }
9520 }
9521
9522 /* PC relative addressing.  */
9523
9524 static void
9525 pcadr (sim_cpu *cpu)
9526 {
9527   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9528      instr[30,29] = immlo
9529      instr[23,5] = immhi.  */
9530   uint64_t address;
9531   unsigned rd = INSTR (4, 0);
9532   uint32_t isPage = INSTR (31, 31);
9533   union { int64_t u64; uint64_t s64; } imm;
9534   uint64_t offset;
9535
9536   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9537   offset = imm.u64;
9538   offset = (offset << 2) | INSTR (30, 29);
9539
9540   address = aarch64_get_PC (cpu);
9541
9542   if (isPage)
9543     {
9544       offset <<= 12;
9545       address &= ~0xfff;
9546     }
9547
9548   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9549   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9550 }
9551
9552 /* Specific decode and execute for group Data Processing Immediate.  */
9553
9554 static void
9555 dexPCRelAddressing (sim_cpu *cpu)
9556 {
9557   /* assert instr[28,24] = 10000.  */
9558   pcadr (cpu);
9559 }
9560
9561 /* Immediate logical.
9562    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9563    16, 32 or 64 bit sequence pulled out at decode and possibly
9564    inverting it..
9565
9566    N.B. the output register (dest) can normally be Xn or SP
9567    the exception occurs for flag setting instructions which may
9568    only use Xn for the output (dest).  The input register can
9569    never be SP.  */
9570
9571 /* 32 bit and immediate.  */
9572 static void
9573 and32 (sim_cpu *cpu, uint32_t bimm)
9574 {
9575   unsigned rn = INSTR (9, 5);
9576   unsigned rd = INSTR (4, 0);
9577
9578   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9579   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9580                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9581 }
9582
9583 /* 64 bit and immediate.  */
9584 static void
9585 and64 (sim_cpu *cpu, uint64_t bimm)
9586 {
9587   unsigned rn = INSTR (9, 5);
9588   unsigned rd = INSTR (4, 0);
9589
9590   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9591   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9592                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9593 }
9594
9595 /* 32 bit and immediate set flags.  */
9596 static void
9597 ands32 (sim_cpu *cpu, uint32_t bimm)
9598 {
9599   unsigned rn = INSTR (9, 5);
9600   unsigned rd = INSTR (4, 0);
9601
9602   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9603   uint32_t value2 = bimm;
9604
9605   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9606   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9607   set_flags_for_binop32 (cpu, value1 & value2);
9608 }
9609
9610 /* 64 bit and immediate set flags.  */
9611 static void
9612 ands64 (sim_cpu *cpu, uint64_t bimm)
9613 {
9614   unsigned rn = INSTR (9, 5);
9615   unsigned rd = INSTR (4, 0);
9616
9617   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9618   uint64_t value2 = bimm;
9619
9620   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9621   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9622   set_flags_for_binop64 (cpu, value1 & value2);
9623 }
9624
9625 /* 32 bit exclusive or immediate.  */
9626 static void
9627 eor32 (sim_cpu *cpu, uint32_t bimm)
9628 {
9629   unsigned rn = INSTR (9, 5);
9630   unsigned rd = INSTR (4, 0);
9631
9632   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9633   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9634                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9635 }
9636
9637 /* 64 bit exclusive or immediate.  */
9638 static void
9639 eor64 (sim_cpu *cpu, uint64_t bimm)
9640 {
9641   unsigned rn = INSTR (9, 5);
9642   unsigned rd = INSTR (4, 0);
9643
9644   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9645   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9646                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9647 }
9648
9649 /* 32 bit or immediate.  */
9650 static void
9651 orr32 (sim_cpu *cpu, uint32_t bimm)
9652 {
9653   unsigned rn = INSTR (9, 5);
9654   unsigned rd = INSTR (4, 0);
9655
9656   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9657   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9658                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9659 }
9660
9661 /* 64 bit or immediate.  */
9662 static void
9663 orr64 (sim_cpu *cpu, uint64_t bimm)
9664 {
9665   unsigned rn = INSTR (9, 5);
9666   unsigned rd = INSTR (4, 0);
9667
9668   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9669   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9670                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9671 }
9672
9673 /* Logical shifted register.
9674    These allow an optional LSL, ASR, LSR or ROR to the second source
9675    register with a count up to the register bit count.
9676    N.B register args may not be SP.  */
9677
9678 /* 32 bit AND shifted register.  */
9679 static void
9680 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9681 {
9682   unsigned rm = INSTR (20, 16);
9683   unsigned rn = INSTR (9, 5);
9684   unsigned rd = INSTR (4, 0);
9685
9686   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9687   aarch64_set_reg_u64
9688     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9689      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9690 }
9691
9692 /* 64 bit AND shifted register.  */
9693 static void
9694 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9695 {
9696   unsigned rm = INSTR (20, 16);
9697   unsigned rn = INSTR (9, 5);
9698   unsigned rd = INSTR (4, 0);
9699
9700   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9701   aarch64_set_reg_u64
9702     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9703      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9704 }
9705
9706 /* 32 bit AND shifted register setting flags.  */
9707 static void
9708 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9709 {
9710   unsigned rm = INSTR (20, 16);
9711   unsigned rn = INSTR (9, 5);
9712   unsigned rd = INSTR (4, 0);
9713
9714   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9715   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9716                                shift, count);
9717
9718   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9719   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9720   set_flags_for_binop32 (cpu, value1 & value2);
9721 }
9722
9723 /* 64 bit AND shifted register setting flags.  */
9724 static void
9725 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9726 {
9727   unsigned rm = INSTR (20, 16);
9728   unsigned rn = INSTR (9, 5);
9729   unsigned rd = INSTR (4, 0);
9730
9731   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9732   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9733                                shift, count);
9734
9735   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9736   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9737   set_flags_for_binop64 (cpu, value1 & value2);
9738 }
9739
9740 /* 32 bit BIC shifted register.  */
9741 static void
9742 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9743 {
9744   unsigned rm = INSTR (20, 16);
9745   unsigned rn = INSTR (9, 5);
9746   unsigned rd = INSTR (4, 0);
9747
9748   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9749   aarch64_set_reg_u64
9750     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9751      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9752 }
9753
9754 /* 64 bit BIC shifted register.  */
9755 static void
9756 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9757 {
9758   unsigned rm = INSTR (20, 16);
9759   unsigned rn = INSTR (9, 5);
9760   unsigned rd = INSTR (4, 0);
9761
9762   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9763   aarch64_set_reg_u64
9764     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9765      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9766 }
9767
9768 /* 32 bit BIC shifted register setting flags.  */
9769 static void
9770 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9771 {
9772   unsigned rm = INSTR (20, 16);
9773   unsigned rn = INSTR (9, 5);
9774   unsigned rd = INSTR (4, 0);
9775
9776   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9777   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9778                                  shift, count);
9779
9780   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9781   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9782   set_flags_for_binop32 (cpu, value1 & value2);
9783 }
9784
9785 /* 64 bit BIC shifted register setting flags.  */
9786 static void
9787 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9788 {
9789   unsigned rm = INSTR (20, 16);
9790   unsigned rn = INSTR (9, 5);
9791   unsigned rd = INSTR (4, 0);
9792
9793   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9794   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9795                                  shift, count);
9796
9797   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9798   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9799   set_flags_for_binop64 (cpu, value1 & value2);
9800 }
9801
9802 /* 32 bit EON shifted register.  */
9803 static void
9804 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9805 {
9806   unsigned rm = INSTR (20, 16);
9807   unsigned rn = INSTR (9, 5);
9808   unsigned rd = INSTR (4, 0);
9809
9810   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9811   aarch64_set_reg_u64
9812     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9813      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9814 }
9815
9816 /* 64 bit EON shifted register.  */
9817 static void
9818 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9819 {
9820   unsigned rm = INSTR (20, 16);
9821   unsigned rn = INSTR (9, 5);
9822   unsigned rd = INSTR (4, 0);
9823
9824   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9825   aarch64_set_reg_u64
9826     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9827      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9828 }
9829
9830 /* 32 bit EOR shifted register.  */
9831 static void
9832 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9833 {
9834   unsigned rm = INSTR (20, 16);
9835   unsigned rn = INSTR (9, 5);
9836   unsigned rd = INSTR (4, 0);
9837
9838   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9839   aarch64_set_reg_u64
9840     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9841      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9842 }
9843
9844 /* 64 bit EOR shifted register.  */
9845 static void
9846 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9847 {
9848   unsigned rm = INSTR (20, 16);
9849   unsigned rn = INSTR (9, 5);
9850   unsigned rd = INSTR (4, 0);
9851
9852   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9853   aarch64_set_reg_u64
9854     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9855      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9856 }
9857
9858 /* 32 bit ORR shifted register.  */
9859 static void
9860 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9861 {
9862   unsigned rm = INSTR (20, 16);
9863   unsigned rn = INSTR (9, 5);
9864   unsigned rd = INSTR (4, 0);
9865
9866   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9867   aarch64_set_reg_u64
9868     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9869      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9870 }
9871
9872 /* 64 bit ORR shifted register.  */
9873 static void
9874 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9875 {
9876   unsigned rm = INSTR (20, 16);
9877   unsigned rn = INSTR (9, 5);
9878   unsigned rd = INSTR (4, 0);
9879
9880   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9881   aarch64_set_reg_u64
9882     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9883      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9884 }
9885
9886 /* 32 bit ORN shifted register.  */
9887 static void
9888 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9889 {
9890   unsigned rm = INSTR (20, 16);
9891   unsigned rn = INSTR (9, 5);
9892   unsigned rd = INSTR (4, 0);
9893
9894   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9895   aarch64_set_reg_u64
9896     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9897      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9898 }
9899
9900 /* 64 bit ORN shifted register.  */
9901 static void
9902 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9903 {
9904   unsigned rm = INSTR (20, 16);
9905   unsigned rn = INSTR (9, 5);
9906   unsigned rd = INSTR (4, 0);
9907
9908   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9909   aarch64_set_reg_u64
9910     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9911      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9912 }
9913
9914 static void
9915 dexLogicalImmediate (sim_cpu *cpu)
9916 {
9917   /* assert instr[28,23] = 1001000
9918      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9919      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9920      instr[22] = N : used to construct immediate mask
9921      instr[21,16] = immr
9922      instr[15,10] = imms
9923      instr[9,5] = Rn
9924      instr[4,0] = Rd  */
9925
9926   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9927   uint32_t size = INSTR (31, 31);
9928   uint32_t n = INSTR (22, 22);
9929   /* uint32_t immr = INSTR (21, 16);.  */
9930   /* uint32_t imms = INSTR (15, 10);.  */
9931   uint32_t index = INSTR (22, 10);
9932   uint64_t bimm64 = LITable [index];
9933   uint32_t dispatch = INSTR (30, 29);
9934
9935   if (~size & n)
9936     HALT_UNALLOC;
9937
9938   if (!bimm64)
9939     HALT_UNALLOC;
9940
9941   if (size == 0)
9942     {
9943       uint32_t bimm = (uint32_t) bimm64;
9944
9945       switch (dispatch)
9946         {
9947         case 0: and32 (cpu, bimm); return;
9948         case 1: orr32 (cpu, bimm); return;
9949         case 2: eor32 (cpu, bimm); return;
9950         case 3: ands32 (cpu, bimm); return;
9951         }
9952     }
9953   else
9954     {
9955       switch (dispatch)
9956         {
9957         case 0: and64 (cpu, bimm64); return;
9958         case 1: orr64 (cpu, bimm64); return;
9959         case 2: eor64 (cpu, bimm64); return;
9960         case 3: ands64 (cpu, bimm64); return;
9961         }
9962     }
9963   HALT_UNALLOC;
9964 }
9965
9966 /* Immediate move.
9967    The uimm argument is a 16 bit value to be inserted into the
9968    target register the pos argument locates the 16 bit word in the
9969    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9970    3} for 64 bit.
9971    N.B register arg may not be SP so it should be.
9972    accessed using the setGZRegisterXXX accessors.  */
9973
9974 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9975 static void
9976 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9977 {
9978   unsigned rd = INSTR (4, 0);
9979
9980   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9981   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9982 }
9983
9984 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9985 static void
9986 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9987 {
9988   unsigned rd = INSTR (4, 0);
9989
9990   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9991   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9992 }
9993
9994 /* 32 bit move 16 bit immediate negated.  */
9995 static void
9996 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9997 {
9998   unsigned rd = INSTR (4, 0);
9999
10000   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10001   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10002 }
10003
10004 /* 64 bit move 16 bit immediate negated.  */
10005 static void
10006 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10007 {
10008   unsigned rd = INSTR (4, 0);
10009
10010   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10011   aarch64_set_reg_u64
10012     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10013                       ^ 0xffffffffffffffffULL));
10014 }
10015
10016 /* 32 bit move 16 bit immediate keep remaining shorts.  */
10017 static void
10018 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10019 {
10020   unsigned rd = INSTR (4, 0);
10021   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10022   uint32_t value = val << (pos * 16);
10023   uint32_t mask = ~(0xffffU << (pos * 16));
10024
10025   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10026   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10027 }
10028
10029 /* 64 bit move 16 it immediate keep remaining shorts.  */
10030 static void
10031 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10032 {
10033   unsigned rd = INSTR (4, 0);
10034   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10035   uint64_t value = (uint64_t) val << (pos * 16);
10036   uint64_t mask = ~(0xffffULL << (pos * 16));
10037
10038   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10039   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10040 }
10041
10042 static void
10043 dexMoveWideImmediate (sim_cpu *cpu)
10044 {
10045   /* assert instr[28:23] = 100101
10046      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10047      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10048      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10049      instr[20,5] = uimm16
10050      instr[4,0] = Rd  */
10051
10052   /* N.B. the (multiple of 16) shift is applied by the called routine,
10053      we just pass the multiplier.  */
10054
10055   uint32_t imm;
10056   uint32_t size = INSTR (31, 31);
10057   uint32_t op = INSTR (30, 29);
10058   uint32_t shift = INSTR (22, 21);
10059
10060   /* 32 bit can only shift 0 or 1 lot of 16.
10061      anything else is an unallocated instruction.  */
10062   if (size == 0 && (shift > 1))
10063     HALT_UNALLOC;
10064
10065   if (op == 1)
10066     HALT_UNALLOC;
10067
10068   imm = INSTR (20, 5);
10069
10070   if (size == 0)
10071     {
10072       if (op == 0)
10073         movn32 (cpu, imm, shift);
10074       else if (op == 2)
10075         movz32 (cpu, imm, shift);
10076       else
10077         movk32 (cpu, imm, shift);
10078     }
10079   else
10080     {
10081       if (op == 0)
10082         movn64 (cpu, imm, shift);
10083       else if (op == 2)
10084         movz64 (cpu, imm, shift);
10085       else
10086         movk64 (cpu, imm, shift);
10087     }
10088 }
10089
10090 /* Bitfield operations.
10091    These take a pair of bit positions r and s which are in {0..31}
10092    or {0..63} depending on the instruction word size.
10093    N.B register args may not be SP.  */
10094
10095 /* OK, we start with ubfm which just needs to pick
10096    some bits out of source zero the rest and write
10097    the result to dest.  Just need two logical shifts.  */
10098
10099 /* 32 bit bitfield move, left and right of affected zeroed
10100    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10101 static void
10102 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10103 {
10104   unsigned rd;
10105   unsigned rn = INSTR (9, 5);
10106   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10107
10108   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10109   if (r <= s)
10110     {
10111       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10112          We want only bits s:xxx:r at the bottom of the word
10113          so we LSL bit s up to bit 31 i.e. by 31 - s
10114          and then we LSR to bring bit 31 down to bit s - r
10115          i.e. by 31 + r - s.  */
10116       value <<= 31 - s;
10117       value >>= 31 + r - s;
10118     }
10119   else
10120     {
10121       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10122          We want only bits s:xxx:0 starting at it 31-(r-1)
10123          so we LSL bit s up to bit 31 i.e. by 31 - s
10124          and then we LSL to bring bit 31 down to 31-(r-1)+s
10125          i.e. by r - (s + 1).  */
10126       value <<= 31 - s;
10127       value >>= r - (s + 1);
10128     }
10129
10130   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10131   rd = INSTR (4, 0);
10132   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10133 }
10134
10135 /* 64 bit bitfield move, left and right of affected zeroed
10136    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10137 static void
10138 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10139 {
10140   unsigned rd;
10141   unsigned rn = INSTR (9, 5);
10142   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10143
10144   if (r <= s)
10145     {
10146       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10147          We want only bits s:xxx:r at the bottom of the word.
10148          So we LSL bit s up to bit 63 i.e. by 63 - s
10149          and then we LSR to bring bit 63 down to bit s - r
10150          i.e. by 63 + r - s.  */
10151       value <<= 63 - s;
10152       value >>= 63 + r - s;
10153     }
10154   else
10155     {
10156       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10157          We want only bits s:xxx:0 starting at it 63-(r-1).
10158          So we LSL bit s up to bit 63 i.e. by 63 - s
10159          and then we LSL to bring bit 63 down to 63-(r-1)+s
10160          i.e. by r - (s + 1).  */
10161       value <<= 63 - s;
10162       value >>= r - (s + 1);
10163     }
10164
10165   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10166   rd = INSTR (4, 0);
10167   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10168 }
10169
10170 /* The signed versions need to insert sign bits
10171    on the left of the inserted bit field. so we do
10172    much the same as the unsigned version except we
10173    use an arithmetic shift right -- this just means
10174    we need to operate on signed values.  */
10175
10176 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
10177 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10178 static void
10179 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10180 {
10181   unsigned rd;
10182   unsigned rn = INSTR (9, 5);
10183   /* as per ubfm32 but use an ASR instead of an LSR.  */
10184   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10185
10186   if (r <= s)
10187     {
10188       value <<= 31 - s;
10189       value >>= 31 + r - s;
10190     }
10191   else
10192     {
10193       value <<= 31 - s;
10194       value >>= r - (s + 1);
10195     }
10196
10197   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10198   rd = INSTR (4, 0);
10199   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10200 }
10201
10202 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
10203 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10204 static void
10205 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10206 {
10207   unsigned rd;
10208   unsigned rn = INSTR (9, 5);
10209   /* acpu per ubfm but use an ASR instead of an LSR.  */
10210   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10211
10212   if (r <= s)
10213     {
10214       value <<= 63 - s;
10215       value >>= 63 + r - s;
10216     }
10217   else
10218     {
10219       value <<= 63 - s;
10220       value >>= r - (s + 1);
10221     }
10222
10223   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10224   rd = INSTR (4, 0);
10225   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10226 }
10227
10228 /* Finally, these versions leave non-affected bits
10229    as is. so we need to generate the bits as per
10230    ubfm and also generate a mask to pick the
10231    bits from the original and computed values.  */
10232
10233 /* 32 bit bitfield move, non-affected bits left as is.
10234    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10235 static void
10236 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10237 {
10238   unsigned rn = INSTR (9, 5);
10239   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10240   uint32_t mask = -1;
10241   unsigned rd;
10242   uint32_t value2;
10243
10244   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10245   if (r <= s)
10246     {
10247       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10248          We want only bits s:xxx:r at the bottom of the word
10249          so we LSL bit s up to bit 31 i.e. by 31 - s
10250          and then we LSR to bring bit 31 down to bit s - r
10251          i.e. by 31 + r - s.  */
10252       value <<= 31 - s;
10253       value >>= 31 + r - s;
10254       /* the mask must include the same bits.  */
10255       mask <<= 31 - s;
10256       mask >>= 31 + r - s;
10257     }
10258   else
10259     {
10260       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10261          We want only bits s:xxx:0 starting at it 31-(r-1)
10262          so we LSL bit s up to bit 31 i.e. by 31 - s
10263          and then we LSL to bring bit 31 down to 31-(r-1)+s
10264          i.e. by r - (s + 1).  */
10265       value <<= 31 - s;
10266       value >>= r - (s + 1);
10267       /* The mask must include the same bits.  */
10268       mask <<= 31 - s;
10269       mask >>= r - (s + 1);
10270     }
10271
10272   rd = INSTR (4, 0);
10273   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10274
10275   value2 &= ~mask;
10276   value2 |= value;
10277
10278   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10279   aarch64_set_reg_u64 (cpu, rd, NO_SP, value2);
10280 }
10281
10282 /* 64 bit bitfield move, non-affected bits left as is.
10283    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10284 static void
10285 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10286 {
10287   unsigned rd;
10288   unsigned rn = INSTR (9, 5);
10289   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10290   uint64_t mask = 0xffffffffffffffffULL;
10291
10292   if (r <= s)
10293     {
10294       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10295          We want only bits s:xxx:r at the bottom of the word
10296          so we LSL bit s up to bit 63 i.e. by 63 - s
10297          and then we LSR to bring bit 63 down to bit s - r
10298          i.e. by 63 + r - s.  */
10299       value <<= 63 - s;
10300       value >>= 63 + r - s;
10301       /* The mask must include the same bits.  */
10302       mask <<= 63 - s;
10303       mask >>= 63 + r - s;
10304     }
10305   else
10306     {
10307       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10308          We want only bits s:xxx:0 starting at it 63-(r-1)
10309          so we LSL bit s up to bit 63 i.e. by 63 - s
10310          and then we LSL to bring bit 63 down to 63-(r-1)+s
10311          i.e. by r - (s + 1).  */
10312       value <<= 63 - s;
10313       value >>= r - (s + 1);
10314       /* The mask must include the same bits.  */
10315       mask <<= 63 - s;
10316       mask >>= r - (s + 1);
10317     }
10318
10319   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10320   rd = INSTR (4, 0);
10321   aarch64_set_reg_u64
10322     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10323 }
10324
10325 static void
10326 dexBitfieldImmediate (sim_cpu *cpu)
10327 {
10328   /* assert instr[28:23] = 100110
10329      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10330      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10331      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10332      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10333      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10334      instr[9,5] = Rn
10335      instr[4,0] = Rd  */
10336
10337   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10338   uint32_t dispatch;
10339   uint32_t imms;
10340   uint32_t size = INSTR (31, 31);
10341   uint32_t n = INSTR (22, 22);
10342   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
10343   /* or else we have an UNALLOC.  */
10344   uint32_t immr = INSTR (21, 16);
10345
10346   if (~size & n)
10347     HALT_UNALLOC;
10348
10349   if (!size && uimm (immr, 5, 5))
10350     HALT_UNALLOC;
10351
10352   imms = INSTR (15, 10);
10353   if (!size && uimm (imms, 5, 5))
10354     HALT_UNALLOC;
10355
10356   /* Switch on combined size and op.  */
10357   dispatch = INSTR (31, 29);
10358   switch (dispatch)
10359     {
10360     case 0: sbfm32 (cpu, immr, imms); return;
10361     case 1: bfm32 (cpu, immr, imms); return;
10362     case 2: ubfm32 (cpu, immr, imms); return;
10363     case 4: sbfm (cpu, immr, imms); return;
10364     case 5: bfm (cpu, immr, imms); return;
10365     case 6: ubfm (cpu, immr, imms); return;
10366     default: HALT_UNALLOC;
10367     }
10368 }
10369
10370 static void
10371 do_EXTR_32 (sim_cpu *cpu)
10372 {
10373   /* instr[31:21] = 00010011100
10374      instr[20,16] = Rm
10375      instr[15,10] = imms :  0xxxxx for 32 bit
10376      instr[9,5]   = Rn
10377      instr[4,0]   = Rd  */
10378   unsigned rm   = INSTR (20, 16);
10379   unsigned imms = INSTR (15, 10) & 31;
10380   unsigned rn   = INSTR ( 9,  5);
10381   unsigned rd   = INSTR ( 4,  0);
10382   uint64_t val1;
10383   uint64_t val2;
10384
10385   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10386   val1 >>= imms;
10387   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10388   val2 <<= (32 - imms);
10389
10390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10391   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10392 }
10393
10394 static void
10395 do_EXTR_64 (sim_cpu *cpu)
10396 {
10397   /* instr[31:21] = 10010011100
10398      instr[20,16] = Rm
10399      instr[15,10] = imms
10400      instr[9,5]   = Rn
10401      instr[4,0]   = Rd  */
10402   unsigned rm   = INSTR (20, 16);
10403   unsigned imms = INSTR (15, 10) & 63;
10404   unsigned rn   = INSTR ( 9,  5);
10405   unsigned rd   = INSTR ( 4,  0);
10406   uint64_t val;
10407
10408   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10409   val >>= imms;
10410   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10411
10412   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10413 }
10414
10415 static void
10416 dexExtractImmediate (sim_cpu *cpu)
10417 {
10418   /* assert instr[28:23] = 100111
10419      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
10420      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10421      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10422      instr[21]    = op0 : must be 0 or UNALLOC
10423      instr[20,16] = Rm
10424      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10425      instr[9,5]   = Rn
10426      instr[4,0]   = Rd  */
10427
10428   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10429   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
10430   uint32_t dispatch;
10431   uint32_t size = INSTR (31, 31);
10432   uint32_t n = INSTR (22, 22);
10433   /* 32 bit operations must have imms[5] = 0
10434      or else we have an UNALLOC.  */
10435   uint32_t imms = INSTR (15, 10);
10436
10437   if (size ^ n)
10438     HALT_UNALLOC;
10439
10440   if (!size && uimm (imms, 5, 5))
10441     HALT_UNALLOC;
10442
10443   /* Switch on combined size and op.  */
10444   dispatch = INSTR (31, 29);
10445
10446   if (dispatch == 0)
10447     do_EXTR_32 (cpu);
10448
10449   else if (dispatch == 4)
10450     do_EXTR_64 (cpu);
10451
10452   else if (dispatch == 1)
10453     HALT_NYI;
10454   else
10455     HALT_UNALLOC;
10456 }
10457
10458 static void
10459 dexDPImm (sim_cpu *cpu)
10460 {
10461   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10462      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10463      bits [25,23] of a DPImm are the secondary dispatch vector.  */
10464   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10465
10466   switch (group2)
10467     {
10468     case DPIMM_PCADR_000:
10469     case DPIMM_PCADR_001:
10470       dexPCRelAddressing (cpu);
10471       return;
10472
10473     case DPIMM_ADDSUB_010:
10474     case DPIMM_ADDSUB_011:
10475       dexAddSubtractImmediate (cpu);
10476       return;
10477
10478     case DPIMM_LOG_100:
10479       dexLogicalImmediate (cpu);
10480       return;
10481
10482     case DPIMM_MOV_101:
10483       dexMoveWideImmediate (cpu);
10484       return;
10485
10486     case DPIMM_BITF_110:
10487       dexBitfieldImmediate (cpu);
10488       return;
10489
10490     case DPIMM_EXTR_111:
10491       dexExtractImmediate (cpu);
10492       return;
10493
10494     default:
10495       /* Should never reach here.  */
10496       HALT_NYI;
10497     }
10498 }
10499
10500 static void
10501 dexLoadUnscaledImmediate (sim_cpu *cpu)
10502 {
10503   /* instr[29,24] == 111_00
10504      instr[21] == 0
10505      instr[11,10] == 00
10506      instr[31,30] = size
10507      instr[26] = V
10508      instr[23,22] = opc
10509      instr[20,12] = simm9
10510      instr[9,5] = rn may be SP.  */
10511   /* unsigned rt = INSTR (4, 0);  */
10512   uint32_t v = INSTR (26, 26);
10513   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10514   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10515
10516   if (!v)
10517     {
10518       /* GReg operations.  */
10519       switch (dispatch)
10520         {
10521         case 0:  sturb (cpu, imm); return;
10522         case 1:  ldurb32 (cpu, imm); return;
10523         case 2:  ldursb64 (cpu, imm); return;
10524         case 3:  ldursb32 (cpu, imm); return;
10525         case 4:  sturh (cpu, imm); return;
10526         case 5:  ldurh32 (cpu, imm); return;
10527         case 6:  ldursh64 (cpu, imm); return;
10528         case 7:  ldursh32 (cpu, imm); return;
10529         case 8:  stur32 (cpu, imm); return;
10530         case 9:  ldur32 (cpu, imm); return;
10531         case 10: ldursw (cpu, imm); return;
10532         case 12: stur64 (cpu, imm); return;
10533         case 13: ldur64 (cpu, imm); return;
10534
10535         case 14:
10536           /* PRFUM NYI.  */
10537           HALT_NYI;
10538
10539         default:
10540         case 11:
10541         case 15:
10542           HALT_UNALLOC;
10543         }
10544     }
10545
10546   /* FReg operations.  */
10547   switch (dispatch)
10548     {
10549     case 2:  fsturq (cpu, imm); return;
10550     case 3:  fldurq (cpu, imm); return;
10551     case 8:  fsturs (cpu, imm); return;
10552     case 9:  fldurs (cpu, imm); return;
10553     case 12: fsturd (cpu, imm); return;
10554     case 13: fldurd (cpu, imm); return;
10555
10556     case 0: /* STUR 8 bit FP.  */
10557     case 1: /* LDUR 8 bit FP.  */
10558     case 4: /* STUR 16 bit FP.  */
10559     case 5: /* LDUR 8 bit FP.  */
10560       HALT_NYI;
10561
10562     default:
10563     case 6:
10564     case 7:
10565     case 10:
10566     case 11:
10567     case 14:
10568     case 15:
10569       HALT_UNALLOC;
10570     }
10571 }
10572
10573 /*  N.B. A preliminary note regarding all the ldrs<x>32
10574     instructions
10575
10576    The signed value loaded by these instructions is cast to unsigned
10577    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10578    64 bit element of the GReg union. this performs a 32 bit sign extension
10579    (as required) but avoids 64 bit sign extension, thus ensuring that the
10580    top half of the register word is zero. this is what the spec demands
10581    when a 32 bit load occurs.  */
10582
10583 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
10584 static void
10585 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10586 {
10587   unsigned int rn = INSTR (9, 5);
10588   unsigned int rt = INSTR (4, 0);
10589
10590   /* The target register may not be SP but the source may be
10591      there is no scaling required for a byte load.  */
10592   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10593   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10594                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10595 }
10596
10597 /* 32 bit load sign-extended byte scaled or unscaled zero-
10598    or sign-extended 32-bit register offset.  */
10599 static void
10600 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10601 {
10602   unsigned int rm = INSTR (20, 16);
10603   unsigned int rn = INSTR (9, 5);
10604   unsigned int rt = INSTR (4, 0);
10605
10606   /* rn may reference SP, rm and rt must reference ZR.  */
10607
10608   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10609   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10610                                  extension);
10611
10612   /* There is no scaling required for a byte load.  */
10613   aarch64_set_reg_u64
10614     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10615                                                    + displacement));
10616 }
10617
10618 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10619    pre- or post-writeback.  */
10620 static void
10621 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10622 {
10623   uint64_t address;
10624   unsigned int rn = INSTR (9, 5);
10625   unsigned int rt = INSTR (4, 0);
10626
10627   if (rn == rt && wb != NoWriteBack)
10628     HALT_UNALLOC;
10629
10630   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10631
10632   if (wb == Pre)
10633       address += offset;
10634
10635   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10636                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10637
10638   if (wb == Post)
10639     address += offset;
10640
10641   if (wb != NoWriteBack)
10642     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10643 }
10644
10645 /* 8 bit store scaled.  */
10646 static void
10647 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10648 {
10649   unsigned st = INSTR (4, 0);
10650   unsigned rn = INSTR (9, 5);
10651
10652   aarch64_set_mem_u8 (cpu,
10653                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10654                       aarch64_get_vec_u8 (cpu, st, 0));
10655 }
10656
10657 /* 8 bit store scaled or unscaled zero- or
10658    sign-extended 8-bit register offset.  */
10659 static void
10660 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10661 {
10662   unsigned rm = INSTR (20, 16);
10663   unsigned rn = INSTR (9, 5);
10664   unsigned st = INSTR (4, 0);
10665
10666   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10667   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10668                                extension);
10669   uint64_t  displacement = scaling == Scaled ? extended : 0;
10670
10671   aarch64_set_mem_u8
10672     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10673 }
10674
10675 /* 16 bit store scaled.  */
10676 static void
10677 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10678 {
10679   unsigned st = INSTR (4, 0);
10680   unsigned rn = INSTR (9, 5);
10681
10682   aarch64_set_mem_u16
10683     (cpu,
10684      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10685      aarch64_get_vec_u16 (cpu, st, 0));
10686 }
10687
10688 /* 16 bit store scaled or unscaled zero-
10689    or sign-extended 16-bit register offset.  */
10690 static void
10691 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10692 {
10693   unsigned rm = INSTR (20, 16);
10694   unsigned rn = INSTR (9, 5);
10695   unsigned st = INSTR (4, 0);
10696
10697   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10698   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10699                                extension);
10700   uint64_t  displacement = OPT_SCALE (extended, 16, scaling);
10701
10702   aarch64_set_mem_u16
10703     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10704 }
10705
10706 /* 32 bit store scaled unsigned 12 bit.  */
10707 static void
10708 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10709 {
10710   unsigned st = INSTR (4, 0);
10711   unsigned rn = INSTR (9, 5);
10712
10713   aarch64_set_mem_u32
10714     (cpu,
10715      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10716      aarch64_get_vec_u32 (cpu, st, 0));
10717 }
10718
10719 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10720 static void
10721 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10722 {
10723   unsigned rn = INSTR (9, 5);
10724   unsigned st = INSTR (4, 0);
10725
10726   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10727
10728   if (wb != Post)
10729     address += offset;
10730
10731   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10732
10733   if (wb == Post)
10734     address += offset;
10735
10736   if (wb != NoWriteBack)
10737     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10738 }
10739
10740 /* 32 bit store scaled or unscaled zero-
10741    or sign-extended 32-bit register offset.  */
10742 static void
10743 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10744 {
10745   unsigned rm = INSTR (20, 16);
10746   unsigned rn = INSTR (9, 5);
10747   unsigned st = INSTR (4, 0);
10748
10749   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10750   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10751                                extension);
10752   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
10753
10754   aarch64_set_mem_u32
10755     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10756 }
10757
10758 /* 64 bit store scaled unsigned 12 bit.  */
10759 static void
10760 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10761 {
10762   unsigned st = INSTR (4, 0);
10763   unsigned rn = INSTR (9, 5);
10764
10765   aarch64_set_mem_u64
10766     (cpu,
10767      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10768      aarch64_get_vec_u64 (cpu, st, 0));
10769 }
10770
10771 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10772 static void
10773 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10774 {
10775   unsigned rn = INSTR (9, 5);
10776   unsigned st = INSTR (4, 0);
10777
10778   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10779
10780   if (wb != Post)
10781     address += offset;
10782
10783   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10784
10785   if (wb == Post)
10786     address += offset;
10787
10788   if (wb != NoWriteBack)
10789     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10790 }
10791
10792 /* 64 bit store scaled or unscaled zero-
10793    or sign-extended 32-bit register offset.  */
10794 static void
10795 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10796 {
10797   unsigned rm = INSTR (20, 16);
10798   unsigned rn = INSTR (9, 5);
10799   unsigned st = INSTR (4, 0);
10800
10801   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10802   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10803                                extension);
10804   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
10805
10806   aarch64_set_mem_u64
10807     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10808 }
10809
10810 /* 128 bit store scaled unsigned 12 bit.  */
10811 static void
10812 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10813 {
10814   FRegister a;
10815   unsigned st = INSTR (4, 0);
10816   unsigned rn = INSTR (9, 5);
10817   uint64_t addr;
10818
10819   aarch64_get_FP_long_double (cpu, st, & a);
10820
10821   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10822   aarch64_set_mem_long_double (cpu, addr, a);
10823 }
10824
10825 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10826 static void
10827 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10828 {
10829   FRegister a;
10830   unsigned rn = INSTR (9, 5);
10831   unsigned st = INSTR (4, 0);
10832   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10833
10834   if (wb != Post)
10835     address += offset;
10836
10837   aarch64_get_FP_long_double (cpu, st, & a);
10838   aarch64_set_mem_long_double (cpu, address, a);
10839
10840   if (wb == Post)
10841     address += offset;
10842
10843   if (wb != NoWriteBack)
10844     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10845 }
10846
10847 /* 128 bit store scaled or unscaled zero-
10848    or sign-extended 32-bit register offset.  */
10849 static void
10850 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10851 {
10852   unsigned rm = INSTR (20, 16);
10853   unsigned rn = INSTR (9, 5);
10854   unsigned st = INSTR (4, 0);
10855
10856   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10857   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10858                                extension);
10859   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
10860
10861   FRegister a;
10862
10863   aarch64_get_FP_long_double (cpu, st, & a);
10864   aarch64_set_mem_long_double (cpu, address + displacement, a);
10865 }
10866
10867 static void
10868 dexLoadImmediatePrePost (sim_cpu *cpu)
10869 {
10870   /* instr[31,30] = size
10871      instr[29,27] = 111
10872      instr[26]    = V
10873      instr[25,24] = 00
10874      instr[23,22] = opc
10875      instr[21]    = 0
10876      instr[20,12] = simm9
10877      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
10878      instr[10]    = 0
10879      instr[9,5]   = Rn may be SP.
10880      instr[4,0]   = Rt */
10881
10882   uint32_t  v        = INSTR (26, 26);
10883   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10884   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10885   WriteBack wb       = INSTR (11, 11);
10886
10887   if (!v)
10888     {
10889       /* GReg operations.  */
10890       switch (dispatch)
10891         {
10892         case 0:  strb_wb (cpu, imm, wb); return;
10893         case 1:  ldrb32_wb (cpu, imm, wb); return;
10894         case 2:  ldrsb_wb (cpu, imm, wb); return;
10895         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10896         case 4:  strh_wb (cpu, imm, wb); return;
10897         case 5:  ldrh32_wb (cpu, imm, wb); return;
10898         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10899         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10900         case 8:  str32_wb (cpu, imm, wb); return;
10901         case 9:  ldr32_wb (cpu, imm, wb); return;
10902         case 10: ldrsw_wb (cpu, imm, wb); return;
10903         case 12: str_wb (cpu, imm, wb); return;
10904         case 13: ldr_wb (cpu, imm, wb); return;
10905
10906         default:
10907         case 11:
10908         case 14:
10909         case 15:
10910           HALT_UNALLOC;
10911         }
10912     }
10913
10914   /* FReg operations.  */
10915   switch (dispatch)
10916     {
10917     case 2:  fstrq_wb (cpu, imm, wb); return;
10918     case 3:  fldrq_wb (cpu, imm, wb); return;
10919     case 8:  fstrs_wb (cpu, imm, wb); return;
10920     case 9:  fldrs_wb (cpu, imm, wb); return;
10921     case 12: fstrd_wb (cpu, imm, wb); return;
10922     case 13: fldrd_wb (cpu, imm, wb); return;
10923
10924     case 0:       /* STUR 8 bit FP.  */
10925     case 1:       /* LDUR 8 bit FP.  */
10926     case 4:       /* STUR 16 bit FP.  */
10927     case 5:       /* LDUR 8 bit FP.  */
10928       HALT_NYI;
10929
10930     default:
10931     case 6:
10932     case 7:
10933     case 10:
10934     case 11:
10935     case 14:
10936     case 15:
10937       HALT_UNALLOC;
10938     }
10939 }
10940
10941 static void
10942 dexLoadRegisterOffset (sim_cpu *cpu)
10943 {
10944   /* instr[31,30] = size
10945      instr[29,27] = 111
10946      instr[26]    = V
10947      instr[25,24] = 00
10948      instr[23,22] = opc
10949      instr[21]    = 1
10950      instr[20,16] = rm
10951      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10952                              110 ==> SXTW, 111 ==> SXTX,
10953                              ow ==> RESERVED
10954      instr[12]    = scaled
10955      instr[11,10] = 10
10956      instr[9,5]   = rn
10957      instr[4,0]   = rt.  */
10958
10959   uint32_t  v = INSTR (26, 26);
10960   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10961   Scaling   scale = INSTR (12, 12);
10962   Extension extensionType = INSTR (15, 13);
10963
10964   /* Check for illegal extension types.  */
10965   if (uimm (extensionType, 1, 1) == 0)
10966     HALT_UNALLOC;
10967
10968   if (extensionType == UXTX || extensionType == SXTX)
10969     extensionType = NoExtension;
10970
10971   if (!v)
10972     {
10973       /* GReg operations.  */
10974       switch (dispatch)
10975         {
10976         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10977         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10978         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10979         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10980         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10981         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10982         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10983         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10984         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10985         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10986         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10987         case 12: str_scale_ext (cpu, scale, extensionType); return;
10988         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10989         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10990
10991         default:
10992         case 11:
10993         case 15:
10994           HALT_UNALLOC;
10995         }
10996     }
10997
10998   /* FReg operations.  */
10999   switch (dispatch)
11000     {
11001     case 1: /* LDUR 8 bit FP.  */
11002       HALT_NYI;
11003     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
11004     case 5: /* LDUR 8 bit FP.  */
11005       HALT_NYI;
11006     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
11007     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11008
11009     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
11010     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
11011     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
11012     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
11013     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11014
11015     default:
11016     case 6:
11017     case 7:
11018     case 10:
11019     case 11:
11020     case 14:
11021     case 15:
11022       HALT_UNALLOC;
11023     }
11024 }
11025
11026 static void
11027 dexLoadUnsignedImmediate (sim_cpu *cpu)
11028 {
11029   /* instr[29,24] == 111_01
11030      instr[31,30] = size
11031      instr[26]    = V
11032      instr[23,22] = opc
11033      instr[21,10] = uimm12 : unsigned immediate offset
11034      instr[9,5]   = rn may be SP.
11035      instr[4,0]   = rt.  */
11036
11037   uint32_t v = INSTR (26,26);
11038   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11039   uint32_t imm = INSTR (21, 10);
11040
11041   if (!v)
11042     {
11043       /* GReg operations.  */
11044       switch (dispatch)
11045         {
11046         case 0:  strb_abs (cpu, imm); return;
11047         case 1:  ldrb32_abs (cpu, imm); return;
11048         case 2:  ldrsb_abs (cpu, imm); return;
11049         case 3:  ldrsb32_abs (cpu, imm); return;
11050         case 4:  strh_abs (cpu, imm); return;
11051         case 5:  ldrh32_abs (cpu, imm); return;
11052         case 6:  ldrsh_abs (cpu, imm); return;
11053         case 7:  ldrsh32_abs (cpu, imm); return;
11054         case 8:  str32_abs (cpu, imm); return;
11055         case 9:  ldr32_abs (cpu, imm); return;
11056         case 10: ldrsw_abs (cpu, imm); return;
11057         case 12: str_abs (cpu, imm); return;
11058         case 13: ldr_abs (cpu, imm); return;
11059         case 14: prfm_abs (cpu, imm); return;
11060
11061         default:
11062         case 11:
11063         case 15:
11064           HALT_UNALLOC;
11065         }
11066     }
11067
11068   /* FReg operations.  */
11069   switch (dispatch)
11070     {
11071     case 0:  fstrb_abs (cpu, imm); return;
11072     case 4:  fstrh_abs (cpu, imm); return;
11073     case 8:  fstrs_abs (cpu, imm); return;
11074     case 12: fstrd_abs (cpu, imm); return;
11075     case 2:  fstrq_abs (cpu, imm); return;
11076
11077     case 1:  fldrb_abs (cpu, imm); return;
11078     case 5:  fldrh_abs (cpu, imm); return;
11079     case 9:  fldrs_abs (cpu, imm); return;
11080     case 13: fldrd_abs (cpu, imm); return;
11081     case 3:  fldrq_abs (cpu, imm); return;
11082
11083     default:
11084     case 6:
11085     case 7:
11086     case 10:
11087     case 11:
11088     case 14:
11089     case 15:
11090       HALT_UNALLOC;
11091     }
11092 }
11093
11094 static void
11095 dexLoadExclusive (sim_cpu *cpu)
11096 {
11097   /* assert instr[29:24] = 001000;
11098      instr[31,30] = size
11099      instr[23] = 0 if exclusive
11100      instr[22] = L : 1 if load, 0 if store
11101      instr[21] = 1 if pair
11102      instr[20,16] = Rs
11103      instr[15] = o0 : 1 if ordered
11104      instr[14,10] = Rt2
11105      instr[9,5] = Rn
11106      instr[4.0] = Rt.  */
11107
11108   switch (INSTR (22, 21))
11109     {
11110     case 2:   ldxr (cpu); return;
11111     case 0:   stxr (cpu); return;
11112     default:  HALT_NYI;
11113     }
11114 }
11115
11116 static void
11117 dexLoadOther (sim_cpu *cpu)
11118 {
11119   uint32_t dispatch;
11120
11121   /* instr[29,25] = 111_0
11122      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11123      instr[21:11,10] is the secondary dispatch.  */
11124   if (INSTR (24, 24))
11125     {
11126       dexLoadUnsignedImmediate (cpu);
11127       return;
11128     }
11129
11130   dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11131   switch (dispatch)
11132     {
11133     case 0: dexLoadUnscaledImmediate (cpu); return;
11134     case 1: dexLoadImmediatePrePost (cpu); return;
11135     case 3: dexLoadImmediatePrePost (cpu); return;
11136     case 6: dexLoadRegisterOffset (cpu); return;
11137
11138     default:
11139     case 2:
11140     case 4:
11141     case 5:
11142     case 7:
11143       HALT_NYI;
11144     }
11145 }
11146
11147 static void
11148 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11149 {
11150   unsigned rn = INSTR (14, 10);
11151   unsigned rd = INSTR (9, 5);
11152   unsigned rm = INSTR (4, 0);
11153   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11154
11155   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11156     HALT_UNALLOC; /* ??? */
11157
11158   offset <<= 2;
11159
11160   if (wb != Post)
11161     address += offset;
11162
11163   aarch64_set_mem_u32 (cpu, address,
11164                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
11165   aarch64_set_mem_u32 (cpu, address + 4,
11166                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
11167
11168   if (wb == Post)
11169     address += offset;
11170
11171   if (wb != NoWriteBack)
11172     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11173 }
11174
11175 static void
11176 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11177 {
11178   unsigned rn = INSTR (14, 10);
11179   unsigned rd = INSTR (9, 5);
11180   unsigned rm = INSTR (4, 0);
11181   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11182
11183   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11184     HALT_UNALLOC; /* ??? */
11185
11186   offset <<= 3;
11187
11188   if (wb != Post)
11189     address += offset;
11190
11191   aarch64_set_mem_u64 (cpu, address,
11192                        aarch64_get_reg_u64 (cpu, rm, NO_SP));
11193   aarch64_set_mem_u64 (cpu, address + 8,
11194                        aarch64_get_reg_u64 (cpu, rn, NO_SP));
11195
11196   if (wb == Post)
11197     address += offset;
11198
11199   if (wb != NoWriteBack)
11200     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11201 }
11202
11203 static void
11204 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11205 {
11206   unsigned rn = INSTR (14, 10);
11207   unsigned rd = INSTR (9, 5);
11208   unsigned rm = INSTR (4, 0);
11209   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11210
11211   /* Treat this as unalloc to make sure we don't do it.  */
11212   if (rn == rm)
11213     HALT_UNALLOC;
11214
11215   offset <<= 2;
11216
11217   if (wb != Post)
11218     address += offset;
11219
11220   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11221   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11222
11223   if (wb == Post)
11224     address += offset;
11225
11226   if (wb != NoWriteBack)
11227     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11228 }
11229
11230 static void
11231 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11232 {
11233   unsigned rn = INSTR (14, 10);
11234   unsigned rd = INSTR (9, 5);
11235   unsigned rm = INSTR (4, 0);
11236   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11237
11238   /* Treat this as unalloc to make sure we don't do it.  */
11239   if (rn == rm)
11240     HALT_UNALLOC;
11241
11242   offset <<= 2;
11243
11244   if (wb != Post)
11245     address += offset;
11246
11247   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11248   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11249
11250   if (wb == Post)
11251     address += offset;
11252
11253   if (wb != NoWriteBack)
11254     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11255 }
11256
11257 static void
11258 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11259 {
11260   unsigned rn = INSTR (14, 10);
11261   unsigned rd = INSTR (9, 5);
11262   unsigned rm = INSTR (4, 0);
11263   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11264
11265   /* Treat this as unalloc to make sure we don't do it.  */
11266   if (rn == rm)
11267     HALT_UNALLOC;
11268
11269   offset <<= 3;
11270
11271   if (wb != Post)
11272     address += offset;
11273
11274   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11275   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11276
11277   if (wb == Post)
11278     address += offset;
11279
11280   if (wb != NoWriteBack)
11281     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11282 }
11283
11284 static void
11285 dex_load_store_pair_gr (sim_cpu *cpu)
11286 {
11287   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11288      instr[29,25] = instruction encoding: 101_0
11289      instr[26]    = V : 1 if fp 0 if gp
11290      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11291      instr[22]    = load/store (1=> load)
11292      instr[21,15] = signed, scaled, offset
11293      instr[14,10] = Rn
11294      instr[ 9, 5] = Rd
11295      instr[ 4, 0] = Rm.  */
11296
11297   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11298   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11299
11300   switch (dispatch)
11301     {
11302     case 2: store_pair_u32 (cpu, offset, Post); return;
11303     case 3: load_pair_u32  (cpu, offset, Post); return;
11304     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11305     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
11306     case 6: store_pair_u32 (cpu, offset, Pre); return;
11307     case 7: load_pair_u32  (cpu, offset, Pre); return;
11308
11309     case 11: load_pair_s32  (cpu, offset, Post); return;
11310     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
11311     case 15: load_pair_s32  (cpu, offset, Pre); return;
11312
11313     case 18: store_pair_u64 (cpu, offset, Post); return;
11314     case 19: load_pair_u64  (cpu, offset, Post); return;
11315     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11316     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
11317     case 22: store_pair_u64 (cpu, offset, Pre); return;
11318     case 23: load_pair_u64  (cpu, offset, Pre); return;
11319
11320     default:
11321       HALT_UNALLOC;
11322     }
11323 }
11324
11325 static void
11326 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11327 {
11328   unsigned rn = INSTR (14, 10);
11329   unsigned rd = INSTR (9, 5);
11330   unsigned rm = INSTR (4, 0);
11331   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11332
11333   offset <<= 2;
11334
11335   if (wb != Post)
11336     address += offset;
11337
11338   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
11339   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11340
11341   if (wb == Post)
11342     address += offset;
11343
11344   if (wb != NoWriteBack)
11345     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11346 }
11347
11348 static void
11349 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11350 {
11351   unsigned rn = INSTR (14, 10);
11352   unsigned rd = INSTR (9, 5);
11353   unsigned rm = INSTR (4, 0);
11354   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11355
11356   offset <<= 3;
11357
11358   if (wb != Post)
11359     address += offset;
11360
11361   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
11362   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11363
11364   if (wb == Post)
11365     address += offset;
11366
11367   if (wb != NoWriteBack)
11368     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11369 }
11370
11371 static void
11372 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11373 {
11374   FRegister a;
11375   unsigned rn = INSTR (14, 10);
11376   unsigned rd = INSTR (9, 5);
11377   unsigned rm = INSTR (4, 0);
11378   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11379
11380   offset <<= 4;
11381
11382   if (wb != Post)
11383     address += offset;
11384
11385   aarch64_get_FP_long_double (cpu, rm, & a);
11386   aarch64_set_mem_long_double (cpu, address, a);
11387   aarch64_get_FP_long_double (cpu, rn, & a);
11388   aarch64_set_mem_long_double (cpu, address + 16, a);
11389
11390   if (wb == Post)
11391     address += offset;
11392
11393   if (wb != NoWriteBack)
11394     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11395 }
11396
11397 static void
11398 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11399 {
11400   unsigned rn = INSTR (14, 10);
11401   unsigned rd = INSTR (9, 5);
11402   unsigned rm = INSTR (4, 0);
11403   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11404
11405   if (rm == rn)
11406     HALT_UNALLOC;
11407
11408   offset <<= 2;
11409
11410   if (wb != Post)
11411     address += offset;
11412
11413   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11414   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11415
11416   if (wb == Post)
11417     address += offset;
11418
11419   if (wb != NoWriteBack)
11420     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11421 }
11422
11423 static void
11424 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11425 {
11426   unsigned rn = INSTR (14, 10);
11427   unsigned rd = INSTR (9, 5);
11428   unsigned rm = INSTR (4, 0);
11429   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11430
11431   if (rm == rn)
11432     HALT_UNALLOC;
11433
11434   offset <<= 3;
11435
11436   if (wb != Post)
11437     address += offset;
11438
11439   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11440   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11441
11442   if (wb == Post)
11443     address += offset;
11444
11445   if (wb != NoWriteBack)
11446     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11447 }
11448
11449 static void
11450 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11451 {
11452   FRegister a;
11453   unsigned rn = INSTR (14, 10);
11454   unsigned rd = INSTR (9, 5);
11455   unsigned rm = INSTR (4, 0);
11456   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11457
11458   if (rm == rn)
11459     HALT_UNALLOC;
11460
11461   offset <<= 4;
11462
11463   if (wb != Post)
11464     address += offset;
11465
11466   aarch64_get_mem_long_double (cpu, address, & a);
11467   aarch64_set_FP_long_double (cpu, rm, a);
11468   aarch64_get_mem_long_double (cpu, address + 16, & a);
11469   aarch64_set_FP_long_double (cpu, rn, a);
11470
11471   if (wb == Post)
11472     address += offset;
11473
11474   if (wb != NoWriteBack)
11475     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11476 }
11477
11478 static void
11479 dex_load_store_pair_fp (sim_cpu *cpu)
11480 {
11481   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11482      instr[29,25] = instruction encoding
11483      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11484      instr[22]    = load/store (1=> load)
11485      instr[21,15] = signed, scaled, offset
11486      instr[14,10] = Rn
11487      instr[ 9, 5] = Rd
11488      instr[ 4, 0] = Rm  */
11489
11490   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11491   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11492
11493   switch (dispatch)
11494     {
11495     case 2: store_pair_float (cpu, offset, Post); return;
11496     case 3: load_pair_float  (cpu, offset, Post); return;
11497     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11498     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
11499     case 6: store_pair_float (cpu, offset, Pre); return;
11500     case 7: load_pair_float  (cpu, offset, Pre); return;
11501
11502     case 10: store_pair_double (cpu, offset, Post); return;
11503     case 11: load_pair_double  (cpu, offset, Post); return;
11504     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11505     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
11506     case 14: store_pair_double (cpu, offset, Pre); return;
11507     case 15: load_pair_double  (cpu, offset, Pre); return;
11508
11509     case 18: store_pair_long_double (cpu, offset, Post); return;
11510     case 19: load_pair_long_double  (cpu, offset, Post); return;
11511     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11512     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
11513     case 22: store_pair_long_double (cpu, offset, Pre); return;
11514     case 23: load_pair_long_double  (cpu, offset, Pre); return;
11515
11516     default:
11517       HALT_UNALLOC;
11518     }
11519 }
11520
11521 static inline unsigned
11522 vec_reg (unsigned v, unsigned o)
11523 {
11524   return (v + o) & 0x3F;
11525 }
11526
11527 /* Load multiple N-element structures to M consecutive registers.  */
11528 static void
11529 vec_load (sim_cpu *cpu, uint64_t address, unsigned n, unsigned m)
11530 {
11531   int      all  = INSTR (30, 30);
11532   unsigned size = INSTR (11, 10);
11533   unsigned vd   = INSTR (4, 0);
11534   unsigned rpt = (n == m) ? 1 : m;
11535   unsigned selem = n;
11536   unsigned i, j, k;
11537
11538   switch (size)
11539     {
11540     case 0: /* 8-bit operations.  */
11541       for (i = 0; i < rpt; i++)
11542         for (j = 0; j < (8 + (8 * all)); j++)
11543           for (k = 0; k < selem; k++)
11544             {
11545               aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11546                                   aarch64_get_mem_u8 (cpu, address));
11547               address += 1;
11548             }
11549       return;
11550
11551     case 1: /* 16-bit operations.  */
11552       for (i = 0; i < rpt; i++)
11553         for (j = 0; j < (4 + (4 * all)); j++)
11554           for (k = 0; k < selem; k++)
11555             {
11556               aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11557                                    aarch64_get_mem_u16 (cpu, address));
11558               address += 2;
11559             }
11560       return;
11561
11562     case 2: /* 32-bit operations.  */
11563       for (i = 0; i < rpt; i++)
11564         for (j = 0; j < (2 + (2 * all)); j++)
11565           for (k = 0; k < selem; k++)
11566             {
11567               aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11568                                    aarch64_get_mem_u32 (cpu, address));
11569               address += 4;
11570             }
11571       return;
11572
11573     case 3: /* 64-bit operations.  */
11574       for (i = 0; i < rpt; i++)
11575         for (j = 0; j < (1 + all); j++)
11576           for (k = 0; k < selem; k++)
11577             {
11578               aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11579                                    aarch64_get_mem_u64 (cpu, address));
11580               address += 8;
11581             }
11582       return;
11583     }
11584 }
11585
11586 /* Load multiple 4-element structures into four consecutive registers.  */
11587 static void
11588 LD4 (sim_cpu *cpu, uint64_t address)
11589 {
11590   vec_load (cpu, address, 4, 4);
11591 }
11592
11593 /* Load multiple 3-element structures into three consecutive registers.  */
11594 static void
11595 LD3 (sim_cpu *cpu, uint64_t address)
11596 {
11597   vec_load (cpu, address, 3, 3);
11598 }
11599
11600 /* Load multiple 2-element structures into two consecutive registers.  */
11601 static void
11602 LD2 (sim_cpu *cpu, uint64_t address)
11603 {
11604   vec_load (cpu, address, 2, 2);
11605 }
11606
11607 /* Load multiple 1-element structures into one register.  */
11608 static void
11609 LD1_1 (sim_cpu *cpu, uint64_t address)
11610 {
11611   vec_load (cpu, address, 1, 1);
11612 }
11613
11614 /* Load multiple 1-element structures into two registers.  */
11615 static void
11616 LD1_2 (sim_cpu *cpu, uint64_t address)
11617 {
11618   vec_load (cpu, address, 1, 2);
11619 }
11620
11621 /* Load multiple 1-element structures into three registers.  */
11622 static void
11623 LD1_3 (sim_cpu *cpu, uint64_t address)
11624 {
11625   vec_load (cpu, address, 1, 3);
11626 }
11627
11628 /* Load multiple 1-element structures into four registers.  */
11629 static void
11630 LD1_4 (sim_cpu *cpu, uint64_t address)
11631 {
11632   vec_load (cpu, address, 1, 4);
11633 }
11634
11635 /* Store multiple N-element structures from M consecutive registers.  */
11636 static void
11637 vec_store (sim_cpu *cpu, uint64_t address, unsigned n, unsigned m)
11638 {
11639   int      all  = INSTR (30, 30);
11640   unsigned size = INSTR (11, 10);
11641   unsigned vd   = INSTR (4, 0);
11642   unsigned rpt = (n == m) ? 1 : m;
11643   unsigned selem = n;
11644   unsigned i, j, k;
11645
11646   switch (size)
11647     {
11648     case 0: /* 8-bit operations.  */
11649       for (i = 0; i < rpt; i++)
11650         for (j = 0; j < (8 + (8 * all)); j++)
11651           for (k = 0; k < selem; k++)
11652             {
11653               aarch64_set_mem_u8
11654                 (cpu, address,
11655                  aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11656               address += 1;
11657             }
11658       return;
11659
11660     case 1: /* 16-bit operations.  */
11661       for (i = 0; i < rpt; i++)
11662         for (j = 0; j < (4 + (4 * all)); j++)
11663           for (k = 0; k < selem; k++)
11664             {
11665               aarch64_set_mem_u16
11666                 (cpu, address,
11667                  aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11668               address += 2;
11669             }
11670       return;
11671
11672     case 2: /* 32-bit operations.  */
11673       for (i = 0; i < rpt; i++)
11674         for (j = 0; j < (2 + (2 * all)); j++)
11675           for (k = 0; k < selem; k++)
11676             {
11677               aarch64_set_mem_u32
11678                 (cpu, address,
11679                  aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11680               address += 4;
11681             }
11682       return;
11683
11684     case 3: /* 64-bit operations.  */
11685       for (i = 0; i < rpt; i++)
11686         for (j = 0; j < (1 + all); j++)
11687           for (k = 0; k < selem; k++)
11688             {
11689               aarch64_set_mem_u64
11690                 (cpu, address,
11691                  aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11692               address += 8;
11693             }
11694       return;
11695     }
11696 }
11697
11698 /* Store multiple 4-element structure from four consecutive registers.  */
11699 static void
11700 ST4 (sim_cpu *cpu, uint64_t address)
11701 {
11702   vec_store (cpu, address, 4, 4);
11703 }
11704
11705 /* Store multiple 3-element structures from three consecutive registers.  */
11706 static void
11707 ST3 (sim_cpu *cpu, uint64_t address)
11708 {
11709   vec_store (cpu, address, 3, 3);
11710 }
11711
11712 /* Store multiple 2-element structures from two consecutive registers.  */
11713 static void
11714 ST2 (sim_cpu *cpu, uint64_t address)
11715 {
11716   vec_store (cpu, address, 2, 2);
11717 }
11718
11719 /* Store multiple 1-element structures from one register.  */
11720 static void
11721 ST1_1 (sim_cpu *cpu, uint64_t address)
11722 {
11723   vec_store (cpu, address, 1, 1);
11724 }
11725
11726 /* Store multiple 1-element structures from two registers.  */
11727 static void
11728 ST1_2 (sim_cpu *cpu, uint64_t address)
11729 {
11730   vec_store (cpu, address, 1, 2);
11731 }
11732
11733 /* Store multiple 1-element structures from three registers.  */
11734 static void
11735 ST1_3 (sim_cpu *cpu, uint64_t address)
11736 {
11737   vec_store (cpu, address, 1, 3);
11738 }
11739
11740 /* Store multiple 1-element structures from four registers.  */
11741 static void
11742 ST1_4 (sim_cpu *cpu, uint64_t address)
11743 {
11744   vec_store (cpu, address, 1, 4);
11745 }
11746
11747 #define LDn_STn_SINGLE_LANE_AND_SIZE()                          \
11748   do                                                            \
11749     {                                                           \
11750       switch (INSTR (15, 14))                                   \
11751         {                                                       \
11752         case 0:                                                 \
11753           lane = (full << 3) | (s << 2) | size;                 \
11754           size = 0;                                             \
11755           break;                                                \
11756                                                                 \
11757         case 1:                                                 \
11758           if ((size & 1) == 1)                                  \
11759             HALT_UNALLOC;                                       \
11760           lane = (full << 2) | (s << 1) | (size >> 1);          \
11761           size = 1;                                             \
11762           break;                                                \
11763                                                                 \
11764         case 2:                                                 \
11765           if ((size & 2) == 2)                                  \
11766             HALT_UNALLOC;                                       \
11767                                                                 \
11768           if ((size & 1) == 0)                                  \
11769             {                                                   \
11770               lane = (full << 1) | s;                           \
11771               size = 2;                                         \
11772             }                                                   \
11773           else                                                  \
11774             {                                                   \
11775               if (s)                                            \
11776                 HALT_UNALLOC;                                   \
11777               lane = full;                                      \
11778               size = 3;                                         \
11779             }                                                   \
11780           break;                                                \
11781                                                                 \
11782         default:                                                \
11783           HALT_UNALLOC;                                         \
11784         }                                                       \
11785     }                                                           \
11786   while (0)
11787
11788 /* Load single structure into one lane of N registers.  */
11789 static void
11790 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11791 {
11792   /* instr[31]    = 0
11793      instr[30]    = element selector 0=>half, 1=>all elements
11794      instr[29,24] = 00 1101
11795      instr[23]    = 0=>simple, 1=>post
11796      instr[22]    = 1
11797      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11798      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11799                       11111 (immediate post inc)
11800      instr[15,13] = opcode
11801      instr[12]    = S, used for lane number
11802      instr[11,10] = size, also used for lane number
11803      instr[9,5]   = address
11804      instr[4,0]   = Vd  */
11805
11806   unsigned full = INSTR (30, 30);
11807   unsigned vd = INSTR (4, 0);
11808   unsigned size = INSTR (11, 10);
11809   unsigned s = INSTR (12, 12);
11810   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11811   int lane = 0;
11812   int i;
11813
11814   NYI_assert (29, 24, 0x0D);
11815   NYI_assert (22, 22, 1);
11816
11817   /* Compute the lane number first (using size), and then compute size.  */
11818   LDn_STn_SINGLE_LANE_AND_SIZE ();
11819
11820   for (i = 0; i < nregs; i++)
11821     switch (size)
11822       {
11823       case 0:
11824         {
11825           uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11826           aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11827           break;
11828         }
11829
11830       case 1:
11831         {
11832           uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11833           aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11834           break;
11835         }
11836
11837       case 2:
11838         {
11839           uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11840           aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11841           break;
11842         }
11843
11844       case 3:
11845         {
11846           uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11847           aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11848           break;
11849         }
11850       }
11851 }
11852
11853 /* Store single structure from one lane from N registers.  */
11854 static void
11855 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11856 {
11857   /* instr[31]    = 0
11858      instr[30]    = element selector 0=>half, 1=>all elements
11859      instr[29,24] = 00 1101
11860      instr[23]    = 0=>simple, 1=>post
11861      instr[22]    = 0
11862      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11863      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11864                       11111 (immediate post inc)
11865      instr[15,13] = opcode
11866      instr[12]    = S, used for lane number
11867      instr[11,10] = size, also used for lane number
11868      instr[9,5]   = address
11869      instr[4,0]   = Vd  */
11870
11871   unsigned full = INSTR (30, 30);
11872   unsigned vd = INSTR (4, 0);
11873   unsigned size = INSTR (11, 10);
11874   unsigned s = INSTR (12, 12);
11875   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11876   int lane = 0;
11877   int i;
11878
11879   NYI_assert (29, 24, 0x0D);
11880   NYI_assert (22, 22, 0);
11881
11882   /* Compute the lane number first (using size), and then compute size.  */
11883   LDn_STn_SINGLE_LANE_AND_SIZE ();
11884
11885   for (i = 0; i < nregs; i++)
11886     switch (size)
11887       {
11888       case 0:
11889         {
11890           uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11891           aarch64_set_mem_u8 (cpu, address + i, val);
11892           break;
11893         }
11894
11895       case 1:
11896         {
11897           uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11898           aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11899           break;
11900         }
11901
11902       case 2:
11903         {
11904           uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11905           aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11906           break;
11907         }
11908
11909       case 3:
11910         {
11911           uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11912           aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11913           break;
11914         }
11915       }
11916 }
11917
11918 /* Load single structure into all lanes of N registers.  */
11919 static void
11920 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11921 {
11922   /* instr[31]    = 0
11923      instr[30]    = element selector 0=>half, 1=>all elements
11924      instr[29,24] = 00 1101
11925      instr[23]    = 0=>simple, 1=>post
11926      instr[22]    = 1
11927      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11928      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11929                       11111 (immediate post inc)
11930      instr[15,14] = 11
11931      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11932      instr[12]    = 0
11933      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11934                                  10=> word(s), 11=> double(d)
11935      instr[9,5]   = address
11936      instr[4,0]   = Vd  */
11937
11938   unsigned full = INSTR (30, 30);
11939   unsigned vd = INSTR (4, 0);
11940   unsigned size = INSTR (11, 10);
11941   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11942   int i, n;
11943
11944   NYI_assert (29, 24, 0x0D);
11945   NYI_assert (22, 22, 1);
11946   NYI_assert (15, 14, 3);
11947   NYI_assert (12, 12, 0);
11948
11949   for (n = 0; n < nregs; n++)
11950     switch (size)
11951       {
11952       case 0:
11953         {
11954           uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11955           for (i = 0; i < (full ? 16 : 8); i++)
11956             aarch64_set_vec_u8 (cpu, vd + n, i, val);
11957           break;
11958         }
11959
11960       case 1:
11961         {
11962           uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11963           for (i = 0; i < (full ? 8 : 4); i++)
11964             aarch64_set_vec_u16 (cpu, vd + n, i, val);
11965           break;
11966         }
11967
11968       case 2:
11969         {
11970           uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11971           for (i = 0; i < (full ? 4 : 2); i++)
11972             aarch64_set_vec_u32 (cpu, vd + n, i, val);
11973           break;
11974         }
11975
11976       case 3:
11977         {
11978           uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11979           for (i = 0; i < (full ? 2 : 1); i++)
11980             aarch64_set_vec_u64 (cpu, vd + n, i, val);
11981           break;
11982         }
11983
11984       default:
11985         HALT_UNALLOC;
11986       }
11987 }
11988
11989 static void
11990 do_vec_load_store (sim_cpu *cpu)
11991 {
11992   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
11993
11994      instr[31]    = 0
11995      instr[30]    = element selector 0=>half, 1=>all elements
11996      instr[29,25] = 00110
11997      instr[24]    = 0=>multiple struct, 1=>single struct
11998      instr[23]    = 0=>simple, 1=>post
11999      instr[22]    = 0=>store, 1=>load
12000      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
12001      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12002                     11111 (immediate post inc)
12003      instr[15,12] = elements and destinations.  eg for load:
12004                      0000=>LD4 => load multiple 4-element to
12005                      four consecutive registers
12006                      0100=>LD3 => load multiple 3-element to
12007                      three consecutive registers
12008                      1000=>LD2 => load multiple 2-element to
12009                      two consecutive registers
12010                      0010=>LD1 => load multiple 1-element to
12011                      four consecutive registers
12012                      0110=>LD1 => load multiple 1-element to
12013                      three consecutive registers
12014                      1010=>LD1 => load multiple 1-element to
12015                      two consecutive registers
12016                      0111=>LD1 => load multiple 1-element to
12017                      one register
12018                      1100=>LDR1,LDR2
12019                      1110=>LDR3,LDR4
12020      instr[11,10] = element size 00=> byte(b), 01=> half(h),
12021                                  10=> word(s), 11=> double(d)
12022      instr[9,5]   = Vn, can be SP
12023      instr[4,0]   = Vd  */
12024
12025   int single;
12026   int post;
12027   int load;
12028   unsigned vn;
12029   uint64_t address;
12030   int type;
12031
12032   if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12033     HALT_NYI;
12034
12035   single = INSTR (24, 24);
12036   post = INSTR (23, 23);
12037   load = INSTR (22, 22);
12038   type = INSTR (15, 12);
12039   vn = INSTR (9, 5);
12040   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12041
12042   if (! single && INSTR (21, 21) != 0)
12043     HALT_UNALLOC;
12044
12045   if (post)
12046     {
12047       unsigned vm = INSTR (20, 16);
12048
12049       if (vm == R31)
12050         {
12051           unsigned sizeof_operation;
12052
12053           if (single)
12054             {
12055               if ((type >= 0) && (type <= 11))
12056                 {
12057                   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12058                   switch (INSTR (15, 14))
12059                     {
12060                     case 0:
12061                       sizeof_operation = nregs * 1;
12062                       break;
12063                     case 1:
12064                       sizeof_operation = nregs * 2;
12065                       break;
12066                     case 2:
12067                       if (INSTR (10, 10) == 0)
12068                         sizeof_operation = nregs * 4;
12069                       else
12070                         sizeof_operation = nregs * 8;
12071                       break;
12072                     default:
12073                       HALT_UNALLOC;
12074                     }
12075                 }
12076               else if (type == 0xC)
12077                 {
12078                   sizeof_operation = INSTR (21, 21) ? 2 : 1;
12079                   sizeof_operation <<= INSTR (11, 10);
12080                 }
12081               else if (type == 0xE)
12082                 {
12083                   sizeof_operation = INSTR (21, 21) ? 4 : 3;
12084                   sizeof_operation <<= INSTR (11, 10);
12085                 }
12086               else
12087                 HALT_UNALLOC;
12088             }
12089           else
12090             {
12091               switch (type)
12092                 {
12093                 case 0: sizeof_operation = 32; break;
12094                 case 4: sizeof_operation = 24; break;
12095                 case 8: sizeof_operation = 16; break;
12096
12097                 case 7:
12098                   /* One register, immediate offset variant.  */
12099                   sizeof_operation = 8;
12100                   break;
12101
12102                 case 10:
12103                   /* Two registers, immediate offset variant.  */
12104                   sizeof_operation = 16;
12105                   break;
12106
12107                 case 6:
12108                   /* Three registers, immediate offset variant.  */
12109                   sizeof_operation = 24;
12110                   break;
12111
12112                 case 2:
12113                   /* Four registers, immediate offset variant.  */
12114                   sizeof_operation = 32;
12115                   break;
12116
12117                 default:
12118                   HALT_UNALLOC;
12119                 }
12120
12121               if (INSTR (30, 30))
12122                 sizeof_operation *= 2;
12123             }
12124
12125           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12126         }
12127       else
12128         aarch64_set_reg_u64 (cpu, vn, SP_OK,
12129                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12130     }
12131   else
12132     {
12133       NYI_assert (20, 16, 0);
12134     }
12135
12136   if (single)
12137     {
12138       if (load)
12139         {
12140           if ((type >= 0) && (type <= 11))
12141             do_vec_LDn_single (cpu, address);
12142           else if ((type == 0xC) || (type == 0xE))
12143             do_vec_LDnR (cpu, address);
12144           else
12145             HALT_UNALLOC;
12146           return;
12147         }
12148
12149       /* Stores.  */
12150       if ((type >= 0) && (type <= 11))
12151         {
12152           do_vec_STn_single (cpu, address);
12153           return;
12154         }
12155
12156       HALT_UNALLOC;
12157     }
12158
12159   if (load)
12160     {
12161       switch (type)
12162         {
12163         case 0:  LD4 (cpu, address); return;
12164         case 4:  LD3 (cpu, address); return;
12165         case 8:  LD2 (cpu, address); return;
12166         case 2:  LD1_4 (cpu, address); return;
12167         case 6:  LD1_3 (cpu, address); return;
12168         case 10: LD1_2 (cpu, address); return;
12169         case 7:  LD1_1 (cpu, address); return;
12170
12171         default:
12172           HALT_UNALLOC;
12173         }
12174     }
12175
12176   /* Stores.  */
12177   switch (type)
12178     {
12179     case 0:  ST4 (cpu, address); return;
12180     case 4:  ST3 (cpu, address); return;
12181     case 8:  ST2 (cpu, address); return;
12182     case 2:  ST1_4 (cpu, address); return;
12183     case 6:  ST1_3 (cpu, address); return;
12184     case 10: ST1_2 (cpu, address); return;
12185     case 7:  ST1_1 (cpu, address); return;
12186     default:
12187       HALT_UNALLOC;
12188     }
12189 }
12190
12191 static void
12192 dexLdSt (sim_cpu *cpu)
12193 {
12194   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12195      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12196              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12197      bits [29,28:26] of a LS are the secondary dispatch vector.  */
12198   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12199
12200   switch (group2)
12201     {
12202     case LS_EXCL_000:
12203       dexLoadExclusive (cpu); return;
12204
12205     case LS_LIT_010:
12206     case LS_LIT_011:
12207       dexLoadLiteral (cpu); return;
12208
12209     case LS_OTHER_110:
12210     case LS_OTHER_111:
12211       dexLoadOther (cpu); return;
12212
12213     case LS_ADVSIMD_001:
12214       do_vec_load_store (cpu); return;
12215
12216     case LS_PAIR_100:
12217       dex_load_store_pair_gr (cpu); return;
12218
12219     case LS_PAIR_101:
12220       dex_load_store_pair_fp (cpu); return;
12221
12222     default:
12223       /* Should never reach here.  */
12224       HALT_NYI;
12225     }
12226 }
12227
12228 /* Specific decode and execute for group Data Processing Register.  */
12229
12230 static void
12231 dexLogicalShiftedRegister (sim_cpu *cpu)
12232 {
12233   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12234      instr[30,29] = op
12235      instr[28:24] = 01010
12236      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12237      instr[21]    = N
12238      instr[20,16] = Rm
12239      instr[15,10] = count : must be 0xxxxx for 32 bit
12240      instr[9,5]   = Rn
12241      instr[4,0]   = Rd  */
12242
12243   uint32_t size      = INSTR (31, 31);
12244   Shift    shiftType = INSTR (23, 22);
12245   uint32_t count     = INSTR (15, 10);
12246
12247   /* 32 bit operations must have count[5] = 0.
12248      or else we have an UNALLOC.  */
12249   if (size == 0 && uimm (count, 5, 5))
12250     HALT_UNALLOC;
12251
12252   /* Dispatch on size:op:N.  */
12253   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12254     {
12255     case 0: and32_shift  (cpu, shiftType, count); return;
12256     case 1: bic32_shift  (cpu, shiftType, count); return;
12257     case 2: orr32_shift  (cpu, shiftType, count); return;
12258     case 3: orn32_shift  (cpu, shiftType, count); return;
12259     case 4: eor32_shift  (cpu, shiftType, count); return;
12260     case 5: eon32_shift  (cpu, shiftType, count); return;
12261     case 6: ands32_shift (cpu, shiftType, count); return;
12262     case 7: bics32_shift (cpu, shiftType, count); return;
12263     case 8: and64_shift  (cpu, shiftType, count); return;
12264     case 9: bic64_shift  (cpu, shiftType, count); return;
12265     case 10:orr64_shift  (cpu, shiftType, count); return;
12266     case 11:orn64_shift  (cpu, shiftType, count); return;
12267     case 12:eor64_shift  (cpu, shiftType, count); return;
12268     case 13:eon64_shift  (cpu, shiftType, count); return;
12269     case 14:ands64_shift (cpu, shiftType, count); return;
12270     case 15:bics64_shift (cpu, shiftType, count); return;
12271     }
12272 }
12273
12274 /* 32 bit conditional select.  */
12275 static void
12276 csel32 (sim_cpu *cpu, CondCode cc)
12277 {
12278   unsigned rm = INSTR (20, 16);
12279   unsigned rn = INSTR (9, 5);
12280   unsigned rd = INSTR (4, 0);
12281
12282   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12283                        testConditionCode (cpu, cc)
12284                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12285                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12286 }
12287
12288 /* 64 bit conditional select.  */
12289 static void
12290 csel64 (sim_cpu *cpu, CondCode cc)
12291 {
12292   unsigned rm = INSTR (20, 16);
12293   unsigned rn = INSTR (9, 5);
12294   unsigned rd = INSTR (4, 0);
12295
12296   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12297                        testConditionCode (cpu, cc)
12298                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12299                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12300 }
12301
12302 /* 32 bit conditional increment.  */
12303 static void
12304 csinc32 (sim_cpu *cpu, CondCode cc)
12305 {
12306   unsigned rm = INSTR (20, 16);
12307   unsigned rn = INSTR (9, 5);
12308   unsigned rd = INSTR (4, 0);
12309
12310   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12311                        testConditionCode (cpu, cc)
12312                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12313                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12314 }
12315
12316 /* 64 bit conditional increment.  */
12317 static void
12318 csinc64 (sim_cpu *cpu, CondCode cc)
12319 {
12320   unsigned rm = INSTR (20, 16);
12321   unsigned rn = INSTR (9, 5);
12322   unsigned rd = INSTR (4, 0);
12323
12324   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12325                        testConditionCode (cpu, cc)
12326                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12327                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12328 }
12329
12330 /* 32 bit conditional invert.  */
12331 static void
12332 csinv32 (sim_cpu *cpu, CondCode cc)
12333 {
12334   unsigned rm = INSTR (20, 16);
12335   unsigned rn = INSTR (9, 5);
12336   unsigned rd = INSTR (4, 0);
12337
12338   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12339                        testConditionCode (cpu, cc)
12340                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12341                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12342 }
12343
12344 /* 64 bit conditional invert.  */
12345 static void
12346 csinv64 (sim_cpu *cpu, CondCode cc)
12347 {
12348   unsigned rm = INSTR (20, 16);
12349   unsigned rn = INSTR (9, 5);
12350   unsigned rd = INSTR (4, 0);
12351
12352   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12353                        testConditionCode (cpu, cc)
12354                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12355                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12356 }
12357
12358 /* 32 bit conditional negate.  */
12359 static void
12360 csneg32 (sim_cpu *cpu, CondCode cc)
12361 {
12362   unsigned rm = INSTR (20, 16);
12363   unsigned rn = INSTR (9, 5);
12364   unsigned rd = INSTR (4, 0);
12365
12366   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12367                        testConditionCode (cpu, cc)
12368                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12369                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12370 }
12371
12372 /* 64 bit conditional negate.  */
12373 static void
12374 csneg64 (sim_cpu *cpu, CondCode cc)
12375 {
12376   unsigned rm = INSTR (20, 16);
12377   unsigned rn = INSTR (9, 5);
12378   unsigned rd = INSTR (4, 0);
12379
12380   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12381                        testConditionCode (cpu, cc)
12382                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12383                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12384 }
12385
12386 static void
12387 dexCondSelect (sim_cpu *cpu)
12388 {
12389   /* instr[28,21] = 11011011
12390      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12391      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12392                             100 ==> CSINV, 101 ==> CSNEG,
12393                             _1_ ==> UNALLOC
12394      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12395      instr[15,12] = cond
12396      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
12397
12398   CondCode cc = INSTR (15, 12);
12399   uint32_t S = INSTR (29, 29);
12400   uint32_t op2 = INSTR (11, 10);
12401
12402   if (S == 1)
12403     HALT_UNALLOC;
12404
12405   if (op2 & 0x2)
12406     HALT_UNALLOC;
12407
12408   switch ((INSTR (31, 30) << 1) | op2)
12409     {
12410     case 0: csel32  (cpu, cc); return;
12411     case 1: csinc32 (cpu, cc); return;
12412     case 2: csinv32 (cpu, cc); return;
12413     case 3: csneg32 (cpu, cc); return;
12414     case 4: csel64  (cpu, cc); return;
12415     case 5: csinc64 (cpu, cc); return;
12416     case 6: csinv64 (cpu, cc); return;
12417     case 7: csneg64 (cpu, cc); return;
12418     }
12419 }
12420
12421 /* Some helpers for counting leading 1 or 0 bits.  */
12422
12423 /* Counts the number of leading bits which are the same
12424    in a 32 bit value in the range 1 to 32.  */
12425 static uint32_t
12426 leading32 (uint32_t value)
12427 {
12428   int32_t mask= 0xffff0000;
12429   uint32_t count= 16; /* Counts number of bits set in mask.  */
12430   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
12431   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
12432
12433   while (lo + 1 < hi)
12434     {
12435       int32_t test = (value & mask);
12436
12437       if (test == 0 || test == mask)
12438         {
12439           lo = count;
12440           count = (lo + hi) / 2;
12441           mask >>= (count - lo);
12442         }
12443       else
12444         {
12445           hi = count;
12446           count = (lo + hi) / 2;
12447           mask <<= hi - count;
12448         }
12449     }
12450
12451   if (lo != hi)
12452     {
12453       int32_t test;
12454
12455       mask >>= 1;
12456       test = (value & mask);
12457
12458       if (test == 0 || test == mask)
12459         count = hi;
12460       else
12461         count = lo;
12462     }
12463
12464   return count;
12465 }
12466
12467 /* Counts the number of leading bits which are the same
12468    in a 64 bit value in the range 1 to 64.  */
12469 static uint64_t
12470 leading64 (uint64_t value)
12471 {
12472   int64_t mask= 0xffffffff00000000LL;
12473   uint64_t count = 32; /* Counts number of bits set in mask.  */
12474   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
12475   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
12476
12477   while (lo + 1 < hi)
12478     {
12479       int64_t test = (value & mask);
12480
12481       if (test == 0 || test == mask)
12482         {
12483           lo = count;
12484           count = (lo + hi) / 2;
12485           mask >>= (count - lo);
12486         }
12487       else
12488         {
12489           hi = count;
12490           count = (lo + hi) / 2;
12491           mask <<= hi - count;
12492         }
12493     }
12494
12495   if (lo != hi)
12496     {
12497       int64_t test;
12498
12499       mask >>= 1;
12500       test = (value & mask);
12501
12502       if (test == 0 || test == mask)
12503         count = hi;
12504       else
12505         count = lo;
12506     }
12507
12508   return count;
12509 }
12510
12511 /* Bit operations.  */
12512 /* N.B register args may not be SP.  */
12513
12514 /* 32 bit count leading sign bits.  */
12515 static void
12516 cls32 (sim_cpu *cpu)
12517 {
12518   unsigned rn = INSTR (9, 5);
12519   unsigned rd = INSTR (4, 0);
12520
12521   /* N.B. the result needs to exclude the leading bit.  */
12522   aarch64_set_reg_u64
12523     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12524 }
12525
12526 /* 64 bit count leading sign bits.  */
12527 static void
12528 cls64 (sim_cpu *cpu)
12529 {
12530   unsigned rn = INSTR (9, 5);
12531   unsigned rd = INSTR (4, 0);
12532
12533   /* N.B. the result needs to exclude the leading bit.  */
12534   aarch64_set_reg_u64
12535     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12536 }
12537
12538 /* 32 bit count leading zero bits.  */
12539 static void
12540 clz32 (sim_cpu *cpu)
12541 {
12542   unsigned rn = INSTR (9, 5);
12543   unsigned rd = INSTR (4, 0);
12544   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12545
12546   /* if the sign (top) bit is set then the count is 0.  */
12547   if (pick32 (value, 31, 31))
12548     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12549   else
12550     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12551 }
12552
12553 /* 64 bit count leading zero bits.  */
12554 static void
12555 clz64 (sim_cpu *cpu)
12556 {
12557   unsigned rn = INSTR (9, 5);
12558   unsigned rd = INSTR (4, 0);
12559   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12560
12561   /* if the sign (top) bit is set then the count is 0.  */
12562   if (pick64 (value, 63, 63))
12563     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12564   else
12565     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12566 }
12567
12568 /* 32 bit reverse bits.  */
12569 static void
12570 rbit32 (sim_cpu *cpu)
12571 {
12572   unsigned rn = INSTR (9, 5);
12573   unsigned rd = INSTR (4, 0);
12574   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12575   uint32_t result = 0;
12576   int i;
12577
12578   for (i = 0; i < 32; i++)
12579     {
12580       result <<= 1;
12581       result |= (value & 1);
12582       value >>= 1;
12583     }
12584   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12585 }
12586
12587 /* 64 bit reverse bits.  */
12588 static void
12589 rbit64 (sim_cpu *cpu)
12590 {
12591   unsigned rn = INSTR (9, 5);
12592   unsigned rd = INSTR (4, 0);
12593   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12594   uint64_t result = 0;
12595   int i;
12596
12597   for (i = 0; i < 64; i++)
12598     {
12599       result <<= 1;
12600       result |= (value & 1UL);
12601       value >>= 1;
12602     }
12603   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12604 }
12605
12606 /* 32 bit reverse bytes.  */
12607 static void
12608 rev32 (sim_cpu *cpu)
12609 {
12610   unsigned rn = INSTR (9, 5);
12611   unsigned rd = INSTR (4, 0);
12612   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12613   uint32_t result = 0;
12614   int i;
12615
12616   for (i = 0; i < 4; i++)
12617     {
12618       result <<= 8;
12619       result |= (value & 0xff);
12620       value >>= 8;
12621     }
12622   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12623 }
12624
12625 /* 64 bit reverse bytes.  */
12626 static void
12627 rev64 (sim_cpu *cpu)
12628 {
12629   unsigned rn = INSTR (9, 5);
12630   unsigned rd = INSTR (4, 0);
12631   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12632   uint64_t result = 0;
12633   int i;
12634
12635   for (i = 0; i < 8; i++)
12636     {
12637       result <<= 8;
12638       result |= (value & 0xffULL);
12639       value >>= 8;
12640     }
12641   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12642 }
12643
12644 /* 32 bit reverse shorts.  */
12645 /* N.B.this reverses the order of the bytes in each half word.  */
12646 static void
12647 revh32 (sim_cpu *cpu)
12648 {
12649   unsigned rn = INSTR (9, 5);
12650   unsigned rd = INSTR (4, 0);
12651   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12652   uint32_t result = 0;
12653   int i;
12654
12655   for (i = 0; i < 2; i++)
12656     {
12657       result <<= 8;
12658       result |= (value & 0x00ff00ff);
12659       value >>= 8;
12660     }
12661   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12662 }
12663
12664 /* 64 bit reverse shorts.  */
12665 /* N.B.this reverses the order of the bytes in each half word.  */
12666 static void
12667 revh64 (sim_cpu *cpu)
12668 {
12669   unsigned rn = INSTR (9, 5);
12670   unsigned rd = INSTR (4, 0);
12671   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12672   uint64_t result = 0;
12673   int i;
12674
12675   for (i = 0; i < 2; i++)
12676     {
12677       result <<= 8;
12678       result |= (value & 0x00ff00ff00ff00ffULL);
12679       value >>= 8;
12680     }
12681   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12682 }
12683
12684 static void
12685 dexDataProc1Source (sim_cpu *cpu)
12686 {
12687   /* instr[30]    = 1
12688      instr[28,21] = 111010110
12689      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12690      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
12691      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12692      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12693                              000010 ==> REV, 000011 ==> UNALLOC
12694                              000100 ==> CLZ, 000101 ==> CLS
12695                              ow ==> UNALLOC
12696      instr[9,5]   = rn : may not be SP
12697      instr[4,0]   = rd : may not be SP.  */
12698
12699   uint32_t S = INSTR (29, 29);
12700   uint32_t opcode2 = INSTR (20, 16);
12701   uint32_t opcode = INSTR (15, 10);
12702   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12703
12704   if (S == 1)
12705     HALT_UNALLOC;
12706
12707   if (opcode2 != 0)
12708     HALT_UNALLOC;
12709
12710   if (opcode & 0x38)
12711     HALT_UNALLOC;
12712
12713   switch (dispatch)
12714     {
12715     case 0: rbit32 (cpu); return;
12716     case 1: revh32 (cpu); return;
12717     case 2: rev32 (cpu); return;
12718     case 4: clz32 (cpu); return;
12719     case 5: cls32 (cpu); return;
12720     case 8: rbit64 (cpu); return;
12721     case 9: revh64 (cpu); return;
12722     case 10:rev32 (cpu); return;
12723     case 11:rev64 (cpu); return;
12724     case 12:clz64 (cpu); return;
12725     case 13:cls64 (cpu); return;
12726     default: HALT_UNALLOC;
12727     }
12728 }
12729
12730 /* Variable shift.
12731    Shifts by count supplied in register.
12732    N.B register args may not be SP.
12733    These all use the shifted auxiliary function for
12734    simplicity and clarity.  Writing the actual shift
12735    inline would avoid a branch and so be faster but
12736    would also necessitate getting signs right.  */
12737
12738 /* 32 bit arithmetic shift right.  */
12739 static void
12740 asrv32 (sim_cpu *cpu)
12741 {
12742   unsigned rm = INSTR (20, 16);
12743   unsigned rn = INSTR (9, 5);
12744   unsigned rd = INSTR (4, 0);
12745
12746   aarch64_set_reg_u64
12747     (cpu, rd, NO_SP,
12748      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12749                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12750 }
12751
12752 /* 64 bit arithmetic shift right.  */
12753 static void
12754 asrv64 (sim_cpu *cpu)
12755 {
12756   unsigned rm = INSTR (20, 16);
12757   unsigned rn = INSTR (9, 5);
12758   unsigned rd = INSTR (4, 0);
12759
12760   aarch64_set_reg_u64
12761     (cpu, rd, NO_SP,
12762      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12763                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12764 }
12765
12766 /* 32 bit logical shift left.  */
12767 static void
12768 lslv32 (sim_cpu *cpu)
12769 {
12770   unsigned rm = INSTR (20, 16);
12771   unsigned rn = INSTR (9, 5);
12772   unsigned rd = INSTR (4, 0);
12773
12774   aarch64_set_reg_u64
12775     (cpu, rd, NO_SP,
12776      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12777                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12778 }
12779
12780 /* 64 bit arithmetic shift left.  */
12781 static void
12782 lslv64 (sim_cpu *cpu)
12783 {
12784   unsigned rm = INSTR (20, 16);
12785   unsigned rn = INSTR (9, 5);
12786   unsigned rd = INSTR (4, 0);
12787
12788   aarch64_set_reg_u64
12789     (cpu, rd, NO_SP,
12790      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12791                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12792 }
12793
12794 /* 32 bit logical shift right.  */
12795 static void
12796 lsrv32 (sim_cpu *cpu)
12797 {
12798   unsigned rm = INSTR (20, 16);
12799   unsigned rn = INSTR (9, 5);
12800   unsigned rd = INSTR (4, 0);
12801
12802   aarch64_set_reg_u64
12803     (cpu, rd, NO_SP,
12804      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12805                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12806 }
12807
12808 /* 64 bit logical shift right.  */
12809 static void
12810 lsrv64 (sim_cpu *cpu)
12811 {
12812   unsigned rm = INSTR (20, 16);
12813   unsigned rn = INSTR (9, 5);
12814   unsigned rd = INSTR (4, 0);
12815
12816   aarch64_set_reg_u64
12817     (cpu, rd, NO_SP,
12818      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12819                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12820 }
12821
12822 /* 32 bit rotate right.  */
12823 static void
12824 rorv32 (sim_cpu *cpu)
12825 {
12826   unsigned rm = INSTR (20, 16);
12827   unsigned rn = INSTR (9, 5);
12828   unsigned rd = INSTR (4, 0);
12829
12830   aarch64_set_reg_u64
12831     (cpu, rd, NO_SP,
12832      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12833                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12834 }
12835
12836 /* 64 bit rotate right.  */
12837 static void
12838 rorv64 (sim_cpu *cpu)
12839 {
12840   unsigned rm = INSTR (20, 16);
12841   unsigned rn = INSTR (9, 5);
12842   unsigned rd = INSTR (4, 0);
12843
12844   aarch64_set_reg_u64
12845     (cpu, rd, NO_SP,
12846      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12847                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12848 }
12849
12850
12851 /* divide.  */
12852
12853 /* 32 bit signed divide.  */
12854 static void
12855 cpuiv32 (sim_cpu *cpu)
12856 {
12857   unsigned rm = INSTR (20, 16);
12858   unsigned rn = INSTR (9, 5);
12859   unsigned rd = INSTR (4, 0);
12860   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12861   /* TODO : check that this rounds towards zero as required.  */
12862   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12863   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12864
12865   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12866                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12867 }
12868
12869 /* 64 bit signed divide.  */
12870 static void
12871 cpuiv64 (sim_cpu *cpu)
12872 {
12873   unsigned rm = INSTR (20, 16);
12874   unsigned rn = INSTR (9, 5);
12875   unsigned rd = INSTR (4, 0);
12876
12877   /* TODO : check that this rounds towards zero as required.  */
12878   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12879
12880   aarch64_set_reg_s64
12881     (cpu, rd, NO_SP,
12882      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12883 }
12884
12885 /* 32 bit unsigned divide.  */
12886 static void
12887 udiv32 (sim_cpu *cpu)
12888 {
12889   unsigned rm = INSTR (20, 16);
12890   unsigned rn = INSTR (9, 5);
12891   unsigned rd = INSTR (4, 0);
12892
12893   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12894   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12895   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12896
12897   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12898                        divisor ? (uint32_t) (dividend / divisor) : 0);
12899 }
12900
12901 /* 64 bit unsigned divide.  */
12902 static void
12903 udiv64 (sim_cpu *cpu)
12904 {
12905   unsigned rm = INSTR (20, 16);
12906   unsigned rn = INSTR (9, 5);
12907   unsigned rd = INSTR (4, 0);
12908
12909   /* TODO : check that this rounds towards zero as required.  */
12910   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12911
12912   aarch64_set_reg_u64
12913     (cpu, rd, NO_SP,
12914      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12915 }
12916
12917 static void
12918 dexDataProc2Source (sim_cpu *cpu)
12919 {
12920   /* assert instr[30] == 0
12921      instr[28,21] == 11010110
12922      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12923      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12924      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12925                              001000 ==> LSLV, 001001 ==> LSRV
12926                              001010 ==> ASRV, 001011 ==> RORV
12927                              ow ==> UNALLOC.  */
12928
12929   uint32_t dispatch;
12930   uint32_t S = INSTR (29, 29);
12931   uint32_t opcode = INSTR (15, 10);
12932
12933   if (S == 1)
12934     HALT_UNALLOC;
12935
12936   if (opcode & 0x34)
12937     HALT_UNALLOC;
12938
12939   dispatch = (  (INSTR (31, 31) << 3)
12940               | (uimm (opcode, 3, 3) << 2)
12941               |  uimm (opcode, 1, 0));
12942   switch (dispatch)
12943     {
12944     case 2:  udiv32 (cpu); return;
12945     case 3:  cpuiv32 (cpu); return;
12946     case 4:  lslv32 (cpu); return;
12947     case 5:  lsrv32 (cpu); return;
12948     case 6:  asrv32 (cpu); return;
12949     case 7:  rorv32 (cpu); return;
12950     case 10: udiv64 (cpu); return;
12951     case 11: cpuiv64 (cpu); return;
12952     case 12: lslv64 (cpu); return;
12953     case 13: lsrv64 (cpu); return;
12954     case 14: asrv64 (cpu); return;
12955     case 15: rorv64 (cpu); return;
12956     default: HALT_UNALLOC;
12957     }
12958 }
12959
12960
12961 /* Multiply.  */
12962
12963 /* 32 bit multiply and add.  */
12964 static void
12965 madd32 (sim_cpu *cpu)
12966 {
12967   unsigned rm = INSTR (20, 16);
12968   unsigned ra = INSTR (14, 10);
12969   unsigned rn = INSTR (9, 5);
12970   unsigned rd = INSTR (4, 0);
12971
12972   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12973   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12974                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12975                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12976                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12977 }
12978
12979 /* 64 bit multiply and add.  */
12980 static void
12981 madd64 (sim_cpu *cpu)
12982 {
12983   unsigned rm = INSTR (20, 16);
12984   unsigned ra = INSTR (14, 10);
12985   unsigned rn = INSTR (9, 5);
12986   unsigned rd = INSTR (4, 0);
12987
12988   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12989   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12990                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12991                        + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12992                           * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12993 }
12994
12995 /* 32 bit multiply and sub.  */
12996 static void
12997 msub32 (sim_cpu *cpu)
12998 {
12999   unsigned rm = INSTR (20, 16);
13000   unsigned ra = INSTR (14, 10);
13001   unsigned rn = INSTR (9, 5);
13002   unsigned rd = INSTR (4, 0);
13003
13004   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13005   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13006                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
13007                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13008                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13009 }
13010
13011 /* 64 bit multiply and sub.  */
13012 static void
13013 msub64 (sim_cpu *cpu)
13014 {
13015   unsigned rm = INSTR (20, 16);
13016   unsigned ra = INSTR (14, 10);
13017   unsigned rn = INSTR (9, 5);
13018   unsigned rd = INSTR (4, 0);
13019
13020   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13021   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13022                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
13023                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13024                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13025 }
13026
13027 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
13028 static void
13029 smaddl (sim_cpu *cpu)
13030 {
13031   unsigned rm = INSTR (20, 16);
13032   unsigned ra = INSTR (14, 10);
13033   unsigned rn = INSTR (9, 5);
13034   unsigned rd = INSTR (4, 0);
13035
13036   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13037      obtain a 64 bit product.  */
13038   aarch64_set_reg_s64
13039     (cpu, rd, NO_SP,
13040      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13041      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13042      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13043 }
13044
13045 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13046 static void
13047 smsubl (sim_cpu *cpu)
13048 {
13049   unsigned rm = INSTR (20, 16);
13050   unsigned ra = INSTR (14, 10);
13051   unsigned rn = INSTR (9, 5);
13052   unsigned rd = INSTR (4, 0);
13053
13054   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13055      obtain a 64 bit product.  */
13056   aarch64_set_reg_s64
13057     (cpu, rd, NO_SP,
13058      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13059      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13060      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13061 }
13062
13063 /* Integer Multiply/Divide.  */
13064
13065 /* First some macros and a helper function.  */
13066 /* Macros to test or access elements of 64 bit words.  */
13067
13068 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
13069 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13070 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13071 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13072 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13073 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13074
13075 /* Offset of sign bit in 64 bit signed integger.  */
13076 #define SIGN_SHIFT_U64 63
13077 /* The sign bit itself -- also identifies the minimum negative int value.  */
13078 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13079 /* Return true if a 64 bit signed int presented as an unsigned int is the
13080    most negative value.  */
13081 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13082 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13083    int has its sign bit set to false.  */
13084 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13085 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13086    an unsigned int has its sign bit set or not.  */
13087 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13088 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
13089 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13090
13091 /* Multiply two 64 bit ints and return.
13092    the hi 64 bits of the 128 bit product.  */
13093
13094 static uint64_t
13095 mul64hi (uint64_t value1, uint64_t value2)
13096 {
13097   uint64_t resultmid1;
13098   uint64_t result;
13099   uint64_t value1_lo = lowWordToU64 (value1);
13100   uint64_t value1_hi = highWordToU64 (value1) ;
13101   uint64_t value2_lo = lowWordToU64 (value2);
13102   uint64_t value2_hi = highWordToU64 (value2);
13103
13104   /* Cross-multiply and collect results.  */
13105   uint64_t xproductlo = value1_lo * value2_lo;
13106   uint64_t xproductmid1 = value1_lo * value2_hi;
13107   uint64_t xproductmid2 = value1_hi * value2_lo;
13108   uint64_t xproducthi = value1_hi * value2_hi;
13109   uint64_t carry = 0;
13110   /* Start accumulating 64 bit results.  */
13111   /* Drop bottom half of lowest cross-product.  */
13112   uint64_t resultmid = xproductlo >> 32;
13113   /* Add in middle products.  */
13114   resultmid = resultmid + xproductmid1;
13115
13116   /* Check for overflow.  */
13117   if (resultmid < xproductmid1)
13118     /* Carry over 1 into top cross-product.  */
13119     carry++;
13120
13121   resultmid1  = resultmid + xproductmid2;
13122
13123   /* Check for overflow.  */
13124   if (resultmid1 < xproductmid2)
13125     /* Carry over 1 into top cross-product.  */
13126     carry++;
13127
13128   /* Drop lowest 32 bits of middle cross-product.  */
13129   result = resultmid1 >> 32;
13130   /* Move carry bit to just above middle cross-product highest bit.  */
13131   carry = carry << 32;
13132
13133   /* Add top cross-product plus and any carry.  */
13134   result += xproducthi + carry;
13135
13136   return result;
13137 }
13138
13139 /* Signed multiply high, source, source2 :
13140    64 bit, dest <-- high 64-bit of result.  */
13141 static void
13142 smulh (sim_cpu *cpu)
13143 {
13144   uint64_t uresult;
13145   int64_t  result;
13146   unsigned rm = INSTR (20, 16);
13147   unsigned rn = INSTR (9, 5);
13148   unsigned rd = INSTR (4, 0);
13149   GReg     ra = INSTR (14, 10);
13150   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13151   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13152   uint64_t uvalue1;
13153   uint64_t uvalue2;
13154   int  negate = 0;
13155
13156   if (ra != R31)
13157     HALT_UNALLOC;
13158
13159   /* Convert to unsigned and use the unsigned mul64hi routine
13160      the fix the sign up afterwards.  */
13161   if (value1 < 0)
13162     {
13163       negate = !negate;
13164       uvalue1 = -value1;
13165     }
13166   else
13167     {
13168       uvalue1 = value1;
13169     }
13170
13171   if (value2 < 0)
13172     {
13173       negate = !negate;
13174       uvalue2 = -value2;
13175     }
13176   else
13177     {
13178       uvalue2 = value2;
13179     }
13180
13181   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13182
13183   uresult = mul64hi (uvalue1, uvalue2);
13184   result = uresult;
13185
13186   if (negate)
13187     {
13188       /* Multiply 128-bit result by -1, which means highpart gets inverted,
13189          and has carry in added only if low part is 0.  */
13190       result = ~result;
13191       if ((uvalue1 * uvalue2) == 0)
13192         result += 1;
13193     }
13194
13195   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13196 }
13197
13198 /* Unsigned multiply add long -- source, source2 :
13199    32 bit, source3 : 64 bit.  */
13200 static void
13201 umaddl (sim_cpu *cpu)
13202 {
13203   unsigned rm = INSTR (20, 16);
13204   unsigned ra = INSTR (14, 10);
13205   unsigned rn = INSTR (9, 5);
13206   unsigned rd = INSTR (4, 0);
13207
13208   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13209   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13210      obtain a 64 bit product.  */
13211   aarch64_set_reg_u64
13212     (cpu, rd, NO_SP,
13213      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13214      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13215      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13216 }
13217
13218 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13219 static void
13220 umsubl (sim_cpu *cpu)
13221 {
13222   unsigned rm = INSTR (20, 16);
13223   unsigned ra = INSTR (14, 10);
13224   unsigned rn = INSTR (9, 5);
13225   unsigned rd = INSTR (4, 0);
13226
13227   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13228   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13229      obtain a 64 bit product.  */
13230   aarch64_set_reg_u64
13231     (cpu, rd, NO_SP,
13232      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13233      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13234      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13235 }
13236
13237 /* Unsigned multiply high, source, source2 :
13238    64 bit, dest <-- high 64-bit of result.  */
13239 static void
13240 umulh (sim_cpu *cpu)
13241 {
13242   unsigned rm = INSTR (20, 16);
13243   unsigned rn = INSTR (9, 5);
13244   unsigned rd = INSTR (4, 0);
13245   GReg     ra = INSTR (14, 10);
13246
13247   if (ra != R31)
13248     HALT_UNALLOC;
13249
13250   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13251   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13252                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13253                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13254 }
13255
13256 static void
13257 dexDataProc3Source (sim_cpu *cpu)
13258 {
13259   /* assert instr[28,24] == 11011.  */
13260   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13261      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13262      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13263      instr[15] = o0 : 0/1 ==> ok
13264      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
13265                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13266                               0100 ==> SMULH,                   (64 bit only)
13267                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13268                               1100 ==> UMULH                    (64 bit only)
13269                               ow ==> UNALLOC.  */
13270
13271   uint32_t dispatch;
13272   uint32_t size = INSTR (31, 31);
13273   uint32_t op54 = INSTR (30, 29);
13274   uint32_t op31 = INSTR (23, 21);
13275   uint32_t o0 = INSTR (15, 15);
13276
13277   if (op54 != 0)
13278     HALT_UNALLOC;
13279
13280   if (size == 0)
13281     {
13282       if (op31 != 0)
13283         HALT_UNALLOC;
13284
13285       if (o0 == 0)
13286         madd32 (cpu);
13287       else
13288         msub32 (cpu);
13289       return;
13290     }
13291
13292   dispatch = (op31 << 1) | o0;
13293
13294   switch (dispatch)
13295     {
13296     case 0:  madd64 (cpu); return;
13297     case 1:  msub64 (cpu); return;
13298     case 2:  smaddl (cpu); return;
13299     case 3:  smsubl (cpu); return;
13300     case 4:  smulh (cpu); return;
13301     case 10: umaddl (cpu); return;
13302     case 11: umsubl (cpu); return;
13303     case 12: umulh (cpu); return;
13304     default: HALT_UNALLOC;
13305     }
13306 }
13307
13308 static void
13309 dexDPReg (sim_cpu *cpu)
13310 {
13311   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13312      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13313      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
13314   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13315
13316   switch (group2)
13317     {
13318     case DPREG_LOG_000:
13319     case DPREG_LOG_001:
13320       dexLogicalShiftedRegister (cpu); return;
13321
13322     case DPREG_ADDSHF_010:
13323       dexAddSubtractShiftedRegister (cpu); return;
13324
13325     case DPREG_ADDEXT_011:
13326       dexAddSubtractExtendedRegister (cpu); return;
13327
13328     case DPREG_ADDCOND_100:
13329       {
13330         /* This set bundles a variety of different operations.  */
13331         /* Check for.  */
13332         /* 1) add/sub w carry.  */
13333         uint32_t mask1 = 0x1FE00000U;
13334         uint32_t val1  = 0x1A000000U;
13335         /* 2) cond compare register/immediate.  */
13336         uint32_t mask2 = 0x1FE00000U;
13337         uint32_t val2  = 0x1A400000U;
13338         /* 3) cond select.  */
13339         uint32_t mask3 = 0x1FE00000U;
13340         uint32_t val3  = 0x1A800000U;
13341         /* 4) data proc 1/2 source.  */
13342         uint32_t mask4 = 0x1FE00000U;
13343         uint32_t val4  = 0x1AC00000U;
13344
13345         if ((aarch64_get_instr (cpu) & mask1) == val1)
13346           dexAddSubtractWithCarry (cpu);
13347
13348         else if ((aarch64_get_instr (cpu) & mask2) == val2)
13349           CondCompare (cpu);
13350
13351         else if ((aarch64_get_instr (cpu) & mask3) == val3)
13352           dexCondSelect (cpu);
13353
13354         else if ((aarch64_get_instr (cpu) & mask4) == val4)
13355           {
13356             /* Bit 30 is clear for data proc 2 source
13357                and set for data proc 1 source.  */
13358             if (aarch64_get_instr (cpu)  & (1U << 30))
13359               dexDataProc1Source (cpu);
13360             else
13361               dexDataProc2Source (cpu);
13362           }
13363
13364         else
13365           /* Should not reach here.  */
13366           HALT_NYI;
13367
13368         return;
13369       }
13370
13371     case DPREG_3SRC_110:
13372       dexDataProc3Source (cpu); return;
13373
13374     case DPREG_UNALLOC_101:
13375       HALT_UNALLOC;
13376
13377     case DPREG_3SRC_111:
13378       dexDataProc3Source (cpu); return;
13379
13380     default:
13381       /* Should never reach here.  */
13382       HALT_NYI;
13383     }
13384 }
13385
13386 /* Unconditional Branch immediate.
13387    Offset is a PC-relative byte offset in the range +/- 128MiB.
13388    The offset is assumed to be raw from the decode i.e. the
13389    simulator is expected to scale them from word offsets to byte.  */
13390
13391 /* Unconditional branch.  */
13392 static void
13393 buc (sim_cpu *cpu, int32_t offset)
13394 {
13395   aarch64_set_next_PC_by_offset (cpu, offset);
13396 }
13397
13398 static unsigned stack_depth = 0;
13399
13400 /* Unconditional branch and link -- writes return PC to LR.  */
13401 static void
13402 bl (sim_cpu *cpu, int32_t offset)
13403 {
13404   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13405   aarch64_save_LR (cpu);
13406   aarch64_set_next_PC_by_offset (cpu, offset);
13407
13408   if (TRACE_BRANCH_P (cpu))
13409     {
13410       ++ stack_depth;
13411       TRACE_BRANCH (cpu,
13412                     " %*scall %" PRIx64 " [%s]"
13413                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13414                     stack_depth, " ", aarch64_get_next_PC (cpu),
13415                     aarch64_get_func (CPU_STATE (cpu),
13416                                       aarch64_get_next_PC (cpu)),
13417                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13418                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13419                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13420                     );
13421     }
13422 }
13423
13424 /* Unconditional Branch register.
13425    Branch/return address is in source register.  */
13426
13427 /* Unconditional branch.  */
13428 static void
13429 br (sim_cpu *cpu)
13430 {
13431   unsigned rn = INSTR (9, 5);
13432   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13433   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13434 }
13435
13436 /* Unconditional branch and link -- writes return PC to LR.  */
13437 static void
13438 blr (sim_cpu *cpu)
13439 {
13440   /* Ensure we read the destination before we write LR.  */
13441   uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13442
13443   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13444   aarch64_save_LR (cpu);
13445   aarch64_set_next_PC (cpu, target);
13446
13447   if (TRACE_BRANCH_P (cpu))
13448     {
13449       ++ stack_depth;
13450       TRACE_BRANCH (cpu,
13451                     " %*scall %" PRIx64 " [%s]"
13452                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13453                     stack_depth, " ", aarch64_get_next_PC (cpu),
13454                     aarch64_get_func (CPU_STATE (cpu),
13455                                       aarch64_get_next_PC (cpu)),
13456                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13457                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13458                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13459                     );
13460     }
13461 }
13462
13463 /* Return -- assembler will default source to LR this is functionally
13464    equivalent to br but, presumably, unlike br it side effects the
13465    branch predictor.  */
13466 static void
13467 ret (sim_cpu *cpu)
13468 {
13469   unsigned rn = INSTR (9, 5);
13470   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13471
13472   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13473   if (TRACE_BRANCH_P (cpu))
13474     {
13475       TRACE_BRANCH (cpu,
13476                     " %*sreturn [result: %" PRIx64 "]",
13477                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13478       -- stack_depth;
13479     }
13480 }
13481
13482 /* NOP -- we implement this and call it from the decode in case we
13483    want to intercept it later.  */
13484
13485 static void
13486 nop (sim_cpu *cpu)
13487 {
13488   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13489 }
13490
13491 /* Data synchronization barrier.  */
13492
13493 static void
13494 dsb (sim_cpu *cpu)
13495 {
13496   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13497 }
13498
13499 /* Data memory barrier.  */
13500
13501 static void
13502 dmb (sim_cpu *cpu)
13503 {
13504   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13505 }
13506
13507 /* Instruction synchronization barrier.  */
13508
13509 static void
13510 isb (sim_cpu *cpu)
13511 {
13512   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13513 }
13514
13515 static void
13516 dexBranchImmediate (sim_cpu *cpu)
13517 {
13518   /* assert instr[30,26] == 00101
13519      instr[31] ==> 0 == B, 1 == BL
13520      instr[25,0] == imm26 branch offset counted in words.  */
13521
13522   uint32_t top = INSTR (31, 31);
13523   /* We have a 26 byte signed word offset which we need to pass to the
13524      execute routine as a signed byte offset.  */
13525   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13526
13527   if (top)
13528     bl (cpu, offset);
13529   else
13530     buc (cpu, offset);
13531 }
13532
13533 /* Control Flow.  */
13534
13535 /* Conditional branch
13536
13537    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13538    a bit position in the range 0 .. 63
13539
13540    cc is a CondCode enum value as pulled out of the decode
13541
13542    N.B. any offset register (source) can only be Xn or Wn.  */
13543
13544 static void
13545 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13546 {
13547   /* The test returns TRUE if CC is met.  */
13548   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13549   if (testConditionCode (cpu, cc))
13550     aarch64_set_next_PC_by_offset (cpu, offset);
13551 }
13552
13553 /* 32 bit branch on register non-zero.  */
13554 static void
13555 cbnz32 (sim_cpu *cpu, int32_t offset)
13556 {
13557   unsigned rt = INSTR (4, 0);
13558
13559   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13560   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13561     aarch64_set_next_PC_by_offset (cpu, offset);
13562 }
13563
13564 /* 64 bit branch on register zero.  */
13565 static void
13566 cbnz (sim_cpu *cpu, int32_t offset)
13567 {
13568   unsigned rt = INSTR (4, 0);
13569
13570   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13571   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13572     aarch64_set_next_PC_by_offset (cpu, offset);
13573 }
13574
13575 /* 32 bit branch on register non-zero.  */
13576 static void
13577 cbz32 (sim_cpu *cpu, int32_t offset)
13578 {
13579   unsigned rt = INSTR (4, 0);
13580
13581   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13582   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13583     aarch64_set_next_PC_by_offset (cpu, offset);
13584 }
13585
13586 /* 64 bit branch on register zero.  */
13587 static void
13588 cbz (sim_cpu *cpu, int32_t offset)
13589 {
13590   unsigned rt = INSTR (4, 0);
13591
13592   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13593   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13594     aarch64_set_next_PC_by_offset (cpu, offset);
13595 }
13596
13597 /* Branch on register bit test non-zero -- one size fits all.  */
13598 static void
13599 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13600 {
13601   unsigned rt = INSTR (4, 0);
13602
13603   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13604   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13605     aarch64_set_next_PC_by_offset (cpu, offset);
13606 }
13607
13608 /* Branch on register bit test zero -- one size fits all.  */
13609 static void
13610 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13611 {
13612   unsigned rt = INSTR (4, 0);
13613
13614   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13615   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13616     aarch64_set_next_PC_by_offset (cpu, offset);
13617 }
13618
13619 static void
13620 dexCompareBranchImmediate (sim_cpu *cpu)
13621 {
13622   /* instr[30,25] = 01 1010
13623      instr[31]    = size : 0 ==> 32, 1 ==> 64
13624      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
13625      instr[23,5]  = simm19 branch offset counted in words
13626      instr[4,0]   = rt  */
13627
13628   uint32_t size = INSTR (31, 31);
13629   uint32_t op   = INSTR (24, 24);
13630   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13631
13632   if (size == 0)
13633     {
13634       if (op == 0)
13635         cbz32 (cpu, offset);
13636       else
13637         cbnz32 (cpu, offset);
13638     }
13639   else
13640     {
13641       if (op == 0)
13642         cbz (cpu, offset);
13643       else
13644         cbnz (cpu, offset);
13645     }
13646 }
13647
13648 static void
13649 dexTestBranchImmediate (sim_cpu *cpu)
13650 {
13651   /* instr[31]    = b5 : bit 5 of test bit idx
13652      instr[30,25] = 01 1011
13653      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
13654      instr[23,19] = b40 : bits 4 to 0 of test bit idx
13655      instr[18,5]  = simm14 : signed offset counted in words
13656      instr[4,0]   = uimm5  */
13657
13658   uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13659   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13660
13661   NYI_assert (30, 25, 0x1b);
13662
13663   if (INSTR (24, 24) == 0)
13664     tbz (cpu, pos, offset);
13665   else
13666     tbnz (cpu, pos, offset);
13667 }
13668
13669 static void
13670 dexCondBranchImmediate (sim_cpu *cpu)
13671 {
13672   /* instr[31,25] = 010 1010
13673      instr[24]    = op1; op => 00 ==> B.cond
13674      instr[23,5]  = simm19 : signed offset counted in words
13675      instr[4]     = op0
13676      instr[3,0]   = cond  */
13677
13678   int32_t offset;
13679   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13680
13681   NYI_assert (31, 25, 0x2a);
13682
13683   if (op != 0)
13684     HALT_UNALLOC;
13685
13686   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13687
13688   bcc (cpu, offset, INSTR (3, 0));
13689 }
13690
13691 static void
13692 dexBranchRegister (sim_cpu *cpu)
13693 {
13694   /* instr[31,25] = 110 1011
13695      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13696      instr[20,16] = op2 : must be 11111
13697      instr[15,10] = op3 : must be 000000
13698      instr[4,0]   = op2 : must be 11111.  */
13699
13700   uint32_t op = INSTR (24, 21);
13701   uint32_t op2 = INSTR (20, 16);
13702   uint32_t op3 = INSTR (15, 10);
13703   uint32_t op4 = INSTR (4, 0);
13704
13705   NYI_assert (31, 25, 0x6b);
13706
13707   if (op2 != 0x1F || op3 != 0 || op4 != 0)
13708     HALT_UNALLOC;
13709
13710   if (op == 0)
13711     br (cpu);
13712
13713   else if (op == 1)
13714     blr (cpu);
13715
13716   else if (op == 2)
13717     ret (cpu);
13718
13719   else
13720     {
13721       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
13722       /* anything else is unallocated.  */
13723       uint32_t rn = INSTR (4, 0);
13724
13725       if (rn != 0x1f)
13726         HALT_UNALLOC;
13727
13728       if (op == 4 || op == 5)
13729         HALT_NYI;
13730
13731       HALT_UNALLOC;
13732     }
13733 }
13734
13735 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13736    but this may not be available.  So instead we define the values we need
13737    here.  */
13738 #define AngelSVC_Reason_Open            0x01
13739 #define AngelSVC_Reason_Close           0x02
13740 #define AngelSVC_Reason_Write           0x05
13741 #define AngelSVC_Reason_Read            0x06
13742 #define AngelSVC_Reason_IsTTY           0x09
13743 #define AngelSVC_Reason_Seek            0x0A
13744 #define AngelSVC_Reason_FLen            0x0C
13745 #define AngelSVC_Reason_Remove          0x0E
13746 #define AngelSVC_Reason_Rename          0x0F
13747 #define AngelSVC_Reason_Clock           0x10
13748 #define AngelSVC_Reason_Time            0x11
13749 #define AngelSVC_Reason_System          0x12
13750 #define AngelSVC_Reason_Errno           0x13
13751 #define AngelSVC_Reason_GetCmdLine      0x15
13752 #define AngelSVC_Reason_HeapInfo        0x16
13753 #define AngelSVC_Reason_ReportException 0x18
13754 #define AngelSVC_Reason_Elapsed         0x30
13755
13756
13757 static void
13758 handle_halt (sim_cpu *cpu, uint32_t val)
13759 {
13760   uint64_t result = 0;
13761
13762   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13763   if (val != 0xf000)
13764     {
13765       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13766       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13767                        sim_stopped, SIM_SIGTRAP);
13768     }
13769
13770   /* We have encountered an Angel SVC call.  See if we can process it.  */
13771   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13772     {
13773     case AngelSVC_Reason_HeapInfo:
13774       {
13775         /* Get the values.  */
13776         uint64_t stack_top = aarch64_get_stack_start (cpu);
13777         uint64_t heap_base = aarch64_get_heap_start (cpu);
13778
13779         /* Get the pointer  */
13780         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13781         ptr = aarch64_get_mem_u64 (cpu, ptr);
13782
13783         /* Fill in the memory block.  */
13784         /* Start addr of heap.  */
13785         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
13786         /* End addr of heap.  */
13787         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
13788         /* Lowest stack addr.  */
13789         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13790         /* Initial stack addr.  */
13791         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13792
13793         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13794       }
13795       break;
13796
13797     case AngelSVC_Reason_Open:
13798       {
13799         /* Get the pointer  */
13800         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
13801         /* FIXME: For now we just assume that we will only be asked
13802            to open the standard file descriptors.  */
13803         static int fd = 0;
13804         result = fd ++;
13805
13806         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13807       }
13808       break;
13809
13810     case AngelSVC_Reason_Close:
13811       {
13812         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13813         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13814         result = 0;
13815       }
13816       break;
13817
13818     case AngelSVC_Reason_Errno:
13819       result = 0;
13820       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13821       break;
13822
13823     case AngelSVC_Reason_Clock:
13824       result =
13825 #ifdef CLOCKS_PER_SEC
13826         (CLOCKS_PER_SEC >= 100)
13827         ? (clock () / (CLOCKS_PER_SEC / 100))
13828         : ((clock () * 100) / CLOCKS_PER_SEC)
13829 #else
13830         /* Presume unix... clock() returns microseconds.  */
13831         (clock () / 10000)
13832 #endif
13833         ;
13834         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13835       break;
13836
13837     case AngelSVC_Reason_GetCmdLine:
13838       {
13839         /* Get the pointer  */
13840         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13841         ptr = aarch64_get_mem_u64 (cpu, ptr);
13842
13843         /* FIXME: No command line for now.  */
13844         aarch64_set_mem_u64 (cpu, ptr, 0);
13845         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13846       }
13847       break;
13848
13849     case AngelSVC_Reason_IsTTY:
13850       result = 1;
13851         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13852       break;
13853
13854     case AngelSVC_Reason_Write:
13855       {
13856         /* Get the pointer  */
13857         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13858         /* Get the write control block.  */
13859         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13860         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13861         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13862
13863         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13864                        PRIx64 " on descriptor %" PRIx64,
13865                        len, buf, fd);
13866
13867         if (len > 1280)
13868           {
13869             TRACE_SYSCALL (cpu,
13870                            " AngelSVC: Write: Suspiciously long write: %ld",
13871                            (long) len);
13872             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13873                              sim_stopped, SIM_SIGBUS);
13874           }
13875         else if (fd == 1)
13876           {
13877             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13878           }
13879         else if (fd == 2)
13880           {
13881             TRACE (cpu, 0, "\n");
13882             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13883                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13884             TRACE (cpu, 0, "\n");
13885           }
13886         else
13887           {
13888             TRACE_SYSCALL (cpu,
13889                            " AngelSVC: Write: Unexpected file handle: %d",
13890                            (int) fd);
13891             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13892                              sim_stopped, SIM_SIGABRT);
13893           }
13894       }
13895       break;
13896
13897     case AngelSVC_Reason_ReportException:
13898       {
13899         /* Get the pointer  */
13900         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13901         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13902         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13903         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13904
13905         TRACE_SYSCALL (cpu,
13906                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13907                        type, state);
13908
13909         if (type == 0x20026)
13910           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13911                            sim_exited, state);
13912         else
13913           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13914                            sim_stopped, SIM_SIGINT);
13915       }
13916       break;
13917
13918     case AngelSVC_Reason_Read:
13919     case AngelSVC_Reason_FLen:
13920     case AngelSVC_Reason_Seek:
13921     case AngelSVC_Reason_Remove:
13922     case AngelSVC_Reason_Time:
13923     case AngelSVC_Reason_System:
13924     case AngelSVC_Reason_Rename:
13925     case AngelSVC_Reason_Elapsed:
13926     default:
13927       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13928                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13929       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13930                        sim_stopped, SIM_SIGTRAP);
13931     }
13932
13933   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13934 }
13935
13936 static void
13937 dexExcpnGen (sim_cpu *cpu)
13938 {
13939   /* instr[31:24] = 11010100
13940      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13941                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13942      instr[20,5]  = imm16
13943      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13944      instr[1,0]   = LL : discriminates opc  */
13945
13946   uint32_t opc = INSTR (23, 21);
13947   uint32_t imm16 = INSTR (20, 5);
13948   uint32_t opc2 = INSTR (4, 2);
13949   uint32_t LL;
13950
13951   NYI_assert (31, 24, 0xd4);
13952
13953   if (opc2 != 0)
13954     HALT_UNALLOC;
13955
13956   LL = INSTR (1, 0);
13957
13958   /* We only implement HLT and BRK for now.  */
13959   if (opc == 1 && LL == 0)
13960     {
13961       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13962       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13963                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13964     }
13965
13966   if (opc == 2 && LL == 0)
13967     handle_halt (cpu, imm16);
13968
13969   else if (opc == 0 || opc == 5)
13970     HALT_NYI;
13971
13972   else
13973     HALT_UNALLOC;
13974 }
13975
13976 /* Stub for accessing system registers.  */
13977
13978 static uint64_t
13979 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13980             unsigned crm, unsigned op2)
13981 {
13982   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13983     /* DCZID_EL0 - the Data Cache Zero ID register.
13984        We do not support DC ZVA at the moment, so
13985        we return a value with the disable bit set.
13986        We implement support for the DCZID register since
13987        it is used by the C library's memset function.  */
13988     return ((uint64_t) 1) << 4;
13989
13990   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13991     /* Cache Type Register.  */
13992     return 0x80008000UL;
13993
13994   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13995     /* TPIDR_EL0 - thread pointer id.  */
13996     return aarch64_get_thread_id (cpu);
13997
13998   if (op1 == 3 && crm == 4 && op2 == 0)
13999     return aarch64_get_FPCR (cpu);
14000
14001   if (op1 == 3 && crm == 4 && op2 == 1)
14002     return aarch64_get_FPSR (cpu);
14003
14004   else if (op1 == 3 && crm == 2 && op2 == 0)
14005     return aarch64_get_CPSR (cpu);
14006
14007   HALT_NYI;
14008 }
14009
14010 static void
14011 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14012             unsigned crm, unsigned op2, uint64_t val)
14013 {
14014   if (op1 == 3 && crm == 4 && op2 == 0)
14015     aarch64_set_FPCR (cpu, val);
14016
14017   else if (op1 == 3 && crm == 4 && op2 == 1)
14018     aarch64_set_FPSR (cpu, val);
14019
14020   else if (op1 == 3 && crm == 2 && op2 == 0)
14021     aarch64_set_CPSR (cpu, val);
14022
14023   else
14024     HALT_NYI;
14025 }
14026
14027 static void
14028 do_mrs (sim_cpu *cpu)
14029 {
14030   /* instr[31:20] = 1101 0101 0001 1
14031      instr[19]    = op0
14032      instr[18,16] = op1
14033      instr[15,12] = CRn
14034      instr[11,8]  = CRm
14035      instr[7,5]   = op2
14036      instr[4,0]   = Rt  */
14037   unsigned sys_op0 = INSTR (19, 19) + 2;
14038   unsigned sys_op1 = INSTR (18, 16);
14039   unsigned sys_crn = INSTR (15, 12);
14040   unsigned sys_crm = INSTR (11, 8);
14041   unsigned sys_op2 = INSTR (7, 5);
14042   unsigned rt = INSTR (4, 0);
14043
14044   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14045   aarch64_set_reg_u64 (cpu, rt, NO_SP,
14046                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14047 }
14048
14049 static void
14050 do_MSR_immediate (sim_cpu *cpu)
14051 {
14052   /* instr[31:19] = 1101 0101 0000 0
14053      instr[18,16] = op1
14054      instr[15,12] = 0100
14055      instr[11,8]  = CRm
14056      instr[7,5]   = op2
14057      instr[4,0]   = 1 1111  */
14058
14059   unsigned op1 = INSTR (18, 16);
14060   /*unsigned crm = INSTR (11, 8);*/
14061   unsigned op2 = INSTR (7, 5);
14062
14063   NYI_assert (31, 19, 0x1AA0);
14064   NYI_assert (15, 12, 0x4);
14065   NYI_assert (4,  0,  0x1F);
14066
14067   if (op1 == 0)
14068     {
14069       if (op2 == 5)
14070         HALT_NYI; /* set SPSel.  */
14071       else
14072         HALT_UNALLOC;
14073     }
14074   else if (op1 == 3)
14075     {
14076       if (op2 == 6)
14077         HALT_NYI; /* set DAIFset.  */
14078       else if (op2 == 7)
14079         HALT_NYI; /* set DAIFclr.  */
14080       else
14081         HALT_UNALLOC;
14082     }
14083   else
14084     HALT_UNALLOC;
14085 }
14086
14087 static void
14088 do_MSR_reg (sim_cpu *cpu)
14089 {
14090   /* instr[31:20] = 1101 0101 0001
14091      instr[19]    = op0
14092      instr[18,16] = op1
14093      instr[15,12] = CRn
14094      instr[11,8]  = CRm
14095      instr[7,5]   = op2
14096      instr[4,0]   = Rt  */
14097
14098   unsigned sys_op0 = INSTR (19, 19) + 2;
14099   unsigned sys_op1 = INSTR (18, 16);
14100   unsigned sys_crn = INSTR (15, 12);
14101   unsigned sys_crm = INSTR (11, 8);
14102   unsigned sys_op2 = INSTR (7, 5);
14103   unsigned rt = INSTR (4, 0);
14104
14105   NYI_assert (31, 20, 0xD51);
14106
14107   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14108   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14109               aarch64_get_reg_u64 (cpu, rt, NO_SP));
14110 }
14111
14112 static void
14113 do_SYS (sim_cpu *cpu)
14114 {
14115   /* instr[31,19] = 1101 0101 0000 1
14116      instr[18,16] = op1
14117      instr[15,12] = CRn
14118      instr[11,8]  = CRm
14119      instr[7,5]   = op2
14120      instr[4,0]   = Rt  */
14121   NYI_assert (31, 19, 0x1AA1);
14122
14123   /* FIXME: For now we just silently accept system ops.  */
14124 }
14125
14126 static void
14127 dexSystem (sim_cpu *cpu)
14128 {
14129   /* instr[31:22] = 1101 01010 0
14130      instr[21]    = L
14131      instr[20,19] = op0
14132      instr[18,16] = op1
14133      instr[15,12] = CRn
14134      instr[11,8]  = CRm
14135      instr[7,5]   = op2
14136      instr[4,0]   = uimm5  */
14137
14138   /* We are interested in HINT, DSB, DMB and ISB
14139
14140      Hint #0 encodes NOOP (this is the only hint we care about)
14141      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14142      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14143
14144      DSB, DMB, ISB are data store barrier, data memory barrier and
14145      instruction store barrier, respectively, where
14146
14147      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14148      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14149      CRm<3:2> ==> domain, CRm<1:0> ==> types,
14150      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14151               10 ==> InerShareable, 11 ==> FullSystem
14152      types :  01 ==> Reads, 10 ==> Writes,
14153               11 ==> All, 00 ==> All (domain == FullSystem).  */
14154
14155   unsigned rt = INSTR (4, 0);
14156
14157   NYI_assert (31, 22, 0x354);
14158
14159   switch (INSTR (21, 12))
14160     {
14161     case 0x032:
14162       if (rt == 0x1F)
14163         {
14164           /* NOP has CRm != 0000 OR.  */
14165           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
14166           uint32_t crm = INSTR (11, 8);
14167           uint32_t op2 = INSTR (7, 5);
14168
14169           if (crm != 0 || (op2 == 0 || op2 > 5))
14170             {
14171               /* Actually call nop method so we can reimplement it later.  */
14172               nop (cpu);
14173               return;
14174             }
14175         }
14176       HALT_NYI;
14177
14178     case 0x033:
14179       {
14180         uint32_t op2 =  INSTR (7, 5);
14181
14182         switch (op2)
14183           {
14184           case 2: HALT_NYI;
14185           case 4: dsb (cpu); return;
14186           case 5: dmb (cpu); return;
14187           case 6: isb (cpu); return;
14188           default: HALT_UNALLOC;
14189         }
14190       }
14191
14192     case 0x3B0:
14193     case 0x3B4:
14194     case 0x3BD:
14195       do_mrs (cpu);
14196       return;
14197
14198     case 0x0B7:
14199       do_SYS (cpu); /* DC is an alias of SYS.  */
14200       return;
14201
14202     default:
14203       if (INSTR (21, 20) == 0x1)
14204         do_MSR_reg (cpu);
14205       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14206         do_MSR_immediate (cpu);
14207       else
14208         HALT_NYI;
14209       return;
14210     }
14211 }
14212
14213 static void
14214 dexBr (sim_cpu *cpu)
14215 {
14216   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14217      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14218      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
14219   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14220
14221   switch (group2)
14222     {
14223     case BR_IMM_000:
14224       return dexBranchImmediate (cpu);
14225
14226     case BR_IMMCMP_001:
14227       /* Compare has bit 25 clear while test has it set.  */
14228       if (!INSTR (25, 25))
14229         dexCompareBranchImmediate (cpu);
14230       else
14231         dexTestBranchImmediate (cpu);
14232       return;
14233
14234     case BR_IMMCOND_010:
14235       /* This is a conditional branch if bit 25 is clear otherwise
14236          unallocated.  */
14237       if (!INSTR (25, 25))
14238         dexCondBranchImmediate (cpu);
14239       else
14240         HALT_UNALLOC;
14241       return;
14242
14243     case BR_UNALLOC_011:
14244       HALT_UNALLOC;
14245
14246     case BR_IMM_100:
14247       dexBranchImmediate (cpu);
14248       return;
14249
14250     case BR_IMMCMP_101:
14251       /* Compare has bit 25 clear while test has it set.  */
14252       if (!INSTR (25, 25))
14253         dexCompareBranchImmediate (cpu);
14254       else
14255         dexTestBranchImmediate (cpu);
14256       return;
14257
14258     case BR_REG_110:
14259       /* Unconditional branch reg has bit 25 set.  */
14260       if (INSTR (25, 25))
14261         dexBranchRegister (cpu);
14262
14263       /* This includes both Excpn Gen, System and unalloc operations.
14264          We need to decode the Excpn Gen operation BRK so we can plant
14265          debugger entry points.
14266          Excpn Gen operations have instr [24] = 0.
14267          we need to decode at least one of the System operations NOP
14268          which is an alias for HINT #0.
14269          System operations have instr [24,22] = 100.  */
14270       else if (INSTR (24, 24) == 0)
14271         dexExcpnGen (cpu);
14272
14273       else if (INSTR (24, 22) == 4)
14274         dexSystem (cpu);
14275
14276       else
14277         HALT_UNALLOC;
14278
14279       return;
14280
14281     case BR_UNALLOC_111:
14282       HALT_UNALLOC;
14283
14284     default:
14285       /* Should never reach here.  */
14286       HALT_NYI;
14287     }
14288 }
14289
14290 static void
14291 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14292 {
14293   /* We need to check if gdb wants an in here.  */
14294   /* checkBreak (cpu);.  */
14295
14296   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14297
14298   switch (group)
14299     {
14300     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
14301     case GROUP_LDST_0100:     dexLdSt (cpu); break;
14302     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
14303     case GROUP_LDST_0110:     dexLdSt (cpu); break;
14304     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
14305     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
14306     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
14307     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
14308     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
14309     case GROUP_LDST_1100:     dexLdSt (cpu); break;
14310     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
14311     case GROUP_LDST_1110:     dexLdSt (cpu); break;
14312     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
14313
14314     case GROUP_UNALLOC_0001:
14315     case GROUP_UNALLOC_0010:
14316     case GROUP_UNALLOC_0011:
14317       HALT_UNALLOC;
14318
14319     default:
14320       /* Should never reach here.  */
14321       HALT_NYI;
14322     }
14323 }
14324
14325 static bfd_boolean
14326 aarch64_step (sim_cpu *cpu)
14327 {
14328   uint64_t pc = aarch64_get_PC (cpu);
14329
14330   if (pc == TOP_LEVEL_RETURN_PC)
14331     return FALSE;
14332
14333   aarch64_set_next_PC (cpu, pc + 4);
14334
14335   /* Code is always little-endian.  */
14336   sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14337                         & aarch64_get_instr (cpu), pc, 4);
14338   aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14339
14340   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14341               aarch64_get_instr (cpu));
14342   TRACE_DISASM (cpu, pc);
14343
14344   aarch64_decode_and_execute (cpu, pc);
14345
14346   return TRUE;
14347 }
14348
14349 void
14350 aarch64_run (SIM_DESC sd)
14351 {
14352   sim_cpu *cpu = STATE_CPU (sd, 0);
14353
14354   while (aarch64_step (cpu))
14355     {
14356       aarch64_update_PC (cpu);
14357
14358       if (sim_events_tick (sd))
14359         sim_events_process (sd);
14360     }
14361
14362   sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14363                    sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14364 }
14365
14366 void
14367 aarch64_init (sim_cpu *cpu, uint64_t pc)
14368 {
14369   uint64_t sp = aarch64_get_stack_start (cpu);
14370
14371   /* Install SP, FP and PC and set LR to -20
14372      so we can detect a top-level return.  */
14373   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14374   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14375   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14376   aarch64_set_next_PC (cpu, pc);
14377   aarch64_update_PC (cpu);
14378   aarch64_init_LIT_table ();
14379 }