tcg/aarch64/tcg-target.inc.c

   1 /*
   2  * Initial TCG Implementation for aarch64
   3  *
   4  * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5  * Written by Claudio Fontana
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or
   8  * (at your option) any later version.
   9  *
  10  * See the COPYING file in the top-level directory for details.
  11  */
  12
  13 #include "../tcg-pool.inc.c"
  14 #include "qemu/bitops.h"
  15
  16 /* We're going to re-use TCGType in setting of the SF bit, which controls
  17    the size of the operation performed.  If we know the values match, it
  18    makes things much cleaner.  */
  19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21 #ifdef CONFIG_DEBUG_TCG
  22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  24     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  25     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  26     "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
  27
  28     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  29     "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  30     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  31     "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
  32 };
  33 #endif /* CONFIG_DEBUG_TCG */
  34
  35 static const int tcg_target_reg_alloc_order[] = {
  36     TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  37     TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  38     TCG_REG_X28, /* we will reserve this for guest_base if configured */
  39
  40     TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  41     TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  42     TCG_REG_X16, TCG_REG_X17,
  43
  44     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  45     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  46
  47     /* X18 reserved by system */
  48     /* X19 reserved for AREG0 */
  49     /* X29 reserved as fp */
  50     /* X30 reserved as temporary */
  51
  52     TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
  53     TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
  54     /* V8 - V15 are call-saved, and skipped.  */
  55     TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
  56     TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
  57     TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
  58     TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
  59 };
  60
  61 static const int tcg_target_call_iarg_regs[8] = {
  62     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  63     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  64 };
  65 static const int tcg_target_call_oarg_regs[1] = {
  66     TCG_REG_X0
  67 };
  68
  69 #define TCG_REG_TMP TCG_REG_X30
  70 #define TCG_VEC_TMP TCG_REG_V31
  71
  72 #ifndef CONFIG_SOFTMMU
  73 /* Note that XZR cannot be encoded in the address base register slot,
  74    as that actaully encodes SP.  So if we need to zero-extend the guest
  75    address, via the address index register slot, we need to load even
  76    a zero guest base into a register.  */
  77 #define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  78 #define TCG_REG_GUEST_BASE TCG_REG_X28
  79 #endif
  80
  81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  82 {
  83     ptrdiff_t offset = target - code_ptr;
  84     if (offset == sextract64(offset, 0, 26)) {
  85         /* read instruction, mask away previous PC_REL26 parameter contents,
  86            set the proper offset, then write back the instruction. */
  87         *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  88         return true;
  89     }
  90     return false;
  91 }
  92
  93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  94 {
  95     ptrdiff_t offset = target - code_ptr;
  96     if (offset == sextract64(offset, 0, 19)) {
  97         *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  98         return true;
  99     }
 100     return false;
 101 }
 102
 103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 104                                intptr_t value, intptr_t addend)
 105 {
 106     tcg_debug_assert(addend == 0);
 107     switch (type) {
 108     case R_AARCH64_JUMP26:
 109     case R_AARCH64_CALL26:
 110         return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 111     case R_AARCH64_CONDBR19:
 112         return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 113     default:
 114         g_assert_not_reached();
 115     }
 116 }
 117
 118 #define TCG_CT_CONST_AIMM 0x100
 119 #define TCG_CT_CONST_LIMM 0x200
 120 #define TCG_CT_CONST_ZERO 0x400
 121 #define TCG_CT_CONST_MONE 0x800
 122 #define TCG_CT_CONST_ORRI 0x1000
 123 #define TCG_CT_CONST_ANDI 0x2000
 124
 125 /* parse target specific constraints */
 126 static const char *target_parse_constraint(TCGArgConstraint *ct,
 127                                            const char *ct_str, TCGType type)
 128 {
 129     switch (*ct_str++) {
 130     case 'r': /* general registers */
 131         ct->ct |= TCG_CT_REG;
 132         ct->u.regs |= 0xffffffffu;
 133         break;
 134     case 'w': /* advsimd registers */
 135         ct->ct |= TCG_CT_REG;
 136         ct->u.regs |= 0xffffffff00000000ull;
 137         break;
 138     case 'l': /* qemu_ld / qemu_st address, data_reg */
 139         ct->ct |= TCG_CT_REG;
 140         ct->u.regs = 0xffffffffu;
 141 #ifdef CONFIG_SOFTMMU
 142         /* x0 and x1 will be overwritten when reading the tlb entry,
 143            and x2, and x3 for helper args, better to avoid using them. */
 144         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 145         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 146         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 147         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 148 #endif
 149         break;
 150     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 151         ct->ct |= TCG_CT_CONST_AIMM;
 152         break;
 153     case 'L': /* Valid for logical immediate.  */
 154         ct->ct |= TCG_CT_CONST_LIMM;
 155         break;
 156     case 'M': /* minus one */
 157         ct->ct |= TCG_CT_CONST_MONE;
 158         break;
 159     case 'O': /* vector orr/bic immediate */
 160         ct->ct |= TCG_CT_CONST_ORRI;
 161         break;
 162     case 'N': /* vector orr/bic immediate, inverted */
 163         ct->ct |= TCG_CT_CONST_ANDI;
 164         break;
 165     case 'Z': /* zero */
 166         ct->ct |= TCG_CT_CONST_ZERO;
 167         break;
 168     default:
 169         return NULL;
 170     }
 171     return ct_str;
 172 }
 173
 174 /* Match a constant valid for addition (12-bit, optionally shifted).  */
 175 static inline bool is_aimm(uint64_t val)
 176 {
 177     return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 178 }
 179
 180 /* Match a constant valid for logical operations.  */
 181 static inline bool is_limm(uint64_t val)
 182 {
 183     /* Taking a simplified view of the logical immediates for now, ignoring
 184        the replication that can happen across the field.  Match bit patterns
 185        of the forms
 186            0....01....1
 187            0..01..10..0
 188        and their inverses.  */
 189
 190     /* Make things easier below, by testing the form with msb clear. */
 191     if ((int64_t)val < 0) {
 192         val = ~val;
 193     }
 194     if (val == 0) {
 195         return false;
 196     }
 197     val += val & -val;
 198     return (val & (val - 1)) == 0;
 199 }
 200
 201 /* Return true if v16 is a valid 16-bit shifted immediate.  */
 202 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
 203 {
 204     if (v16 == (v16 & 0xff)) {
 205         *cmode = 0x8;
 206         *imm8 = v16 & 0xff;
 207         return true;
 208     } else if (v16 == (v16 & 0xff00)) {
 209         *cmode = 0xa;
 210         *imm8 = v16 >> 8;
 211         return true;
 212     }
 213     return false;
 214 }
 215
 216 /* Return true if v32 is a valid 32-bit shifted immediate.  */
 217 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
 218 {
 219     if (v32 == (v32 & 0xff)) {
 220         *cmode = 0x0;
 221         *imm8 = v32 & 0xff;
 222         return true;
 223     } else if (v32 == (v32 & 0xff00)) {
 224         *cmode = 0x2;
 225         *imm8 = (v32 >> 8) & 0xff;
 226         return true;
 227     } else if (v32 == (v32 & 0xff0000)) {
 228         *cmode = 0x4;
 229         *imm8 = (v32 >> 16) & 0xff;
 230         return true;
 231     } else if (v32 == (v32 & 0xff000000)) {
 232         *cmode = 0x6;
 233         *imm8 = v32 >> 24;
 234         return true;
 235     }
 236     return false;
 237 }
 238
 239 /* Return true if v32 is a valid 32-bit shifting ones immediate.  */
 240 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
 241 {
 242     if ((v32 & 0xffff00ff) == 0xff) {
 243         *cmode = 0xc;
 244         *imm8 = (v32 >> 8) & 0xff;
 245         return true;
 246     } else if ((v32 & 0xff00ffff) == 0xffff) {
 247         *cmode = 0xd;
 248         *imm8 = (v32 >> 16) & 0xff;
 249         return true;
 250     }
 251     return false;
 252 }
 253
 254 /* Return true if v32 is a valid float32 immediate.  */
 255 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
 256 {
 257     if (extract32(v32, 0, 19) == 0
 258         && (extract32(v32, 25, 6) == 0x20
 259             || extract32(v32, 25, 6) == 0x1f)) {
 260         *cmode = 0xf;
 261         *imm8 = (extract32(v32, 31, 1) << 7)
 262               | (extract32(v32, 25, 1) << 6)
 263               | extract32(v32, 19, 6);
 264         return true;
 265     }
 266     return false;
 267 }
 268
 269 /* Return true if v64 is a valid float64 immediate.  */
 270 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
 271 {
 272     if (extract64(v64, 0, 48) == 0
 273         && (extract64(v64, 54, 9) == 0x100
 274             || extract64(v64, 54, 9) == 0x0ff)) {
 275         *cmode = 0xf;
 276         *imm8 = (extract64(v64, 63, 1) << 7)
 277               | (extract64(v64, 54, 1) << 6)
 278               | extract64(v64, 48, 6);
 279         return true;
 280     }
 281     return false;
 282 }
 283
 284 /*
 285  * Return non-zero if v32 can be formed by MOVI+ORR.
 286  * Place the parameters for MOVI in (cmode, imm8).
 287  * Return the cmode for ORR; the imm8 can be had via extraction from v32.
 288  */
 289 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
 290 {
 291     int i;
 292
 293     for (i = 6; i > 0; i -= 2) {
 294         /* Mask out one byte we can add with ORR.  */
 295         uint32_t tmp = v32 & ~(0xffu << (i * 4));
 296         if (is_shimm32(tmp, cmode, imm8) ||
 297             is_soimm32(tmp, cmode, imm8)) {
 298             break;
 299         }
 300     }
 301     return i;
 302 }
 303
 304 /* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
 305 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
 306 {
 307     if (v32 == deposit32(v32, 16, 16, v32)) {
 308         return is_shimm16(v32, cmode, imm8);
 309     } else {
 310         return is_shimm32(v32, cmode, imm8);
 311     }
 312 }
 313
 314 static int tcg_target_const_match(tcg_target_long val, TCGType type,
 315                                   const TCGArgConstraint *arg_ct)
 316 {
 317     int ct = arg_ct->ct;
 318
 319     if (ct & TCG_CT_CONST) {
 320         return 1;
 321     }
 322     if (type == TCG_TYPE_I32) {
 323         val = (int32_t)val;
 324     }
 325     if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 326         return 1;
 327     }
 328     if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 329         return 1;
 330     }
 331     if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 332         return 1;
 333     }
 334     if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 335         return 1;
 336     }
 337
 338     switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
 339     case 0:
 340         break;
 341     case TCG_CT_CONST_ANDI:
 342         val = ~val;
 343         /* fallthru */
 344     case TCG_CT_CONST_ORRI:
 345         if (val == deposit64(val, 32, 32, val)) {
 346             int cmode, imm8;
 347             return is_shimm1632(val, &cmode, &imm8);
 348         }
 349         break;
 350     default:
 351         /* Both bits should not be set for the same insn.  */
 352         g_assert_not_reached();
 353     }
 354
 355     return 0;
 356 }
 357
 358 enum aarch64_cond_code {
 359     COND_EQ = 0x0,
 360     COND_NE = 0x1,
 361     COND_CS = 0x2,     /* Unsigned greater or equal */
 362     COND_HS = COND_CS, /* ALIAS greater or equal */
 363     COND_CC = 0x3,     /* Unsigned less than */
 364     COND_LO = COND_CC, /* ALIAS Lower */
 365     COND_MI = 0x4,     /* Negative */
 366     COND_PL = 0x5,     /* Zero or greater */
 367     COND_VS = 0x6,     /* Overflow */
 368     COND_VC = 0x7,     /* No overflow */
 369     COND_HI = 0x8,     /* Unsigned greater than */
 370     COND_LS = 0x9,     /* Unsigned less or equal */
 371     COND_GE = 0xa,
 372     COND_LT = 0xb,
 373     COND_GT = 0xc,
 374     COND_LE = 0xd,
 375     COND_AL = 0xe,
 376     COND_NV = 0xf, /* behaves like COND_AL here */
 377 };
 378
 379 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 380     [TCG_COND_EQ] = COND_EQ,
 381     [TCG_COND_NE] = COND_NE,
 382     [TCG_COND_LT] = COND_LT,
 383     [TCG_COND_GE] = COND_GE,
 384     [TCG_COND_LE] = COND_LE,
 385     [TCG_COND_GT] = COND_GT,
 386     /* unsigned */
 387     [TCG_COND_LTU] = COND_LO,
 388     [TCG_COND_GTU] = COND_HI,
 389     [TCG_COND_GEU] = COND_HS,
 390     [TCG_COND_LEU] = COND_LS,
 391 };
 392
 393 typedef enum {
 394     LDST_ST = 0,    /* store */
 395     LDST_LD = 1,    /* load */
 396     LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 397     LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 398 } AArch64LdstType;
 399
 400 /* We encode the format of the insn into the beginning of the name, so that
 401    we can have the preprocessor help "typecheck" the insn vs the output
 402    function.  Arm didn't provide us with nice names for the formats, so we
 403    use the section number of the architecture reference manual in which the
 404    instruction group is described.  */
 405 typedef enum {
 406     /* Compare and branch (immediate).  */
 407     I3201_CBZ       = 0x34000000,
 408     I3201_CBNZ      = 0x35000000,
 409
 410     /* Conditional branch (immediate).  */
 411     I3202_B_C       = 0x54000000,
 412
 413     /* Unconditional branch (immediate).  */
 414     I3206_B         = 0x14000000,
 415     I3206_BL        = 0x94000000,
 416
 417     /* Unconditional branch (register).  */
 418     I3207_BR        = 0xd61f0000,
 419     I3207_BLR       = 0xd63f0000,
 420     I3207_RET       = 0xd65f0000,
 421
 422     /* AdvSIMD load/store single structure.  */
 423     I3303_LD1R      = 0x0d40c000,
 424
 425     /* Load literal for loading the address at pc-relative offset */
 426     I3305_LDR       = 0x58000000,
 427     I3305_LDR_v64   = 0x5c000000,
 428     I3305_LDR_v128  = 0x9c000000,
 429
 430     /* Load/store register.  Described here as 3.3.12, but the helper
 431        that emits them can transform to 3.3.10 or 3.3.13.  */
 432     I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 433     I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 434     I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 435     I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 436
 437     I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 438     I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 439     I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 440     I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 441
 442     I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 443     I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 444
 445     I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 446     I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 447     I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 448
 449     I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
 450     I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
 451
 452     I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
 453     I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
 454
 455     I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
 456     I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 457
 458     I3312_TO_I3310  = 0x00200800,
 459     I3312_TO_I3313  = 0x01000000,
 460
 461     /* Load/store register pair instructions.  */
 462     I3314_LDP       = 0x28400000,
 463     I3314_STP       = 0x28000000,
 464
 465     /* Add/subtract immediate instructions.  */
 466     I3401_ADDI      = 0x11000000,
 467     I3401_ADDSI     = 0x31000000,
 468     I3401_SUBI      = 0x51000000,
 469     I3401_SUBSI     = 0x71000000,
 470
 471     /* Bitfield instructions.  */
 472     I3402_BFM       = 0x33000000,
 473     I3402_SBFM      = 0x13000000,
 474     I3402_UBFM      = 0x53000000,
 475
 476     /* Extract instruction.  */
 477     I3403_EXTR      = 0x13800000,
 478
 479     /* Logical immediate instructions.  */
 480     I3404_ANDI      = 0x12000000,
 481     I3404_ORRI      = 0x32000000,
 482     I3404_EORI      = 0x52000000,
 483
 484     /* Move wide immediate instructions.  */
 485     I3405_MOVN      = 0x12800000,
 486     I3405_MOVZ      = 0x52800000,
 487     I3405_MOVK      = 0x72800000,
 488
 489     /* PC relative addressing instructions.  */
 490     I3406_ADR       = 0x10000000,
 491     I3406_ADRP      = 0x90000000,
 492
 493     /* Add/subtract shifted register instructions (without a shift).  */
 494     I3502_ADD       = 0x0b000000,
 495     I3502_ADDS      = 0x2b000000,
 496     I3502_SUB       = 0x4b000000,
 497     I3502_SUBS      = 0x6b000000,
 498
 499     /* Add/subtract shifted register instructions (with a shift).  */
 500     I3502S_ADD_LSL  = I3502_ADD,
 501
 502     /* Add/subtract with carry instructions.  */
 503     I3503_ADC       = 0x1a000000,
 504     I3503_SBC       = 0x5a000000,
 505
 506     /* Conditional select instructions.  */
 507     I3506_CSEL      = 0x1a800000,
 508     I3506_CSINC     = 0x1a800400,
 509     I3506_CSINV     = 0x5a800000,
 510     I3506_CSNEG     = 0x5a800400,
 511
 512     /* Data-processing (1 source) instructions.  */
 513     I3507_CLZ       = 0x5ac01000,
 514     I3507_RBIT      = 0x5ac00000,
 515     I3507_REV16     = 0x5ac00400,
 516     I3507_REV32     = 0x5ac00800,
 517     I3507_REV64     = 0x5ac00c00,
 518
 519     /* Data-processing (2 source) instructions.  */
 520     I3508_LSLV      = 0x1ac02000,
 521     I3508_LSRV      = 0x1ac02400,
 522     I3508_ASRV      = 0x1ac02800,
 523     I3508_RORV      = 0x1ac02c00,
 524     I3508_SMULH     = 0x9b407c00,
 525     I3508_UMULH     = 0x9bc07c00,
 526     I3508_UDIV      = 0x1ac00800,
 527     I3508_SDIV      = 0x1ac00c00,
 528
 529     /* Data-processing (3 source) instructions.  */
 530     I3509_MADD      = 0x1b000000,
 531     I3509_MSUB      = 0x1b008000,
 532
 533     /* Logical shifted register instructions (without a shift).  */
 534     I3510_AND       = 0x0a000000,
 535     I3510_BIC       = 0x0a200000,
 536     I3510_ORR       = 0x2a000000,
 537     I3510_ORN       = 0x2a200000,
 538     I3510_EOR       = 0x4a000000,
 539     I3510_EON       = 0x4a200000,
 540     I3510_ANDS      = 0x6a000000,
 541
 542     /* Logical shifted register instructions (with a shift).  */
 543     I3502S_AND_LSR  = I3510_AND | (1 << 22),
 544
 545     /* AdvSIMD copy */
 546     I3605_DUP      = 0x0e000400,
 547     I3605_INS      = 0x4e001c00,
 548     I3605_UMOV     = 0x0e003c00,
 549
 550     /* AdvSIMD modified immediate */
 551     I3606_MOVI      = 0x0f000400,
 552     I3606_MVNI      = 0x2f000400,
 553     I3606_BIC       = 0x2f001400,
 554     I3606_ORR       = 0x0f001400,
 555
 556     /* AdvSIMD shift by immediate */
 557     I3614_SSHR      = 0x0f000400,
 558     I3614_SSRA      = 0x0f001400,
 559     I3614_SHL       = 0x0f005400,
 560     I3614_SLI       = 0x2f005400,
 561     I3614_USHR      = 0x2f000400,
 562     I3614_USRA      = 0x2f001400,
 563
 564     /* AdvSIMD three same.  */
 565     I3616_ADD       = 0x0e208400,
 566     I3616_AND       = 0x0e201c00,
 567     I3616_BIC       = 0x0e601c00,
 568     I3616_BIF       = 0x2ee01c00,
 569     I3616_BIT       = 0x2ea01c00,
 570     I3616_BSL       = 0x2e601c00,
 571     I3616_EOR       = 0x2e201c00,
 572     I3616_MUL       = 0x0e209c00,
 573     I3616_ORR       = 0x0ea01c00,
 574     I3616_ORN       = 0x0ee01c00,
 575     I3616_SUB       = 0x2e208400,
 576     I3616_CMGT      = 0x0e203400,
 577     I3616_CMGE      = 0x0e203c00,
 578     I3616_CMTST     = 0x0e208c00,
 579     I3616_CMHI      = 0x2e203400,
 580     I3616_CMHS      = 0x2e203c00,
 581     I3616_CMEQ      = 0x2e208c00,
 582     I3616_SMAX      = 0x0e206400,
 583     I3616_SMIN      = 0x0e206c00,
 584     I3616_SSHL      = 0x0e204400,
 585     I3616_SQADD     = 0x0e200c00,
 586     I3616_SQSUB     = 0x0e202c00,
 587     I3616_UMAX      = 0x2e206400,
 588     I3616_UMIN      = 0x2e206c00,
 589     I3616_UQADD     = 0x2e200c00,
 590     I3616_UQSUB     = 0x2e202c00,
 591     I3616_USHL      = 0x2e204400,
 592
 593     /* AdvSIMD two-reg misc.  */
 594     I3617_CMGT0     = 0x0e208800,
 595     I3617_CMEQ0     = 0x0e209800,
 596     I3617_CMLT0     = 0x0e20a800,
 597     I3617_CMGE0     = 0x2e208800,
 598     I3617_CMLE0     = 0x2e20a800,
 599     I3617_NOT       = 0x2e205800,
 600     I3617_ABS       = 0x0e20b800,
 601     I3617_NEG       = 0x2e20b800,
 602
 603     /* System instructions.  */
 604     NOP             = 0xd503201f,
 605     DMB_ISH         = 0xd50338bf,
 606     DMB_LD          = 0x00000100,
 607     DMB_ST          = 0x00000200,
 608 } AArch64Insn;
 609
 610 static inline uint32_t tcg_in32(TCGContext *s)
 611 {
 612     uint32_t v = *(uint32_t *)s->code_ptr;
 613     return v;
 614 }
 615
 616 /* Emit an opcode with "type-checking" of the format.  */
 617 #define tcg_out_insn(S, FMT, OP, ...) \
 618     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 619
 620 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
 621                               TCGReg rt, TCGReg rn, unsigned size)
 622 {
 623     tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
 624 }
 625
 626 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
 627                               int imm19, TCGReg rt)
 628 {
 629     tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 630 }
 631
 632 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 633                               TCGReg rt, int imm19)
 634 {
 635     tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 636 }
 637
 638 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 639                               TCGCond c, int imm19)
 640 {
 641     tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 642 }
 643
 644 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 645 {
 646     tcg_out32(s, insn | (imm26 & 0x03ffffff));
 647 }
 648
 649 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 650 {
 651     tcg_out32(s, insn | rn << 5);
 652 }
 653
 654 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 655                               TCGReg r1, TCGReg r2, TCGReg rn,
 656                               tcg_target_long ofs, bool pre, bool w)
 657 {
 658     insn |= 1u << 31; /* ext */
 659     insn |= pre << 24;
 660     insn |= w << 23;
 661
 662     tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 663     insn |= (ofs & (0x7f << 3)) << (15 - 3);
 664
 665     tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 666 }
 667
 668 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 669                               TCGReg rd, TCGReg rn, uint64_t aimm)
 670 {
 671     if (aimm > 0xfff) {
 672         tcg_debug_assert((aimm & 0xfff) == 0);
 673         aimm >>= 12;
 674         tcg_debug_assert(aimm <= 0xfff);
 675         aimm |= 1 << 12;  /* apply LSL 12 */
 676     }
 677     tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 678 }
 679
 680 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 681    (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 682    that feed the DecodeBitMasks pseudo function.  */
 683 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 684                               TCGReg rd, TCGReg rn, int n, int immr, int imms)
 685 {
 686     tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 687               | rn << 5 | rd);
 688 }
 689
 690 #define tcg_out_insn_3404  tcg_out_insn_3402
 691
 692 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 693                               TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 694 {
 695     tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 696               | rn << 5 | rd);
 697 }
 698
 699 /* This function is used for the Move (wide immediate) instruction group.
 700    Note that SHIFT is a full shift count, not the 2 bit HW field. */
 701 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 702                               TCGReg rd, uint16_t half, unsigned shift)
 703 {
 704     tcg_debug_assert((shift & ~0x30) == 0);
 705     tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 706 }
 707
 708 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 709                               TCGReg rd, int64_t disp)
 710 {
 711     tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 712 }
 713
 714 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
 715    the rare occasion when we actually want to supply a shift amount.  */
 716 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 717                                       TCGType ext, TCGReg rd, TCGReg rn,
 718                                       TCGReg rm, int imm6)
 719 {
 720     tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 721 }
 722
 723 /* This function is for 3.5.2 (Add/subtract shifted register),
 724    and 3.5.10 (Logical shifted register), for the vast majorty of cases
 725    when we don't want to apply a shift.  Thus it can also be used for
 726    3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 727 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 728                               TCGReg rd, TCGReg rn, TCGReg rm)
 729 {
 730     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 731 }
 732
 733 #define tcg_out_insn_3503  tcg_out_insn_3502
 734 #define tcg_out_insn_3508  tcg_out_insn_3502
 735 #define tcg_out_insn_3510  tcg_out_insn_3502
 736
 737 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 738                               TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 739 {
 740     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 741               | tcg_cond_to_aarch64[c] << 12);
 742 }
 743
 744 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 745                               TCGReg rd, TCGReg rn)
 746 {
 747     tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 748 }
 749
 750 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 751                               TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 752 {
 753     tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 754 }
 755
 756 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
 757                               TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
 758 {
 759     /* Note that bit 11 set means general register input.  Therefore
 760        we can handle both register sets with one function.  */
 761     tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
 762               | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
 763 }
 764
 765 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
 766                               TCGReg rd, bool op, int cmode, uint8_t imm8)
 767 {
 768     tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
 769               | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 770 }
 771
 772 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
 773                               TCGReg rd, TCGReg rn, unsigned immhb)
 774 {
 775     tcg_out32(s, insn | q << 30 | immhb << 16
 776               | (rn & 0x1f) << 5 | (rd & 0x1f));
 777 }
 778
 779 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
 780                               unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
 781 {
 782     tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
 783               | (rn & 0x1f) << 5 | (rd & 0x1f));
 784 }
 785
 786 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
 787                               unsigned size, TCGReg rd, TCGReg rn)
 788 {
 789     tcg_out32(s, insn | q << 30 | (size << 22)
 790               | (rn & 0x1f) << 5 | (rd & 0x1f));
 791 }
 792
 793 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 794                               TCGReg rd, TCGReg base, TCGType ext,
 795                               TCGReg regoff)
 796 {
 797     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 798     tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 799               0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
 800 }
 801
 802 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 803                               TCGReg rd, TCGReg rn, intptr_t offset)
 804 {
 805     tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
 806 }
 807
 808 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 809                               TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 810 {
 811     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 812     tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
 813               | rn << 5 | (rd & 0x1f));
 814 }
 815
 816 /* Register to register move using ORR (shifted register with no shift). */
 817 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 818 {
 819     tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 820 }
 821
 822 /* Register to register move using ADDI (move to/from SP).  */
 823 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 824 {
 825     tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 826 }
 827
 828 /* This function is used for the Logical (immediate) instruction group.
 829    The value of LIMM must satisfy IS_LIMM.  See the comment above about
 830    only supporting simplified logical immediates.  */
 831 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 832                              TCGReg rd, TCGReg rn, uint64_t limm)
 833 {
 834     unsigned h, l, r, c;
 835
 836     tcg_debug_assert(is_limm(limm));
 837
 838     h = clz64(limm);
 839     l = ctz64(limm);
 840     if (l == 0) {
 841         r = 0;                  /* form 0....01....1 */
 842         c = ctz64(~limm) - 1;
 843         if (h == 0) {
 844             r = clz64(~limm);   /* form 1..10..01..1 */
 845             c += r;
 846         }
 847     } else {
 848         r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 849         c = r - h - 1;
 850     }
 851     if (ext == TCG_TYPE_I32) {
 852         r &= 31;
 853         c &= 31;
 854     }
 855
 856     tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 857 }
 858
 859 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 860                              TCGReg rd, tcg_target_long v64)
 861 {
 862     bool q = type == TCG_TYPE_V128;
 863     int cmode, imm8, i;
 864
 865     /* Test all bytes equal first.  */
 866     if (v64 == dup_const(MO_8, v64)) {
 867         imm8 = (uint8_t)v64;
 868         tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
 869         return;
 870     }
 871
 872     /*
 873      * Test all bytes 0x00 or 0xff second.  This can match cases that
 874      * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
 875      */
 876     for (i = imm8 = 0; i < 8; i++) {
 877         uint8_t byte = v64 >> (i * 8);
 878         if (byte == 0xff) {
 879             imm8 |= 1 << i;
 880         } else if (byte != 0) {
 881             goto fail_bytes;
 882         }
 883     }
 884     tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
 885     return;
 886  fail_bytes:
 887
 888     /*
 889      * Tests for various replications.  For each element width, if we
 890      * cannot find an expansion there's no point checking a larger
 891      * width because we already know by replication it cannot match.
 892      */
 893     if (v64 == dup_const(MO_16, v64)) {
 894         uint16_t v16 = v64;
 895
 896         if (is_shimm16(v16, &cmode, &imm8)) {
 897             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 898             return;
 899         }
 900         if (is_shimm16(~v16, &cmode, &imm8)) {
 901             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 902             return;
 903         }
 904
 905         /*
 906          * Otherwise, all remaining constants can be loaded in two insns:
 907          * rd = v16 & 0xff, rd |= v16 & 0xff00.
 908          */
 909         tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
 910         tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
 911         return;
 912     } else if (v64 == dup_const(MO_32, v64)) {
 913         uint32_t v32 = v64;
 914         uint32_t n32 = ~v32;
 915
 916         if (is_shimm32(v32, &cmode, &imm8) ||
 917             is_soimm32(v32, &cmode, &imm8) ||
 918             is_fimm32(v32, &cmode, &imm8)) {
 919             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 920             return;
 921         }
 922         if (is_shimm32(n32, &cmode, &imm8) ||
 923             is_soimm32(n32, &cmode, &imm8)) {
 924             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 925             return;
 926         }
 927
 928         /*
 929          * Restrict the set of constants to those we can load with
 930          * two instructions.  Others we load from the pool.
 931          */
 932         i = is_shimm32_pair(v32, &cmode, &imm8);
 933         if (i) {
 934             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 935             tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
 936             return;
 937         }
 938         i = is_shimm32_pair(n32, &cmode, &imm8);
 939         if (i) {
 940             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 941             tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
 942             return;
 943         }
 944     } else if (is_fimm64(v64, &cmode, &imm8)) {
 945         tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
 946         return;
 947     }
 948
 949     /*
 950      * As a last resort, load from the constant pool.  Sadly there
 951      * is no LD1R (literal), so store the full 16-byte vector.
 952      */
 953     if (type == TCG_TYPE_V128) {
 954         new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
 955         tcg_out_insn(s, 3305, LDR_v128, 0, rd);
 956     } else {
 957         new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
 958         tcg_out_insn(s, 3305, LDR_v64, 0, rd);
 959     }
 960 }
 961
 962 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 963                             TCGReg rd, TCGReg rs)
 964 {
 965     int is_q = type - TCG_TYPE_V64;
 966     tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
 967     return true;
 968 }
 969
 970 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 971                              TCGReg r, TCGReg base, intptr_t offset)
 972 {
 973     TCGReg temp = TCG_REG_TMP;
 974
 975     if (offset < -0xffffff || offset > 0xffffff) {
 976         tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
 977         tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
 978         base = temp;
 979     } else {
 980         AArch64Insn add_insn = I3401_ADDI;
 981
 982         if (offset < 0) {
 983             add_insn = I3401_SUBI;
 984             offset = -offset;
 985         }
 986         if (offset & 0xfff000) {
 987             tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
 988             base = temp;
 989         }
 990         if (offset & 0xfff) {
 991             tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
 992             base = temp;
 993         }
 994     }
 995     tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
 996     return true;
 997 }
 998
 999 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1000                          tcg_target_long value)
1001 {
1002     tcg_target_long svalue = value;
1003     tcg_target_long ivalue = ~value;
1004     tcg_target_long t0, t1, t2;
1005     int s0, s1;
1006     AArch64Insn opc;
1007
1008     switch (type) {
1009     case TCG_TYPE_I32:
1010     case TCG_TYPE_I64:
1011         tcg_debug_assert(rd < 32);
1012         break;
1013
1014     case TCG_TYPE_V64:
1015     case TCG_TYPE_V128:
1016         tcg_debug_assert(rd >= 32);
1017         tcg_out_dupi_vec(s, type, rd, value);
1018         return;
1019
1020     default:
1021         g_assert_not_reached();
1022     }
1023
1024     /* For 32-bit values, discard potential garbage in value.  For 64-bit
1025        values within [2**31, 2**32-1], we can create smaller sequences by
1026        interpreting this as a negative 32-bit number, while ensuring that
1027        the high 32 bits are cleared by setting SF=0.  */
1028     if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1029         svalue = (int32_t)value;
1030         value = (uint32_t)value;
1031         ivalue = (uint32_t)ivalue;
1032         type = TCG_TYPE_I32;
1033     }
1034
1035     /* Speed things up by handling the common case of small positive
1036        and negative values specially.  */
1037     if ((value & ~0xffffull) == 0) {
1038         tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1039         return;
1040     } else if ((ivalue & ~0xffffull) == 0) {
1041         tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1042         return;
1043     }
1044
1045     /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1046        use the sign-extended value.  That lets us match rotated values such
1047        as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1048     if (is_limm(svalue)) {
1049         tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1050         return;
1051     }
1052
1053     /* Look for host pointer values within 4G of the PC.  This happens
1054        often when loading pointers to QEMU's own data structures.  */
1055     if (type == TCG_TYPE_I64) {
1056         tcg_target_long disp = value - (intptr_t)s->code_ptr;
1057         if (disp == sextract64(disp, 0, 21)) {
1058             tcg_out_insn(s, 3406, ADR, rd, disp);
1059             return;
1060         }
1061         disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1062         if (disp == sextract64(disp, 0, 21)) {
1063             tcg_out_insn(s, 3406, ADRP, rd, disp);
1064             if (value & 0xfff) {
1065                 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1066             }
1067             return;
1068         }
1069     }
1070
1071     /* Would it take fewer insns to begin with MOVN?  */
1072     if (ctpop64(value) >= 32) {
1073         t0 = ivalue;
1074         opc = I3405_MOVN;
1075     } else {
1076         t0 = value;
1077         opc = I3405_MOVZ;
1078     }
1079     s0 = ctz64(t0) & (63 & -16);
1080     t1 = t0 & ~(0xffffUL << s0);
1081     s1 = ctz64(t1) & (63 & -16);
1082     t2 = t1 & ~(0xffffUL << s1);
1083     if (t2 == 0) {
1084         tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1085         if (t1 != 0) {
1086             tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1087         }
1088         return;
1089     }
1090
1091     /* For more than 2 insns, dump it into the constant pool.  */
1092     new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1093     tcg_out_insn(s, 3305, LDR, 0, rd);
1094 }
1095
1096 /* Define something more legible for general use.  */
1097 #define tcg_out_ldst_r  tcg_out_insn_3310
1098
1099 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1100                          TCGReg rn, intptr_t offset, int lgsize)
1101 {
1102     /* If the offset is naturally aligned and in range, then we can
1103        use the scaled uimm12 encoding */
1104     if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1105         uintptr_t scaled_uimm = offset >> lgsize;
1106         if (scaled_uimm <= 0xfff) {
1107             tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1108             return;
1109         }
1110     }
1111
1112     /* Small signed offsets can use the unscaled encoding.  */
1113     if (offset >= -256 && offset < 256) {
1114         tcg_out_insn_3312(s, insn, rd, rn, offset);
1115         return;
1116     }
1117
1118     /* Worst-case scenario, move offset to temp register, use reg offset.  */
1119     tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1120     tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1121 }
1122
1123 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1124 {
1125     if (ret == arg) {
1126         return true;
1127     }
1128     switch (type) {
1129     case TCG_TYPE_I32:
1130     case TCG_TYPE_I64:
1131         if (ret < 32 && arg < 32) {
1132             tcg_out_movr(s, type, ret, arg);
1133             break;
1134         } else if (ret < 32) {
1135             tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1136             break;
1137         } else if (arg < 32) {
1138             tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1139             break;
1140         }
1141         /* FALLTHRU */
1142
1143     case TCG_TYPE_V64:
1144         tcg_debug_assert(ret >= 32 && arg >= 32);
1145         tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1146         break;
1147     case TCG_TYPE_V128:
1148         tcg_debug_assert(ret >= 32 && arg >= 32);
1149         tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1150         break;
1151
1152     default:
1153         g_assert_not_reached();
1154     }
1155     return true;
1156 }
1157
1158 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1159                        TCGReg base, intptr_t ofs)
1160 {
1161     AArch64Insn insn;
1162     int lgsz;
1163
1164     switch (type) {
1165     case TCG_TYPE_I32:
1166         insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1167         lgsz = 2;
1168         break;
1169     case TCG_TYPE_I64:
1170         insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1171         lgsz = 3;
1172         break;
1173     case TCG_TYPE_V64:
1174         insn = I3312_LDRVD;
1175         lgsz = 3;
1176         break;
1177     case TCG_TYPE_V128:
1178         insn = I3312_LDRVQ;
1179         lgsz = 4;
1180         break;
1181     default:
1182         g_assert_not_reached();
1183     }
1184     tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1185 }
1186
1187 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1188                        TCGReg base, intptr_t ofs)
1189 {
1190     AArch64Insn insn;
1191     int lgsz;
1192
1193     switch (type) {
1194     case TCG_TYPE_I32:
1195         insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1196         lgsz = 2;
1197         break;
1198     case TCG_TYPE_I64:
1199         insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1200         lgsz = 3;
1201         break;
1202     case TCG_TYPE_V64:
1203         insn = I3312_STRVD;
1204         lgsz = 3;
1205         break;
1206     case TCG_TYPE_V128:
1207         insn = I3312_STRVQ;
1208         lgsz = 4;
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213     tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1214 }
1215
1216 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1217                                TCGReg base, intptr_t ofs)
1218 {
1219     if (type <= TCG_TYPE_I64 && val == 0) {
1220         tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1221         return true;
1222     }
1223     return false;
1224 }
1225
1226 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1227                                TCGReg rn, unsigned int a, unsigned int b)
1228 {
1229     tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1230 }
1231
1232 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1233                                 TCGReg rn, unsigned int a, unsigned int b)
1234 {
1235     tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1236 }
1237
1238 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1239                                 TCGReg rn, unsigned int a, unsigned int b)
1240 {
1241     tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1242 }
1243
1244 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1245                                 TCGReg rn, TCGReg rm, unsigned int a)
1246 {
1247     tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1248 }
1249
1250 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1251                                TCGReg rd, TCGReg rn, unsigned int m)
1252 {
1253     int bits = ext ? 64 : 32;
1254     int max = bits - 1;
1255     tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1256 }
1257
1258 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1259                                TCGReg rd, TCGReg rn, unsigned int m)
1260 {
1261     int max = ext ? 63 : 31;
1262     tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1263 }
1264
1265 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1266                                TCGReg rd, TCGReg rn, unsigned int m)
1267 {
1268     int max = ext ? 63 : 31;
1269     tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1270 }
1271
1272 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1273                                 TCGReg rd, TCGReg rn, unsigned int m)
1274 {
1275     int max = ext ? 63 : 31;
1276     tcg_out_extr(s, ext, rd, rn, rn, m & max);
1277 }
1278
1279 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1280                                 TCGReg rd, TCGReg rn, unsigned int m)
1281 {
1282     int bits = ext ? 64 : 32;
1283     int max = bits - 1;
1284     tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1285 }
1286
1287 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1288                                TCGReg rn, unsigned lsb, unsigned width)
1289 {
1290     unsigned size = ext ? 64 : 32;
1291     unsigned a = (size - lsb) & (size - 1);
1292     unsigned b = width - 1;
1293     tcg_out_bfm(s, ext, rd, rn, a, b);
1294 }
1295
1296 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1297                         tcg_target_long b, bool const_b)
1298 {
1299     if (const_b) {
1300         /* Using CMP or CMN aliases.  */
1301         if (b >= 0) {
1302             tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1303         } else {
1304             tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1305         }
1306     } else {
1307         /* Using CMP alias SUBS wzr, Wn, Wm */
1308         tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1309     }
1310 }
1311
1312 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1313 {
1314     ptrdiff_t offset = target - s->code_ptr;
1315     tcg_debug_assert(offset == sextract64(offset, 0, 26));
1316     tcg_out_insn(s, 3206, B, offset);
1317 }
1318
1319 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1320 {
1321     ptrdiff_t offset = target - s->code_ptr;
1322     if (offset == sextract64(offset, 0, 26)) {
1323         tcg_out_insn(s, 3206, BL, offset);
1324     } else {
1325         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1326         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1327     }
1328 }
1329
1330 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1331 {
1332     tcg_out_insn(s, 3207, BLR, reg);
1333 }
1334
1335 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1336 {
1337     ptrdiff_t offset = target - s->code_ptr;
1338     if (offset == sextract64(offset, 0, 26)) {
1339         tcg_out_insn(s, 3206, BL, offset);
1340     } else {
1341         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1342         tcg_out_callr(s, TCG_REG_TMP);
1343     }
1344 }
1345
1346 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1347                               uintptr_t addr)
1348 {
1349     tcg_insn_unit i1, i2;
1350     TCGType rt = TCG_TYPE_I64;
1351     TCGReg  rd = TCG_REG_TMP;
1352     uint64_t pair;
1353
1354     ptrdiff_t offset = addr - jmp_addr;
1355
1356     if (offset == sextract64(offset, 0, 26)) {
1357         i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1358         i2 = NOP;
1359     } else {
1360         offset = (addr >> 12) - (jmp_addr >> 12);
1361
1362         /* patch ADRP */
1363         i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1364         /* patch ADDI */
1365         i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1366     }
1367     pair = (uint64_t)i2 << 32 | i1;
1368     atomic_set((uint64_t *)jmp_addr, pair);
1369     flush_icache_range(jmp_addr, jmp_addr + 8);
1370 }
1371
1372 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1373 {
1374     if (!l->has_value) {
1375         tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376         tcg_out_insn(s, 3206, B, 0);
1377     } else {
1378         tcg_out_goto(s, l->u.value_ptr);
1379     }
1380 }
1381
1382 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383                            TCGArg b, bool b_const, TCGLabel *l)
1384 {
1385     intptr_t offset;
1386     bool need_cmp;
1387
1388     if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1389         need_cmp = false;
1390     } else {
1391         need_cmp = true;
1392         tcg_out_cmp(s, ext, a, b, b_const);
1393     }
1394
1395     if (!l->has_value) {
1396         tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397         offset = tcg_in32(s) >> 5;
1398     } else {
1399         offset = l->u.value_ptr - s->code_ptr;
1400         tcg_debug_assert(offset == sextract64(offset, 0, 19));
1401     }
1402
1403     if (need_cmp) {
1404         tcg_out_insn(s, 3202, B_C, c, offset);
1405     } else if (c == TCG_COND_EQ) {
1406         tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1407     } else {
1408         tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1409     }
1410 }
1411
1412 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1413 {
1414     tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1415 }
1416
1417 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1418 {
1419     tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1420 }
1421
1422 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1423 {
1424     tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1425 }
1426
1427 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1428                                TCGReg rd, TCGReg rn)
1429 {
1430     /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1431     int bits = (8 << s_bits) - 1;
1432     tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1433 }
1434
1435 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1436                                TCGReg rd, TCGReg rn)
1437 {
1438     /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1439     int bits = (8 << s_bits) - 1;
1440     tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1441 }
1442
1443 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1444                             TCGReg rn, int64_t aimm)
1445 {
1446     if (aimm >= 0) {
1447         tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1448     } else {
1449         tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1450     }
1451 }
1452
1453 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1454                                    TCGReg rh, TCGReg al, TCGReg ah,
1455                                    tcg_target_long bl, tcg_target_long bh,
1456                                    bool const_bl, bool const_bh, bool sub)
1457 {
1458     TCGReg orig_rl = rl;
1459     AArch64Insn insn;
1460
1461     if (rl == ah || (!const_bh && rl == bh)) {
1462         rl = TCG_REG_TMP;
1463     }
1464
1465     if (const_bl) {
1466         insn = I3401_ADDSI;
1467         if ((bl < 0) ^ sub) {
1468             insn = I3401_SUBSI;
1469             bl = -bl;
1470         }
1471         if (unlikely(al == TCG_REG_XZR)) {
1472             /* ??? We want to allow al to be zero for the benefit of
1473                negation via subtraction.  However, that leaves open the
1474                possibility of adding 0+const in the low part, and the
1475                immediate add instructions encode XSP not XZR.  Don't try
1476                anything more elaborate here than loading another zero.  */
1477             al = TCG_REG_TMP;
1478             tcg_out_movi(s, ext, al, 0);
1479         }
1480         tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1481     } else {
1482         tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1483     }
1484
1485     insn = I3503_ADC;
1486     if (const_bh) {
1487         /* Note that the only two constants we support are 0 and -1, and
1488            that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1489         if ((bh != 0) ^ sub) {
1490             insn = I3503_SBC;
1491         }
1492         bh = TCG_REG_XZR;
1493     } else if (sub) {
1494         insn = I3503_SBC;
1495     }
1496     tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1497
1498     tcg_out_mov(s, ext, orig_rl, rl);
1499 }
1500
1501 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1502 {
1503     static const uint32_t sync[] = {
1504         [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1505         [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1506         [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1507         [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1508         [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1509     };
1510     tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1511 }
1512
1513 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1514                          TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1515 {
1516     TCGReg a1 = a0;
1517     if (is_ctz) {
1518         a1 = TCG_REG_TMP;
1519         tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1520     }
1521     if (const_b && b == (ext ? 64 : 32)) {
1522         tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1523     } else {
1524         AArch64Insn sel = I3506_CSEL;
1525
1526         tcg_out_cmp(s, ext, a0, 0, 1);
1527         tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1528
1529         if (const_b) {
1530             if (b == -1) {
1531                 b = TCG_REG_XZR;
1532                 sel = I3506_CSINV;
1533             } else if (b == 0) {
1534                 b = TCG_REG_XZR;
1535             } else {
1536                 tcg_out_movi(s, ext, d, b);
1537                 b = d;
1538             }
1539         }
1540         tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1541     }
1542 }
1543
1544 #ifdef CONFIG_SOFTMMU
1545 #include "../tcg-ldst.inc.c"
1546
1547 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1548  *                                     TCGMemOpIdx oi, uintptr_t ra)
1549  */
1550 static void * const qemu_ld_helpers[16] = {
1551     [MO_UB]   = helper_ret_ldub_mmu,
1552     [MO_LEUW] = helper_le_lduw_mmu,
1553     [MO_LEUL] = helper_le_ldul_mmu,
1554     [MO_LEQ]  = helper_le_ldq_mmu,
1555     [MO_BEUW] = helper_be_lduw_mmu,
1556     [MO_BEUL] = helper_be_ldul_mmu,
1557     [MO_BEQ]  = helper_be_ldq_mmu,
1558 };
1559
1560 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1561  *                                     uintxx_t val, TCGMemOpIdx oi,
1562  *                                     uintptr_t ra)
1563  */
1564 static void * const qemu_st_helpers[16] = {
1565     [MO_UB]   = helper_ret_stb_mmu,
1566     [MO_LEUW] = helper_le_stw_mmu,
1567     [MO_LEUL] = helper_le_stl_mmu,
1568     [MO_LEQ]  = helper_le_stq_mmu,
1569     [MO_BEUW] = helper_be_stw_mmu,
1570     [MO_BEUL] = helper_be_stl_mmu,
1571     [MO_BEQ]  = helper_be_stq_mmu,
1572 };
1573
1574 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1575 {
1576     ptrdiff_t offset = tcg_pcrel_diff(s, target);
1577     tcg_debug_assert(offset == sextract64(offset, 0, 21));
1578     tcg_out_insn(s, 3406, ADR, rd, offset);
1579 }
1580
1581 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1582 {
1583     TCGMemOpIdx oi = lb->oi;
1584     MemOp opc = get_memop(oi);
1585     MemOp size = opc & MO_SIZE;
1586
1587     if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1588         return false;
1589     }
1590
1591     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1592     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1593     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1594     tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1595     tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1596     if (opc & MO_SIGN) {
1597         tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1598     } else {
1599         tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1600     }
1601
1602     tcg_out_goto(s, lb->raddr);
1603     return true;
1604 }
1605
1606 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1607 {
1608     TCGMemOpIdx oi = lb->oi;
1609     MemOp opc = get_memop(oi);
1610     MemOp size = opc & MO_SIZE;
1611
1612     if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1613         return false;
1614     }
1615
1616     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1617     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1618     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1619     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1620     tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1621     tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1622     tcg_out_goto(s, lb->raddr);
1623     return true;
1624 }
1625
1626 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1627                                 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1628                                 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1629 {
1630     TCGLabelQemuLdst *label = new_ldst_label(s);
1631
1632     label->is_ld = is_ld;
1633     label->oi = oi;
1634     label->type = ext;
1635     label->datalo_reg = data_reg;
1636     label->addrlo_reg = addr_reg;
1637     label->raddr = raddr;
1638     label->label_ptr[0] = label_ptr;
1639 }
1640
1641 /* We expect to use a 7-bit scaled negative offset from ENV.  */
1642 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1643 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1644
1645 /* These offsets are built into the LDP below.  */
1646 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1647 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1648
1649 /* Load and compare a TLB entry, emitting the conditional jump to the
1650    slow path for the failure case, which will be patched later when finalizing
1651    the slow path. Generated code returns the host addend in X1,
1652    clobbers X0,X2,X3,TMP. */
1653 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1654                              tcg_insn_unit **label_ptr, int mem_index,
1655                              bool is_read)
1656 {
1657     unsigned a_bits = get_alignment_bits(opc);
1658     unsigned s_bits = opc & MO_SIZE;
1659     unsigned a_mask = (1u << a_bits) - 1;
1660     unsigned s_mask = (1u << s_bits) - 1;
1661     TCGReg x3;
1662     TCGType mask_type;
1663     uint64_t compare_mask;
1664
1665     mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1666                  ? TCG_TYPE_I64 : TCG_TYPE_I32);
1667
1668     /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1669     tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1670                  TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1671
1672     /* Extract the TLB index from the address into X0.  */
1673     tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1674                  TCG_REG_X0, TCG_REG_X0, addr_reg,
1675                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1676
1677     /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1678     tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1679
1680     /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1681     tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1682                ? offsetof(CPUTLBEntry, addr_read)
1683                : offsetof(CPUTLBEntry, addr_write));
1684     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1685                offsetof(CPUTLBEntry, addend));
1686
1687     /* For aligned accesses, we check the first byte and include the alignment
1688        bits within the address.  For unaligned access, we check that we don't
1689        cross pages using the address of the last byte of the access.  */
1690     if (a_bits >= s_bits) {
1691         x3 = addr_reg;
1692     } else {
1693         tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1694                      TCG_REG_X3, addr_reg, s_mask - a_mask);
1695         x3 = TCG_REG_X3;
1696     }
1697     compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1698
1699     /* Store the page mask part of the address into X3.  */
1700     tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1701                      TCG_REG_X3, x3, compare_mask);
1702
1703     /* Perform the address comparison. */
1704     tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1705
1706     /* If not equal, we jump to the slow path. */
1707     *label_ptr = s->code_ptr;
1708     tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1709 }
1710
1711 #endif /* CONFIG_SOFTMMU */
1712
1713 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1714                                    TCGReg data_r, TCGReg addr_r,
1715                                    TCGType otype, TCGReg off_r)
1716 {
1717     const MemOp bswap = memop & MO_BSWAP;
1718
1719     switch (memop & MO_SSIZE) {
1720     case MO_UB:
1721         tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1722         break;
1723     case MO_SB:
1724         tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1725                        data_r, addr_r, otype, off_r);
1726         break;
1727     case MO_UW:
1728         tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1729         if (bswap) {
1730             tcg_out_rev16(s, data_r, data_r);
1731         }
1732         break;
1733     case MO_SW:
1734         if (bswap) {
1735             tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1736             tcg_out_rev16(s, data_r, data_r);
1737             tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1738         } else {
1739             tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1740                            data_r, addr_r, otype, off_r);
1741         }
1742         break;
1743     case MO_UL:
1744         tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1745         if (bswap) {
1746             tcg_out_rev32(s, data_r, data_r);
1747         }
1748         break;
1749     case MO_SL:
1750         if (bswap) {
1751             tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1752             tcg_out_rev32(s, data_r, data_r);
1753             tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1754         } else {
1755             tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1756         }
1757         break;
1758     case MO_Q:
1759         tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1760         if (bswap) {
1761             tcg_out_rev64(s, data_r, data_r);
1762         }
1763         break;
1764     default:
1765         tcg_abort();
1766     }
1767 }
1768
1769 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1770                                    TCGReg data_r, TCGReg addr_r,
1771                                    TCGType otype, TCGReg off_r)
1772 {
1773     const MemOp bswap = memop & MO_BSWAP;
1774
1775     switch (memop & MO_SIZE) {
1776     case MO_8:
1777         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1778         break;
1779     case MO_16:
1780         if (bswap && data_r != TCG_REG_XZR) {
1781             tcg_out_rev16(s, TCG_REG_TMP, data_r);
1782             data_r = TCG_REG_TMP;
1783         }
1784         tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1785         break;
1786     case MO_32:
1787         if (bswap && data_r != TCG_REG_XZR) {
1788             tcg_out_rev32(s, TCG_REG_TMP, data_r);
1789             data_r = TCG_REG_TMP;
1790         }
1791         tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1792         break;
1793     case MO_64:
1794         if (bswap && data_r != TCG_REG_XZR) {
1795             tcg_out_rev64(s, TCG_REG_TMP, data_r);
1796             data_r = TCG_REG_TMP;
1797         }
1798         tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1799         break;
1800     default:
1801         tcg_abort();
1802     }
1803 }
1804
1805 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1806                             TCGMemOpIdx oi, TCGType ext)
1807 {
1808     MemOp memop = get_memop(oi);
1809     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1810 #ifdef CONFIG_SOFTMMU
1811     unsigned mem_index = get_mmuidx(oi);
1812     tcg_insn_unit *label_ptr;
1813
1814     tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1815     tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1816                            TCG_REG_X1, otype, addr_reg);
1817     add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1818                         s->code_ptr, label_ptr);
1819 #else /* !CONFIG_SOFTMMU */
1820     if (USE_GUEST_BASE) {
1821         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1822                                TCG_REG_GUEST_BASE, otype, addr_reg);
1823     } else {
1824         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1825                                addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1826     }
1827 #endif /* CONFIG_SOFTMMU */
1828 }
1829
1830 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1831                             TCGMemOpIdx oi)
1832 {
1833     MemOp memop = get_memop(oi);
1834     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1835 #ifdef CONFIG_SOFTMMU
1836     unsigned mem_index = get_mmuidx(oi);
1837     tcg_insn_unit *label_ptr;
1838
1839     tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1840     tcg_out_qemu_st_direct(s, memop, data_reg,
1841                            TCG_REG_X1, otype, addr_reg);
1842     add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1843                         data_reg, addr_reg, s->code_ptr, label_ptr);
1844 #else /* !CONFIG_SOFTMMU */
1845     if (USE_GUEST_BASE) {
1846         tcg_out_qemu_st_direct(s, memop, data_reg,
1847                                TCG_REG_GUEST_BASE, otype, addr_reg);
1848     } else {
1849         tcg_out_qemu_st_direct(s, memop, data_reg,
1850                                addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1851     }
1852 #endif /* CONFIG_SOFTMMU */
1853 }
1854
1855 static tcg_insn_unit *tb_ret_addr;
1856
1857 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1858                        const TCGArg args[TCG_MAX_OP_ARGS],
1859                        const int const_args[TCG_MAX_OP_ARGS])
1860 {
1861     /* 99% of the time, we can signal the use of extension registers
1862        by looking to see if the opcode handles 64-bit data.  */
1863     TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1864
1865     /* Hoist the loads of the most common arguments.  */
1866     TCGArg a0 = args[0];
1867     TCGArg a1 = args[1];
1868     TCGArg a2 = args[2];
1869     int c2 = const_args[2];
1870
1871     /* Some operands are defined with "rZ" constraint, a register or
1872        the zero register.  These need not actually test args[I] == 0.  */
1873 #define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1874
1875     switch (opc) {
1876     case INDEX_op_exit_tb:
1877         /* Reuse the zeroing that exists for goto_ptr.  */
1878         if (a0 == 0) {
1879             tcg_out_goto_long(s, s->code_gen_epilogue);
1880         } else {
1881             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1882             tcg_out_goto_long(s, tb_ret_addr);
1883         }
1884         break;
1885
1886     case INDEX_op_goto_tb:
1887         if (s->tb_jmp_insn_offset != NULL) {
1888             /* TCG_TARGET_HAS_direct_jump */
1889             /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1890                write can be used to patch the target address. */
1891             if ((uintptr_t)s->code_ptr & 7) {
1892                 tcg_out32(s, NOP);
1893             }
1894             s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1895             /* actual branch destination will be patched by
1896                tb_target_set_jmp_target later. */
1897             tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1898             tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1899         } else {
1900             /* !TCG_TARGET_HAS_direct_jump */
1901             tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1902             intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1903             tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1904         }
1905         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1906         set_jmp_reset_offset(s, a0);
1907         break;
1908
1909     case INDEX_op_goto_ptr:
1910         tcg_out_insn(s, 3207, BR, a0);
1911         break;
1912
1913     case INDEX_op_br:
1914         tcg_out_goto_label(s, arg_label(a0));
1915         break;
1916
1917     case INDEX_op_ld8u_i32:
1918     case INDEX_op_ld8u_i64:
1919         tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1920         break;
1921     case INDEX_op_ld8s_i32:
1922         tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1923         break;
1924     case INDEX_op_ld8s_i64:
1925         tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1926         break;
1927     case INDEX_op_ld16u_i32:
1928     case INDEX_op_ld16u_i64:
1929         tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1930         break;
1931     case INDEX_op_ld16s_i32:
1932         tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1933         break;
1934     case INDEX_op_ld16s_i64:
1935         tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1936         break;
1937     case INDEX_op_ld_i32:
1938     case INDEX_op_ld32u_i64:
1939         tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1940         break;
1941     case INDEX_op_ld32s_i64:
1942         tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1943         break;
1944     case INDEX_op_ld_i64:
1945         tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1946         break;
1947
1948     case INDEX_op_st8_i32:
1949     case INDEX_op_st8_i64:
1950         tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1951         break;
1952     case INDEX_op_st16_i32:
1953     case INDEX_op_st16_i64:
1954         tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1955         break;
1956     case INDEX_op_st_i32:
1957     case INDEX_op_st32_i64:
1958         tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1959         break;
1960     case INDEX_op_st_i64:
1961         tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1962         break;
1963
1964     case INDEX_op_add_i32:
1965         a2 = (int32_t)a2;
1966         /* FALLTHRU */
1967     case INDEX_op_add_i64:
1968         if (c2) {
1969             tcg_out_addsubi(s, ext, a0, a1, a2);
1970         } else {
1971             tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1972         }
1973         break;
1974
1975     case INDEX_op_sub_i32:
1976         a2 = (int32_t)a2;
1977         /* FALLTHRU */
1978     case INDEX_op_sub_i64:
1979         if (c2) {
1980             tcg_out_addsubi(s, ext, a0, a1, -a2);
1981         } else {
1982             tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1983         }
1984         break;
1985
1986     case INDEX_op_neg_i64:
1987     case INDEX_op_neg_i32:
1988         tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1989         break;
1990
1991     case INDEX_op_and_i32:
1992         a2 = (int32_t)a2;
1993         /* FALLTHRU */
1994     case INDEX_op_and_i64:
1995         if (c2) {
1996             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1997         } else {
1998             tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1999         }
2000         break;
2001
2002     case INDEX_op_andc_i32:
2003         a2 = (int32_t)a2;
2004         /* FALLTHRU */
2005     case INDEX_op_andc_i64:
2006         if (c2) {
2007             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2008         } else {
2009             tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2010         }
2011         break;
2012
2013     case INDEX_op_or_i32:
2014         a2 = (int32_t)a2;
2015         /* FALLTHRU */
2016     case INDEX_op_or_i64:
2017         if (c2) {
2018             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2019         } else {
2020             tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2021         }
2022         break;
2023
2024     case INDEX_op_orc_i32:
2025         a2 = (int32_t)a2;
2026         /* FALLTHRU */
2027     case INDEX_op_orc_i64:
2028         if (c2) {
2029             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2030         } else {
2031             tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2032         }
2033         break;
2034
2035     case INDEX_op_xor_i32:
2036         a2 = (int32_t)a2;
2037         /* FALLTHRU */
2038     case INDEX_op_xor_i64:
2039         if (c2) {
2040             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2041         } else {
2042             tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2043         }
2044         break;
2045
2046     case INDEX_op_eqv_i32:
2047         a2 = (int32_t)a2;
2048         /* FALLTHRU */
2049     case INDEX_op_eqv_i64:
2050         if (c2) {
2051             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2052         } else {
2053             tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2054         }
2055         break;
2056
2057     case INDEX_op_not_i64:
2058     case INDEX_op_not_i32:
2059         tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2060         break;
2061
2062     case INDEX_op_mul_i64:
2063     case INDEX_op_mul_i32:
2064         tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2065         break;
2066
2067     case INDEX_op_div_i64:
2068     case INDEX_op_div_i32:
2069         tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2070         break;
2071     case INDEX_op_divu_i64:
2072     case INDEX_op_divu_i32:
2073         tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2074         break;
2075
2076     case INDEX_op_rem_i64:
2077     case INDEX_op_rem_i32:
2078         tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2079         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2080         break;
2081     case INDEX_op_remu_i64:
2082     case INDEX_op_remu_i32:
2083         tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2084         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2085         break;
2086
2087     case INDEX_op_shl_i64:
2088     case INDEX_op_shl_i32:
2089         if (c2) {
2090             tcg_out_shl(s, ext, a0, a1, a2);
2091         } else {
2092             tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2093         }
2094         break;
2095
2096     case INDEX_op_shr_i64:
2097     case INDEX_op_shr_i32:
2098         if (c2) {
2099             tcg_out_shr(s, ext, a0, a1, a2);
2100         } else {
2101             tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2102         }
2103         break;
2104
2105     case INDEX_op_sar_i64:
2106     case INDEX_op_sar_i32:
2107         if (c2) {
2108             tcg_out_sar(s, ext, a0, a1, a2);
2109         } else {
2110             tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2111         }
2112         break;
2113
2114     case INDEX_op_rotr_i64:
2115     case INDEX_op_rotr_i32:
2116         if (c2) {
2117             tcg_out_rotr(s, ext, a0, a1, a2);
2118         } else {
2119             tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2120         }
2121         break;
2122
2123     case INDEX_op_rotl_i64:
2124     case INDEX_op_rotl_i32:
2125         if (c2) {
2126             tcg_out_rotl(s, ext, a0, a1, a2);
2127         } else {
2128             tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2129             tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2130         }
2131         break;
2132
2133     case INDEX_op_clz_i64:
2134     case INDEX_op_clz_i32:
2135         tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2136         break;
2137     case INDEX_op_ctz_i64:
2138     case INDEX_op_ctz_i32:
2139         tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2140         break;
2141
2142     case INDEX_op_brcond_i32:
2143         a1 = (int32_t)a1;
2144         /* FALLTHRU */
2145     case INDEX_op_brcond_i64:
2146         tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2147         break;
2148
2149     case INDEX_op_setcond_i32:
2150         a2 = (int32_t)a2;
2151         /* FALLTHRU */
2152     case INDEX_op_setcond_i64:
2153         tcg_out_cmp(s, ext, a1, a2, c2);
2154         /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2155         tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2156                      TCG_REG_XZR, tcg_invert_cond(args[3]));
2157         break;
2158
2159     case INDEX_op_movcond_i32:
2160         a2 = (int32_t)a2;
2161         /* FALLTHRU */
2162     case INDEX_op_movcond_i64:
2163         tcg_out_cmp(s, ext, a1, a2, c2);
2164         tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2165         break;
2166
2167     case INDEX_op_qemu_ld_i32:
2168     case INDEX_op_qemu_ld_i64:
2169         tcg_out_qemu_ld(s, a0, a1, a2, ext);
2170         break;
2171     case INDEX_op_qemu_st_i32:
2172     case INDEX_op_qemu_st_i64:
2173         tcg_out_qemu_st(s, REG0(0), a1, a2);
2174         break;
2175
2176     case INDEX_op_bswap64_i64:
2177         tcg_out_rev64(s, a0, a1);
2178         break;
2179     case INDEX_op_bswap32_i64:
2180     case INDEX_op_bswap32_i32:
2181         tcg_out_rev32(s, a0, a1);
2182         break;
2183     case INDEX_op_bswap16_i64:
2184     case INDEX_op_bswap16_i32:
2185         tcg_out_rev16(s, a0, a1);
2186         break;
2187
2188     case INDEX_op_ext8s_i64:
2189     case INDEX_op_ext8s_i32:
2190         tcg_out_sxt(s, ext, MO_8, a0, a1);
2191         break;
2192     case INDEX_op_ext16s_i64:
2193     case INDEX_op_ext16s_i32:
2194         tcg_out_sxt(s, ext, MO_16, a0, a1);
2195         break;
2196     case INDEX_op_ext_i32_i64:
2197     case INDEX_op_ext32s_i64:
2198         tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2199         break;
2200     case INDEX_op_ext8u_i64:
2201     case INDEX_op_ext8u_i32:
2202         tcg_out_uxt(s, MO_8, a0, a1);
2203         break;
2204     case INDEX_op_ext16u_i64:
2205     case INDEX_op_ext16u_i32:
2206         tcg_out_uxt(s, MO_16, a0, a1);
2207         break;
2208     case INDEX_op_extu_i32_i64:
2209     case INDEX_op_ext32u_i64:
2210         tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2211         break;
2212
2213     case INDEX_op_deposit_i64:
2214     case INDEX_op_deposit_i32:
2215         tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2216         break;
2217
2218     case INDEX_op_extract_i64:
2219     case INDEX_op_extract_i32:
2220         tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2221         break;
2222
2223     case INDEX_op_sextract_i64:
2224     case INDEX_op_sextract_i32:
2225         tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2226         break;
2227
2228     case INDEX_op_extract2_i64:
2229     case INDEX_op_extract2_i32:
2230         tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2231         break;
2232
2233     case INDEX_op_add2_i32:
2234         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2235                         (int32_t)args[4], args[5], const_args[4],
2236                         const_args[5], false);
2237         break;
2238     case INDEX_op_add2_i64:
2239         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2240                         args[5], const_args[4], const_args[5], false);
2241         break;
2242     case INDEX_op_sub2_i32:
2243         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2244                         (int32_t)args[4], args[5], const_args[4],
2245                         const_args[5], true);
2246         break;
2247     case INDEX_op_sub2_i64:
2248         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2249                         args[5], const_args[4], const_args[5], true);
2250         break;
2251
2252     case INDEX_op_muluh_i64:
2253         tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2254         break;
2255     case INDEX_op_mulsh_i64:
2256         tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2257         break;
2258
2259     case INDEX_op_mb:
2260         tcg_out_mb(s, a0);
2261         break;
2262
2263     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2264     case INDEX_op_mov_i64:
2265     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2266     case INDEX_op_movi_i64:
2267     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2268     default:
2269         g_assert_not_reached();
2270     }
2271
2272 #undef REG0
2273 }
2274
2275 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2276                            unsigned vecl, unsigned vece,
2277                            const TCGArg *args, const int *const_args)
2278 {
2279     static const AArch64Insn cmp_insn[16] = {
2280         [TCG_COND_EQ] = I3616_CMEQ,
2281         [TCG_COND_GT] = I3616_CMGT,
2282         [TCG_COND_GE] = I3616_CMGE,
2283         [TCG_COND_GTU] = I3616_CMHI,
2284         [TCG_COND_GEU] = I3616_CMHS,
2285     };
2286     static const AArch64Insn cmp0_insn[16] = {
2287         [TCG_COND_EQ] = I3617_CMEQ0,
2288         [TCG_COND_GT] = I3617_CMGT0,
2289         [TCG_COND_GE] = I3617_CMGE0,
2290         [TCG_COND_LT] = I3617_CMLT0,
2291         [TCG_COND_LE] = I3617_CMLE0,
2292     };
2293
2294     TCGType type = vecl + TCG_TYPE_V64;
2295     unsigned is_q = vecl;
2296     TCGArg a0, a1, a2, a3;
2297     int cmode, imm8;
2298
2299     a0 = args[0];
2300     a1 = args[1];
2301     a2 = args[2];
2302
2303     switch (opc) {
2304     case INDEX_op_ld_vec:
2305         tcg_out_ld(s, type, a0, a1, a2);
2306         break;
2307     case INDEX_op_st_vec:
2308         tcg_out_st(s, type, a0, a1, a2);
2309         break;
2310     case INDEX_op_dupm_vec:
2311         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2312         break;
2313     case INDEX_op_add_vec:
2314         tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2315         break;
2316     case INDEX_op_sub_vec:
2317         tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2318         break;
2319     case INDEX_op_mul_vec:
2320         tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2321         break;
2322     case INDEX_op_neg_vec:
2323         tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2324         break;
2325     case INDEX_op_abs_vec:
2326         tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2327         break;
2328     case INDEX_op_and_vec:
2329         if (const_args[2]) {
2330             is_shimm1632(~a2, &cmode, &imm8);
2331             if (a0 == a1) {
2332                 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2333                 return;
2334             }
2335             tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2336             a2 = a0;
2337         }
2338         tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2339         break;
2340     case INDEX_op_or_vec:
2341         if (const_args[2]) {
2342             is_shimm1632(a2, &cmode, &imm8);
2343             if (a0 == a1) {
2344                 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2345                 return;
2346             }
2347             tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2348             a2 = a0;
2349         }
2350         tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2351         break;
2352     case INDEX_op_andc_vec:
2353         if (const_args[2]) {
2354             is_shimm1632(a2, &cmode, &imm8);
2355             if (a0 == a1) {
2356                 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2357                 return;
2358             }
2359             tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2360             a2 = a0;
2361         }
2362         tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2363         break;
2364     case INDEX_op_orc_vec:
2365         if (const_args[2]) {
2366             is_shimm1632(~a2, &cmode, &imm8);
2367             if (a0 == a1) {
2368                 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2369                 return;
2370             }
2371             tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2372             a2 = a0;
2373         }
2374         tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2375         break;
2376     case INDEX_op_xor_vec:
2377         tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2378         break;
2379     case INDEX_op_ssadd_vec:
2380         tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2381         break;
2382     case INDEX_op_sssub_vec:
2383         tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2384         break;
2385     case INDEX_op_usadd_vec:
2386         tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2387         break;
2388     case INDEX_op_ussub_vec:
2389         tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2390         break;
2391     case INDEX_op_smax_vec:
2392         tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2393         break;
2394     case INDEX_op_smin_vec:
2395         tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2396         break;
2397     case INDEX_op_umax_vec:
2398         tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2399         break;
2400     case INDEX_op_umin_vec:
2401         tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2402         break;
2403     case INDEX_op_not_vec:
2404         tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2405         break;
2406     case INDEX_op_shli_vec:
2407         tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2408         break;
2409     case INDEX_op_shri_vec:
2410         tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2411         break;
2412     case INDEX_op_sari_vec:
2413         tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2414         break;
2415     case INDEX_op_aa64_sli_vec:
2416         tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2417         break;
2418     case INDEX_op_shlv_vec:
2419         tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2420         break;
2421     case INDEX_op_aa64_sshl_vec:
2422         tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2423         break;
2424     case INDEX_op_cmp_vec:
2425         {
2426             TCGCond cond = args[3];
2427             AArch64Insn insn;
2428
2429             if (cond == TCG_COND_NE) {
2430                 if (const_args[2]) {
2431                     tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2432                 } else {
2433                     tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2434                     tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2435                 }
2436             } else {
2437                 if (const_args[2]) {
2438                     insn = cmp0_insn[cond];
2439                     if (insn) {
2440                         tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2441                         break;
2442                     }
2443                     tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2444                     a2 = TCG_VEC_TMP;
2445                 }
2446                 insn = cmp_insn[cond];
2447                 if (insn == 0) {
2448                     TCGArg t;
2449                     t = a1, a1 = a2, a2 = t;
2450                     cond = tcg_swap_cond(cond);
2451                     insn = cmp_insn[cond];
2452                     tcg_debug_assert(insn != 0);
2453                 }
2454                 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2455             }
2456         }
2457         break;
2458
2459     case INDEX_op_bitsel_vec:
2460         a3 = args[3];
2461         if (a0 == a3) {
2462             tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2463         } else if (a0 == a2) {
2464             tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2465         } else {
2466             if (a0 != a1) {
2467                 tcg_out_mov(s, type, a0, a1);
2468             }
2469             tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2470         }
2471         break;
2472
2473     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2474     case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2475     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2476     default:
2477         g_assert_not_reached();
2478     }
2479 }
2480
2481 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2482 {
2483     switch (opc) {
2484     case INDEX_op_add_vec:
2485     case INDEX_op_sub_vec:
2486     case INDEX_op_and_vec:
2487     case INDEX_op_or_vec:
2488     case INDEX_op_xor_vec:
2489     case INDEX_op_andc_vec:
2490     case INDEX_op_orc_vec:
2491     case INDEX_op_neg_vec:
2492     case INDEX_op_abs_vec:
2493     case INDEX_op_not_vec:
2494     case INDEX_op_cmp_vec:
2495     case INDEX_op_shli_vec:
2496     case INDEX_op_shri_vec:
2497     case INDEX_op_sari_vec:
2498     case INDEX_op_ssadd_vec:
2499     case INDEX_op_sssub_vec:
2500     case INDEX_op_usadd_vec:
2501     case INDEX_op_ussub_vec:
2502     case INDEX_op_shlv_vec:
2503     case INDEX_op_bitsel_vec:
2504         return 1;
2505     case INDEX_op_rotli_vec:
2506     case INDEX_op_shrv_vec:
2507     case INDEX_op_sarv_vec:
2508     case INDEX_op_rotlv_vec:
2509     case INDEX_op_rotrv_vec:
2510         return -1;
2511     case INDEX_op_mul_vec:
2512     case INDEX_op_smax_vec:
2513     case INDEX_op_smin_vec:
2514     case INDEX_op_umax_vec:
2515     case INDEX_op_umin_vec:
2516         return vece < MO_64;
2517
2518     default:
2519         return 0;
2520     }
2521 }
2522
2523 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2524                        TCGArg a0, ...)
2525 {
2526     va_list va;
2527     TCGv_vec v0, v1, v2, t1, t2;
2528     TCGArg a2;
2529
2530     va_start(va, a0);
2531     v0 = temp_tcgv_vec(arg_temp(a0));
2532     v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2533     a2 = va_arg(va, TCGArg);
2534     v2 = temp_tcgv_vec(arg_temp(a2));
2535
2536     switch (opc) {
2537     case INDEX_op_rotli_vec:
2538         t1 = tcg_temp_new_vec(type);
2539         tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2540         vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2541                   tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2542         tcg_temp_free_vec(t1);
2543         break;
2544
2545     case INDEX_op_shrv_vec:
2546     case INDEX_op_sarv_vec:
2547         /* Right shifts are negative left shifts for AArch64.  */
2548         t1 = tcg_temp_new_vec(type);
2549         tcg_gen_neg_vec(vece, t1, v2);
2550         opc = (opc == INDEX_op_shrv_vec
2551                ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2552         vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2553                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2554         tcg_temp_free_vec(t1);
2555         break;
2556
2557     case INDEX_op_rotlv_vec:
2558         t1 = tcg_temp_new_vec(type);
2559         tcg_gen_dupi_vec(vece, t1, 8 << vece);
2560         tcg_gen_sub_vec(vece, t1, v2, t1);
2561         /* Right shifts are negative left shifts for AArch64.  */
2562         vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2563                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2564         vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2565                   tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2566         tcg_gen_or_vec(vece, v0, v0, t1);
2567         tcg_temp_free_vec(t1);
2568         break;
2569
2570     case INDEX_op_rotrv_vec:
2571         t1 = tcg_temp_new_vec(type);
2572         t2 = tcg_temp_new_vec(type);
2573         tcg_gen_neg_vec(vece, t1, v2);
2574         tcg_gen_dupi_vec(vece, t2, 8 << vece);
2575         tcg_gen_add_vec(vece, t2, t1, t2);
2576         /* Right shifts are negative left shifts for AArch64.  */
2577         vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2578                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2579         vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2580                   tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2581         tcg_gen_or_vec(vece, v0, t1, t2);
2582         tcg_temp_free_vec(t1);
2583         tcg_temp_free_vec(t2);
2584         break;
2585
2586     default:
2587         g_assert_not_reached();
2588     }
2589
2590     va_end(va);
2591 }
2592
2593 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2594 {
2595     static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2596     static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2597     static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2598     static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2599     static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2600     static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2601     static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2602     static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2603     static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2604     static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2605     static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2606     static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2607     static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2608     static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2609     static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2610     static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2611     static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2612     static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2613     static const TCGTargetOpDef r_r_rAL
2614         = { .args_ct_str = { "r", "r", "rAL" } };
2615     static const TCGTargetOpDef dep
2616         = { .args_ct_str = { "r", "0", "rZ" } };
2617     static const TCGTargetOpDef ext2
2618         = { .args_ct_str = { "r", "rZ", "rZ" } };
2619     static const TCGTargetOpDef movc
2620         = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2621     static const TCGTargetOpDef add2
2622         = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2623     static const TCGTargetOpDef w_w_w_w
2624         = { .args_ct_str = { "w", "w", "w", "w" } };
2625
2626     switch (op) {
2627     case INDEX_op_goto_ptr:
2628         return &r;
2629
2630     case INDEX_op_ld8u_i32:
2631     case INDEX_op_ld8s_i32:
2632     case INDEX_op_ld16u_i32:
2633     case INDEX_op_ld16s_i32:
2634     case INDEX_op_ld_i32:
2635     case INDEX_op_ld8u_i64:
2636     case INDEX_op_ld8s_i64:
2637     case INDEX_op_ld16u_i64:
2638     case INDEX_op_ld16s_i64:
2639     case INDEX_op_ld32u_i64:
2640     case INDEX_op_ld32s_i64:
2641     case INDEX_op_ld_i64:
2642     case INDEX_op_neg_i32:
2643     case INDEX_op_neg_i64:
2644     case INDEX_op_not_i32:
2645     case INDEX_op_not_i64:
2646     case INDEX_op_bswap16_i32:
2647     case INDEX_op_bswap32_i32:
2648     case INDEX_op_bswap16_i64:
2649     case INDEX_op_bswap32_i64:
2650     case INDEX_op_bswap64_i64:
2651     case INDEX_op_ext8s_i32:
2652     case INDEX_op_ext16s_i32:
2653     case INDEX_op_ext8u_i32:
2654     case INDEX_op_ext16u_i32:
2655     case INDEX_op_ext8s_i64:
2656     case INDEX_op_ext16s_i64:
2657     case INDEX_op_ext32s_i64:
2658     case INDEX_op_ext8u_i64:
2659     case INDEX_op_ext16u_i64:
2660     case INDEX_op_ext32u_i64:
2661     case INDEX_op_ext_i32_i64:
2662     case INDEX_op_extu_i32_i64:
2663     case INDEX_op_extract_i32:
2664     case INDEX_op_extract_i64:
2665     case INDEX_op_sextract_i32:
2666     case INDEX_op_sextract_i64:
2667         return &r_r;
2668
2669     case INDEX_op_st8_i32:
2670     case INDEX_op_st16_i32:
2671     case INDEX_op_st_i32:
2672     case INDEX_op_st8_i64:
2673     case INDEX_op_st16_i64:
2674     case INDEX_op_st32_i64:
2675     case INDEX_op_st_i64:
2676         return &rZ_r;
2677
2678     case INDEX_op_add_i32:
2679     case INDEX_op_add_i64:
2680     case INDEX_op_sub_i32:
2681     case INDEX_op_sub_i64:
2682     case INDEX_op_setcond_i32:
2683     case INDEX_op_setcond_i64:
2684         return &r_r_rA;
2685
2686     case INDEX_op_mul_i32:
2687     case INDEX_op_mul_i64:
2688     case INDEX_op_div_i32:
2689     case INDEX_op_div_i64:
2690     case INDEX_op_divu_i32:
2691     case INDEX_op_divu_i64:
2692     case INDEX_op_rem_i32:
2693     case INDEX_op_rem_i64:
2694     case INDEX_op_remu_i32:
2695     case INDEX_op_remu_i64:
2696     case INDEX_op_muluh_i64:
2697     case INDEX_op_mulsh_i64:
2698         return &r_r_r;
2699
2700     case INDEX_op_and_i32:
2701     case INDEX_op_and_i64:
2702     case INDEX_op_or_i32:
2703     case INDEX_op_or_i64:
2704     case INDEX_op_xor_i32:
2705     case INDEX_op_xor_i64:
2706     case INDEX_op_andc_i32:
2707     case INDEX_op_andc_i64:
2708     case INDEX_op_orc_i32:
2709     case INDEX_op_orc_i64:
2710     case INDEX_op_eqv_i32:
2711     case INDEX_op_eqv_i64:
2712         return &r_r_rL;
2713
2714     case INDEX_op_shl_i32:
2715     case INDEX_op_shr_i32:
2716     case INDEX_op_sar_i32:
2717     case INDEX_op_rotl_i32:
2718     case INDEX_op_rotr_i32:
2719     case INDEX_op_shl_i64:
2720     case INDEX_op_shr_i64:
2721     case INDEX_op_sar_i64:
2722     case INDEX_op_rotl_i64:
2723     case INDEX_op_rotr_i64:
2724         return &r_r_ri;
2725
2726     case INDEX_op_clz_i32:
2727     case INDEX_op_ctz_i32:
2728     case INDEX_op_clz_i64:
2729     case INDEX_op_ctz_i64:
2730         return &r_r_rAL;
2731
2732     case INDEX_op_brcond_i32:
2733     case INDEX_op_brcond_i64:
2734         return &r_rA;
2735
2736     case INDEX_op_movcond_i32:
2737     case INDEX_op_movcond_i64:
2738         return &movc;
2739
2740     case INDEX_op_qemu_ld_i32:
2741     case INDEX_op_qemu_ld_i64:
2742         return &r_l;
2743     case INDEX_op_qemu_st_i32:
2744     case INDEX_op_qemu_st_i64:
2745         return &lZ_l;
2746
2747     case INDEX_op_deposit_i32:
2748     case INDEX_op_deposit_i64:
2749         return &dep;
2750
2751     case INDEX_op_extract2_i32:
2752     case INDEX_op_extract2_i64:
2753         return &ext2;
2754
2755     case INDEX_op_add2_i32:
2756     case INDEX_op_add2_i64:
2757     case INDEX_op_sub2_i32:
2758     case INDEX_op_sub2_i64:
2759         return &add2;
2760
2761     case INDEX_op_add_vec:
2762     case INDEX_op_sub_vec:
2763     case INDEX_op_mul_vec:
2764     case INDEX_op_xor_vec:
2765     case INDEX_op_ssadd_vec:
2766     case INDEX_op_sssub_vec:
2767     case INDEX_op_usadd_vec:
2768     case INDEX_op_ussub_vec:
2769     case INDEX_op_smax_vec:
2770     case INDEX_op_smin_vec:
2771     case INDEX_op_umax_vec:
2772     case INDEX_op_umin_vec:
2773     case INDEX_op_shlv_vec:
2774     case INDEX_op_shrv_vec:
2775     case INDEX_op_sarv_vec:
2776     case INDEX_op_aa64_sshl_vec:
2777         return &w_w_w;
2778     case INDEX_op_not_vec:
2779     case INDEX_op_neg_vec:
2780     case INDEX_op_abs_vec:
2781     case INDEX_op_shli_vec:
2782     case INDEX_op_shri_vec:
2783     case INDEX_op_sari_vec:
2784         return &w_w;
2785     case INDEX_op_ld_vec:
2786     case INDEX_op_st_vec:
2787     case INDEX_op_dupm_vec:
2788         return &w_r;
2789     case INDEX_op_dup_vec:
2790         return &w_wr;
2791     case INDEX_op_or_vec:
2792     case INDEX_op_andc_vec:
2793         return &w_w_wO;
2794     case INDEX_op_and_vec:
2795     case INDEX_op_orc_vec:
2796         return &w_w_wN;
2797     case INDEX_op_cmp_vec:
2798         return &w_w_wZ;
2799     case INDEX_op_bitsel_vec:
2800         return &w_w_w_w;
2801     case INDEX_op_aa64_sli_vec:
2802         return &w_0_w;
2803
2804     default:
2805         return NULL;
2806     }
2807 }
2808
2809 static void tcg_target_init(TCGContext *s)
2810 {
2811     tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2812     tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2813     tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2814     tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2815
2816     tcg_target_call_clobber_regs = -1ull;
2817     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2818     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2819     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2820     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2821     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2822     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2823     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2824     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2825     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2826     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2827     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2828     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2829     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2830     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2831     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2832     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2833     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2834     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2835     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2836
2837     s->reserved_regs = 0;
2838     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2839     tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2840     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2841     tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2842     tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2843 }
2844
2845 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2846 #define PUSH_SIZE  ((30 - 19 + 1) * 8)
2847
2848 #define FRAME_SIZE \
2849     ((PUSH_SIZE \
2850       + TCG_STATIC_CALL_ARGS_SIZE \
2851       + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2852       + TCG_TARGET_STACK_ALIGN - 1) \
2853      & ~(TCG_TARGET_STACK_ALIGN - 1))
2854
2855 /* We're expecting a 2 byte uleb128 encoded value.  */
2856 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2857
2858 /* We're expecting to use a single ADDI insn.  */
2859 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2860
2861 static void tcg_target_qemu_prologue(TCGContext *s)
2862 {
2863     TCGReg r;
2864
2865     /* Push (FP, LR) and allocate space for all saved registers.  */
2866     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2867                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
2868
2869     /* Set up frame pointer for canonical unwinding.  */
2870     tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2871
2872     /* Store callee-preserved regs x19..x28.  */
2873     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2874         int ofs = (r - TCG_REG_X19 + 2) * 8;
2875         tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2876     }
2877
2878     /* Make stack space for TCG locals.  */
2879     tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2880                  FRAME_SIZE - PUSH_SIZE);
2881
2882     /* Inform TCG about how to find TCG locals with register, offset, size.  */
2883     tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2884                   CPU_TEMP_BUF_NLONGS * sizeof(long));
2885
2886 #if !defined(CONFIG_SOFTMMU)
2887     if (USE_GUEST_BASE) {
2888         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2889         tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2890     }
2891 #endif
2892
2893     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2894     tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2895
2896     /*
2897      * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2898      * and fall through to the rest of the epilogue.
2899      */
2900     s->code_gen_epilogue = s->code_ptr;
2901     tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2902
2903     /* TB epilogue */
2904     tb_ret_addr = s->code_ptr;
2905
2906     /* Remove TCG locals stack space.  */
2907     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2908                  FRAME_SIZE - PUSH_SIZE);
2909
2910     /* Restore registers x19..x28.  */
2911     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2912         int ofs = (r - TCG_REG_X19 + 2) * 8;
2913         tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2914     }
2915
2916     /* Pop (FP, LR), restore SP to previous frame.  */
2917     tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2918                  TCG_REG_SP, PUSH_SIZE, 0, 1);
2919     tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2920 }
2921
2922 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2923 {
2924     int i;
2925     for (i = 0; i < count; ++i) {
2926         p[i] = NOP;
2927     }
2928 }
2929
2930 typedef struct {
2931     DebugFrameHeader h;
2932     uint8_t fde_def_cfa[4];
2933     uint8_t fde_reg_ofs[24];
2934 } DebugFrame;
2935
2936 #define ELF_HOST_MACHINE EM_AARCH64
2937
2938 static const DebugFrame debug_frame = {
2939     .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2940     .h.cie.id = -1,
2941     .h.cie.version = 1,
2942     .h.cie.code_align = 1,
2943     .h.cie.data_align = 0x78,             /* sleb128 -8 */
2944     .h.cie.return_column = TCG_REG_LR,
2945
2946     /* Total FDE size does not include the "len" member.  */
2947     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2948
2949     .fde_def_cfa = {
2950         12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2951         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2952         (FRAME_SIZE >> 7)
2953     },
2954     .fde_reg_ofs = {
2955         0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2956         0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2957         0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2958         0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2959         0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2960         0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2961         0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2962         0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2963         0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2964         0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2965         0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2966         0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2967     }
2968 };
2969
2970 void tcg_register_jit(void *buf, size_t buf_size)
2971 {
2972     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2973 }