target/arm/translate-vfp.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/osdep.h"
  24 #include "tcg/tcg-op.h"
  25 #include "tcg/tcg-op-gvec.h"
  26 #include "exec/exec-all.h"
  27 #include "exec/gen-icount.h"
  28 #include "translate.h"
  29 #include "translate-a32.h"
  30
  31 /* Include the generated VFP decoder */
  32 #include "decode-vfp.c.inc"
  33 #include "decode-vfp-uncond.c.inc"
  34
  35 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
  36 {
  37     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
  38 }
  39
  40 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
  41 {
  42     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
  43 }
  44
  45 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
  46 {
  47     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
  48 }
  49
  50 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
  51 {
  52     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
  53 }
  54
  55 /*
  56  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  57  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  58  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  59  */
  60 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  61 {
  62     uint64_t imm;
  63
  64     switch (size) {
  65     case MO_64:
  66         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  67             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  68             extract32(imm8, 0, 6);
  69         imm <<= 48;
  70         break;
  71     case MO_32:
  72         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  73             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  74             (extract32(imm8, 0, 6) << 3);
  75         imm <<= 16;
  76         break;
  77     case MO_16:
  78         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  79             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  80             (extract32(imm8, 0, 6) << 6);
  81         break;
  82     default:
  83         g_assert_not_reached();
  84     }
  85     return imm;
  86 }
  87
  88 /*
  89  * Return the offset of a 16-bit half of the specified VFP single-precision
  90  * register. If top is true, returns the top 16 bits; otherwise the bottom
  91  * 16 bits.
  92  */
  93 static inline long vfp_f16_offset(unsigned reg, bool top)
  94 {
  95     long offs = vfp_reg_offset(false, reg);
  96 #ifdef HOST_WORDS_BIGENDIAN
  97     if (!top) {
  98         offs += 2;
  99     }
 100 #else
 101     if (top) {
 102         offs += 2;
 103     }
 104 #endif
 105     return offs;
 106 }
 107
 108 /*
 109  * Generate code for M-profile lazy FP state preservation if needed;
 110  * this corresponds to the pseudocode PreserveFPState() function.
 111  */
 112 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
 113 {
 114     if (s->v7m_lspact) {
 115         /*
 116          * Lazy state saving affects external memory and also the NVIC,
 117          * so we must mark it as an IO operation for icount (and cause
 118          * this to be the last insn in the TB).
 119          */
 120         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 121             s->base.is_jmp = DISAS_UPDATE_EXIT;
 122             gen_io_start();
 123         }
 124         gen_helper_v7m_preserve_fp_state(cpu_env);
 125         /*
 126          * If the preserve_fp_state helper doesn't throw an exception
 127          * then it will clear LSPACT; we don't need to repeat this for
 128          * any further FP insns in this TB.
 129          */
 130         s->v7m_lspact = false;
 131         /*
 132          * The helper might have zeroed VPR, so we do not know the
 133          * correct value for the MVE_NO_PRED TB flag any more.
 134          * If we're about to create a new fp context then that
 135          * will precisely determine the MVE_NO_PRED value (see
 136          * gen_update_fp_context()). Otherwise, we must:
 137          *  - set s->mve_no_pred to false, so this instruction
 138          *    is generated to use helper functions
 139          *  - end the TB now, without chaining to the next TB
 140          */
 141         if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
 142             s->mve_no_pred = false;
 143             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
 144         }
 145     }
 146 }
 147
 148 /*
 149  * Generate code for M-profile FP context handling: update the
 150  * ownership of the FP context, and create a new context if
 151  * necessary. This corresponds to the parts of the pseudocode
 152  * ExecuteFPCheck() after the inital PreserveFPState() call.
 153  */
 154 static void gen_update_fp_context(DisasContext *s)
 155 {
 156     /* Update ownership of FP context: set FPCCR.S to match current state */
 157     if (s->v8m_fpccr_s_wrong) {
 158         TCGv_i32 tmp;
 159
 160         tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 161         if (s->v8m_secure) {
 162             tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 163         } else {
 164             tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 165         }
 166         store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 167         /* Don't need to do this for any further FP insns in this TB */
 168         s->v8m_fpccr_s_wrong = false;
 169     }
 170
 171     if (s->v7m_new_fp_ctxt_needed) {
 172         /*
 173          * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
 174          * the FPSCR, and VPR.
 175          */
 176         TCGv_i32 control, fpscr;
 177         uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 178
 179         fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 180         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 181         tcg_temp_free_i32(fpscr);
 182         if (dc_isar_feature(aa32_mve, s)) {
 183             TCGv_i32 z32 = tcg_const_i32(0);
 184             store_cpu_field(z32, v7m.vpr);
 185         }
 186         /*
 187          * We just updated the FPSCR and VPR. Some of this state is cached
 188          * in the MVE_NO_PRED TB flag. We want to avoid having to end the
 189          * TB here, which means we need the new value of the MVE_NO_PRED
 190          * flag to be exactly known here and the same for all executions.
 191          * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
 192          * always set to 0, so the new MVE_NO_PRED flag is always 1
 193          * if and only if we have MVE.
 194          *
 195          * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
 196          * but those do not exist for M-profile, so are not relevant here.)
 197          */
 198         s->mve_no_pred = dc_isar_feature(aa32_mve, s);
 199
 200         if (s->v8m_secure) {
 201             bits |= R_V7M_CONTROL_SFPA_MASK;
 202         }
 203         control = load_cpu_field(v7m.control[M_REG_S]);
 204         tcg_gen_ori_i32(control, control, bits);
 205         store_cpu_field(control, v7m.control[M_REG_S]);
 206         /* Don't need to do this for any further FP insns in this TB */
 207         s->v7m_new_fp_ctxt_needed = false;
 208     }
 209 }
 210
 211 /*
 212  * Check that VFP access is enabled, A-profile specific version.
 213  *
 214  * If VFP is enabled, return true. If not, emit code to generate an
 215  * appropriate exception and return false.
 216  * The ignore_vfp_enabled argument specifies that we should ignore
 217  * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
 218  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 219  */
 220 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
 221 {
 222     if (s->fp_excp_el) {
 223         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 224                            syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
 225         return false;
 226     }
 227
 228     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 229         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 230         unallocated_encoding(s);
 231         return false;
 232     }
 233     return true;
 234 }
 235
 236 /*
 237  * Check that VFP access is enabled, M-profile specific version.
 238  *
 239  * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
 240  * return true. If not, emit code to generate an appropriate exception and
 241  * return false.
 242  * skip_context_update is true to skip the "update FP context" part of this.
 243  */
 244 bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
 245 {
 246     if (s->fp_excp_el) {
 247         /*
 248          * M-profile mostly catches the "FPU disabled" case early, in
 249          * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
 250          * which do coprocessor-checks are outside the large ranges of
 251          * the encoding space handled by the patterns in m-nocp.decode,
 252          * and for them we may need to raise NOCP here.
 253          */
 254         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
 255                            syn_uncategorized(), s->fp_excp_el);
 256         return false;
 257     }
 258
 259     /* Handle M-profile lazy FP state mechanics */
 260
 261     /* Trigger lazy-state preservation if necessary */
 262     gen_preserve_fp_state(s, skip_context_update);
 263
 264     if (!skip_context_update) {
 265         /* Update ownership of FP context and create new FP context if needed */
 266         gen_update_fp_context(s);
 267     }
 268
 269     return true;
 270 }
 271
 272 /*
 273  * The most usual kind of VFP access check, for everything except
 274  * FMXR/FMRX to the always-available special registers.
 275  */
 276 bool vfp_access_check(DisasContext *s)
 277 {
 278     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 279         return vfp_access_check_m(s, false);
 280     } else {
 281         return vfp_access_check_a(s, false);
 282     }
 283 }
 284
 285 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 286 {
 287     uint32_t rd, rn, rm;
 288     int sz = a->sz;
 289
 290     if (!dc_isar_feature(aa32_vsel, s)) {
 291         return false;
 292     }
 293
 294     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 295         return false;
 296     }
 297
 298     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 299         return false;
 300     }
 301
 302     /* UNDEF accesses to D16-D31 if they don't exist */
 303     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 304         ((a->vm | a->vn | a->vd) & 0x10)) {
 305         return false;
 306     }
 307
 308     rd = a->vd;
 309     rn = a->vn;
 310     rm = a->vm;
 311
 312     if (!vfp_access_check(s)) {
 313         return true;
 314     }
 315
 316     if (sz == 3) {
 317         TCGv_i64 frn, frm, dest;
 318         TCGv_i64 tmp, zero, zf, nf, vf;
 319
 320         zero = tcg_const_i64(0);
 321
 322         frn = tcg_temp_new_i64();
 323         frm = tcg_temp_new_i64();
 324         dest = tcg_temp_new_i64();
 325
 326         zf = tcg_temp_new_i64();
 327         nf = tcg_temp_new_i64();
 328         vf = tcg_temp_new_i64();
 329
 330         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 331         tcg_gen_ext_i32_i64(nf, cpu_NF);
 332         tcg_gen_ext_i32_i64(vf, cpu_VF);
 333
 334         vfp_load_reg64(frn, rn);
 335         vfp_load_reg64(frm, rm);
 336         switch (a->cc) {
 337         case 0: /* eq: Z */
 338             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 339                                 frn, frm);
 340             break;
 341         case 1: /* vs: V */
 342             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 343                                 frn, frm);
 344             break;
 345         case 2: /* ge: N == V -> N ^ V == 0 */
 346             tmp = tcg_temp_new_i64();
 347             tcg_gen_xor_i64(tmp, vf, nf);
 348             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 349                                 frn, frm);
 350             tcg_temp_free_i64(tmp);
 351             break;
 352         case 3: /* gt: !Z && N == V */
 353             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 354                                 frn, frm);
 355             tmp = tcg_temp_new_i64();
 356             tcg_gen_xor_i64(tmp, vf, nf);
 357             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 358                                 dest, frm);
 359             tcg_temp_free_i64(tmp);
 360             break;
 361         }
 362         vfp_store_reg64(dest, rd);
 363         tcg_temp_free_i64(frn);
 364         tcg_temp_free_i64(frm);
 365         tcg_temp_free_i64(dest);
 366
 367         tcg_temp_free_i64(zf);
 368         tcg_temp_free_i64(nf);
 369         tcg_temp_free_i64(vf);
 370
 371         tcg_temp_free_i64(zero);
 372     } else {
 373         TCGv_i32 frn, frm, dest;
 374         TCGv_i32 tmp, zero;
 375
 376         zero = tcg_const_i32(0);
 377
 378         frn = tcg_temp_new_i32();
 379         frm = tcg_temp_new_i32();
 380         dest = tcg_temp_new_i32();
 381         vfp_load_reg32(frn, rn);
 382         vfp_load_reg32(frm, rm);
 383         switch (a->cc) {
 384         case 0: /* eq: Z */
 385             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 386                                 frn, frm);
 387             break;
 388         case 1: /* vs: V */
 389             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 390                                 frn, frm);
 391             break;
 392         case 2: /* ge: N == V -> N ^ V == 0 */
 393             tmp = tcg_temp_new_i32();
 394             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 395             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 396                                 frn, frm);
 397             tcg_temp_free_i32(tmp);
 398             break;
 399         case 3: /* gt: !Z && N == V */
 400             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 401                                 frn, frm);
 402             tmp = tcg_temp_new_i32();
 403             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 404             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 405                                 dest, frm);
 406             tcg_temp_free_i32(tmp);
 407             break;
 408         }
 409         /* For fp16 the top half is always zeroes */
 410         if (sz == 1) {
 411             tcg_gen_andi_i32(dest, dest, 0xffff);
 412         }
 413         vfp_store_reg32(dest, rd);
 414         tcg_temp_free_i32(frn);
 415         tcg_temp_free_i32(frm);
 416         tcg_temp_free_i32(dest);
 417
 418         tcg_temp_free_i32(zero);
 419     }
 420
 421     return true;
 422 }
 423
 424 /*
 425  * Table for converting the most common AArch32 encoding of
 426  * rounding mode to arm_fprounding order (which matches the
 427  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 428  */
 429 static const uint8_t fp_decode_rm[] = {
 430     FPROUNDING_TIEAWAY,
 431     FPROUNDING_TIEEVEN,
 432     FPROUNDING_POSINF,
 433     FPROUNDING_NEGINF,
 434 };
 435
 436 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 437 {
 438     uint32_t rd, rm;
 439     int sz = a->sz;
 440     TCGv_ptr fpst;
 441     TCGv_i32 tcg_rmode;
 442     int rounding = fp_decode_rm[a->rm];
 443
 444     if (!dc_isar_feature(aa32_vrint, s)) {
 445         return false;
 446     }
 447
 448     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 449         return false;
 450     }
 451
 452     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 453         return false;
 454     }
 455
 456     /* UNDEF accesses to D16-D31 if they don't exist */
 457     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 458         ((a->vm | a->vd) & 0x10)) {
 459         return false;
 460     }
 461
 462     rd = a->vd;
 463     rm = a->vm;
 464
 465     if (!vfp_access_check(s)) {
 466         return true;
 467     }
 468
 469     if (sz == 1) {
 470         fpst = fpstatus_ptr(FPST_FPCR_F16);
 471     } else {
 472         fpst = fpstatus_ptr(FPST_FPCR);
 473     }
 474
 475     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 476     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 477
 478     if (sz == 3) {
 479         TCGv_i64 tcg_op;
 480         TCGv_i64 tcg_res;
 481         tcg_op = tcg_temp_new_i64();
 482         tcg_res = tcg_temp_new_i64();
 483         vfp_load_reg64(tcg_op, rm);
 484         gen_helper_rintd(tcg_res, tcg_op, fpst);
 485         vfp_store_reg64(tcg_res, rd);
 486         tcg_temp_free_i64(tcg_op);
 487         tcg_temp_free_i64(tcg_res);
 488     } else {
 489         TCGv_i32 tcg_op;
 490         TCGv_i32 tcg_res;
 491         tcg_op = tcg_temp_new_i32();
 492         tcg_res = tcg_temp_new_i32();
 493         vfp_load_reg32(tcg_op, rm);
 494         if (sz == 1) {
 495             gen_helper_rinth(tcg_res, tcg_op, fpst);
 496         } else {
 497             gen_helper_rints(tcg_res, tcg_op, fpst);
 498         }
 499         vfp_store_reg32(tcg_res, rd);
 500         tcg_temp_free_i32(tcg_op);
 501         tcg_temp_free_i32(tcg_res);
 502     }
 503
 504     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 505     tcg_temp_free_i32(tcg_rmode);
 506
 507     tcg_temp_free_ptr(fpst);
 508     return true;
 509 }
 510
 511 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 512 {
 513     uint32_t rd, rm;
 514     int sz = a->sz;
 515     TCGv_ptr fpst;
 516     TCGv_i32 tcg_rmode, tcg_shift;
 517     int rounding = fp_decode_rm[a->rm];
 518     bool is_signed = a->op;
 519
 520     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 521         return false;
 522     }
 523
 524     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 525         return false;
 526     }
 527
 528     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 529         return false;
 530     }
 531
 532     /* UNDEF accesses to D16-D31 if they don't exist */
 533     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 534         return false;
 535     }
 536
 537     rd = a->vd;
 538     rm = a->vm;
 539
 540     if (!vfp_access_check(s)) {
 541         return true;
 542     }
 543
 544     if (sz == 1) {
 545         fpst = fpstatus_ptr(FPST_FPCR_F16);
 546     } else {
 547         fpst = fpstatus_ptr(FPST_FPCR);
 548     }
 549
 550     tcg_shift = tcg_const_i32(0);
 551
 552     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 553     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 554
 555     if (sz == 3) {
 556         TCGv_i64 tcg_double, tcg_res;
 557         TCGv_i32 tcg_tmp;
 558         tcg_double = tcg_temp_new_i64();
 559         tcg_res = tcg_temp_new_i64();
 560         tcg_tmp = tcg_temp_new_i32();
 561         vfp_load_reg64(tcg_double, rm);
 562         if (is_signed) {
 563             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 564         } else {
 565             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 566         }
 567         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 568         vfp_store_reg32(tcg_tmp, rd);
 569         tcg_temp_free_i32(tcg_tmp);
 570         tcg_temp_free_i64(tcg_res);
 571         tcg_temp_free_i64(tcg_double);
 572     } else {
 573         TCGv_i32 tcg_single, tcg_res;
 574         tcg_single = tcg_temp_new_i32();
 575         tcg_res = tcg_temp_new_i32();
 576         vfp_load_reg32(tcg_single, rm);
 577         if (sz == 1) {
 578             if (is_signed) {
 579                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 580             } else {
 581                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 582             }
 583         } else {
 584             if (is_signed) {
 585                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 586             } else {
 587                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 588             }
 589         }
 590         vfp_store_reg32(tcg_res, rd);
 591         tcg_temp_free_i32(tcg_res);
 592         tcg_temp_free_i32(tcg_single);
 593     }
 594
 595     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 596     tcg_temp_free_i32(tcg_rmode);
 597
 598     tcg_temp_free_i32(tcg_shift);
 599
 600     tcg_temp_free_ptr(fpst);
 601
 602     return true;
 603 }
 604
 605 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
 606 {
 607     /*
 608      * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
 609      * and VMOV (general-purpose register to vector lane) insns are not
 610      * predicated, but they are subject to beatwise execution if they are
 611      * not in an IT block.
 612      *
 613      * Since our implementation always executes all 4 beats in one tick,
 614      * this means only that if PSR.ECI says we should not be executing
 615      * the beat corresponding to the lane of the vector register being
 616      * accessed then we should skip performing the move, and that we need
 617      * to do the usual check for bad ECI state and advance of ECI state.
 618      *
 619      * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
 620      *
 621      * Return true if this VMOV scalar <-> gpreg should be skipped because
 622      * the MVE PSR.ECI state says we skip the beat where the store happens.
 623      */
 624
 625     /* Calculate the byte offset into Qn which we're going to access */
 626     int ofs = (index << size) + ((vn & 1) * 8);
 627
 628     if (!dc_isar_feature(aa32_mve, s)) {
 629         return false;
 630     }
 631
 632     switch (s->eci) {
 633     case ECI_NONE:
 634         return false;
 635     case ECI_A0:
 636         return ofs < 4;
 637     case ECI_A0A1:
 638         return ofs < 8;
 639     case ECI_A0A1A2:
 640     case ECI_A0A1A2B0:
 641         return ofs < 12;
 642     default:
 643         g_assert_not_reached();
 644     }
 645 }
 646
 647 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 648 {
 649     /* VMOV scalar to general purpose register */
 650     TCGv_i32 tmp;
 651
 652     /*
 653      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 654      * all sizes, whether the CPU has fp or not.
 655      */
 656     if (!dc_isar_feature(aa32_mve, s)) {
 657         if (a->size == MO_32
 658             ? !dc_isar_feature(aa32_fpsp_v2, s)
 659             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 660             return false;
 661         }
 662     }
 663
 664     /* UNDEF accesses to D16-D31 if they don't exist */
 665     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 666         return false;
 667     }
 668
 669     if (dc_isar_feature(aa32_mve, s)) {
 670         if (!mve_eci_check(s)) {
 671             return true;
 672         }
 673     }
 674
 675     if (!vfp_access_check(s)) {
 676         return true;
 677     }
 678
 679     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
 680         tmp = tcg_temp_new_i32();
 681         read_neon_element32(tmp, a->vn, a->index,
 682                             a->size | (a->u ? 0 : MO_SIGN));
 683         store_reg(s, a->rt, tmp);
 684     }
 685
 686     if (dc_isar_feature(aa32_mve, s)) {
 687         mve_update_and_store_eci(s);
 688     }
 689     return true;
 690 }
 691
 692 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 693 {
 694     /* VMOV general purpose register to scalar */
 695     TCGv_i32 tmp;
 696
 697     /*
 698      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 699      * all sizes, whether the CPU has fp or not.
 700      */
 701     if (!dc_isar_feature(aa32_mve, s)) {
 702         if (a->size == MO_32
 703             ? !dc_isar_feature(aa32_fpsp_v2, s)
 704             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 705             return false;
 706         }
 707     }
 708
 709     /* UNDEF accesses to D16-D31 if they don't exist */
 710     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 711         return false;
 712     }
 713
 714     if (dc_isar_feature(aa32_mve, s)) {
 715         if (!mve_eci_check(s)) {
 716             return true;
 717         }
 718     }
 719
 720     if (!vfp_access_check(s)) {
 721         return true;
 722     }
 723
 724     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
 725         tmp = load_reg(s, a->rt);
 726         write_neon_element32(tmp, a->vn, a->index, a->size);
 727         tcg_temp_free_i32(tmp);
 728     }
 729
 730     if (dc_isar_feature(aa32_mve, s)) {
 731         mve_update_and_store_eci(s);
 732     }
 733     return true;
 734 }
 735
 736 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 737 {
 738     /* VDUP (general purpose register) */
 739     TCGv_i32 tmp;
 740     int size, vec_size;
 741
 742     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 743         return false;
 744     }
 745
 746     /* UNDEF accesses to D16-D31 if they don't exist */
 747     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 748         return false;
 749     }
 750
 751     if (a->b && a->e) {
 752         return false;
 753     }
 754
 755     if (a->q && (a->vn & 1)) {
 756         return false;
 757     }
 758
 759     vec_size = a->q ? 16 : 8;
 760     if (a->b) {
 761         size = 0;
 762     } else if (a->e) {
 763         size = 1;
 764     } else {
 765         size = 2;
 766     }
 767
 768     if (!vfp_access_check(s)) {
 769         return true;
 770     }
 771
 772     tmp = load_reg(s, a->rt);
 773     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 774                          vec_size, vec_size, tmp);
 775     tcg_temp_free_i32(tmp);
 776
 777     return true;
 778 }
 779
 780 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 781 {
 782     TCGv_i32 tmp;
 783     bool ignore_vfp_enabled = false;
 784
 785     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 786         /* M profile version was already handled in m-nocp.decode */
 787         return false;
 788     }
 789
 790     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 791         return false;
 792     }
 793
 794     switch (a->reg) {
 795     case ARM_VFP_FPSID:
 796         /*
 797          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 798          * all ID registers to privileged access only.
 799          */
 800         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 801             return false;
 802         }
 803         ignore_vfp_enabled = true;
 804         break;
 805     case ARM_VFP_MVFR0:
 806     case ARM_VFP_MVFR1:
 807         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 808             return false;
 809         }
 810         ignore_vfp_enabled = true;
 811         break;
 812     case ARM_VFP_MVFR2:
 813         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 814             return false;
 815         }
 816         ignore_vfp_enabled = true;
 817         break;
 818     case ARM_VFP_FPSCR:
 819         break;
 820     case ARM_VFP_FPEXC:
 821         if (IS_USER(s)) {
 822             return false;
 823         }
 824         ignore_vfp_enabled = true;
 825         break;
 826     case ARM_VFP_FPINST:
 827     case ARM_VFP_FPINST2:
 828         /* Not present in VFPv3 */
 829         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 830             return false;
 831         }
 832         break;
 833     default:
 834         return false;
 835     }
 836
 837     /*
 838      * Call vfp_access_check_a() directly, because we need to tell
 839      * it to ignore FPEXC.EN for some register accesses.
 840      */
 841     if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
 842         return true;
 843     }
 844
 845     if (a->l) {
 846         /* VMRS, move VFP special register to gp register */
 847         switch (a->reg) {
 848         case ARM_VFP_MVFR0:
 849         case ARM_VFP_MVFR1:
 850         case ARM_VFP_MVFR2:
 851         case ARM_VFP_FPSID:
 852             if (s->current_el == 1) {
 853                 TCGv_i32 tcg_reg, tcg_rt;
 854
 855                 gen_set_condexec(s);
 856                 gen_set_pc_im(s, s->pc_curr);
 857                 tcg_reg = tcg_const_i32(a->reg);
 858                 tcg_rt = tcg_const_i32(a->rt);
 859                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
 860                 tcg_temp_free_i32(tcg_reg);
 861                 tcg_temp_free_i32(tcg_rt);
 862             }
 863             /* fall through */
 864         case ARM_VFP_FPEXC:
 865         case ARM_VFP_FPINST:
 866         case ARM_VFP_FPINST2:
 867             tmp = load_cpu_field(vfp.xregs[a->reg]);
 868             break;
 869         case ARM_VFP_FPSCR:
 870             if (a->rt == 15) {
 871                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 872                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 873             } else {
 874                 tmp = tcg_temp_new_i32();
 875                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 876             }
 877             break;
 878         default:
 879             g_assert_not_reached();
 880         }
 881
 882         if (a->rt == 15) {
 883             /* Set the 4 flag bits in the CPSR.  */
 884             gen_set_nzcv(tmp);
 885             tcg_temp_free_i32(tmp);
 886         } else {
 887             store_reg(s, a->rt, tmp);
 888         }
 889     } else {
 890         /* VMSR, move gp register to VFP special register */
 891         switch (a->reg) {
 892         case ARM_VFP_FPSID:
 893         case ARM_VFP_MVFR0:
 894         case ARM_VFP_MVFR1:
 895         case ARM_VFP_MVFR2:
 896             /* Writes are ignored.  */
 897             break;
 898         case ARM_VFP_FPSCR:
 899             tmp = load_reg(s, a->rt);
 900             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 901             tcg_temp_free_i32(tmp);
 902             gen_lookup_tb(s);
 903             break;
 904         case ARM_VFP_FPEXC:
 905             /*
 906              * TODO: VFP subarchitecture support.
 907              * For now, keep the EN bit only
 908              */
 909             tmp = load_reg(s, a->rt);
 910             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 911             store_cpu_field(tmp, vfp.xregs[a->reg]);
 912             gen_lookup_tb(s);
 913             break;
 914         case ARM_VFP_FPINST:
 915         case ARM_VFP_FPINST2:
 916             tmp = load_reg(s, a->rt);
 917             store_cpu_field(tmp, vfp.xregs[a->reg]);
 918             break;
 919         default:
 920             g_assert_not_reached();
 921         }
 922     }
 923
 924     return true;
 925 }
 926
 927
 928 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
 929 {
 930     TCGv_i32 tmp;
 931
 932     if (!dc_isar_feature(aa32_fp16_arith, s)) {
 933         return false;
 934     }
 935
 936     if (a->rt == 15) {
 937         /* UNPREDICTABLE; we choose to UNDEF */
 938         return false;
 939     }
 940
 941     if (!vfp_access_check(s)) {
 942         return true;
 943     }
 944
 945     if (a->l) {
 946         /* VFP to general purpose register */
 947         tmp = tcg_temp_new_i32();
 948         vfp_load_reg32(tmp, a->vn);
 949         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 950         store_reg(s, a->rt, tmp);
 951     } else {
 952         /* general purpose register to VFP */
 953         tmp = load_reg(s, a->rt);
 954         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 955         vfp_store_reg32(tmp, a->vn);
 956         tcg_temp_free_i32(tmp);
 957     }
 958
 959     return true;
 960 }
 961
 962 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 963 {
 964     TCGv_i32 tmp;
 965
 966     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 967         return false;
 968     }
 969
 970     if (!vfp_access_check(s)) {
 971         return true;
 972     }
 973
 974     if (a->l) {
 975         /* VFP to general purpose register */
 976         tmp = tcg_temp_new_i32();
 977         vfp_load_reg32(tmp, a->vn);
 978         if (a->rt == 15) {
 979             /* Set the 4 flag bits in the CPSR.  */
 980             gen_set_nzcv(tmp);
 981             tcg_temp_free_i32(tmp);
 982         } else {
 983             store_reg(s, a->rt, tmp);
 984         }
 985     } else {
 986         /* general purpose register to VFP */
 987         tmp = load_reg(s, a->rt);
 988         vfp_store_reg32(tmp, a->vn);
 989         tcg_temp_free_i32(tmp);
 990     }
 991
 992     return true;
 993 }
 994
 995 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 996 {
 997     TCGv_i32 tmp;
 998
 999     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1000         return false;
1001     }
1002
1003     /*
1004      * VMOV between two general-purpose registers and two single precision
1005      * floating point registers
1006      */
1007     if (!vfp_access_check(s)) {
1008         return true;
1009     }
1010
1011     if (a->op) {
1012         /* fpreg to gpreg */
1013         tmp = tcg_temp_new_i32();
1014         vfp_load_reg32(tmp, a->vm);
1015         store_reg(s, a->rt, tmp);
1016         tmp = tcg_temp_new_i32();
1017         vfp_load_reg32(tmp, a->vm + 1);
1018         store_reg(s, a->rt2, tmp);
1019     } else {
1020         /* gpreg to fpreg */
1021         tmp = load_reg(s, a->rt);
1022         vfp_store_reg32(tmp, a->vm);
1023         tcg_temp_free_i32(tmp);
1024         tmp = load_reg(s, a->rt2);
1025         vfp_store_reg32(tmp, a->vm + 1);
1026         tcg_temp_free_i32(tmp);
1027     }
1028
1029     return true;
1030 }
1031
1032 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1033 {
1034     TCGv_i32 tmp;
1035
1036     /*
1037      * VMOV between two general-purpose registers and one double precision
1038      * floating point register.  Note that this does not require support
1039      * for double precision arithmetic.
1040      */
1041     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1042         return false;
1043     }
1044
1045     /* UNDEF accesses to D16-D31 if they don't exist */
1046     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1047         return false;
1048     }
1049
1050     if (!vfp_access_check(s)) {
1051         return true;
1052     }
1053
1054     if (a->op) {
1055         /* fpreg to gpreg */
1056         tmp = tcg_temp_new_i32();
1057         vfp_load_reg32(tmp, a->vm * 2);
1058         store_reg(s, a->rt, tmp);
1059         tmp = tcg_temp_new_i32();
1060         vfp_load_reg32(tmp, a->vm * 2 + 1);
1061         store_reg(s, a->rt2, tmp);
1062     } else {
1063         /* gpreg to fpreg */
1064         tmp = load_reg(s, a->rt);
1065         vfp_store_reg32(tmp, a->vm * 2);
1066         tcg_temp_free_i32(tmp);
1067         tmp = load_reg(s, a->rt2);
1068         vfp_store_reg32(tmp, a->vm * 2 + 1);
1069         tcg_temp_free_i32(tmp);
1070     }
1071
1072     return true;
1073 }
1074
1075 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1076 {
1077     uint32_t offset;
1078     TCGv_i32 addr, tmp;
1079
1080     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1081         return false;
1082     }
1083
1084     if (!vfp_access_check(s)) {
1085         return true;
1086     }
1087
1088     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1089     offset = a->imm << 1;
1090     if (!a->u) {
1091         offset = -offset;
1092     }
1093
1094     /* For thumb, use of PC is UNPREDICTABLE.  */
1095     addr = add_reg_for_lit(s, a->rn, offset);
1096     tmp = tcg_temp_new_i32();
1097     if (a->l) {
1098         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1099         vfp_store_reg32(tmp, a->vd);
1100     } else {
1101         vfp_load_reg32(tmp, a->vd);
1102         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1103     }
1104     tcg_temp_free_i32(tmp);
1105     tcg_temp_free_i32(addr);
1106
1107     return true;
1108 }
1109
1110 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1111 {
1112     uint32_t offset;
1113     TCGv_i32 addr, tmp;
1114
1115     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1116         return false;
1117     }
1118
1119     if (!vfp_access_check(s)) {
1120         return true;
1121     }
1122
1123     offset = a->imm << 2;
1124     if (!a->u) {
1125         offset = -offset;
1126     }
1127
1128     /* For thumb, use of PC is UNPREDICTABLE.  */
1129     addr = add_reg_for_lit(s, a->rn, offset);
1130     tmp = tcg_temp_new_i32();
1131     if (a->l) {
1132         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1133         vfp_store_reg32(tmp, a->vd);
1134     } else {
1135         vfp_load_reg32(tmp, a->vd);
1136         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1137     }
1138     tcg_temp_free_i32(tmp);
1139     tcg_temp_free_i32(addr);
1140
1141     return true;
1142 }
1143
1144 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1145 {
1146     uint32_t offset;
1147     TCGv_i32 addr;
1148     TCGv_i64 tmp;
1149
1150     /* Note that this does not require support for double arithmetic.  */
1151     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1152         return false;
1153     }
1154
1155     /* UNDEF accesses to D16-D31 if they don't exist */
1156     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1157         return false;
1158     }
1159
1160     if (!vfp_access_check(s)) {
1161         return true;
1162     }
1163
1164     offset = a->imm << 2;
1165     if (!a->u) {
1166         offset = -offset;
1167     }
1168
1169     /* For thumb, use of PC is UNPREDICTABLE.  */
1170     addr = add_reg_for_lit(s, a->rn, offset);
1171     tmp = tcg_temp_new_i64();
1172     if (a->l) {
1173         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1174         vfp_store_reg64(tmp, a->vd);
1175     } else {
1176         vfp_load_reg64(tmp, a->vd);
1177         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1178     }
1179     tcg_temp_free_i64(tmp);
1180     tcg_temp_free_i32(addr);
1181
1182     return true;
1183 }
1184
1185 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1186 {
1187     uint32_t offset;
1188     TCGv_i32 addr, tmp;
1189     int i, n;
1190
1191     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1192         return false;
1193     }
1194
1195     n = a->imm;
1196
1197     if (n == 0 || (a->vd + n) > 32) {
1198         /*
1199          * UNPREDICTABLE cases for bad immediates: we choose to
1200          * UNDEF to avoid generating huge numbers of TCG ops
1201          */
1202         return false;
1203     }
1204     if (a->rn == 15 && a->w) {
1205         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1206         return false;
1207     }
1208
1209     s->eci_handled = true;
1210
1211     if (!vfp_access_check(s)) {
1212         return true;
1213     }
1214
1215     /* For thumb, use of PC is UNPREDICTABLE.  */
1216     addr = add_reg_for_lit(s, a->rn, 0);
1217     if (a->p) {
1218         /* pre-decrement */
1219         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1220     }
1221
1222     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1223         /*
1224          * Here 'addr' is the lowest address we will store to,
1225          * and is either the old SP (if post-increment) or
1226          * the new SP (if pre-decrement). For post-increment
1227          * where the old value is below the limit and the new
1228          * value is above, it is UNKNOWN whether the limit check
1229          * triggers; we choose to trigger.
1230          */
1231         gen_helper_v8m_stackcheck(cpu_env, addr);
1232     }
1233
1234     offset = 4;
1235     tmp = tcg_temp_new_i32();
1236     for (i = 0; i < n; i++) {
1237         if (a->l) {
1238             /* load */
1239             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1240             vfp_store_reg32(tmp, a->vd + i);
1241         } else {
1242             /* store */
1243             vfp_load_reg32(tmp, a->vd + i);
1244             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1245         }
1246         tcg_gen_addi_i32(addr, addr, offset);
1247     }
1248     tcg_temp_free_i32(tmp);
1249     if (a->w) {
1250         /* writeback */
1251         if (a->p) {
1252             offset = -offset * n;
1253             tcg_gen_addi_i32(addr, addr, offset);
1254         }
1255         store_reg(s, a->rn, addr);
1256     } else {
1257         tcg_temp_free_i32(addr);
1258     }
1259
1260     clear_eci_state(s);
1261     return true;
1262 }
1263
1264 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1265 {
1266     uint32_t offset;
1267     TCGv_i32 addr;
1268     TCGv_i64 tmp;
1269     int i, n;
1270
1271     /* Note that this does not require support for double arithmetic.  */
1272     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1273         return false;
1274     }
1275
1276     n = a->imm >> 1;
1277
1278     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1279         /*
1280          * UNPREDICTABLE cases for bad immediates: we choose to
1281          * UNDEF to avoid generating huge numbers of TCG ops
1282          */
1283         return false;
1284     }
1285     if (a->rn == 15 && a->w) {
1286         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1287         return false;
1288     }
1289
1290     /* UNDEF accesses to D16-D31 if they don't exist */
1291     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1292         return false;
1293     }
1294
1295     s->eci_handled = true;
1296
1297     if (!vfp_access_check(s)) {
1298         return true;
1299     }
1300
1301     /* For thumb, use of PC is UNPREDICTABLE.  */
1302     addr = add_reg_for_lit(s, a->rn, 0);
1303     if (a->p) {
1304         /* pre-decrement */
1305         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1306     }
1307
1308     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1309         /*
1310          * Here 'addr' is the lowest address we will store to,
1311          * and is either the old SP (if post-increment) or
1312          * the new SP (if pre-decrement). For post-increment
1313          * where the old value is below the limit and the new
1314          * value is above, it is UNKNOWN whether the limit check
1315          * triggers; we choose to trigger.
1316          */
1317         gen_helper_v8m_stackcheck(cpu_env, addr);
1318     }
1319
1320     offset = 8;
1321     tmp = tcg_temp_new_i64();
1322     for (i = 0; i < n; i++) {
1323         if (a->l) {
1324             /* load */
1325             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1326             vfp_store_reg64(tmp, a->vd + i);
1327         } else {
1328             /* store */
1329             vfp_load_reg64(tmp, a->vd + i);
1330             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1331         }
1332         tcg_gen_addi_i32(addr, addr, offset);
1333     }
1334     tcg_temp_free_i64(tmp);
1335     if (a->w) {
1336         /* writeback */
1337         if (a->p) {
1338             offset = -offset * n;
1339         } else if (a->imm & 1) {
1340             offset = 4;
1341         } else {
1342             offset = 0;
1343         }
1344
1345         if (offset != 0) {
1346             tcg_gen_addi_i32(addr, addr, offset);
1347         }
1348         store_reg(s, a->rn, addr);
1349     } else {
1350         tcg_temp_free_i32(addr);
1351     }
1352
1353     clear_eci_state(s);
1354     return true;
1355 }
1356
1357 /*
1358  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1359  * The callback should emit code to write a value to vd. If
1360  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1361  * will contain the old value of the relevant VFP register;
1362  * otherwise it must be written to only.
1363  */
1364 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1365                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1366 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1367                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1368
1369 /*
1370  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1371  * The callback should emit code to write a value to vd (which
1372  * should be written to only).
1373  */
1374 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1375 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1376
1377 /*
1378  * Return true if the specified S reg is in a scalar bank
1379  * (ie if it is s0..s7)
1380  */
1381 static inline bool vfp_sreg_is_scalar(int reg)
1382 {
1383     return (reg & 0x18) == 0;
1384 }
1385
1386 /*
1387  * Return true if the specified D reg is in a scalar bank
1388  * (ie if it is d0..d3 or d16..d19)
1389  */
1390 static inline bool vfp_dreg_is_scalar(int reg)
1391 {
1392     return (reg & 0xc) == 0;
1393 }
1394
1395 /*
1396  * Advance the S reg number forwards by delta within its bank
1397  * (ie increment the low 3 bits but leave the rest the same)
1398  */
1399 static inline int vfp_advance_sreg(int reg, int delta)
1400 {
1401     return ((reg + delta) & 0x7) | (reg & ~0x7);
1402 }
1403
1404 /*
1405  * Advance the D reg number forwards by delta within its bank
1406  * (ie increment the low 2 bits but leave the rest the same)
1407  */
1408 static inline int vfp_advance_dreg(int reg, int delta)
1409 {
1410     return ((reg + delta) & 0x3) | (reg & ~0x3);
1411 }
1412
1413 /*
1414  * Perform a 3-operand VFP data processing instruction. fn is the
1415  * callback to do the actual operation; this function deals with the
1416  * code to handle looping around for VFP vector processing.
1417  */
1418 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1419                           int vd, int vn, int vm, bool reads_vd)
1420 {
1421     uint32_t delta_m = 0;
1422     uint32_t delta_d = 0;
1423     int veclen = s->vec_len;
1424     TCGv_i32 f0, f1, fd;
1425     TCGv_ptr fpst;
1426
1427     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1428         return false;
1429     }
1430
1431     if (!dc_isar_feature(aa32_fpshvec, s) &&
1432         (veclen != 0 || s->vec_stride != 0)) {
1433         return false;
1434     }
1435
1436     if (!vfp_access_check(s)) {
1437         return true;
1438     }
1439
1440     if (veclen > 0) {
1441         /* Figure out what type of vector operation this is.  */
1442         if (vfp_sreg_is_scalar(vd)) {
1443             /* scalar */
1444             veclen = 0;
1445         } else {
1446             delta_d = s->vec_stride + 1;
1447
1448             if (vfp_sreg_is_scalar(vm)) {
1449                 /* mixed scalar/vector */
1450                 delta_m = 0;
1451             } else {
1452                 /* vector */
1453                 delta_m = delta_d;
1454             }
1455         }
1456     }
1457
1458     f0 = tcg_temp_new_i32();
1459     f1 = tcg_temp_new_i32();
1460     fd = tcg_temp_new_i32();
1461     fpst = fpstatus_ptr(FPST_FPCR);
1462
1463     vfp_load_reg32(f0, vn);
1464     vfp_load_reg32(f1, vm);
1465
1466     for (;;) {
1467         if (reads_vd) {
1468             vfp_load_reg32(fd, vd);
1469         }
1470         fn(fd, f0, f1, fpst);
1471         vfp_store_reg32(fd, vd);
1472
1473         if (veclen == 0) {
1474             break;
1475         }
1476
1477         /* Set up the operands for the next iteration */
1478         veclen--;
1479         vd = vfp_advance_sreg(vd, delta_d);
1480         vn = vfp_advance_sreg(vn, delta_d);
1481         vfp_load_reg32(f0, vn);
1482         if (delta_m) {
1483             vm = vfp_advance_sreg(vm, delta_m);
1484             vfp_load_reg32(f1, vm);
1485         }
1486     }
1487
1488     tcg_temp_free_i32(f0);
1489     tcg_temp_free_i32(f1);
1490     tcg_temp_free_i32(fd);
1491     tcg_temp_free_ptr(fpst);
1492
1493     return true;
1494 }
1495
1496 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1497                           int vd, int vn, int vm, bool reads_vd)
1498 {
1499     /*
1500      * Do a half-precision operation. Functionally this is
1501      * the same as do_vfp_3op_sp(), except:
1502      *  - it uses the FPST_FPCR_F16
1503      *  - it doesn't need the VFP vector handling (fp16 is a
1504      *    v8 feature, and in v8 VFP vectors don't exist)
1505      *  - it does the aa32_fp16_arith feature test
1506      */
1507     TCGv_i32 f0, f1, fd;
1508     TCGv_ptr fpst;
1509
1510     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1511         return false;
1512     }
1513
1514     if (s->vec_len != 0 || s->vec_stride != 0) {
1515         return false;
1516     }
1517
1518     if (!vfp_access_check(s)) {
1519         return true;
1520     }
1521
1522     f0 = tcg_temp_new_i32();
1523     f1 = tcg_temp_new_i32();
1524     fd = tcg_temp_new_i32();
1525     fpst = fpstatus_ptr(FPST_FPCR_F16);
1526
1527     vfp_load_reg32(f0, vn);
1528     vfp_load_reg32(f1, vm);
1529
1530     if (reads_vd) {
1531         vfp_load_reg32(fd, vd);
1532     }
1533     fn(fd, f0, f1, fpst);
1534     vfp_store_reg32(fd, vd);
1535
1536     tcg_temp_free_i32(f0);
1537     tcg_temp_free_i32(f1);
1538     tcg_temp_free_i32(fd);
1539     tcg_temp_free_ptr(fpst);
1540
1541     return true;
1542 }
1543
1544 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1545                           int vd, int vn, int vm, bool reads_vd)
1546 {
1547     uint32_t delta_m = 0;
1548     uint32_t delta_d = 0;
1549     int veclen = s->vec_len;
1550     TCGv_i64 f0, f1, fd;
1551     TCGv_ptr fpst;
1552
1553     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1554         return false;
1555     }
1556
1557     /* UNDEF accesses to D16-D31 if they don't exist */
1558     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1559         return false;
1560     }
1561
1562     if (!dc_isar_feature(aa32_fpshvec, s) &&
1563         (veclen != 0 || s->vec_stride != 0)) {
1564         return false;
1565     }
1566
1567     if (!vfp_access_check(s)) {
1568         return true;
1569     }
1570
1571     if (veclen > 0) {
1572         /* Figure out what type of vector operation this is.  */
1573         if (vfp_dreg_is_scalar(vd)) {
1574             /* scalar */
1575             veclen = 0;
1576         } else {
1577             delta_d = (s->vec_stride >> 1) + 1;
1578
1579             if (vfp_dreg_is_scalar(vm)) {
1580                 /* mixed scalar/vector */
1581                 delta_m = 0;
1582             } else {
1583                 /* vector */
1584                 delta_m = delta_d;
1585             }
1586         }
1587     }
1588
1589     f0 = tcg_temp_new_i64();
1590     f1 = tcg_temp_new_i64();
1591     fd = tcg_temp_new_i64();
1592     fpst = fpstatus_ptr(FPST_FPCR);
1593
1594     vfp_load_reg64(f0, vn);
1595     vfp_load_reg64(f1, vm);
1596
1597     for (;;) {
1598         if (reads_vd) {
1599             vfp_load_reg64(fd, vd);
1600         }
1601         fn(fd, f0, f1, fpst);
1602         vfp_store_reg64(fd, vd);
1603
1604         if (veclen == 0) {
1605             break;
1606         }
1607         /* Set up the operands for the next iteration */
1608         veclen--;
1609         vd = vfp_advance_dreg(vd, delta_d);
1610         vn = vfp_advance_dreg(vn, delta_d);
1611         vfp_load_reg64(f0, vn);
1612         if (delta_m) {
1613             vm = vfp_advance_dreg(vm, delta_m);
1614             vfp_load_reg64(f1, vm);
1615         }
1616     }
1617
1618     tcg_temp_free_i64(f0);
1619     tcg_temp_free_i64(f1);
1620     tcg_temp_free_i64(fd);
1621     tcg_temp_free_ptr(fpst);
1622
1623     return true;
1624 }
1625
1626 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1627 {
1628     uint32_t delta_m = 0;
1629     uint32_t delta_d = 0;
1630     int veclen = s->vec_len;
1631     TCGv_i32 f0, fd;
1632
1633     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1634
1635     if (!dc_isar_feature(aa32_fpshvec, s) &&
1636         (veclen != 0 || s->vec_stride != 0)) {
1637         return false;
1638     }
1639
1640     if (!vfp_access_check(s)) {
1641         return true;
1642     }
1643
1644     if (veclen > 0) {
1645         /* Figure out what type of vector operation this is.  */
1646         if (vfp_sreg_is_scalar(vd)) {
1647             /* scalar */
1648             veclen = 0;
1649         } else {
1650             delta_d = s->vec_stride + 1;
1651
1652             if (vfp_sreg_is_scalar(vm)) {
1653                 /* mixed scalar/vector */
1654                 delta_m = 0;
1655             } else {
1656                 /* vector */
1657                 delta_m = delta_d;
1658             }
1659         }
1660     }
1661
1662     f0 = tcg_temp_new_i32();
1663     fd = tcg_temp_new_i32();
1664
1665     vfp_load_reg32(f0, vm);
1666
1667     for (;;) {
1668         fn(fd, f0);
1669         vfp_store_reg32(fd, vd);
1670
1671         if (veclen == 0) {
1672             break;
1673         }
1674
1675         if (delta_m == 0) {
1676             /* single source one-many */
1677             while (veclen--) {
1678                 vd = vfp_advance_sreg(vd, delta_d);
1679                 vfp_store_reg32(fd, vd);
1680             }
1681             break;
1682         }
1683
1684         /* Set up the operands for the next iteration */
1685         veclen--;
1686         vd = vfp_advance_sreg(vd, delta_d);
1687         vm = vfp_advance_sreg(vm, delta_m);
1688         vfp_load_reg32(f0, vm);
1689     }
1690
1691     tcg_temp_free_i32(f0);
1692     tcg_temp_free_i32(fd);
1693
1694     return true;
1695 }
1696
1697 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1698 {
1699     /*
1700      * Do a half-precision operation. Functionally this is
1701      * the same as do_vfp_2op_sp(), except:
1702      *  - it doesn't need the VFP vector handling (fp16 is a
1703      *    v8 feature, and in v8 VFP vectors don't exist)
1704      *  - it does the aa32_fp16_arith feature test
1705      */
1706     TCGv_i32 f0;
1707
1708     /* Note that the caller must check the aa32_fp16_arith feature */
1709
1710     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1711         return false;
1712     }
1713
1714     if (s->vec_len != 0 || s->vec_stride != 0) {
1715         return false;
1716     }
1717
1718     if (!vfp_access_check(s)) {
1719         return true;
1720     }
1721
1722     f0 = tcg_temp_new_i32();
1723     vfp_load_reg32(f0, vm);
1724     fn(f0, f0);
1725     vfp_store_reg32(f0, vd);
1726     tcg_temp_free_i32(f0);
1727
1728     return true;
1729 }
1730
1731 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1732 {
1733     uint32_t delta_m = 0;
1734     uint32_t delta_d = 0;
1735     int veclen = s->vec_len;
1736     TCGv_i64 f0, fd;
1737
1738     /* Note that the caller must check the aa32_fpdp_v2 feature. */
1739
1740     /* UNDEF accesses to D16-D31 if they don't exist */
1741     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1742         return false;
1743     }
1744
1745     if (!dc_isar_feature(aa32_fpshvec, s) &&
1746         (veclen != 0 || s->vec_stride != 0)) {
1747         return false;
1748     }
1749
1750     if (!vfp_access_check(s)) {
1751         return true;
1752     }
1753
1754     if (veclen > 0) {
1755         /* Figure out what type of vector operation this is.  */
1756         if (vfp_dreg_is_scalar(vd)) {
1757             /* scalar */
1758             veclen = 0;
1759         } else {
1760             delta_d = (s->vec_stride >> 1) + 1;
1761
1762             if (vfp_dreg_is_scalar(vm)) {
1763                 /* mixed scalar/vector */
1764                 delta_m = 0;
1765             } else {
1766                 /* vector */
1767                 delta_m = delta_d;
1768             }
1769         }
1770     }
1771
1772     f0 = tcg_temp_new_i64();
1773     fd = tcg_temp_new_i64();
1774
1775     vfp_load_reg64(f0, vm);
1776
1777     for (;;) {
1778         fn(fd, f0);
1779         vfp_store_reg64(fd, vd);
1780
1781         if (veclen == 0) {
1782             break;
1783         }
1784
1785         if (delta_m == 0) {
1786             /* single source one-many */
1787             while (veclen--) {
1788                 vd = vfp_advance_dreg(vd, delta_d);
1789                 vfp_store_reg64(fd, vd);
1790             }
1791             break;
1792         }
1793
1794         /* Set up the operands for the next iteration */
1795         veclen--;
1796         vd = vfp_advance_dreg(vd, delta_d);
1797         vd = vfp_advance_dreg(vm, delta_m);
1798         vfp_load_reg64(f0, vm);
1799     }
1800
1801     tcg_temp_free_i64(f0);
1802     tcg_temp_free_i64(fd);
1803
1804     return true;
1805 }
1806
1807 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1808 {
1809     /* Note that order of inputs to the add matters for NaNs */
1810     TCGv_i32 tmp = tcg_temp_new_i32();
1811
1812     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1813     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1814     tcg_temp_free_i32(tmp);
1815 }
1816
1817 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1818 {
1819     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1820 }
1821
1822 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1823 {
1824     /* Note that order of inputs to the add matters for NaNs */
1825     TCGv_i32 tmp = tcg_temp_new_i32();
1826
1827     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1828     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1829     tcg_temp_free_i32(tmp);
1830 }
1831
1832 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1833 {
1834     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1835 }
1836
1837 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1838 {
1839     /* Note that order of inputs to the add matters for NaNs */
1840     TCGv_i64 tmp = tcg_temp_new_i64();
1841
1842     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1843     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1844     tcg_temp_free_i64(tmp);
1845 }
1846
1847 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1848 {
1849     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1850 }
1851
1852 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1853 {
1854     /*
1855      * VMLS: vd = vd + -(vn * vm)
1856      * Note that order of inputs to the add matters for NaNs.
1857      */
1858     TCGv_i32 tmp = tcg_temp_new_i32();
1859
1860     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1861     gen_helper_vfp_negh(tmp, tmp);
1862     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1863     tcg_temp_free_i32(tmp);
1864 }
1865
1866 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1867 {
1868     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1869 }
1870
1871 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1872 {
1873     /*
1874      * VMLS: vd = vd + -(vn * vm)
1875      * Note that order of inputs to the add matters for NaNs.
1876      */
1877     TCGv_i32 tmp = tcg_temp_new_i32();
1878
1879     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1880     gen_helper_vfp_negs(tmp, tmp);
1881     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1882     tcg_temp_free_i32(tmp);
1883 }
1884
1885 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1886 {
1887     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1888 }
1889
1890 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1891 {
1892     /*
1893      * VMLS: vd = vd + -(vn * vm)
1894      * Note that order of inputs to the add matters for NaNs.
1895      */
1896     TCGv_i64 tmp = tcg_temp_new_i64();
1897
1898     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1899     gen_helper_vfp_negd(tmp, tmp);
1900     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1901     tcg_temp_free_i64(tmp);
1902 }
1903
1904 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1905 {
1906     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1907 }
1908
1909 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1910 {
1911     /*
1912      * VNMLS: -fd + (fn * fm)
1913      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1914      * plausible looking simplifications because this will give wrong results
1915      * for NaNs.
1916      */
1917     TCGv_i32 tmp = tcg_temp_new_i32();
1918
1919     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1920     gen_helper_vfp_negh(vd, vd);
1921     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1922     tcg_temp_free_i32(tmp);
1923 }
1924
1925 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1926 {
1927     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1928 }
1929
1930 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1931 {
1932     /*
1933      * VNMLS: -fd + (fn * fm)
1934      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1935      * plausible looking simplifications because this will give wrong results
1936      * for NaNs.
1937      */
1938     TCGv_i32 tmp = tcg_temp_new_i32();
1939
1940     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1941     gen_helper_vfp_negs(vd, vd);
1942     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1943     tcg_temp_free_i32(tmp);
1944 }
1945
1946 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1947 {
1948     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1949 }
1950
1951 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1952 {
1953     /*
1954      * VNMLS: -fd + (fn * fm)
1955      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1956      * plausible looking simplifications because this will give wrong results
1957      * for NaNs.
1958      */
1959     TCGv_i64 tmp = tcg_temp_new_i64();
1960
1961     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1962     gen_helper_vfp_negd(vd, vd);
1963     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1964     tcg_temp_free_i64(tmp);
1965 }
1966
1967 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1968 {
1969     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1970 }
1971
1972 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1973 {
1974     /* VNMLA: -fd + -(fn * fm) */
1975     TCGv_i32 tmp = tcg_temp_new_i32();
1976
1977     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1978     gen_helper_vfp_negh(tmp, tmp);
1979     gen_helper_vfp_negh(vd, vd);
1980     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1981     tcg_temp_free_i32(tmp);
1982 }
1983
1984 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1985 {
1986     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1987 }
1988
1989 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1990 {
1991     /* VNMLA: -fd + -(fn * fm) */
1992     TCGv_i32 tmp = tcg_temp_new_i32();
1993
1994     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1995     gen_helper_vfp_negs(tmp, tmp);
1996     gen_helper_vfp_negs(vd, vd);
1997     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1998     tcg_temp_free_i32(tmp);
1999 }
2000
2001 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2002 {
2003     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2004 }
2005
2006 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2007 {
2008     /* VNMLA: -fd + (fn * fm) */
2009     TCGv_i64 tmp = tcg_temp_new_i64();
2010
2011     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2012     gen_helper_vfp_negd(tmp, tmp);
2013     gen_helper_vfp_negd(vd, vd);
2014     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2015     tcg_temp_free_i64(tmp);
2016 }
2017
2018 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2019 {
2020     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2021 }
2022
2023 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2024 {
2025     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2026 }
2027
2028 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2029 {
2030     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2031 }
2032
2033 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2034 {
2035     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2036 }
2037
2038 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2039 {
2040     /* VNMUL: -(fn * fm) */
2041     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2042     gen_helper_vfp_negh(vd, vd);
2043 }
2044
2045 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2046 {
2047     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2048 }
2049
2050 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2051 {
2052     /* VNMUL: -(fn * fm) */
2053     gen_helper_vfp_muls(vd, vn, vm, fpst);
2054     gen_helper_vfp_negs(vd, vd);
2055 }
2056
2057 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2058 {
2059     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2060 }
2061
2062 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2063 {
2064     /* VNMUL: -(fn * fm) */
2065     gen_helper_vfp_muld(vd, vn, vm, fpst);
2066     gen_helper_vfp_negd(vd, vd);
2067 }
2068
2069 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2070 {
2071     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2072 }
2073
2074 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2075 {
2076     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2077 }
2078
2079 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2080 {
2081     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2082 }
2083
2084 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2085 {
2086     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2087 }
2088
2089 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2090 {
2091     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2092 }
2093
2094 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2095 {
2096     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2097 }
2098
2099 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2100 {
2101     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2102 }
2103
2104 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2105 {
2106     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2107 }
2108
2109 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2110 {
2111     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2112 }
2113
2114 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2115 {
2116     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2117 }
2118
2119 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2120 {
2121     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2122         return false;
2123     }
2124     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2125                          a->vd, a->vn, a->vm, false);
2126 }
2127
2128 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2129 {
2130     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2131         return false;
2132     }
2133     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2134                          a->vd, a->vn, a->vm, false);
2135 }
2136
2137 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2138 {
2139     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2140         return false;
2141     }
2142     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2143                          a->vd, a->vn, a->vm, false);
2144 }
2145
2146 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2147 {
2148     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2149         return false;
2150     }
2151     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2152                          a->vd, a->vn, a->vm, false);
2153 }
2154
2155 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2156 {
2157     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2158         return false;
2159     }
2160     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2161                          a->vd, a->vn, a->vm, false);
2162 }
2163
2164 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2165 {
2166     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2167         return false;
2168     }
2169     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2170                          a->vd, a->vn, a->vm, false);
2171 }
2172
2173 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2174 {
2175     /*
2176      * VFNMA : fd = muladd(-fd,  fn, fm)
2177      * VFNMS : fd = muladd(-fd, -fn, fm)
2178      * VFMA  : fd = muladd( fd,  fn, fm)
2179      * VFMS  : fd = muladd( fd, -fn, fm)
2180      *
2181      * These are fused multiply-add, and must be done as one floating
2182      * point operation with no rounding between the multiplication and
2183      * addition steps.  NB that doing the negations here as separate
2184      * steps is correct : an input NaN should come out with its sign
2185      * bit flipped if it is a negated-input.
2186      */
2187     TCGv_ptr fpst;
2188     TCGv_i32 vn, vm, vd;
2189
2190     /*
2191      * Present in VFPv4 only, and only with the FP16 extension.
2192      * Note that we can't rely on the SIMDFMAC check alone, because
2193      * in a Neon-no-VFP core that ID register field will be non-zero.
2194      */
2195     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2196         !dc_isar_feature(aa32_simdfmac, s) ||
2197         !dc_isar_feature(aa32_fpsp_v2, s)) {
2198         return false;
2199     }
2200
2201     if (s->vec_len != 0 || s->vec_stride != 0) {
2202         return false;
2203     }
2204
2205     if (!vfp_access_check(s)) {
2206         return true;
2207     }
2208
2209     vn = tcg_temp_new_i32();
2210     vm = tcg_temp_new_i32();
2211     vd = tcg_temp_new_i32();
2212
2213     vfp_load_reg32(vn, a->vn);
2214     vfp_load_reg32(vm, a->vm);
2215     if (neg_n) {
2216         /* VFNMS, VFMS */
2217         gen_helper_vfp_negh(vn, vn);
2218     }
2219     vfp_load_reg32(vd, a->vd);
2220     if (neg_d) {
2221         /* VFNMA, VFNMS */
2222         gen_helper_vfp_negh(vd, vd);
2223     }
2224     fpst = fpstatus_ptr(FPST_FPCR_F16);
2225     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2226     vfp_store_reg32(vd, a->vd);
2227
2228     tcg_temp_free_ptr(fpst);
2229     tcg_temp_free_i32(vn);
2230     tcg_temp_free_i32(vm);
2231     tcg_temp_free_i32(vd);
2232
2233     return true;
2234 }
2235
2236 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2237 {
2238     /*
2239      * VFNMA : fd = muladd(-fd,  fn, fm)
2240      * VFNMS : fd = muladd(-fd, -fn, fm)
2241      * VFMA  : fd = muladd( fd,  fn, fm)
2242      * VFMS  : fd = muladd( fd, -fn, fm)
2243      *
2244      * These are fused multiply-add, and must be done as one floating
2245      * point operation with no rounding between the multiplication and
2246      * addition steps.  NB that doing the negations here as separate
2247      * steps is correct : an input NaN should come out with its sign
2248      * bit flipped if it is a negated-input.
2249      */
2250     TCGv_ptr fpst;
2251     TCGv_i32 vn, vm, vd;
2252
2253     /*
2254      * Present in VFPv4 only.
2255      * Note that we can't rely on the SIMDFMAC check alone, because
2256      * in a Neon-no-VFP core that ID register field will be non-zero.
2257      */
2258     if (!dc_isar_feature(aa32_simdfmac, s) ||
2259         !dc_isar_feature(aa32_fpsp_v2, s)) {
2260         return false;
2261     }
2262     /*
2263      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2264      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2265      */
2266     if (s->vec_len != 0 || s->vec_stride != 0) {
2267         return false;
2268     }
2269
2270     if (!vfp_access_check(s)) {
2271         return true;
2272     }
2273
2274     vn = tcg_temp_new_i32();
2275     vm = tcg_temp_new_i32();
2276     vd = tcg_temp_new_i32();
2277
2278     vfp_load_reg32(vn, a->vn);
2279     vfp_load_reg32(vm, a->vm);
2280     if (neg_n) {
2281         /* VFNMS, VFMS */
2282         gen_helper_vfp_negs(vn, vn);
2283     }
2284     vfp_load_reg32(vd, a->vd);
2285     if (neg_d) {
2286         /* VFNMA, VFNMS */
2287         gen_helper_vfp_negs(vd, vd);
2288     }
2289     fpst = fpstatus_ptr(FPST_FPCR);
2290     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2291     vfp_store_reg32(vd, a->vd);
2292
2293     tcg_temp_free_ptr(fpst);
2294     tcg_temp_free_i32(vn);
2295     tcg_temp_free_i32(vm);
2296     tcg_temp_free_i32(vd);
2297
2298     return true;
2299 }
2300
2301 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2302 {
2303     /*
2304      * VFNMA : fd = muladd(-fd,  fn, fm)
2305      * VFNMS : fd = muladd(-fd, -fn, fm)
2306      * VFMA  : fd = muladd( fd,  fn, fm)
2307      * VFMS  : fd = muladd( fd, -fn, fm)
2308      *
2309      * These are fused multiply-add, and must be done as one floating
2310      * point operation with no rounding between the multiplication and
2311      * addition steps.  NB that doing the negations here as separate
2312      * steps is correct : an input NaN should come out with its sign
2313      * bit flipped if it is a negated-input.
2314      */
2315     TCGv_ptr fpst;
2316     TCGv_i64 vn, vm, vd;
2317
2318     /*
2319      * Present in VFPv4 only.
2320      * Note that we can't rely on the SIMDFMAC check alone, because
2321      * in a Neon-no-VFP core that ID register field will be non-zero.
2322      */
2323     if (!dc_isar_feature(aa32_simdfmac, s) ||
2324         !dc_isar_feature(aa32_fpdp_v2, s)) {
2325         return false;
2326     }
2327     /*
2328      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2329      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2330      */
2331     if (s->vec_len != 0 || s->vec_stride != 0) {
2332         return false;
2333     }
2334
2335     /* UNDEF accesses to D16-D31 if they don't exist. */
2336     if (!dc_isar_feature(aa32_simd_r32, s) &&
2337         ((a->vd | a->vn | a->vm) & 0x10)) {
2338         return false;
2339     }
2340
2341     if (!vfp_access_check(s)) {
2342         return true;
2343     }
2344
2345     vn = tcg_temp_new_i64();
2346     vm = tcg_temp_new_i64();
2347     vd = tcg_temp_new_i64();
2348
2349     vfp_load_reg64(vn, a->vn);
2350     vfp_load_reg64(vm, a->vm);
2351     if (neg_n) {
2352         /* VFNMS, VFMS */
2353         gen_helper_vfp_negd(vn, vn);
2354     }
2355     vfp_load_reg64(vd, a->vd);
2356     if (neg_d) {
2357         /* VFNMA, VFNMS */
2358         gen_helper_vfp_negd(vd, vd);
2359     }
2360     fpst = fpstatus_ptr(FPST_FPCR);
2361     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2362     vfp_store_reg64(vd, a->vd);
2363
2364     tcg_temp_free_ptr(fpst);
2365     tcg_temp_free_i64(vn);
2366     tcg_temp_free_i64(vm);
2367     tcg_temp_free_i64(vd);
2368
2369     return true;
2370 }
2371
2372 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2373     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2374                                       arg_##INSN##_##PREC *a)           \
2375     {                                                                   \
2376         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2377     }
2378
2379 #define MAKE_VFM_TRANS_FNS(PREC) \
2380     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2381     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2382     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2383     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2384
2385 MAKE_VFM_TRANS_FNS(hp)
2386 MAKE_VFM_TRANS_FNS(sp)
2387 MAKE_VFM_TRANS_FNS(dp)
2388
2389 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2390 {
2391     TCGv_i32 fd;
2392
2393     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2394         return false;
2395     }
2396
2397     if (s->vec_len != 0 || s->vec_stride != 0) {
2398         return false;
2399     }
2400
2401     if (!vfp_access_check(s)) {
2402         return true;
2403     }
2404
2405     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2406     vfp_store_reg32(fd, a->vd);
2407     tcg_temp_free_i32(fd);
2408     return true;
2409 }
2410
2411 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2412 {
2413     uint32_t delta_d = 0;
2414     int veclen = s->vec_len;
2415     TCGv_i32 fd;
2416     uint32_t vd;
2417
2418     vd = a->vd;
2419
2420     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2421         return false;
2422     }
2423
2424     if (!dc_isar_feature(aa32_fpshvec, s) &&
2425         (veclen != 0 || s->vec_stride != 0)) {
2426         return false;
2427     }
2428
2429     if (!vfp_access_check(s)) {
2430         return true;
2431     }
2432
2433     if (veclen > 0) {
2434         /* Figure out what type of vector operation this is.  */
2435         if (vfp_sreg_is_scalar(vd)) {
2436             /* scalar */
2437             veclen = 0;
2438         } else {
2439             delta_d = s->vec_stride + 1;
2440         }
2441     }
2442
2443     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2444
2445     for (;;) {
2446         vfp_store_reg32(fd, vd);
2447
2448         if (veclen == 0) {
2449             break;
2450         }
2451
2452         /* Set up the operands for the next iteration */
2453         veclen--;
2454         vd = vfp_advance_sreg(vd, delta_d);
2455     }
2456
2457     tcg_temp_free_i32(fd);
2458     return true;
2459 }
2460
2461 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2462 {
2463     uint32_t delta_d = 0;
2464     int veclen = s->vec_len;
2465     TCGv_i64 fd;
2466     uint32_t vd;
2467
2468     vd = a->vd;
2469
2470     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2471         return false;
2472     }
2473
2474     /* UNDEF accesses to D16-D31 if they don't exist. */
2475     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2476         return false;
2477     }
2478
2479     if (!dc_isar_feature(aa32_fpshvec, s) &&
2480         (veclen != 0 || s->vec_stride != 0)) {
2481         return false;
2482     }
2483
2484     if (!vfp_access_check(s)) {
2485         return true;
2486     }
2487
2488     if (veclen > 0) {
2489         /* Figure out what type of vector operation this is.  */
2490         if (vfp_dreg_is_scalar(vd)) {
2491             /* scalar */
2492             veclen = 0;
2493         } else {
2494             delta_d = (s->vec_stride >> 1) + 1;
2495         }
2496     }
2497
2498     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2499
2500     for (;;) {
2501         vfp_store_reg64(fd, vd);
2502
2503         if (veclen == 0) {
2504             break;
2505         }
2506
2507         /* Set up the operands for the next iteration */
2508         veclen--;
2509         vd = vfp_advance_dreg(vd, delta_d);
2510     }
2511
2512     tcg_temp_free_i64(fd);
2513     return true;
2514 }
2515
2516 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2517     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2518                                       arg_##INSN##_##PREC *a)   \
2519     {                                                           \
2520         if (!dc_isar_feature(CHECK, s)) {                       \
2521             return false;                                       \
2522         }                                                       \
2523         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2524     }
2525
2526 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2527     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2528                                       arg_##INSN##_##PREC *a)   \
2529     {                                                           \
2530         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2531             !dc_isar_feature(aa32_mve, s)) {                    \
2532             return false;                                       \
2533         }                                                       \
2534         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2535     }
2536
2537 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2538 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2539
2540 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
2541 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
2542 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
2543
2544 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
2545 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
2546 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
2547
2548 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2549 {
2550     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2551 }
2552
2553 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2554 {
2555     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2556 }
2557
2558 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2559 {
2560     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2561 }
2562
2563 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2564 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2565 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2566
2567 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2568 {
2569     TCGv_i32 vd, vm;
2570
2571     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2572         return false;
2573     }
2574
2575     /* Vm/M bits must be zero for the Z variant */
2576     if (a->z && a->vm != 0) {
2577         return false;
2578     }
2579
2580     if (!vfp_access_check(s)) {
2581         return true;
2582     }
2583
2584     vd = tcg_temp_new_i32();
2585     vm = tcg_temp_new_i32();
2586
2587     vfp_load_reg32(vd, a->vd);
2588     if (a->z) {
2589         tcg_gen_movi_i32(vm, 0);
2590     } else {
2591         vfp_load_reg32(vm, a->vm);
2592     }
2593
2594     if (a->e) {
2595         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2596     } else {
2597         gen_helper_vfp_cmph(vd, vm, cpu_env);
2598     }
2599
2600     tcg_temp_free_i32(vd);
2601     tcg_temp_free_i32(vm);
2602
2603     return true;
2604 }
2605
2606 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2607 {
2608     TCGv_i32 vd, vm;
2609
2610     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2611         return false;
2612     }
2613
2614     /* Vm/M bits must be zero for the Z variant */
2615     if (a->z && a->vm != 0) {
2616         return false;
2617     }
2618
2619     if (!vfp_access_check(s)) {
2620         return true;
2621     }
2622
2623     vd = tcg_temp_new_i32();
2624     vm = tcg_temp_new_i32();
2625
2626     vfp_load_reg32(vd, a->vd);
2627     if (a->z) {
2628         tcg_gen_movi_i32(vm, 0);
2629     } else {
2630         vfp_load_reg32(vm, a->vm);
2631     }
2632
2633     if (a->e) {
2634         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2635     } else {
2636         gen_helper_vfp_cmps(vd, vm, cpu_env);
2637     }
2638
2639     tcg_temp_free_i32(vd);
2640     tcg_temp_free_i32(vm);
2641
2642     return true;
2643 }
2644
2645 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2646 {
2647     TCGv_i64 vd, vm;
2648
2649     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2650         return false;
2651     }
2652
2653     /* Vm/M bits must be zero for the Z variant */
2654     if (a->z && a->vm != 0) {
2655         return false;
2656     }
2657
2658     /* UNDEF accesses to D16-D31 if they don't exist. */
2659     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2660         return false;
2661     }
2662
2663     if (!vfp_access_check(s)) {
2664         return true;
2665     }
2666
2667     vd = tcg_temp_new_i64();
2668     vm = tcg_temp_new_i64();
2669
2670     vfp_load_reg64(vd, a->vd);
2671     if (a->z) {
2672         tcg_gen_movi_i64(vm, 0);
2673     } else {
2674         vfp_load_reg64(vm, a->vm);
2675     }
2676
2677     if (a->e) {
2678         gen_helper_vfp_cmped(vd, vm, cpu_env);
2679     } else {
2680         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2681     }
2682
2683     tcg_temp_free_i64(vd);
2684     tcg_temp_free_i64(vm);
2685
2686     return true;
2687 }
2688
2689 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2690 {
2691     TCGv_ptr fpst;
2692     TCGv_i32 ahp_mode;
2693     TCGv_i32 tmp;
2694
2695     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2696         return false;
2697     }
2698
2699     if (!vfp_access_check(s)) {
2700         return true;
2701     }
2702
2703     fpst = fpstatus_ptr(FPST_FPCR);
2704     ahp_mode = get_ahp_flag();
2705     tmp = tcg_temp_new_i32();
2706     /* The T bit tells us if we want the low or high 16 bits of Vm */
2707     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2708     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2709     vfp_store_reg32(tmp, a->vd);
2710     tcg_temp_free_i32(ahp_mode);
2711     tcg_temp_free_ptr(fpst);
2712     tcg_temp_free_i32(tmp);
2713     return true;
2714 }
2715
2716 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2717 {
2718     TCGv_ptr fpst;
2719     TCGv_i32 ahp_mode;
2720     TCGv_i32 tmp;
2721     TCGv_i64 vd;
2722
2723     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2724         return false;
2725     }
2726
2727     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2728         return false;
2729     }
2730
2731     /* UNDEF accesses to D16-D31 if they don't exist. */
2732     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2733         return false;
2734     }
2735
2736     if (!vfp_access_check(s)) {
2737         return true;
2738     }
2739
2740     fpst = fpstatus_ptr(FPST_FPCR);
2741     ahp_mode = get_ahp_flag();
2742     tmp = tcg_temp_new_i32();
2743     /* The T bit tells us if we want the low or high 16 bits of Vm */
2744     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2745     vd = tcg_temp_new_i64();
2746     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2747     vfp_store_reg64(vd, a->vd);
2748     tcg_temp_free_i32(ahp_mode);
2749     tcg_temp_free_ptr(fpst);
2750     tcg_temp_free_i32(tmp);
2751     tcg_temp_free_i64(vd);
2752     return true;
2753 }
2754
2755 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
2756 {
2757     TCGv_ptr fpst;
2758     TCGv_i32 tmp;
2759
2760     if (!dc_isar_feature(aa32_bf16, s)) {
2761         return false;
2762     }
2763
2764     if (!vfp_access_check(s)) {
2765         return true;
2766     }
2767
2768     fpst = fpstatus_ptr(FPST_FPCR);
2769     tmp = tcg_temp_new_i32();
2770
2771     vfp_load_reg32(tmp, a->vm);
2772     gen_helper_bfcvt(tmp, tmp, fpst);
2773     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2774     tcg_temp_free_ptr(fpst);
2775     tcg_temp_free_i32(tmp);
2776     return true;
2777 }
2778
2779 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2780 {
2781     TCGv_ptr fpst;
2782     TCGv_i32 ahp_mode;
2783     TCGv_i32 tmp;
2784
2785     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2786         return false;
2787     }
2788
2789     if (!vfp_access_check(s)) {
2790         return true;
2791     }
2792
2793     fpst = fpstatus_ptr(FPST_FPCR);
2794     ahp_mode = get_ahp_flag();
2795     tmp = tcg_temp_new_i32();
2796
2797     vfp_load_reg32(tmp, a->vm);
2798     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2799     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2800     tcg_temp_free_i32(ahp_mode);
2801     tcg_temp_free_ptr(fpst);
2802     tcg_temp_free_i32(tmp);
2803     return true;
2804 }
2805
2806 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2807 {
2808     TCGv_ptr fpst;
2809     TCGv_i32 ahp_mode;
2810     TCGv_i32 tmp;
2811     TCGv_i64 vm;
2812
2813     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2814         return false;
2815     }
2816
2817     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2818         return false;
2819     }
2820
2821     /* UNDEF accesses to D16-D31 if they don't exist. */
2822     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2823         return false;
2824     }
2825
2826     if (!vfp_access_check(s)) {
2827         return true;
2828     }
2829
2830     fpst = fpstatus_ptr(FPST_FPCR);
2831     ahp_mode = get_ahp_flag();
2832     tmp = tcg_temp_new_i32();
2833     vm = tcg_temp_new_i64();
2834
2835     vfp_load_reg64(vm, a->vm);
2836     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2837     tcg_temp_free_i64(vm);
2838     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2839     tcg_temp_free_i32(ahp_mode);
2840     tcg_temp_free_ptr(fpst);
2841     tcg_temp_free_i32(tmp);
2842     return true;
2843 }
2844
2845 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2846 {
2847     TCGv_ptr fpst;
2848     TCGv_i32 tmp;
2849
2850     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2851         return false;
2852     }
2853
2854     if (!vfp_access_check(s)) {
2855         return true;
2856     }
2857
2858     tmp = tcg_temp_new_i32();
2859     vfp_load_reg32(tmp, a->vm);
2860     fpst = fpstatus_ptr(FPST_FPCR_F16);
2861     gen_helper_rinth(tmp, tmp, fpst);
2862     vfp_store_reg32(tmp, a->vd);
2863     tcg_temp_free_ptr(fpst);
2864     tcg_temp_free_i32(tmp);
2865     return true;
2866 }
2867
2868 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2869 {
2870     TCGv_ptr fpst;
2871     TCGv_i32 tmp;
2872
2873     if (!dc_isar_feature(aa32_vrint, s)) {
2874         return false;
2875     }
2876
2877     if (!vfp_access_check(s)) {
2878         return true;
2879     }
2880
2881     tmp = tcg_temp_new_i32();
2882     vfp_load_reg32(tmp, a->vm);
2883     fpst = fpstatus_ptr(FPST_FPCR);
2884     gen_helper_rints(tmp, tmp, fpst);
2885     vfp_store_reg32(tmp, a->vd);
2886     tcg_temp_free_ptr(fpst);
2887     tcg_temp_free_i32(tmp);
2888     return true;
2889 }
2890
2891 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2892 {
2893     TCGv_ptr fpst;
2894     TCGv_i64 tmp;
2895
2896     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2897         return false;
2898     }
2899
2900     if (!dc_isar_feature(aa32_vrint, s)) {
2901         return false;
2902     }
2903
2904     /* UNDEF accesses to D16-D31 if they don't exist. */
2905     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2906         return false;
2907     }
2908
2909     if (!vfp_access_check(s)) {
2910         return true;
2911     }
2912
2913     tmp = tcg_temp_new_i64();
2914     vfp_load_reg64(tmp, a->vm);
2915     fpst = fpstatus_ptr(FPST_FPCR);
2916     gen_helper_rintd(tmp, tmp, fpst);
2917     vfp_store_reg64(tmp, a->vd);
2918     tcg_temp_free_ptr(fpst);
2919     tcg_temp_free_i64(tmp);
2920     return true;
2921 }
2922
2923 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2924 {
2925     TCGv_ptr fpst;
2926     TCGv_i32 tmp;
2927     TCGv_i32 tcg_rmode;
2928
2929     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2930         return false;
2931     }
2932
2933     if (!vfp_access_check(s)) {
2934         return true;
2935     }
2936
2937     tmp = tcg_temp_new_i32();
2938     vfp_load_reg32(tmp, a->vm);
2939     fpst = fpstatus_ptr(FPST_FPCR_F16);
2940     tcg_rmode = tcg_const_i32(float_round_to_zero);
2941     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2942     gen_helper_rinth(tmp, tmp, fpst);
2943     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2944     vfp_store_reg32(tmp, a->vd);
2945     tcg_temp_free_ptr(fpst);
2946     tcg_temp_free_i32(tcg_rmode);
2947     tcg_temp_free_i32(tmp);
2948     return true;
2949 }
2950
2951 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2952 {
2953     TCGv_ptr fpst;
2954     TCGv_i32 tmp;
2955     TCGv_i32 tcg_rmode;
2956
2957     if (!dc_isar_feature(aa32_vrint, s)) {
2958         return false;
2959     }
2960
2961     if (!vfp_access_check(s)) {
2962         return true;
2963     }
2964
2965     tmp = tcg_temp_new_i32();
2966     vfp_load_reg32(tmp, a->vm);
2967     fpst = fpstatus_ptr(FPST_FPCR);
2968     tcg_rmode = tcg_const_i32(float_round_to_zero);
2969     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2970     gen_helper_rints(tmp, tmp, fpst);
2971     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2972     vfp_store_reg32(tmp, a->vd);
2973     tcg_temp_free_ptr(fpst);
2974     tcg_temp_free_i32(tcg_rmode);
2975     tcg_temp_free_i32(tmp);
2976     return true;
2977 }
2978
2979 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2980 {
2981     TCGv_ptr fpst;
2982     TCGv_i64 tmp;
2983     TCGv_i32 tcg_rmode;
2984
2985     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2986         return false;
2987     }
2988
2989     if (!dc_isar_feature(aa32_vrint, s)) {
2990         return false;
2991     }
2992
2993     /* UNDEF accesses to D16-D31 if they don't exist. */
2994     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2995         return false;
2996     }
2997
2998     if (!vfp_access_check(s)) {
2999         return true;
3000     }
3001
3002     tmp = tcg_temp_new_i64();
3003     vfp_load_reg64(tmp, a->vm);
3004     fpst = fpstatus_ptr(FPST_FPCR);
3005     tcg_rmode = tcg_const_i32(float_round_to_zero);
3006     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3007     gen_helper_rintd(tmp, tmp, fpst);
3008     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3009     vfp_store_reg64(tmp, a->vd);
3010     tcg_temp_free_ptr(fpst);
3011     tcg_temp_free_i64(tmp);
3012     tcg_temp_free_i32(tcg_rmode);
3013     return true;
3014 }
3015
3016 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3017 {
3018     TCGv_ptr fpst;
3019     TCGv_i32 tmp;
3020
3021     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3022         return false;
3023     }
3024
3025     if (!vfp_access_check(s)) {
3026         return true;
3027     }
3028
3029     tmp = tcg_temp_new_i32();
3030     vfp_load_reg32(tmp, a->vm);
3031     fpst = fpstatus_ptr(FPST_FPCR_F16);
3032     gen_helper_rinth_exact(tmp, tmp, fpst);
3033     vfp_store_reg32(tmp, a->vd);
3034     tcg_temp_free_ptr(fpst);
3035     tcg_temp_free_i32(tmp);
3036     return true;
3037 }
3038
3039 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3040 {
3041     TCGv_ptr fpst;
3042     TCGv_i32 tmp;
3043
3044     if (!dc_isar_feature(aa32_vrint, s)) {
3045         return false;
3046     }
3047
3048     if (!vfp_access_check(s)) {
3049         return true;
3050     }
3051
3052     tmp = tcg_temp_new_i32();
3053     vfp_load_reg32(tmp, a->vm);
3054     fpst = fpstatus_ptr(FPST_FPCR);
3055     gen_helper_rints_exact(tmp, tmp, fpst);
3056     vfp_store_reg32(tmp, a->vd);
3057     tcg_temp_free_ptr(fpst);
3058     tcg_temp_free_i32(tmp);
3059     return true;
3060 }
3061
3062 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3063 {
3064     TCGv_ptr fpst;
3065     TCGv_i64 tmp;
3066
3067     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3068         return false;
3069     }
3070
3071     if (!dc_isar_feature(aa32_vrint, s)) {
3072         return false;
3073     }
3074
3075     /* UNDEF accesses to D16-D31 if they don't exist. */
3076     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3077         return false;
3078     }
3079
3080     if (!vfp_access_check(s)) {
3081         return true;
3082     }
3083
3084     tmp = tcg_temp_new_i64();
3085     vfp_load_reg64(tmp, a->vm);
3086     fpst = fpstatus_ptr(FPST_FPCR);
3087     gen_helper_rintd_exact(tmp, tmp, fpst);
3088     vfp_store_reg64(tmp, a->vd);
3089     tcg_temp_free_ptr(fpst);
3090     tcg_temp_free_i64(tmp);
3091     return true;
3092 }
3093
3094 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3095 {
3096     TCGv_i64 vd;
3097     TCGv_i32 vm;
3098
3099     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3100         return false;
3101     }
3102
3103     /* UNDEF accesses to D16-D31 if they don't exist. */
3104     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3105         return false;
3106     }
3107
3108     if (!vfp_access_check(s)) {
3109         return true;
3110     }
3111
3112     vm = tcg_temp_new_i32();
3113     vd = tcg_temp_new_i64();
3114     vfp_load_reg32(vm, a->vm);
3115     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3116     vfp_store_reg64(vd, a->vd);
3117     tcg_temp_free_i32(vm);
3118     tcg_temp_free_i64(vd);
3119     return true;
3120 }
3121
3122 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3123 {
3124     TCGv_i64 vm;
3125     TCGv_i32 vd;
3126
3127     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3128         return false;
3129     }
3130
3131     /* UNDEF accesses to D16-D31 if they don't exist. */
3132     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3133         return false;
3134     }
3135
3136     if (!vfp_access_check(s)) {
3137         return true;
3138     }
3139
3140     vd = tcg_temp_new_i32();
3141     vm = tcg_temp_new_i64();
3142     vfp_load_reg64(vm, a->vm);
3143     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3144     vfp_store_reg32(vd, a->vd);
3145     tcg_temp_free_i32(vd);
3146     tcg_temp_free_i64(vm);
3147     return true;
3148 }
3149
3150 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3151 {
3152     TCGv_i32 vm;
3153     TCGv_ptr fpst;
3154
3155     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3156         return false;
3157     }
3158
3159     if (!vfp_access_check(s)) {
3160         return true;
3161     }
3162
3163     vm = tcg_temp_new_i32();
3164     vfp_load_reg32(vm, a->vm);
3165     fpst = fpstatus_ptr(FPST_FPCR_F16);
3166     if (a->s) {
3167         /* i32 -> f16 */
3168         gen_helper_vfp_sitoh(vm, vm, fpst);
3169     } else {
3170         /* u32 -> f16 */
3171         gen_helper_vfp_uitoh(vm, vm, fpst);
3172     }
3173     vfp_store_reg32(vm, a->vd);
3174     tcg_temp_free_i32(vm);
3175     tcg_temp_free_ptr(fpst);
3176     return true;
3177 }
3178
3179 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3180 {
3181     TCGv_i32 vm;
3182     TCGv_ptr fpst;
3183
3184     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3185         return false;
3186     }
3187
3188     if (!vfp_access_check(s)) {
3189         return true;
3190     }
3191
3192     vm = tcg_temp_new_i32();
3193     vfp_load_reg32(vm, a->vm);
3194     fpst = fpstatus_ptr(FPST_FPCR);
3195     if (a->s) {
3196         /* i32 -> f32 */
3197         gen_helper_vfp_sitos(vm, vm, fpst);
3198     } else {
3199         /* u32 -> f32 */
3200         gen_helper_vfp_uitos(vm, vm, fpst);
3201     }
3202     vfp_store_reg32(vm, a->vd);
3203     tcg_temp_free_i32(vm);
3204     tcg_temp_free_ptr(fpst);
3205     return true;
3206 }
3207
3208 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3209 {
3210     TCGv_i32 vm;
3211     TCGv_i64 vd;
3212     TCGv_ptr fpst;
3213
3214     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3215         return false;
3216     }
3217
3218     /* UNDEF accesses to D16-D31 if they don't exist. */
3219     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3220         return false;
3221     }
3222
3223     if (!vfp_access_check(s)) {
3224         return true;
3225     }
3226
3227     vm = tcg_temp_new_i32();
3228     vd = tcg_temp_new_i64();
3229     vfp_load_reg32(vm, a->vm);
3230     fpst = fpstatus_ptr(FPST_FPCR);
3231     if (a->s) {
3232         /* i32 -> f64 */
3233         gen_helper_vfp_sitod(vd, vm, fpst);
3234     } else {
3235         /* u32 -> f64 */
3236         gen_helper_vfp_uitod(vd, vm, fpst);
3237     }
3238     vfp_store_reg64(vd, a->vd);
3239     tcg_temp_free_i32(vm);
3240     tcg_temp_free_i64(vd);
3241     tcg_temp_free_ptr(fpst);
3242     return true;
3243 }
3244
3245 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3246 {
3247     TCGv_i32 vd;
3248     TCGv_i64 vm;
3249
3250     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3251         return false;
3252     }
3253
3254     if (!dc_isar_feature(aa32_jscvt, s)) {
3255         return false;
3256     }
3257
3258     /* UNDEF accesses to D16-D31 if they don't exist. */
3259     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3260         return false;
3261     }
3262
3263     if (!vfp_access_check(s)) {
3264         return true;
3265     }
3266
3267     vm = tcg_temp_new_i64();
3268     vd = tcg_temp_new_i32();
3269     vfp_load_reg64(vm, a->vm);
3270     gen_helper_vjcvt(vd, vm, cpu_env);
3271     vfp_store_reg32(vd, a->vd);
3272     tcg_temp_free_i64(vm);
3273     tcg_temp_free_i32(vd);
3274     return true;
3275 }
3276
3277 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3278 {
3279     TCGv_i32 vd, shift;
3280     TCGv_ptr fpst;
3281     int frac_bits;
3282
3283     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3284         return false;
3285     }
3286
3287     if (!vfp_access_check(s)) {
3288         return true;
3289     }
3290
3291     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3292
3293     vd = tcg_temp_new_i32();
3294     vfp_load_reg32(vd, a->vd);
3295
3296     fpst = fpstatus_ptr(FPST_FPCR_F16);
3297     shift = tcg_const_i32(frac_bits);
3298
3299     /* Switch on op:U:sx bits */
3300     switch (a->opc) {
3301     case 0:
3302         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3303         break;
3304     case 1:
3305         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3306         break;
3307     case 2:
3308         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3309         break;
3310     case 3:
3311         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3312         break;
3313     case 4:
3314         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3315         break;
3316     case 5:
3317         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3318         break;
3319     case 6:
3320         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3321         break;
3322     case 7:
3323         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3324         break;
3325     default:
3326         g_assert_not_reached();
3327     }
3328
3329     vfp_store_reg32(vd, a->vd);
3330     tcg_temp_free_i32(vd);
3331     tcg_temp_free_i32(shift);
3332     tcg_temp_free_ptr(fpst);
3333     return true;
3334 }
3335
3336 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3337 {
3338     TCGv_i32 vd, shift;
3339     TCGv_ptr fpst;
3340     int frac_bits;
3341
3342     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3343         return false;
3344     }
3345
3346     if (!vfp_access_check(s)) {
3347         return true;
3348     }
3349
3350     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3351
3352     vd = tcg_temp_new_i32();
3353     vfp_load_reg32(vd, a->vd);
3354
3355     fpst = fpstatus_ptr(FPST_FPCR);
3356     shift = tcg_const_i32(frac_bits);
3357
3358     /* Switch on op:U:sx bits */
3359     switch (a->opc) {
3360     case 0:
3361         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3362         break;
3363     case 1:
3364         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3365         break;
3366     case 2:
3367         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3368         break;
3369     case 3:
3370         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3371         break;
3372     case 4:
3373         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3374         break;
3375     case 5:
3376         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3377         break;
3378     case 6:
3379         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3380         break;
3381     case 7:
3382         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3383         break;
3384     default:
3385         g_assert_not_reached();
3386     }
3387
3388     vfp_store_reg32(vd, a->vd);
3389     tcg_temp_free_i32(vd);
3390     tcg_temp_free_i32(shift);
3391     tcg_temp_free_ptr(fpst);
3392     return true;
3393 }
3394
3395 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3396 {
3397     TCGv_i64 vd;
3398     TCGv_i32 shift;
3399     TCGv_ptr fpst;
3400     int frac_bits;
3401
3402     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3403         return false;
3404     }
3405
3406     /* UNDEF accesses to D16-D31 if they don't exist. */
3407     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3408         return false;
3409     }
3410
3411     if (!vfp_access_check(s)) {
3412         return true;
3413     }
3414
3415     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3416
3417     vd = tcg_temp_new_i64();
3418     vfp_load_reg64(vd, a->vd);
3419
3420     fpst = fpstatus_ptr(FPST_FPCR);
3421     shift = tcg_const_i32(frac_bits);
3422
3423     /* Switch on op:U:sx bits */
3424     switch (a->opc) {
3425     case 0:
3426         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3427         break;
3428     case 1:
3429         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3430         break;
3431     case 2:
3432         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3433         break;
3434     case 3:
3435         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3436         break;
3437     case 4:
3438         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3439         break;
3440     case 5:
3441         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3442         break;
3443     case 6:
3444         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3445         break;
3446     case 7:
3447         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3448         break;
3449     default:
3450         g_assert_not_reached();
3451     }
3452
3453     vfp_store_reg64(vd, a->vd);
3454     tcg_temp_free_i64(vd);
3455     tcg_temp_free_i32(shift);
3456     tcg_temp_free_ptr(fpst);
3457     return true;
3458 }
3459
3460 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3461 {
3462     TCGv_i32 vm;
3463     TCGv_ptr fpst;
3464
3465     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3466         return false;
3467     }
3468
3469     if (!vfp_access_check(s)) {
3470         return true;
3471     }
3472
3473     fpst = fpstatus_ptr(FPST_FPCR_F16);
3474     vm = tcg_temp_new_i32();
3475     vfp_load_reg32(vm, a->vm);
3476
3477     if (a->s) {
3478         if (a->rz) {
3479             gen_helper_vfp_tosizh(vm, vm, fpst);
3480         } else {
3481             gen_helper_vfp_tosih(vm, vm, fpst);
3482         }
3483     } else {
3484         if (a->rz) {
3485             gen_helper_vfp_touizh(vm, vm, fpst);
3486         } else {
3487             gen_helper_vfp_touih(vm, vm, fpst);
3488         }
3489     }
3490     vfp_store_reg32(vm, a->vd);
3491     tcg_temp_free_i32(vm);
3492     tcg_temp_free_ptr(fpst);
3493     return true;
3494 }
3495
3496 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3497 {
3498     TCGv_i32 vm;
3499     TCGv_ptr fpst;
3500
3501     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3502         return false;
3503     }
3504
3505     if (!vfp_access_check(s)) {
3506         return true;
3507     }
3508
3509     fpst = fpstatus_ptr(FPST_FPCR);
3510     vm = tcg_temp_new_i32();
3511     vfp_load_reg32(vm, a->vm);
3512
3513     if (a->s) {
3514         if (a->rz) {
3515             gen_helper_vfp_tosizs(vm, vm, fpst);
3516         } else {
3517             gen_helper_vfp_tosis(vm, vm, fpst);
3518         }
3519     } else {
3520         if (a->rz) {
3521             gen_helper_vfp_touizs(vm, vm, fpst);
3522         } else {
3523             gen_helper_vfp_touis(vm, vm, fpst);
3524         }
3525     }
3526     vfp_store_reg32(vm, a->vd);
3527     tcg_temp_free_i32(vm);
3528     tcg_temp_free_ptr(fpst);
3529     return true;
3530 }
3531
3532 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3533 {
3534     TCGv_i32 vd;
3535     TCGv_i64 vm;
3536     TCGv_ptr fpst;
3537
3538     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3539         return false;
3540     }
3541
3542     /* UNDEF accesses to D16-D31 if they don't exist. */
3543     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3544         return false;
3545     }
3546
3547     if (!vfp_access_check(s)) {
3548         return true;
3549     }
3550
3551     fpst = fpstatus_ptr(FPST_FPCR);
3552     vm = tcg_temp_new_i64();
3553     vd = tcg_temp_new_i32();
3554     vfp_load_reg64(vm, a->vm);
3555
3556     if (a->s) {
3557         if (a->rz) {
3558             gen_helper_vfp_tosizd(vd, vm, fpst);
3559         } else {
3560             gen_helper_vfp_tosid(vd, vm, fpst);
3561         }
3562     } else {
3563         if (a->rz) {
3564             gen_helper_vfp_touizd(vd, vm, fpst);
3565         } else {
3566             gen_helper_vfp_touid(vd, vm, fpst);
3567         }
3568     }
3569     vfp_store_reg32(vd, a->vd);
3570     tcg_temp_free_i32(vd);
3571     tcg_temp_free_i64(vm);
3572     tcg_temp_free_ptr(fpst);
3573     return true;
3574 }
3575
3576 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3577 {
3578     TCGv_i32 rd, rm;
3579
3580     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3581         return false;
3582     }
3583
3584     if (s->vec_len != 0 || s->vec_stride != 0) {
3585         return false;
3586     }
3587
3588     if (!vfp_access_check(s)) {
3589         return true;
3590     }
3591
3592     /* Insert low half of Vm into high half of Vd */
3593     rm = tcg_temp_new_i32();
3594     rd = tcg_temp_new_i32();
3595     vfp_load_reg32(rm, a->vm);
3596     vfp_load_reg32(rd, a->vd);
3597     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3598     vfp_store_reg32(rd, a->vd);
3599     tcg_temp_free_i32(rm);
3600     tcg_temp_free_i32(rd);
3601     return true;
3602 }
3603
3604 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3605 {
3606     TCGv_i32 rm;
3607
3608     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3609         return false;
3610     }
3611
3612     if (s->vec_len != 0 || s->vec_stride != 0) {
3613         return false;
3614     }
3615
3616     if (!vfp_access_check(s)) {
3617         return true;
3618     }
3619
3620     /* Set Vd to high half of Vm */
3621     rm = tcg_temp_new_i32();
3622     vfp_load_reg32(rm, a->vm);
3623     tcg_gen_shri_i32(rm, rm, 16);
3624     vfp_store_reg32(rm, a->vd);
3625     tcg_temp_free_i32(rm);
3626     return true;
3627 }