target/arm/tcg/translate-vfp.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/osdep.h"
  24 #include "translate.h"
  25 #include "translate-a32.h"
  26
  27 /* Include the generated VFP decoder */
  28 #include "decode-vfp.c.inc"
  29 #include "decode-vfp-uncond.c.inc"
  30
  31 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
  32 {
  33     tcg_gen_ld_i64(var, tcg_env, vfp_reg_offset(true, reg));
  34 }
  35
  36 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
  37 {
  38     tcg_gen_st_i64(var, tcg_env, vfp_reg_offset(true, reg));
  39 }
  40
  41 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
  42 {
  43     tcg_gen_ld_i32(var, tcg_env, vfp_reg_offset(false, reg));
  44 }
  45
  46 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
  47 {
  48     tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg));
  49 }
  50
  51 static inline void vfp_load_reg16(TCGv_i32 var, int reg)
  52 {
  53     tcg_gen_ld16u_i32(var, tcg_env,
  54                       vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2);
  55 }
  56
  57 /*
  58  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  59  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  60  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  61  */
  62 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  63 {
  64     uint64_t imm;
  65
  66     switch (size) {
  67     case MO_64:
  68         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  69             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  70             extract32(imm8, 0, 6);
  71         imm <<= 48;
  72         break;
  73     case MO_32:
  74         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  75             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  76             (extract32(imm8, 0, 6) << 3);
  77         imm <<= 16;
  78         break;
  79     case MO_16:
  80         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  81             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  82             (extract32(imm8, 0, 6) << 6);
  83         break;
  84     default:
  85         g_assert_not_reached();
  86     }
  87     return imm;
  88 }
  89
  90 /*
  91  * Return the offset of a 16-bit half of the specified VFP single-precision
  92  * register. If top is true, returns the top 16 bits; otherwise the bottom
  93  * 16 bits.
  94  */
  95 static inline long vfp_f16_offset(unsigned reg, bool top)
  96 {
  97     long offs = vfp_reg_offset(false, reg);
  98 #if HOST_BIG_ENDIAN
  99     if (!top) {
 100         offs += 2;
 101     }
 102 #else
 103     if (top) {
 104         offs += 2;
 105     }
 106 #endif
 107     return offs;
 108 }
 109
 110 /*
 111  * Generate code for M-profile lazy FP state preservation if needed;
 112  * this corresponds to the pseudocode PreserveFPState() function.
 113  */
 114 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
 115 {
 116     if (s->v7m_lspact) {
 117         /*
 118          * Lazy state saving affects external memory and also the NVIC,
 119          * so we must mark it as an IO operation for icount (and cause
 120          * this to be the last insn in the TB).
 121          */
 122         if (translator_io_start(&s->base)) {
 123             s->base.is_jmp = DISAS_UPDATE_EXIT;
 124         }
 125         gen_helper_v7m_preserve_fp_state(tcg_env);
 126         /*
 127          * If the preserve_fp_state helper doesn't throw an exception
 128          * then it will clear LSPACT; we don't need to repeat this for
 129          * any further FP insns in this TB.
 130          */
 131         s->v7m_lspact = false;
 132         /*
 133          * The helper might have zeroed VPR, so we do not know the
 134          * correct value for the MVE_NO_PRED TB flag any more.
 135          * If we're about to create a new fp context then that
 136          * will precisely determine the MVE_NO_PRED value (see
 137          * gen_update_fp_context()). Otherwise, we must:
 138          *  - set s->mve_no_pred to false, so this instruction
 139          *    is generated to use helper functions
 140          *  - end the TB now, without chaining to the next TB
 141          */
 142         if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
 143             s->mve_no_pred = false;
 144             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
 145         }
 146     }
 147 }
 148
 149 /*
 150  * Generate code for M-profile FP context handling: update the
 151  * ownership of the FP context, and create a new context if
 152  * necessary. This corresponds to the parts of the pseudocode
 153  * ExecuteFPCheck() after the initial PreserveFPState() call.
 154  */
 155 static void gen_update_fp_context(DisasContext *s)
 156 {
 157     /* Update ownership of FP context: set FPCCR.S to match current state */
 158     if (s->v8m_fpccr_s_wrong) {
 159         TCGv_i32 tmp;
 160
 161         tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 162         if (s->v8m_secure) {
 163             tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 164         } else {
 165             tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 166         }
 167         store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 168         /* Don't need to do this for any further FP insns in this TB */
 169         s->v8m_fpccr_s_wrong = false;
 170     }
 171
 172     if (s->v7m_new_fp_ctxt_needed) {
 173         /*
 174          * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
 175          * the FPSCR, and VPR.
 176          */
 177         TCGv_i32 control, fpscr;
 178         uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 179
 180         fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 181         gen_helper_vfp_set_fpscr(tcg_env, fpscr);
 182         if (dc_isar_feature(aa32_mve, s)) {
 183             store_cpu_field(tcg_constant_i32(0), v7m.vpr);
 184         }
 185         /*
 186          * We just updated the FPSCR and VPR. Some of this state is cached
 187          * in the MVE_NO_PRED TB flag. We want to avoid having to end the
 188          * TB here, which means we need the new value of the MVE_NO_PRED
 189          * flag to be exactly known here and the same for all executions.
 190          * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
 191          * always set to 0, so the new MVE_NO_PRED flag is always 1
 192          * if and only if we have MVE.
 193          *
 194          * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
 195          * but those do not exist for M-profile, so are not relevant here.)
 196          */
 197         s->mve_no_pred = dc_isar_feature(aa32_mve, s);
 198
 199         if (s->v8m_secure) {
 200             bits |= R_V7M_CONTROL_SFPA_MASK;
 201         }
 202         control = load_cpu_field(v7m.control[M_REG_S]);
 203         tcg_gen_ori_i32(control, control, bits);
 204         store_cpu_field(control, v7m.control[M_REG_S]);
 205         /* Don't need to do this for any further FP insns in this TB */
 206         s->v7m_new_fp_ctxt_needed = false;
 207     }
 208 }
 209
 210 /*
 211  * Check that VFP access is enabled, A-profile specific version.
 212  *
 213  * If VFP is enabled, return true. If not, emit code to generate an
 214  * appropriate exception and return false.
 215  * The ignore_vfp_enabled argument specifies that we should ignore
 216  * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
 217  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 218  */
 219 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
 220 {
 221     if (s->fp_excp_el) {
 222         /*
 223          * The full syndrome is only used for HSR when HCPTR traps:
 224          * For v8, when TA==0, coproc is RES0.
 225          * For v7, any use of a Floating-point instruction or access
 226          * to a Floating-point Extension register that is trapped to
 227          * Hyp mode because of a trap configured in the HCPTR sets
 228          * this field to 0xA.
 229          */
 230         int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa;
 231         uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc);
 232
 233         gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el);
 234         return false;
 235     }
 236
 237     /*
 238      * Note that rebuild_hflags_a32 has already accounted for being in EL0
 239      * and the higher EL in A64 mode, etc.  Unlike A64 mode, there do not
 240      * appear to be any insns which touch VFP which are allowed.
 241      */
 242     if (s->sme_trap_nonstreaming) {
 243         gen_exception_insn(s, 0, EXCP_UDEF,
 244                            syn_smetrap(SME_ET_Streaming,
 245                                        curr_insn_len(s) == 2));
 246         return false;
 247     }
 248
 249     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 250         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 251         unallocated_encoding(s);
 252         return false;
 253     }
 254     return true;
 255 }
 256
 257 /*
 258  * Check that VFP access is enabled, M-profile specific version.
 259  *
 260  * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
 261  * return true. If not, emit code to generate an appropriate exception and
 262  * return false.
 263  * skip_context_update is true to skip the "update FP context" part of this.
 264  */
 265 bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
 266 {
 267     if (s->fp_excp_el) {
 268         /*
 269          * M-profile mostly catches the "FPU disabled" case early, in
 270          * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
 271          * which do coprocessor-checks are outside the large ranges of
 272          * the encoding space handled by the patterns in m-nocp.decode,
 273          * and for them we may need to raise NOCP here.
 274          */
 275         gen_exception_insn_el(s, 0, EXCP_NOCP,
 276                               syn_uncategorized(), s->fp_excp_el);
 277         return false;
 278     }
 279
 280     /* Handle M-profile lazy FP state mechanics */
 281
 282     /* Trigger lazy-state preservation if necessary */
 283     gen_preserve_fp_state(s, skip_context_update);
 284
 285     if (!skip_context_update) {
 286         /* Update ownership of FP context and create new FP context if needed */
 287         gen_update_fp_context(s);
 288     }
 289
 290     return true;
 291 }
 292
 293 /*
 294  * The most usual kind of VFP access check, for everything except
 295  * FMXR/FMRX to the always-available special registers.
 296  */
 297 bool vfp_access_check(DisasContext *s)
 298 {
 299     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 300         return vfp_access_check_m(s, false);
 301     } else {
 302         return vfp_access_check_a(s, false);
 303     }
 304 }
 305
 306 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 307 {
 308     uint32_t rd, rn, rm;
 309     int sz = a->sz;
 310
 311     if (!dc_isar_feature(aa32_vsel, s)) {
 312         return false;
 313     }
 314
 315     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 316         return false;
 317     }
 318
 319     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 320         return false;
 321     }
 322
 323     /* UNDEF accesses to D16-D31 if they don't exist */
 324     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 325         ((a->vm | a->vn | a->vd) & 0x10)) {
 326         return false;
 327     }
 328
 329     rd = a->vd;
 330     rn = a->vn;
 331     rm = a->vm;
 332
 333     if (!vfp_access_check(s)) {
 334         return true;
 335     }
 336
 337     if (sz == 3) {
 338         TCGv_i64 frn, frm, dest;
 339         TCGv_i64 tmp, zero, zf, nf, vf;
 340
 341         zero = tcg_constant_i64(0);
 342
 343         frn = tcg_temp_new_i64();
 344         frm = tcg_temp_new_i64();
 345         dest = tcg_temp_new_i64();
 346
 347         zf = tcg_temp_new_i64();
 348         nf = tcg_temp_new_i64();
 349         vf = tcg_temp_new_i64();
 350
 351         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 352         tcg_gen_ext_i32_i64(nf, cpu_NF);
 353         tcg_gen_ext_i32_i64(vf, cpu_VF);
 354
 355         vfp_load_reg64(frn, rn);
 356         vfp_load_reg64(frm, rm);
 357         switch (a->cc) {
 358         case 0: /* eq: Z */
 359             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm);
 360             break;
 361         case 1: /* vs: V */
 362             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm);
 363             break;
 364         case 2: /* ge: N == V -> N ^ V == 0 */
 365             tmp = tcg_temp_new_i64();
 366             tcg_gen_xor_i64(tmp, vf, nf);
 367             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm);
 368             break;
 369         case 3: /* gt: !Z && N == V */
 370             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm);
 371             tmp = tcg_temp_new_i64();
 372             tcg_gen_xor_i64(tmp, vf, nf);
 373             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm);
 374             break;
 375         }
 376         vfp_store_reg64(dest, rd);
 377     } else {
 378         TCGv_i32 frn, frm, dest;
 379         TCGv_i32 tmp, zero;
 380
 381         zero = tcg_constant_i32(0);
 382
 383         frn = tcg_temp_new_i32();
 384         frm = tcg_temp_new_i32();
 385         dest = tcg_temp_new_i32();
 386         vfp_load_reg32(frn, rn);
 387         vfp_load_reg32(frm, rm);
 388         switch (a->cc) {
 389         case 0: /* eq: Z */
 390             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm);
 391             break;
 392         case 1: /* vs: V */
 393             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm);
 394             break;
 395         case 2: /* ge: N == V -> N ^ V == 0 */
 396             tmp = tcg_temp_new_i32();
 397             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 398             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm);
 399             break;
 400         case 3: /* gt: !Z && N == V */
 401             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm);
 402             tmp = tcg_temp_new_i32();
 403             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 404             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm);
 405             break;
 406         }
 407         /* For fp16 the top half is always zeroes */
 408         if (sz == 1) {
 409             tcg_gen_andi_i32(dest, dest, 0xffff);
 410         }
 411         vfp_store_reg32(dest, rd);
 412     }
 413
 414     return true;
 415 }
 416
 417 /*
 418  * Table for converting the most common AArch32 encoding of
 419  * rounding mode to arm_fprounding order (which matches the
 420  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 421  */
 422 static const uint8_t fp_decode_rm[] = {
 423     FPROUNDING_TIEAWAY,
 424     FPROUNDING_TIEEVEN,
 425     FPROUNDING_POSINF,
 426     FPROUNDING_NEGINF,
 427 };
 428
 429 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 430 {
 431     uint32_t rd, rm;
 432     int sz = a->sz;
 433     TCGv_ptr fpst;
 434     TCGv_i32 tcg_rmode;
 435     int rounding = fp_decode_rm[a->rm];
 436
 437     if (!dc_isar_feature(aa32_vrint, s)) {
 438         return false;
 439     }
 440
 441     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 442         return false;
 443     }
 444
 445     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 446         return false;
 447     }
 448
 449     /* UNDEF accesses to D16-D31 if they don't exist */
 450     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 451         ((a->vm | a->vd) & 0x10)) {
 452         return false;
 453     }
 454
 455     rd = a->vd;
 456     rm = a->vm;
 457
 458     if (!vfp_access_check(s)) {
 459         return true;
 460     }
 461
 462     if (sz == 1) {
 463         fpst = fpstatus_ptr(FPST_FPCR_F16);
 464     } else {
 465         fpst = fpstatus_ptr(FPST_FPCR);
 466     }
 467
 468     tcg_rmode = gen_set_rmode(rounding, fpst);
 469
 470     if (sz == 3) {
 471         TCGv_i64 tcg_op;
 472         TCGv_i64 tcg_res;
 473         tcg_op = tcg_temp_new_i64();
 474         tcg_res = tcg_temp_new_i64();
 475         vfp_load_reg64(tcg_op, rm);
 476         gen_helper_rintd(tcg_res, tcg_op, fpst);
 477         vfp_store_reg64(tcg_res, rd);
 478     } else {
 479         TCGv_i32 tcg_op;
 480         TCGv_i32 tcg_res;
 481         tcg_op = tcg_temp_new_i32();
 482         tcg_res = tcg_temp_new_i32();
 483         vfp_load_reg32(tcg_op, rm);
 484         if (sz == 1) {
 485             gen_helper_rinth(tcg_res, tcg_op, fpst);
 486         } else {
 487             gen_helper_rints(tcg_res, tcg_op, fpst);
 488         }
 489         vfp_store_reg32(tcg_res, rd);
 490     }
 491
 492     gen_restore_rmode(tcg_rmode, fpst);
 493     return true;
 494 }
 495
 496 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 497 {
 498     uint32_t rd, rm;
 499     int sz = a->sz;
 500     TCGv_ptr fpst;
 501     TCGv_i32 tcg_rmode, tcg_shift;
 502     int rounding = fp_decode_rm[a->rm];
 503     bool is_signed = a->op;
 504
 505     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 506         return false;
 507     }
 508
 509     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 510         return false;
 511     }
 512
 513     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 514         return false;
 515     }
 516
 517     /* UNDEF accesses to D16-D31 if they don't exist */
 518     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 519         return false;
 520     }
 521
 522     rd = a->vd;
 523     rm = a->vm;
 524
 525     if (!vfp_access_check(s)) {
 526         return true;
 527     }
 528
 529     if (sz == 1) {
 530         fpst = fpstatus_ptr(FPST_FPCR_F16);
 531     } else {
 532         fpst = fpstatus_ptr(FPST_FPCR);
 533     }
 534
 535     tcg_shift = tcg_constant_i32(0);
 536     tcg_rmode = gen_set_rmode(rounding, fpst);
 537
 538     if (sz == 3) {
 539         TCGv_i64 tcg_double, tcg_res;
 540         TCGv_i32 tcg_tmp;
 541         tcg_double = tcg_temp_new_i64();
 542         tcg_res = tcg_temp_new_i64();
 543         tcg_tmp = tcg_temp_new_i32();
 544         vfp_load_reg64(tcg_double, rm);
 545         if (is_signed) {
 546             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 547         } else {
 548             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 549         }
 550         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 551         vfp_store_reg32(tcg_tmp, rd);
 552     } else {
 553         TCGv_i32 tcg_single, tcg_res;
 554         tcg_single = tcg_temp_new_i32();
 555         tcg_res = tcg_temp_new_i32();
 556         vfp_load_reg32(tcg_single, rm);
 557         if (sz == 1) {
 558             if (is_signed) {
 559                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 560             } else {
 561                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 562             }
 563         } else {
 564             if (is_signed) {
 565                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 566             } else {
 567                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 568             }
 569         }
 570         vfp_store_reg32(tcg_res, rd);
 571     }
 572
 573     gen_restore_rmode(tcg_rmode, fpst);
 574     return true;
 575 }
 576
 577 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
 578 {
 579     /*
 580      * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
 581      * and VMOV (general-purpose register to vector lane) insns are not
 582      * predicated, but they are subject to beatwise execution if they are
 583      * not in an IT block.
 584      *
 585      * Since our implementation always executes all 4 beats in one tick,
 586      * this means only that if PSR.ECI says we should not be executing
 587      * the beat corresponding to the lane of the vector register being
 588      * accessed then we should skip performing the move, and that we need
 589      * to do the usual check for bad ECI state and advance of ECI state.
 590      *
 591      * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
 592      *
 593      * Return true if this VMOV scalar <-> gpreg should be skipped because
 594      * the MVE PSR.ECI state says we skip the beat where the store happens.
 595      */
 596
 597     /* Calculate the byte offset into Qn which we're going to access */
 598     int ofs = (index << size) + ((vn & 1) * 8);
 599
 600     if (!dc_isar_feature(aa32_mve, s)) {
 601         return false;
 602     }
 603
 604     switch (s->eci) {
 605     case ECI_NONE:
 606         return false;
 607     case ECI_A0:
 608         return ofs < 4;
 609     case ECI_A0A1:
 610         return ofs < 8;
 611     case ECI_A0A1A2:
 612     case ECI_A0A1A2B0:
 613         return ofs < 12;
 614     default:
 615         g_assert_not_reached();
 616     }
 617 }
 618
 619 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 620 {
 621     /* VMOV scalar to general purpose register */
 622     TCGv_i32 tmp;
 623
 624     /*
 625      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 626      * all sizes, whether the CPU has fp or not.
 627      */
 628     if (!dc_isar_feature(aa32_mve, s)) {
 629         if (a->size == MO_32
 630             ? !dc_isar_feature(aa32_fpsp_v2, s)
 631             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 632             return false;
 633         }
 634     }
 635
 636     /* UNDEF accesses to D16-D31 if they don't exist */
 637     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 638         return false;
 639     }
 640
 641     if (dc_isar_feature(aa32_mve, s)) {
 642         if (!mve_eci_check(s)) {
 643             return true;
 644         }
 645     }
 646
 647     if (!vfp_access_check(s)) {
 648         return true;
 649     }
 650
 651     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
 652         tmp = tcg_temp_new_i32();
 653         read_neon_element32(tmp, a->vn, a->index,
 654                             a->size | (a->u ? 0 : MO_SIGN));
 655         store_reg(s, a->rt, tmp);
 656     }
 657
 658     if (dc_isar_feature(aa32_mve, s)) {
 659         mve_update_and_store_eci(s);
 660     }
 661     return true;
 662 }
 663
 664 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 665 {
 666     /* VMOV general purpose register to scalar */
 667     TCGv_i32 tmp;
 668
 669     /*
 670      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 671      * all sizes, whether the CPU has fp or not.
 672      */
 673     if (!dc_isar_feature(aa32_mve, s)) {
 674         if (a->size == MO_32
 675             ? !dc_isar_feature(aa32_fpsp_v2, s)
 676             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 677             return false;
 678         }
 679     }
 680
 681     /* UNDEF accesses to D16-D31 if they don't exist */
 682     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 683         return false;
 684     }
 685
 686     if (dc_isar_feature(aa32_mve, s)) {
 687         if (!mve_eci_check(s)) {
 688             return true;
 689         }
 690     }
 691
 692     if (!vfp_access_check(s)) {
 693         return true;
 694     }
 695
 696     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
 697         tmp = load_reg(s, a->rt);
 698         write_neon_element32(tmp, a->vn, a->index, a->size);
 699     }
 700
 701     if (dc_isar_feature(aa32_mve, s)) {
 702         mve_update_and_store_eci(s);
 703     }
 704     return true;
 705 }
 706
 707 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 708 {
 709     /* VDUP (general purpose register) */
 710     TCGv_i32 tmp;
 711     int size, vec_size;
 712
 713     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 714         return false;
 715     }
 716
 717     /* UNDEF accesses to D16-D31 if they don't exist */
 718     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 719         return false;
 720     }
 721
 722     if (a->b && a->e) {
 723         return false;
 724     }
 725
 726     if (a->q && (a->vn & 1)) {
 727         return false;
 728     }
 729
 730     vec_size = a->q ? 16 : 8;
 731     if (a->b) {
 732         size = 0;
 733     } else if (a->e) {
 734         size = 1;
 735     } else {
 736         size = 2;
 737     }
 738
 739     if (!vfp_access_check(s)) {
 740         return true;
 741     }
 742
 743     tmp = load_reg(s, a->rt);
 744     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 745                          vec_size, vec_size, tmp);
 746     return true;
 747 }
 748
 749 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 750 {
 751     TCGv_i32 tmp;
 752     bool ignore_vfp_enabled = false;
 753
 754     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 755         /* M profile version was already handled in m-nocp.decode */
 756         return false;
 757     }
 758
 759     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 760         return false;
 761     }
 762
 763     switch (a->reg) {
 764     case ARM_VFP_FPSID:
 765         /*
 766          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 767          * all ID registers to privileged access only.
 768          */
 769         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 770             return false;
 771         }
 772         ignore_vfp_enabled = true;
 773         break;
 774     case ARM_VFP_MVFR0:
 775     case ARM_VFP_MVFR1:
 776         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 777             return false;
 778         }
 779         ignore_vfp_enabled = true;
 780         break;
 781     case ARM_VFP_MVFR2:
 782         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 783             return false;
 784         }
 785         ignore_vfp_enabled = true;
 786         break;
 787     case ARM_VFP_FPSCR:
 788         break;
 789     case ARM_VFP_FPEXC:
 790         if (IS_USER(s)) {
 791             return false;
 792         }
 793         ignore_vfp_enabled = true;
 794         break;
 795     case ARM_VFP_FPINST:
 796     case ARM_VFP_FPINST2:
 797         /* Not present in VFPv3 */
 798         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 799             return false;
 800         }
 801         break;
 802     default:
 803         return false;
 804     }
 805
 806     /*
 807      * Call vfp_access_check_a() directly, because we need to tell
 808      * it to ignore FPEXC.EN for some register accesses.
 809      */
 810     if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
 811         return true;
 812     }
 813
 814     if (a->l) {
 815         /* VMRS, move VFP special register to gp register */
 816         switch (a->reg) {
 817         case ARM_VFP_MVFR0:
 818         case ARM_VFP_MVFR1:
 819         case ARM_VFP_MVFR2:
 820         case ARM_VFP_FPSID:
 821             if (s->current_el == 1) {
 822                 gen_set_condexec(s);
 823                 gen_update_pc(s, 0);
 824                 gen_helper_check_hcr_el2_trap(tcg_env,
 825                                               tcg_constant_i32(a->rt),
 826                                               tcg_constant_i32(a->reg));
 827             }
 828             /* fall through */
 829         case ARM_VFP_FPEXC:
 830         case ARM_VFP_FPINST:
 831         case ARM_VFP_FPINST2:
 832             tmp = load_cpu_field(vfp.xregs[a->reg]);
 833             break;
 834         case ARM_VFP_FPSCR:
 835             if (a->rt == 15) {
 836                 tmp = load_cpu_field_low32(vfp.fpsr);
 837                 tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
 838             } else {
 839                 tmp = tcg_temp_new_i32();
 840                 gen_helper_vfp_get_fpscr(tmp, tcg_env);
 841             }
 842             break;
 843         default:
 844             g_assert_not_reached();
 845         }
 846
 847         if (a->rt == 15) {
 848             /* Set the 4 flag bits in the CPSR.  */
 849             gen_set_nzcv(tmp);
 850         } else {
 851             store_reg(s, a->rt, tmp);
 852         }
 853     } else {
 854         /* VMSR, move gp register to VFP special register */
 855         switch (a->reg) {
 856         case ARM_VFP_FPSID:
 857         case ARM_VFP_MVFR0:
 858         case ARM_VFP_MVFR1:
 859         case ARM_VFP_MVFR2:
 860             /* Writes are ignored.  */
 861             break;
 862         case ARM_VFP_FPSCR:
 863             tmp = load_reg(s, a->rt);
 864             gen_helper_vfp_set_fpscr(tcg_env, tmp);
 865             gen_lookup_tb(s);
 866             break;
 867         case ARM_VFP_FPEXC:
 868             /*
 869              * TODO: VFP subarchitecture support.
 870              * For now, keep the EN bit only
 871              */
 872             tmp = load_reg(s, a->rt);
 873             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 874             store_cpu_field(tmp, vfp.xregs[a->reg]);
 875             gen_lookup_tb(s);
 876             break;
 877         case ARM_VFP_FPINST:
 878         case ARM_VFP_FPINST2:
 879             tmp = load_reg(s, a->rt);
 880             store_cpu_field(tmp, vfp.xregs[a->reg]);
 881             break;
 882         default:
 883             g_assert_not_reached();
 884         }
 885     }
 886
 887     return true;
 888 }
 889
 890
 891 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
 892 {
 893     TCGv_i32 tmp;
 894
 895     if (!dc_isar_feature(aa32_fp16_arith, s)) {
 896         return false;
 897     }
 898
 899     if (a->rt == 15) {
 900         /* UNPREDICTABLE; we choose to UNDEF */
 901         return false;
 902     }
 903
 904     if (!vfp_access_check(s)) {
 905         return true;
 906     }
 907
 908     if (a->l) {
 909         /* VFP to general purpose register */
 910         tmp = tcg_temp_new_i32();
 911         vfp_load_reg16(tmp, a->vn);
 912         store_reg(s, a->rt, tmp);
 913     } else {
 914         /* general purpose register to VFP */
 915         tmp = load_reg(s, a->rt);
 916         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 917         vfp_store_reg32(tmp, a->vn);
 918     }
 919
 920     return true;
 921 }
 922
 923 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 924 {
 925     TCGv_i32 tmp;
 926
 927     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 928         return false;
 929     }
 930
 931     if (!vfp_access_check(s)) {
 932         return true;
 933     }
 934
 935     if (a->l) {
 936         /* VFP to general purpose register */
 937         tmp = tcg_temp_new_i32();
 938         vfp_load_reg32(tmp, a->vn);
 939         if (a->rt == 15) {
 940             /* Set the 4 flag bits in the CPSR.  */
 941             gen_set_nzcv(tmp);
 942         } else {
 943             store_reg(s, a->rt, tmp);
 944         }
 945     } else {
 946         /* general purpose register to VFP */
 947         tmp = load_reg(s, a->rt);
 948         vfp_store_reg32(tmp, a->vn);
 949     }
 950
 951     return true;
 952 }
 953
 954 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 955 {
 956     TCGv_i32 tmp;
 957
 958     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 959         return false;
 960     }
 961
 962     /*
 963      * VMOV between two general-purpose registers and two single precision
 964      * floating point registers
 965      */
 966     if (!vfp_access_check(s)) {
 967         return true;
 968     }
 969
 970     if (a->op) {
 971         /* fpreg to gpreg */
 972         tmp = tcg_temp_new_i32();
 973         vfp_load_reg32(tmp, a->vm);
 974         store_reg(s, a->rt, tmp);
 975         tmp = tcg_temp_new_i32();
 976         vfp_load_reg32(tmp, a->vm + 1);
 977         store_reg(s, a->rt2, tmp);
 978     } else {
 979         /* gpreg to fpreg */
 980         tmp = load_reg(s, a->rt);
 981         vfp_store_reg32(tmp, a->vm);
 982         tmp = load_reg(s, a->rt2);
 983         vfp_store_reg32(tmp, a->vm + 1);
 984     }
 985
 986     return true;
 987 }
 988
 989 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
 990 {
 991     TCGv_i32 tmp;
 992
 993     /*
 994      * VMOV between two general-purpose registers and one double precision
 995      * floating point register.  Note that this does not require support
 996      * for double precision arithmetic.
 997      */
 998     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 999         return false;
1000     }
1001
1002     /* UNDEF accesses to D16-D31 if they don't exist */
1003     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1004         return false;
1005     }
1006
1007     if (!vfp_access_check(s)) {
1008         return true;
1009     }
1010
1011     if (a->op) {
1012         /* fpreg to gpreg */
1013         tmp = tcg_temp_new_i32();
1014         vfp_load_reg32(tmp, a->vm * 2);
1015         store_reg(s, a->rt, tmp);
1016         tmp = tcg_temp_new_i32();
1017         vfp_load_reg32(tmp, a->vm * 2 + 1);
1018         store_reg(s, a->rt2, tmp);
1019     } else {
1020         /* gpreg to fpreg */
1021         tmp = load_reg(s, a->rt);
1022         vfp_store_reg32(tmp, a->vm * 2);
1023         tmp = load_reg(s, a->rt2);
1024         vfp_store_reg32(tmp, a->vm * 2 + 1);
1025     }
1026
1027     return true;
1028 }
1029
1030 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1031 {
1032     uint32_t offset;
1033     TCGv_i32 addr, tmp;
1034
1035     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1036         return false;
1037     }
1038
1039     if (!vfp_access_check(s)) {
1040         return true;
1041     }
1042
1043     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1044     offset = a->imm << 1;
1045     if (!a->u) {
1046         offset = -offset;
1047     }
1048
1049     /* For thumb, use of PC is UNPREDICTABLE.  */
1050     addr = add_reg_for_lit(s, a->rn, offset);
1051     tmp = tcg_temp_new_i32();
1052     if (a->l) {
1053         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1054         vfp_store_reg32(tmp, a->vd);
1055     } else {
1056         vfp_load_reg32(tmp, a->vd);
1057         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1058     }
1059     return true;
1060 }
1061
1062 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1063 {
1064     uint32_t offset;
1065     TCGv_i32 addr, tmp;
1066
1067     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1068         return false;
1069     }
1070
1071     if (!vfp_access_check(s)) {
1072         return true;
1073     }
1074
1075     offset = a->imm << 2;
1076     if (!a->u) {
1077         offset = -offset;
1078     }
1079
1080     /* For thumb, use of PC is UNPREDICTABLE.  */
1081     addr = add_reg_for_lit(s, a->rn, offset);
1082     tmp = tcg_temp_new_i32();
1083     if (a->l) {
1084         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1085         vfp_store_reg32(tmp, a->vd);
1086     } else {
1087         vfp_load_reg32(tmp, a->vd);
1088         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1089     }
1090     return true;
1091 }
1092
1093 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1094 {
1095     uint32_t offset;
1096     TCGv_i32 addr;
1097     TCGv_i64 tmp;
1098
1099     /* Note that this does not require support for double arithmetic.  */
1100     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1101         return false;
1102     }
1103
1104     /* UNDEF accesses to D16-D31 if they don't exist */
1105     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1106         return false;
1107     }
1108
1109     if (!vfp_access_check(s)) {
1110         return true;
1111     }
1112
1113     offset = a->imm << 2;
1114     if (!a->u) {
1115         offset = -offset;
1116     }
1117
1118     /* For thumb, use of PC is UNPREDICTABLE.  */
1119     addr = add_reg_for_lit(s, a->rn, offset);
1120     tmp = tcg_temp_new_i64();
1121     if (a->l) {
1122         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1123         vfp_store_reg64(tmp, a->vd);
1124     } else {
1125         vfp_load_reg64(tmp, a->vd);
1126         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1127     }
1128     return true;
1129 }
1130
1131 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1132 {
1133     uint32_t offset;
1134     TCGv_i32 addr, tmp;
1135     int i, n;
1136
1137     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1138         return false;
1139     }
1140
1141     n = a->imm;
1142
1143     if (n == 0 || (a->vd + n) > 32) {
1144         /*
1145          * UNPREDICTABLE cases for bad immediates: we choose to
1146          * UNDEF to avoid generating huge numbers of TCG ops
1147          */
1148         return false;
1149     }
1150     if (a->rn == 15 && a->w) {
1151         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1152         return false;
1153     }
1154
1155     s->eci_handled = true;
1156
1157     if (!vfp_access_check(s)) {
1158         return true;
1159     }
1160
1161     /* For thumb, use of PC is UNPREDICTABLE.  */
1162     addr = add_reg_for_lit(s, a->rn, 0);
1163     if (a->p) {
1164         /* pre-decrement */
1165         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1166     }
1167
1168     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1169         /*
1170          * Here 'addr' is the lowest address we will store to,
1171          * and is either the old SP (if post-increment) or
1172          * the new SP (if pre-decrement). For post-increment
1173          * where the old value is below the limit and the new
1174          * value is above, it is UNKNOWN whether the limit check
1175          * triggers; we choose to trigger.
1176          */
1177         gen_helper_v8m_stackcheck(tcg_env, addr);
1178     }
1179
1180     offset = 4;
1181     tmp = tcg_temp_new_i32();
1182     for (i = 0; i < n; i++) {
1183         if (a->l) {
1184             /* load */
1185             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1186             vfp_store_reg32(tmp, a->vd + i);
1187         } else {
1188             /* store */
1189             vfp_load_reg32(tmp, a->vd + i);
1190             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1191         }
1192         tcg_gen_addi_i32(addr, addr, offset);
1193     }
1194     if (a->w) {
1195         /* writeback */
1196         if (a->p) {
1197             offset = -offset * n;
1198             tcg_gen_addi_i32(addr, addr, offset);
1199         }
1200         store_reg(s, a->rn, addr);
1201     }
1202
1203     clear_eci_state(s);
1204     return true;
1205 }
1206
1207 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1208 {
1209     uint32_t offset;
1210     TCGv_i32 addr;
1211     TCGv_i64 tmp;
1212     int i, n;
1213
1214     /* Note that this does not require support for double arithmetic.  */
1215     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1216         return false;
1217     }
1218
1219     n = a->imm >> 1;
1220
1221     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1222         /*
1223          * UNPREDICTABLE cases for bad immediates: we choose to
1224          * UNDEF to avoid generating huge numbers of TCG ops
1225          */
1226         return false;
1227     }
1228     if (a->rn == 15 && a->w) {
1229         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1230         return false;
1231     }
1232
1233     /* UNDEF accesses to D16-D31 if they don't exist */
1234     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1235         return false;
1236     }
1237
1238     s->eci_handled = true;
1239
1240     if (!vfp_access_check(s)) {
1241         return true;
1242     }
1243
1244     /* For thumb, use of PC is UNPREDICTABLE.  */
1245     addr = add_reg_for_lit(s, a->rn, 0);
1246     if (a->p) {
1247         /* pre-decrement */
1248         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1249     }
1250
1251     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1252         /*
1253          * Here 'addr' is the lowest address we will store to,
1254          * and is either the old SP (if post-increment) or
1255          * the new SP (if pre-decrement). For post-increment
1256          * where the old value is below the limit and the new
1257          * value is above, it is UNKNOWN whether the limit check
1258          * triggers; we choose to trigger.
1259          */
1260         gen_helper_v8m_stackcheck(tcg_env, addr);
1261     }
1262
1263     offset = 8;
1264     tmp = tcg_temp_new_i64();
1265     for (i = 0; i < n; i++) {
1266         if (a->l) {
1267             /* load */
1268             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1269             vfp_store_reg64(tmp, a->vd + i);
1270         } else {
1271             /* store */
1272             vfp_load_reg64(tmp, a->vd + i);
1273             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1274         }
1275         tcg_gen_addi_i32(addr, addr, offset);
1276     }
1277     if (a->w) {
1278         /* writeback */
1279         if (a->p) {
1280             offset = -offset * n;
1281         } else if (a->imm & 1) {
1282             offset = 4;
1283         } else {
1284             offset = 0;
1285         }
1286
1287         if (offset != 0) {
1288             tcg_gen_addi_i32(addr, addr, offset);
1289         }
1290         store_reg(s, a->rn, addr);
1291     }
1292
1293     clear_eci_state(s);
1294     return true;
1295 }
1296
1297 /*
1298  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1299  * The callback should emit code to write a value to vd. If
1300  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1301  * will contain the old value of the relevant VFP register;
1302  * otherwise it must be written to only.
1303  */
1304 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1305                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1306 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1307                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1308
1309 /*
1310  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1311  * The callback should emit code to write a value to vd (which
1312  * should be written to only).
1313  */
1314 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1315 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1316
1317 /*
1318  * Return true if the specified S reg is in a scalar bank
1319  * (ie if it is s0..s7)
1320  */
1321 static inline bool vfp_sreg_is_scalar(int reg)
1322 {
1323     return (reg & 0x18) == 0;
1324 }
1325
1326 /*
1327  * Return true if the specified D reg is in a scalar bank
1328  * (ie if it is d0..d3 or d16..d19)
1329  */
1330 static inline bool vfp_dreg_is_scalar(int reg)
1331 {
1332     return (reg & 0xc) == 0;
1333 }
1334
1335 /*
1336  * Advance the S reg number forwards by delta within its bank
1337  * (ie increment the low 3 bits but leave the rest the same)
1338  */
1339 static inline int vfp_advance_sreg(int reg, int delta)
1340 {
1341     return ((reg + delta) & 0x7) | (reg & ~0x7);
1342 }
1343
1344 /*
1345  * Advance the D reg number forwards by delta within its bank
1346  * (ie increment the low 2 bits but leave the rest the same)
1347  */
1348 static inline int vfp_advance_dreg(int reg, int delta)
1349 {
1350     return ((reg + delta) & 0x3) | (reg & ~0x3);
1351 }
1352
1353 /*
1354  * Perform a 3-operand VFP data processing instruction. fn is the
1355  * callback to do the actual operation; this function deals with the
1356  * code to handle looping around for VFP vector processing.
1357  */
1358 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1359                           int vd, int vn, int vm, bool reads_vd)
1360 {
1361     uint32_t delta_m = 0;
1362     uint32_t delta_d = 0;
1363     int veclen = s->vec_len;
1364     TCGv_i32 f0, f1, fd;
1365     TCGv_ptr fpst;
1366
1367     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1368         return false;
1369     }
1370
1371     if (!dc_isar_feature(aa32_fpshvec, s) &&
1372         (veclen != 0 || s->vec_stride != 0)) {
1373         return false;
1374     }
1375
1376     if (!vfp_access_check(s)) {
1377         return true;
1378     }
1379
1380     if (veclen > 0) {
1381         /* Figure out what type of vector operation this is.  */
1382         if (vfp_sreg_is_scalar(vd)) {
1383             /* scalar */
1384             veclen = 0;
1385         } else {
1386             delta_d = s->vec_stride + 1;
1387
1388             if (vfp_sreg_is_scalar(vm)) {
1389                 /* mixed scalar/vector */
1390                 delta_m = 0;
1391             } else {
1392                 /* vector */
1393                 delta_m = delta_d;
1394             }
1395         }
1396     }
1397
1398     f0 = tcg_temp_new_i32();
1399     f1 = tcg_temp_new_i32();
1400     fd = tcg_temp_new_i32();
1401     fpst = fpstatus_ptr(FPST_FPCR);
1402
1403     vfp_load_reg32(f0, vn);
1404     vfp_load_reg32(f1, vm);
1405
1406     for (;;) {
1407         if (reads_vd) {
1408             vfp_load_reg32(fd, vd);
1409         }
1410         fn(fd, f0, f1, fpst);
1411         vfp_store_reg32(fd, vd);
1412
1413         if (veclen == 0) {
1414             break;
1415         }
1416
1417         /* Set up the operands for the next iteration */
1418         veclen--;
1419         vd = vfp_advance_sreg(vd, delta_d);
1420         vn = vfp_advance_sreg(vn, delta_d);
1421         vfp_load_reg32(f0, vn);
1422         if (delta_m) {
1423             vm = vfp_advance_sreg(vm, delta_m);
1424             vfp_load_reg32(f1, vm);
1425         }
1426     }
1427     return true;
1428 }
1429
1430 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1431                           int vd, int vn, int vm, bool reads_vd)
1432 {
1433     /*
1434      * Do a half-precision operation. Functionally this is
1435      * the same as do_vfp_3op_sp(), except:
1436      *  - it uses the FPST_FPCR_F16
1437      *  - it doesn't need the VFP vector handling (fp16 is a
1438      *    v8 feature, and in v8 VFP vectors don't exist)
1439      *  - it does the aa32_fp16_arith feature test
1440      */
1441     TCGv_i32 f0, f1, fd;
1442     TCGv_ptr fpst;
1443
1444     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1445         return false;
1446     }
1447
1448     if (s->vec_len != 0 || s->vec_stride != 0) {
1449         return false;
1450     }
1451
1452     if (!vfp_access_check(s)) {
1453         return true;
1454     }
1455
1456     f0 = tcg_temp_new_i32();
1457     f1 = tcg_temp_new_i32();
1458     fd = tcg_temp_new_i32();
1459     fpst = fpstatus_ptr(FPST_FPCR_F16);
1460
1461     vfp_load_reg16(f0, vn);
1462     vfp_load_reg16(f1, vm);
1463
1464     if (reads_vd) {
1465         vfp_load_reg16(fd, vd);
1466     }
1467     fn(fd, f0, f1, fpst);
1468     vfp_store_reg32(fd, vd);
1469     return true;
1470 }
1471
1472 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1473                           int vd, int vn, int vm, bool reads_vd)
1474 {
1475     uint32_t delta_m = 0;
1476     uint32_t delta_d = 0;
1477     int veclen = s->vec_len;
1478     TCGv_i64 f0, f1, fd;
1479     TCGv_ptr fpst;
1480
1481     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1482         return false;
1483     }
1484
1485     /* UNDEF accesses to D16-D31 if they don't exist */
1486     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1487         return false;
1488     }
1489
1490     if (!dc_isar_feature(aa32_fpshvec, s) &&
1491         (veclen != 0 || s->vec_stride != 0)) {
1492         return false;
1493     }
1494
1495     if (!vfp_access_check(s)) {
1496         return true;
1497     }
1498
1499     if (veclen > 0) {
1500         /* Figure out what type of vector operation this is.  */
1501         if (vfp_dreg_is_scalar(vd)) {
1502             /* scalar */
1503             veclen = 0;
1504         } else {
1505             delta_d = (s->vec_stride >> 1) + 1;
1506
1507             if (vfp_dreg_is_scalar(vm)) {
1508                 /* mixed scalar/vector */
1509                 delta_m = 0;
1510             } else {
1511                 /* vector */
1512                 delta_m = delta_d;
1513             }
1514         }
1515     }
1516
1517     f0 = tcg_temp_new_i64();
1518     f1 = tcg_temp_new_i64();
1519     fd = tcg_temp_new_i64();
1520     fpst = fpstatus_ptr(FPST_FPCR);
1521
1522     vfp_load_reg64(f0, vn);
1523     vfp_load_reg64(f1, vm);
1524
1525     for (;;) {
1526         if (reads_vd) {
1527             vfp_load_reg64(fd, vd);
1528         }
1529         fn(fd, f0, f1, fpst);
1530         vfp_store_reg64(fd, vd);
1531
1532         if (veclen == 0) {
1533             break;
1534         }
1535         /* Set up the operands for the next iteration */
1536         veclen--;
1537         vd = vfp_advance_dreg(vd, delta_d);
1538         vn = vfp_advance_dreg(vn, delta_d);
1539         vfp_load_reg64(f0, vn);
1540         if (delta_m) {
1541             vm = vfp_advance_dreg(vm, delta_m);
1542             vfp_load_reg64(f1, vm);
1543         }
1544     }
1545     return true;
1546 }
1547
1548 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1549 {
1550     uint32_t delta_m = 0;
1551     uint32_t delta_d = 0;
1552     int veclen = s->vec_len;
1553     TCGv_i32 f0, fd;
1554
1555     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1556
1557     if (!dc_isar_feature(aa32_fpshvec, s) &&
1558         (veclen != 0 || s->vec_stride != 0)) {
1559         return false;
1560     }
1561
1562     if (!vfp_access_check(s)) {
1563         return true;
1564     }
1565
1566     if (veclen > 0) {
1567         /* Figure out what type of vector operation this is.  */
1568         if (vfp_sreg_is_scalar(vd)) {
1569             /* scalar */
1570             veclen = 0;
1571         } else {
1572             delta_d = s->vec_stride + 1;
1573
1574             if (vfp_sreg_is_scalar(vm)) {
1575                 /* mixed scalar/vector */
1576                 delta_m = 0;
1577             } else {
1578                 /* vector */
1579                 delta_m = delta_d;
1580             }
1581         }
1582     }
1583
1584     f0 = tcg_temp_new_i32();
1585     fd = tcg_temp_new_i32();
1586
1587     vfp_load_reg32(f0, vm);
1588
1589     for (;;) {
1590         fn(fd, f0);
1591         vfp_store_reg32(fd, vd);
1592
1593         if (veclen == 0) {
1594             break;
1595         }
1596
1597         if (delta_m == 0) {
1598             /* single source one-many */
1599             while (veclen--) {
1600                 vd = vfp_advance_sreg(vd, delta_d);
1601                 vfp_store_reg32(fd, vd);
1602             }
1603             break;
1604         }
1605
1606         /* Set up the operands for the next iteration */
1607         veclen--;
1608         vd = vfp_advance_sreg(vd, delta_d);
1609         vm = vfp_advance_sreg(vm, delta_m);
1610         vfp_load_reg32(f0, vm);
1611     }
1612     return true;
1613 }
1614
1615 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1616 {
1617     /*
1618      * Do a half-precision operation. Functionally this is
1619      * the same as do_vfp_2op_sp(), except:
1620      *  - it doesn't need the VFP vector handling (fp16 is a
1621      *    v8 feature, and in v8 VFP vectors don't exist)
1622      *  - it does the aa32_fp16_arith feature test
1623      */
1624     TCGv_i32 f0;
1625
1626     /* Note that the caller must check the aa32_fp16_arith feature */
1627
1628     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1629         return false;
1630     }
1631
1632     if (s->vec_len != 0 || s->vec_stride != 0) {
1633         return false;
1634     }
1635
1636     if (!vfp_access_check(s)) {
1637         return true;
1638     }
1639
1640     f0 = tcg_temp_new_i32();
1641     vfp_load_reg16(f0, vm);
1642     fn(f0, f0);
1643     vfp_store_reg32(f0, vd);
1644
1645     return true;
1646 }
1647
1648 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1649 {
1650     uint32_t delta_m = 0;
1651     uint32_t delta_d = 0;
1652     int veclen = s->vec_len;
1653     TCGv_i64 f0, fd;
1654
1655     /* Note that the caller must check the aa32_fpdp_v2 feature. */
1656
1657     /* UNDEF accesses to D16-D31 if they don't exist */
1658     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1659         return false;
1660     }
1661
1662     if (!dc_isar_feature(aa32_fpshvec, s) &&
1663         (veclen != 0 || s->vec_stride != 0)) {
1664         return false;
1665     }
1666
1667     if (!vfp_access_check(s)) {
1668         return true;
1669     }
1670
1671     if (veclen > 0) {
1672         /* Figure out what type of vector operation this is.  */
1673         if (vfp_dreg_is_scalar(vd)) {
1674             /* scalar */
1675             veclen = 0;
1676         } else {
1677             delta_d = (s->vec_stride >> 1) + 1;
1678
1679             if (vfp_dreg_is_scalar(vm)) {
1680                 /* mixed scalar/vector */
1681                 delta_m = 0;
1682             } else {
1683                 /* vector */
1684                 delta_m = delta_d;
1685             }
1686         }
1687     }
1688
1689     f0 = tcg_temp_new_i64();
1690     fd = tcg_temp_new_i64();
1691
1692     vfp_load_reg64(f0, vm);
1693
1694     for (;;) {
1695         fn(fd, f0);
1696         vfp_store_reg64(fd, vd);
1697
1698         if (veclen == 0) {
1699             break;
1700         }
1701
1702         if (delta_m == 0) {
1703             /* single source one-many */
1704             while (veclen--) {
1705                 vd = vfp_advance_dreg(vd, delta_d);
1706                 vfp_store_reg64(fd, vd);
1707             }
1708             break;
1709         }
1710
1711         /* Set up the operands for the next iteration */
1712         veclen--;
1713         vd = vfp_advance_dreg(vd, delta_d);
1714         vd = vfp_advance_dreg(vm, delta_m);
1715         vfp_load_reg64(f0, vm);
1716     }
1717     return true;
1718 }
1719
1720 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1721 {
1722     /* Note that order of inputs to the add matters for NaNs */
1723     TCGv_i32 tmp = tcg_temp_new_i32();
1724
1725     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1726     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1727 }
1728
1729 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1730 {
1731     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1732 }
1733
1734 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1735 {
1736     /* Note that order of inputs to the add matters for NaNs */
1737     TCGv_i32 tmp = tcg_temp_new_i32();
1738
1739     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1740     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1741 }
1742
1743 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1744 {
1745     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1746 }
1747
1748 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1749 {
1750     /* Note that order of inputs to the add matters for NaNs */
1751     TCGv_i64 tmp = tcg_temp_new_i64();
1752
1753     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1754     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1755 }
1756
1757 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1758 {
1759     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1760 }
1761
1762 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1763 {
1764     /*
1765      * VMLS: vd = vd + -(vn * vm)
1766      * Note that order of inputs to the add matters for NaNs.
1767      */
1768     TCGv_i32 tmp = tcg_temp_new_i32();
1769
1770     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1771     gen_vfp_negh(tmp, tmp);
1772     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1773 }
1774
1775 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1776 {
1777     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1778 }
1779
1780 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1781 {
1782     /*
1783      * VMLS: vd = vd + -(vn * vm)
1784      * Note that order of inputs to the add matters for NaNs.
1785      */
1786     TCGv_i32 tmp = tcg_temp_new_i32();
1787
1788     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1789     gen_vfp_negs(tmp, tmp);
1790     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1791 }
1792
1793 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1794 {
1795     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1796 }
1797
1798 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1799 {
1800     /*
1801      * VMLS: vd = vd + -(vn * vm)
1802      * Note that order of inputs to the add matters for NaNs.
1803      */
1804     TCGv_i64 tmp = tcg_temp_new_i64();
1805
1806     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1807     gen_vfp_negd(tmp, tmp);
1808     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1809 }
1810
1811 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1812 {
1813     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1814 }
1815
1816 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1817 {
1818     /*
1819      * VNMLS: -fd + (fn * fm)
1820      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1821      * plausible looking simplifications because this will give wrong results
1822      * for NaNs.
1823      */
1824     TCGv_i32 tmp = tcg_temp_new_i32();
1825
1826     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1827     gen_vfp_negh(vd, vd);
1828     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1829 }
1830
1831 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1832 {
1833     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1834 }
1835
1836 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1837 {
1838     /*
1839      * VNMLS: -fd + (fn * fm)
1840      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1841      * plausible looking simplifications because this will give wrong results
1842      * for NaNs.
1843      */
1844     TCGv_i32 tmp = tcg_temp_new_i32();
1845
1846     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1847     gen_vfp_negs(vd, vd);
1848     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1849 }
1850
1851 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1852 {
1853     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1854 }
1855
1856 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1857 {
1858     /*
1859      * VNMLS: -fd + (fn * fm)
1860      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1861      * plausible looking simplifications because this will give wrong results
1862      * for NaNs.
1863      */
1864     TCGv_i64 tmp = tcg_temp_new_i64();
1865
1866     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1867     gen_vfp_negd(vd, vd);
1868     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1869 }
1870
1871 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1872 {
1873     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1874 }
1875
1876 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1877 {
1878     /* VNMLA: -fd + -(fn * fm) */
1879     TCGv_i32 tmp = tcg_temp_new_i32();
1880
1881     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1882     gen_vfp_negh(tmp, tmp);
1883     gen_vfp_negh(vd, vd);
1884     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1885 }
1886
1887 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1888 {
1889     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1890 }
1891
1892 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1893 {
1894     /* VNMLA: -fd + -(fn * fm) */
1895     TCGv_i32 tmp = tcg_temp_new_i32();
1896
1897     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1898     gen_vfp_negs(tmp, tmp);
1899     gen_vfp_negs(vd, vd);
1900     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1901 }
1902
1903 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1904 {
1905     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1906 }
1907
1908 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1909 {
1910     /* VNMLA: -fd + (fn * fm) */
1911     TCGv_i64 tmp = tcg_temp_new_i64();
1912
1913     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1914     gen_vfp_negd(tmp, tmp);
1915     gen_vfp_negd(vd, vd);
1916     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1917 }
1918
1919 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1920 {
1921     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1922 }
1923
1924 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
1925 {
1926     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
1927 }
1928
1929 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1930 {
1931     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1932 }
1933
1934 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1935 {
1936     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1937 }
1938
1939 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1940 {
1941     /* VNMUL: -(fn * fm) */
1942     gen_helper_vfp_mulh(vd, vn, vm, fpst);
1943     gen_vfp_negh(vd, vd);
1944 }
1945
1946 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
1947 {
1948     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
1949 }
1950
1951 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1952 {
1953     /* VNMUL: -(fn * fm) */
1954     gen_helper_vfp_muls(vd, vn, vm, fpst);
1955     gen_vfp_negs(vd, vd);
1956 }
1957
1958 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1959 {
1960     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1961 }
1962
1963 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1964 {
1965     /* VNMUL: -(fn * fm) */
1966     gen_helper_vfp_muld(vd, vn, vm, fpst);
1967     gen_vfp_negd(vd, vd);
1968 }
1969
1970 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1971 {
1972     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1973 }
1974
1975 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
1976 {
1977     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
1978 }
1979
1980 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1981 {
1982     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1983 }
1984
1985 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1986 {
1987     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1988 }
1989
1990 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
1991 {
1992     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
1993 }
1994
1995 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1996 {
1997     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1998 }
1999
2000 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2001 {
2002     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2003 }
2004
2005 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2006 {
2007     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2008 }
2009
2010 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2011 {
2012     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2013 }
2014
2015 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2016 {
2017     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2018 }
2019
2020 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2021 {
2022     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2023         return false;
2024     }
2025     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2026                          a->vd, a->vn, a->vm, false);
2027 }
2028
2029 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2030 {
2031     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2032         return false;
2033     }
2034     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2035                          a->vd, a->vn, a->vm, false);
2036 }
2037
2038 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2039 {
2040     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2041         return false;
2042     }
2043     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2044                          a->vd, a->vn, a->vm, false);
2045 }
2046
2047 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2048 {
2049     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2050         return false;
2051     }
2052     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2053                          a->vd, a->vn, a->vm, false);
2054 }
2055
2056 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2057 {
2058     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2059         return false;
2060     }
2061     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2062                          a->vd, a->vn, a->vm, false);
2063 }
2064
2065 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2066 {
2067     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2068         return false;
2069     }
2070     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2071                          a->vd, a->vn, a->vm, false);
2072 }
2073
2074 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2075 {
2076     /*
2077      * VFNMA : fd = muladd(-fd,  fn, fm)
2078      * VFNMS : fd = muladd(-fd, -fn, fm)
2079      * VFMA  : fd = muladd( fd,  fn, fm)
2080      * VFMS  : fd = muladd( fd, -fn, fm)
2081      *
2082      * These are fused multiply-add, and must be done as one floating
2083      * point operation with no rounding between the multiplication and
2084      * addition steps.  NB that doing the negations here as separate
2085      * steps is correct : an input NaN should come out with its sign
2086      * bit flipped if it is a negated-input.
2087      */
2088     TCGv_ptr fpst;
2089     TCGv_i32 vn, vm, vd;
2090
2091     /*
2092      * Present in VFPv4 only, and only with the FP16 extension.
2093      * Note that we can't rely on the SIMDFMAC check alone, because
2094      * in a Neon-no-VFP core that ID register field will be non-zero.
2095      */
2096     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2097         !dc_isar_feature(aa32_simdfmac, s) ||
2098         !dc_isar_feature(aa32_fpsp_v2, s)) {
2099         return false;
2100     }
2101
2102     if (s->vec_len != 0 || s->vec_stride != 0) {
2103         return false;
2104     }
2105
2106     if (!vfp_access_check(s)) {
2107         return true;
2108     }
2109
2110     vn = tcg_temp_new_i32();
2111     vm = tcg_temp_new_i32();
2112     vd = tcg_temp_new_i32();
2113
2114     vfp_load_reg16(vn, a->vn);
2115     vfp_load_reg16(vm, a->vm);
2116     if (neg_n) {
2117         /* VFNMS, VFMS */
2118         gen_vfp_negh(vn, vn);
2119     }
2120     vfp_load_reg16(vd, a->vd);
2121     if (neg_d) {
2122         /* VFNMA, VFNMS */
2123         gen_vfp_negh(vd, vd);
2124     }
2125     fpst = fpstatus_ptr(FPST_FPCR_F16);
2126     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2127     vfp_store_reg32(vd, a->vd);
2128     return true;
2129 }
2130
2131 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2132 {
2133     /*
2134      * VFNMA : fd = muladd(-fd,  fn, fm)
2135      * VFNMS : fd = muladd(-fd, -fn, fm)
2136      * VFMA  : fd = muladd( fd,  fn, fm)
2137      * VFMS  : fd = muladd( fd, -fn, fm)
2138      *
2139      * These are fused multiply-add, and must be done as one floating
2140      * point operation with no rounding between the multiplication and
2141      * addition steps.  NB that doing the negations here as separate
2142      * steps is correct : an input NaN should come out with its sign
2143      * bit flipped if it is a negated-input.
2144      */
2145     TCGv_ptr fpst;
2146     TCGv_i32 vn, vm, vd;
2147
2148     /*
2149      * Present in VFPv4 only.
2150      * Note that we can't rely on the SIMDFMAC check alone, because
2151      * in a Neon-no-VFP core that ID register field will be non-zero.
2152      */
2153     if (!dc_isar_feature(aa32_simdfmac, s) ||
2154         !dc_isar_feature(aa32_fpsp_v2, s)) {
2155         return false;
2156     }
2157     /*
2158      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2159      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2160      */
2161     if (s->vec_len != 0 || s->vec_stride != 0) {
2162         return false;
2163     }
2164
2165     if (!vfp_access_check(s)) {
2166         return true;
2167     }
2168
2169     vn = tcg_temp_new_i32();
2170     vm = tcg_temp_new_i32();
2171     vd = tcg_temp_new_i32();
2172
2173     vfp_load_reg32(vn, a->vn);
2174     vfp_load_reg32(vm, a->vm);
2175     if (neg_n) {
2176         /* VFNMS, VFMS */
2177         gen_vfp_negs(vn, vn);
2178     }
2179     vfp_load_reg32(vd, a->vd);
2180     if (neg_d) {
2181         /* VFNMA, VFNMS */
2182         gen_vfp_negs(vd, vd);
2183     }
2184     fpst = fpstatus_ptr(FPST_FPCR);
2185     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2186     vfp_store_reg32(vd, a->vd);
2187     return true;
2188 }
2189
2190 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2191 {
2192     /*
2193      * VFNMA : fd = muladd(-fd, -fn, fm)
2194      * VFNMS : fd = muladd(-fd,  fn, fm)
2195      * VFMA  : fd = muladd( fd,  fn, fm)
2196      * VFMS  : fd = muladd( fd, -fn, fm)
2197      *
2198      * These are fused multiply-add, and must be done as one floating
2199      * point operation with no rounding between the multiplication and
2200      * addition steps.  NB that doing the negations here as separate
2201      * steps is correct : an input NaN should come out with its sign
2202      * bit flipped if it is a negated-input.
2203      */
2204     TCGv_ptr fpst;
2205     TCGv_i64 vn, vm, vd;
2206
2207     /*
2208      * Present in VFPv4 only.
2209      * Note that we can't rely on the SIMDFMAC check alone, because
2210      * in a Neon-no-VFP core that ID register field will be non-zero.
2211      */
2212     if (!dc_isar_feature(aa32_simdfmac, s) ||
2213         !dc_isar_feature(aa32_fpdp_v2, s)) {
2214         return false;
2215     }
2216     /*
2217      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2218      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2219      */
2220     if (s->vec_len != 0 || s->vec_stride != 0) {
2221         return false;
2222     }
2223
2224     /* UNDEF accesses to D16-D31 if they don't exist. */
2225     if (!dc_isar_feature(aa32_simd_r32, s) &&
2226         ((a->vd | a->vn | a->vm) & 0x10)) {
2227         return false;
2228     }
2229
2230     if (!vfp_access_check(s)) {
2231         return true;
2232     }
2233
2234     vn = tcg_temp_new_i64();
2235     vm = tcg_temp_new_i64();
2236     vd = tcg_temp_new_i64();
2237
2238     vfp_load_reg64(vn, a->vn);
2239     vfp_load_reg64(vm, a->vm);
2240     if (neg_n) {
2241         /* VFNMS, VFMS */
2242         gen_vfp_negd(vn, vn);
2243     }
2244     vfp_load_reg64(vd, a->vd);
2245     if (neg_d) {
2246         /* VFNMA, VFNMS */
2247         gen_vfp_negd(vd, vd);
2248     }
2249     fpst = fpstatus_ptr(FPST_FPCR);
2250     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2251     vfp_store_reg64(vd, a->vd);
2252     return true;
2253 }
2254
2255 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2256     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2257                                       arg_##INSN##_##PREC *a)           \
2258     {                                                                   \
2259         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2260     }
2261
2262 #define MAKE_VFM_TRANS_FNS(PREC) \
2263     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2264     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2265     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, false, true) \
2266     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, true, true)
2267
2268 MAKE_VFM_TRANS_FNS(hp)
2269 MAKE_VFM_TRANS_FNS(sp)
2270 MAKE_VFM_TRANS_FNS(dp)
2271
2272 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2273 {
2274     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2275         return false;
2276     }
2277
2278     if (s->vec_len != 0 || s->vec_stride != 0) {
2279         return false;
2280     }
2281
2282     if (!vfp_access_check(s)) {
2283         return true;
2284     }
2285
2286     vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);
2287     return true;
2288 }
2289
2290 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2291 {
2292     uint32_t delta_d = 0;
2293     int veclen = s->vec_len;
2294     TCGv_i32 fd;
2295     uint32_t vd;
2296
2297     vd = a->vd;
2298
2299     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2300         return false;
2301     }
2302
2303     if (!dc_isar_feature(aa32_fpshvec, s) &&
2304         (veclen != 0 || s->vec_stride != 0)) {
2305         return false;
2306     }
2307
2308     if (!vfp_access_check(s)) {
2309         return true;
2310     }
2311
2312     if (veclen > 0) {
2313         /* Figure out what type of vector operation this is.  */
2314         if (vfp_sreg_is_scalar(vd)) {
2315             /* scalar */
2316             veclen = 0;
2317         } else {
2318             delta_d = s->vec_stride + 1;
2319         }
2320     }
2321
2322     fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));
2323
2324     for (;;) {
2325         vfp_store_reg32(fd, vd);
2326
2327         if (veclen == 0) {
2328             break;
2329         }
2330
2331         /* Set up the operands for the next iteration */
2332         veclen--;
2333         vd = vfp_advance_sreg(vd, delta_d);
2334     }
2335
2336     return true;
2337 }
2338
2339 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2340 {
2341     uint32_t delta_d = 0;
2342     int veclen = s->vec_len;
2343     TCGv_i64 fd;
2344     uint32_t vd;
2345
2346     vd = a->vd;
2347
2348     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2349         return false;
2350     }
2351
2352     /* UNDEF accesses to D16-D31 if they don't exist. */
2353     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2354         return false;
2355     }
2356
2357     if (!dc_isar_feature(aa32_fpshvec, s) &&
2358         (veclen != 0 || s->vec_stride != 0)) {
2359         return false;
2360     }
2361
2362     if (!vfp_access_check(s)) {
2363         return true;
2364     }
2365
2366     if (veclen > 0) {
2367         /* Figure out what type of vector operation this is.  */
2368         if (vfp_dreg_is_scalar(vd)) {
2369             /* scalar */
2370             veclen = 0;
2371         } else {
2372             delta_d = (s->vec_stride >> 1) + 1;
2373         }
2374     }
2375
2376     fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));
2377
2378     for (;;) {
2379         vfp_store_reg64(fd, vd);
2380
2381         if (veclen == 0) {
2382             break;
2383         }
2384
2385         /* Set up the operands for the next iteration */
2386         veclen--;
2387         vd = vfp_advance_dreg(vd, delta_d);
2388     }
2389
2390     return true;
2391 }
2392
2393 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2394     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2395                                       arg_##INSN##_##PREC *a)   \
2396     {                                                           \
2397         if (!dc_isar_feature(CHECK, s)) {                       \
2398             return false;                                       \
2399         }                                                       \
2400         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2401     }
2402
2403 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2404     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2405                                       arg_##INSN##_##PREC *a)   \
2406     {                                                           \
2407         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2408             !dc_isar_feature(aa32_mve, s)) {                    \
2409             return false;                                       \
2410         }                                                       \
2411         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2412     }
2413
2414 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2415 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2416
2417 DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith)
2418 DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2)
2419 DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2)
2420
2421 DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith)
2422 DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2)
2423 DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
2424
2425 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2426 {
2427     gen_helper_vfp_sqrth(vd, vm, tcg_env);
2428 }
2429
2430 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2431 {
2432     gen_helper_vfp_sqrts(vd, vm, tcg_env);
2433 }
2434
2435 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2436 {
2437     gen_helper_vfp_sqrtd(vd, vm, tcg_env);
2438 }
2439
2440 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2441 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2442 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2443
2444 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2445 {
2446     TCGv_i32 vd, vm;
2447
2448     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2449         return false;
2450     }
2451
2452     /* Vm/M bits must be zero for the Z variant */
2453     if (a->z && a->vm != 0) {
2454         return false;
2455     }
2456
2457     if (!vfp_access_check(s)) {
2458         return true;
2459     }
2460
2461     vd = tcg_temp_new_i32();
2462     vm = tcg_temp_new_i32();
2463
2464     vfp_load_reg16(vd, a->vd);
2465     if (a->z) {
2466         tcg_gen_movi_i32(vm, 0);
2467     } else {
2468         vfp_load_reg16(vm, a->vm);
2469     }
2470
2471     if (a->e) {
2472         gen_helper_vfp_cmpeh(vd, vm, tcg_env);
2473     } else {
2474         gen_helper_vfp_cmph(vd, vm, tcg_env);
2475     }
2476     return true;
2477 }
2478
2479 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2480 {
2481     TCGv_i32 vd, vm;
2482
2483     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2484         return false;
2485     }
2486
2487     /* Vm/M bits must be zero for the Z variant */
2488     if (a->z && a->vm != 0) {
2489         return false;
2490     }
2491
2492     if (!vfp_access_check(s)) {
2493         return true;
2494     }
2495
2496     vd = tcg_temp_new_i32();
2497     vm = tcg_temp_new_i32();
2498
2499     vfp_load_reg32(vd, a->vd);
2500     if (a->z) {
2501         tcg_gen_movi_i32(vm, 0);
2502     } else {
2503         vfp_load_reg32(vm, a->vm);
2504     }
2505
2506     if (a->e) {
2507         gen_helper_vfp_cmpes(vd, vm, tcg_env);
2508     } else {
2509         gen_helper_vfp_cmps(vd, vm, tcg_env);
2510     }
2511     return true;
2512 }
2513
2514 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2515 {
2516     TCGv_i64 vd, vm;
2517
2518     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2519         return false;
2520     }
2521
2522     /* Vm/M bits must be zero for the Z variant */
2523     if (a->z && a->vm != 0) {
2524         return false;
2525     }
2526
2527     /* UNDEF accesses to D16-D31 if they don't exist. */
2528     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2529         return false;
2530     }
2531
2532     if (!vfp_access_check(s)) {
2533         return true;
2534     }
2535
2536     vd = tcg_temp_new_i64();
2537     vm = tcg_temp_new_i64();
2538
2539     vfp_load_reg64(vd, a->vd);
2540     if (a->z) {
2541         tcg_gen_movi_i64(vm, 0);
2542     } else {
2543         vfp_load_reg64(vm, a->vm);
2544     }
2545
2546     if (a->e) {
2547         gen_helper_vfp_cmped(vd, vm, tcg_env);
2548     } else {
2549         gen_helper_vfp_cmpd(vd, vm, tcg_env);
2550     }
2551     return true;
2552 }
2553
2554 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2555 {
2556     TCGv_ptr fpst;
2557     TCGv_i32 ahp_mode;
2558     TCGv_i32 tmp;
2559
2560     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2561         return false;
2562     }
2563
2564     if (!vfp_access_check(s)) {
2565         return true;
2566     }
2567
2568     fpst = fpstatus_ptr(FPST_FPCR);
2569     ahp_mode = get_ahp_flag();
2570     tmp = tcg_temp_new_i32();
2571     /* The T bit tells us if we want the low or high 16 bits of Vm */
2572     tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
2573     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2574     vfp_store_reg32(tmp, a->vd);
2575     return true;
2576 }
2577
2578 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2579 {
2580     TCGv_ptr fpst;
2581     TCGv_i32 ahp_mode;
2582     TCGv_i32 tmp;
2583     TCGv_i64 vd;
2584
2585     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2586         return false;
2587     }
2588
2589     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2590         return false;
2591     }
2592
2593     /* UNDEF accesses to D16-D31 if they don't exist. */
2594     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2595         return false;
2596     }
2597
2598     if (!vfp_access_check(s)) {
2599         return true;
2600     }
2601
2602     fpst = fpstatus_ptr(FPST_FPCR);
2603     ahp_mode = get_ahp_flag();
2604     tmp = tcg_temp_new_i32();
2605     /* The T bit tells us if we want the low or high 16 bits of Vm */
2606     tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
2607     vd = tcg_temp_new_i64();
2608     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2609     vfp_store_reg64(vd, a->vd);
2610     return true;
2611 }
2612
2613 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
2614 {
2615     TCGv_ptr fpst;
2616     TCGv_i32 tmp;
2617
2618     if (!dc_isar_feature(aa32_bf16, s)) {
2619         return false;
2620     }
2621
2622     if (!vfp_access_check(s)) {
2623         return true;
2624     }
2625
2626     fpst = fpstatus_ptr(FPST_FPCR);
2627     tmp = tcg_temp_new_i32();
2628
2629     vfp_load_reg32(tmp, a->vm);
2630     gen_helper_bfcvt(tmp, tmp, fpst);
2631     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2632     return true;
2633 }
2634
2635 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2636 {
2637     TCGv_ptr fpst;
2638     TCGv_i32 ahp_mode;
2639     TCGv_i32 tmp;
2640
2641     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2642         return false;
2643     }
2644
2645     if (!vfp_access_check(s)) {
2646         return true;
2647     }
2648
2649     fpst = fpstatus_ptr(FPST_FPCR);
2650     ahp_mode = get_ahp_flag();
2651     tmp = tcg_temp_new_i32();
2652
2653     vfp_load_reg32(tmp, a->vm);
2654     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2655     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2656     return true;
2657 }
2658
2659 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2660 {
2661     TCGv_ptr fpst;
2662     TCGv_i32 ahp_mode;
2663     TCGv_i32 tmp;
2664     TCGv_i64 vm;
2665
2666     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2667         return false;
2668     }
2669
2670     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2671         return false;
2672     }
2673
2674     /* UNDEF accesses to D16-D31 if they don't exist. */
2675     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2676         return false;
2677     }
2678
2679     if (!vfp_access_check(s)) {
2680         return true;
2681     }
2682
2683     fpst = fpstatus_ptr(FPST_FPCR);
2684     ahp_mode = get_ahp_flag();
2685     tmp = tcg_temp_new_i32();
2686     vm = tcg_temp_new_i64();
2687
2688     vfp_load_reg64(vm, a->vm);
2689     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2690     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2691     return true;
2692 }
2693
2694 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2695 {
2696     TCGv_ptr fpst;
2697     TCGv_i32 tmp;
2698
2699     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2700         return false;
2701     }
2702
2703     if (!vfp_access_check(s)) {
2704         return true;
2705     }
2706
2707     tmp = tcg_temp_new_i32();
2708     vfp_load_reg16(tmp, a->vm);
2709     fpst = fpstatus_ptr(FPST_FPCR_F16);
2710     gen_helper_rinth(tmp, tmp, fpst);
2711     vfp_store_reg32(tmp, a->vd);
2712     return true;
2713 }
2714
2715 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2716 {
2717     TCGv_ptr fpst;
2718     TCGv_i32 tmp;
2719
2720     if (!dc_isar_feature(aa32_vrint, s)) {
2721         return false;
2722     }
2723
2724     if (!vfp_access_check(s)) {
2725         return true;
2726     }
2727
2728     tmp = tcg_temp_new_i32();
2729     vfp_load_reg32(tmp, a->vm);
2730     fpst = fpstatus_ptr(FPST_FPCR);
2731     gen_helper_rints(tmp, tmp, fpst);
2732     vfp_store_reg32(tmp, a->vd);
2733     return true;
2734 }
2735
2736 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2737 {
2738     TCGv_ptr fpst;
2739     TCGv_i64 tmp;
2740
2741     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2742         return false;
2743     }
2744
2745     if (!dc_isar_feature(aa32_vrint, s)) {
2746         return false;
2747     }
2748
2749     /* UNDEF accesses to D16-D31 if they don't exist. */
2750     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2751         return false;
2752     }
2753
2754     if (!vfp_access_check(s)) {
2755         return true;
2756     }
2757
2758     tmp = tcg_temp_new_i64();
2759     vfp_load_reg64(tmp, a->vm);
2760     fpst = fpstatus_ptr(FPST_FPCR);
2761     gen_helper_rintd(tmp, tmp, fpst);
2762     vfp_store_reg64(tmp, a->vd);
2763     return true;
2764 }
2765
2766 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2767 {
2768     TCGv_ptr fpst;
2769     TCGv_i32 tmp;
2770     TCGv_i32 tcg_rmode;
2771
2772     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2773         return false;
2774     }
2775
2776     if (!vfp_access_check(s)) {
2777         return true;
2778     }
2779
2780     tmp = tcg_temp_new_i32();
2781     vfp_load_reg16(tmp, a->vm);
2782     fpst = fpstatus_ptr(FPST_FPCR_F16);
2783     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2784     gen_helper_rinth(tmp, tmp, fpst);
2785     gen_restore_rmode(tcg_rmode, fpst);
2786     vfp_store_reg32(tmp, a->vd);
2787     return true;
2788 }
2789
2790 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2791 {
2792     TCGv_ptr fpst;
2793     TCGv_i32 tmp;
2794     TCGv_i32 tcg_rmode;
2795
2796     if (!dc_isar_feature(aa32_vrint, s)) {
2797         return false;
2798     }
2799
2800     if (!vfp_access_check(s)) {
2801         return true;
2802     }
2803
2804     tmp = tcg_temp_new_i32();
2805     vfp_load_reg32(tmp, a->vm);
2806     fpst = fpstatus_ptr(FPST_FPCR);
2807     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2808     gen_helper_rints(tmp, tmp, fpst);
2809     gen_restore_rmode(tcg_rmode, fpst);
2810     vfp_store_reg32(tmp, a->vd);
2811     return true;
2812 }
2813
2814 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2815 {
2816     TCGv_ptr fpst;
2817     TCGv_i64 tmp;
2818     TCGv_i32 tcg_rmode;
2819
2820     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2821         return false;
2822     }
2823
2824     if (!dc_isar_feature(aa32_vrint, s)) {
2825         return false;
2826     }
2827
2828     /* UNDEF accesses to D16-D31 if they don't exist. */
2829     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2830         return false;
2831     }
2832
2833     if (!vfp_access_check(s)) {
2834         return true;
2835     }
2836
2837     tmp = tcg_temp_new_i64();
2838     vfp_load_reg64(tmp, a->vm);
2839     fpst = fpstatus_ptr(FPST_FPCR);
2840     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2841     gen_helper_rintd(tmp, tmp, fpst);
2842     gen_restore_rmode(tcg_rmode, fpst);
2843     vfp_store_reg64(tmp, a->vd);
2844     return true;
2845 }
2846
2847 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
2848 {
2849     TCGv_ptr fpst;
2850     TCGv_i32 tmp;
2851
2852     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2853         return false;
2854     }
2855
2856     if (!vfp_access_check(s)) {
2857         return true;
2858     }
2859
2860     tmp = tcg_temp_new_i32();
2861     vfp_load_reg16(tmp, a->vm);
2862     fpst = fpstatus_ptr(FPST_FPCR_F16);
2863     gen_helper_rinth_exact(tmp, tmp, fpst);
2864     vfp_store_reg32(tmp, a->vd);
2865     return true;
2866 }
2867
2868 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2869 {
2870     TCGv_ptr fpst;
2871     TCGv_i32 tmp;
2872
2873     if (!dc_isar_feature(aa32_vrint, s)) {
2874         return false;
2875     }
2876
2877     if (!vfp_access_check(s)) {
2878         return true;
2879     }
2880
2881     tmp = tcg_temp_new_i32();
2882     vfp_load_reg32(tmp, a->vm);
2883     fpst = fpstatus_ptr(FPST_FPCR);
2884     gen_helper_rints_exact(tmp, tmp, fpst);
2885     vfp_store_reg32(tmp, a->vd);
2886     return true;
2887 }
2888
2889 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2890 {
2891     TCGv_ptr fpst;
2892     TCGv_i64 tmp;
2893
2894     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2895         return false;
2896     }
2897
2898     if (!dc_isar_feature(aa32_vrint, s)) {
2899         return false;
2900     }
2901
2902     /* UNDEF accesses to D16-D31 if they don't exist. */
2903     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2904         return false;
2905     }
2906
2907     if (!vfp_access_check(s)) {
2908         return true;
2909     }
2910
2911     tmp = tcg_temp_new_i64();
2912     vfp_load_reg64(tmp, a->vm);
2913     fpst = fpstatus_ptr(FPST_FPCR);
2914     gen_helper_rintd_exact(tmp, tmp, fpst);
2915     vfp_store_reg64(tmp, a->vd);
2916     return true;
2917 }
2918
2919 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2920 {
2921     TCGv_i64 vd;
2922     TCGv_i32 vm;
2923
2924     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2925         return false;
2926     }
2927
2928     /* UNDEF accesses to D16-D31 if they don't exist. */
2929     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2930         return false;
2931     }
2932
2933     if (!vfp_access_check(s)) {
2934         return true;
2935     }
2936
2937     vm = tcg_temp_new_i32();
2938     vd = tcg_temp_new_i64();
2939     vfp_load_reg32(vm, a->vm);
2940     gen_helper_vfp_fcvtds(vd, vm, tcg_env);
2941     vfp_store_reg64(vd, a->vd);
2942     return true;
2943 }
2944
2945 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2946 {
2947     TCGv_i64 vm;
2948     TCGv_i32 vd;
2949
2950     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2951         return false;
2952     }
2953
2954     /* UNDEF accesses to D16-D31 if they don't exist. */
2955     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2956         return false;
2957     }
2958
2959     if (!vfp_access_check(s)) {
2960         return true;
2961     }
2962
2963     vd = tcg_temp_new_i32();
2964     vm = tcg_temp_new_i64();
2965     vfp_load_reg64(vm, a->vm);
2966     gen_helper_vfp_fcvtsd(vd, vm, tcg_env);
2967     vfp_store_reg32(vd, a->vd);
2968     return true;
2969 }
2970
2971 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
2972 {
2973     TCGv_i32 vm;
2974     TCGv_ptr fpst;
2975
2976     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2977         return false;
2978     }
2979
2980     if (!vfp_access_check(s)) {
2981         return true;
2982     }
2983
2984     vm = tcg_temp_new_i32();
2985     vfp_load_reg32(vm, a->vm);
2986     fpst = fpstatus_ptr(FPST_FPCR_F16);
2987     if (a->s) {
2988         /* i32 -> f16 */
2989         gen_helper_vfp_sitoh(vm, vm, fpst);
2990     } else {
2991         /* u32 -> f16 */
2992         gen_helper_vfp_uitoh(vm, vm, fpst);
2993     }
2994     vfp_store_reg32(vm, a->vd);
2995     return true;
2996 }
2997
2998 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2999 {
3000     TCGv_i32 vm;
3001     TCGv_ptr fpst;
3002
3003     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3004         return false;
3005     }
3006
3007     if (!vfp_access_check(s)) {
3008         return true;
3009     }
3010
3011     vm = tcg_temp_new_i32();
3012     vfp_load_reg32(vm, a->vm);
3013     fpst = fpstatus_ptr(FPST_FPCR);
3014     if (a->s) {
3015         /* i32 -> f32 */
3016         gen_helper_vfp_sitos(vm, vm, fpst);
3017     } else {
3018         /* u32 -> f32 */
3019         gen_helper_vfp_uitos(vm, vm, fpst);
3020     }
3021     vfp_store_reg32(vm, a->vd);
3022     return true;
3023 }
3024
3025 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3026 {
3027     TCGv_i32 vm;
3028     TCGv_i64 vd;
3029     TCGv_ptr fpst;
3030
3031     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3032         return false;
3033     }
3034
3035     /* UNDEF accesses to D16-D31 if they don't exist. */
3036     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3037         return false;
3038     }
3039
3040     if (!vfp_access_check(s)) {
3041         return true;
3042     }
3043
3044     vm = tcg_temp_new_i32();
3045     vd = tcg_temp_new_i64();
3046     vfp_load_reg32(vm, a->vm);
3047     fpst = fpstatus_ptr(FPST_FPCR);
3048     if (a->s) {
3049         /* i32 -> f64 */
3050         gen_helper_vfp_sitod(vd, vm, fpst);
3051     } else {
3052         /* u32 -> f64 */
3053         gen_helper_vfp_uitod(vd, vm, fpst);
3054     }
3055     vfp_store_reg64(vd, a->vd);
3056     return true;
3057 }
3058
3059 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3060 {
3061     TCGv_i32 vd;
3062     TCGv_i64 vm;
3063
3064     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3065         return false;
3066     }
3067
3068     if (!dc_isar_feature(aa32_jscvt, s)) {
3069         return false;
3070     }
3071
3072     /* UNDEF accesses to D16-D31 if they don't exist. */
3073     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3074         return false;
3075     }
3076
3077     if (!vfp_access_check(s)) {
3078         return true;
3079     }
3080
3081     vm = tcg_temp_new_i64();
3082     vd = tcg_temp_new_i32();
3083     vfp_load_reg64(vm, a->vm);
3084     gen_helper_vjcvt(vd, vm, tcg_env);
3085     vfp_store_reg32(vd, a->vd);
3086     return true;
3087 }
3088
3089 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3090 {
3091     TCGv_i32 vd, shift;
3092     TCGv_ptr fpst;
3093     int frac_bits;
3094
3095     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3096         return false;
3097     }
3098
3099     if (!vfp_access_check(s)) {
3100         return true;
3101     }
3102
3103     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3104
3105     vd = tcg_temp_new_i32();
3106     vfp_load_reg32(vd, a->vd);
3107
3108     fpst = fpstatus_ptr(FPST_FPCR_F16);
3109     shift = tcg_constant_i32(frac_bits);
3110
3111     /* Switch on op:U:sx bits */
3112     switch (a->opc) {
3113     case 0:
3114         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3115         break;
3116     case 1:
3117         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3118         break;
3119     case 2:
3120         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3121         break;
3122     case 3:
3123         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3124         break;
3125     case 4:
3126         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3127         break;
3128     case 5:
3129         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3130         break;
3131     case 6:
3132         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3133         break;
3134     case 7:
3135         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3136         break;
3137     default:
3138         g_assert_not_reached();
3139     }
3140
3141     vfp_store_reg32(vd, a->vd);
3142     return true;
3143 }
3144
3145 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3146 {
3147     TCGv_i32 vd, shift;
3148     TCGv_ptr fpst;
3149     int frac_bits;
3150
3151     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3152         return false;
3153     }
3154
3155     if (!vfp_access_check(s)) {
3156         return true;
3157     }
3158
3159     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3160
3161     vd = tcg_temp_new_i32();
3162     vfp_load_reg32(vd, a->vd);
3163
3164     fpst = fpstatus_ptr(FPST_FPCR);
3165     shift = tcg_constant_i32(frac_bits);
3166
3167     /* Switch on op:U:sx bits */
3168     switch (a->opc) {
3169     case 0:
3170         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3171         break;
3172     case 1:
3173         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3174         break;
3175     case 2:
3176         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3177         break;
3178     case 3:
3179         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3180         break;
3181     case 4:
3182         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3183         break;
3184     case 5:
3185         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3186         break;
3187     case 6:
3188         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3189         break;
3190     case 7:
3191         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3192         break;
3193     default:
3194         g_assert_not_reached();
3195     }
3196
3197     vfp_store_reg32(vd, a->vd);
3198     return true;
3199 }
3200
3201 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3202 {
3203     TCGv_i64 vd;
3204     TCGv_i32 shift;
3205     TCGv_ptr fpst;
3206     int frac_bits;
3207
3208     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3209         return false;
3210     }
3211
3212     /* UNDEF accesses to D16-D31 if they don't exist. */
3213     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3214         return false;
3215     }
3216
3217     if (!vfp_access_check(s)) {
3218         return true;
3219     }
3220
3221     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3222
3223     vd = tcg_temp_new_i64();
3224     vfp_load_reg64(vd, a->vd);
3225
3226     fpst = fpstatus_ptr(FPST_FPCR);
3227     shift = tcg_constant_i32(frac_bits);
3228
3229     /* Switch on op:U:sx bits */
3230     switch (a->opc) {
3231     case 0:
3232         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3233         break;
3234     case 1:
3235         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3236         break;
3237     case 2:
3238         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3239         break;
3240     case 3:
3241         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3242         break;
3243     case 4:
3244         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3245         break;
3246     case 5:
3247         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3248         break;
3249     case 6:
3250         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3251         break;
3252     case 7:
3253         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3254         break;
3255     default:
3256         g_assert_not_reached();
3257     }
3258
3259     vfp_store_reg64(vd, a->vd);
3260     return true;
3261 }
3262
3263 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3264 {
3265     TCGv_i32 vm;
3266     TCGv_ptr fpst;
3267
3268     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3269         return false;
3270     }
3271
3272     if (!vfp_access_check(s)) {
3273         return true;
3274     }
3275
3276     fpst = fpstatus_ptr(FPST_FPCR_F16);
3277     vm = tcg_temp_new_i32();
3278     vfp_load_reg16(vm, a->vm);
3279
3280     if (a->s) {
3281         if (a->rz) {
3282             gen_helper_vfp_tosizh(vm, vm, fpst);
3283         } else {
3284             gen_helper_vfp_tosih(vm, vm, fpst);
3285         }
3286     } else {
3287         if (a->rz) {
3288             gen_helper_vfp_touizh(vm, vm, fpst);
3289         } else {
3290             gen_helper_vfp_touih(vm, vm, fpst);
3291         }
3292     }
3293     vfp_store_reg32(vm, a->vd);
3294     return true;
3295 }
3296
3297 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3298 {
3299     TCGv_i32 vm;
3300     TCGv_ptr fpst;
3301
3302     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3303         return false;
3304     }
3305
3306     if (!vfp_access_check(s)) {
3307         return true;
3308     }
3309
3310     fpst = fpstatus_ptr(FPST_FPCR);
3311     vm = tcg_temp_new_i32();
3312     vfp_load_reg32(vm, a->vm);
3313
3314     if (a->s) {
3315         if (a->rz) {
3316             gen_helper_vfp_tosizs(vm, vm, fpst);
3317         } else {
3318             gen_helper_vfp_tosis(vm, vm, fpst);
3319         }
3320     } else {
3321         if (a->rz) {
3322             gen_helper_vfp_touizs(vm, vm, fpst);
3323         } else {
3324             gen_helper_vfp_touis(vm, vm, fpst);
3325         }
3326     }
3327     vfp_store_reg32(vm, a->vd);
3328     return true;
3329 }
3330
3331 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3332 {
3333     TCGv_i32 vd;
3334     TCGv_i64 vm;
3335     TCGv_ptr fpst;
3336
3337     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3338         return false;
3339     }
3340
3341     /* UNDEF accesses to D16-D31 if they don't exist. */
3342     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3343         return false;
3344     }
3345
3346     if (!vfp_access_check(s)) {
3347         return true;
3348     }
3349
3350     fpst = fpstatus_ptr(FPST_FPCR);
3351     vm = tcg_temp_new_i64();
3352     vd = tcg_temp_new_i32();
3353     vfp_load_reg64(vm, a->vm);
3354
3355     if (a->s) {
3356         if (a->rz) {
3357             gen_helper_vfp_tosizd(vd, vm, fpst);
3358         } else {
3359             gen_helper_vfp_tosid(vd, vm, fpst);
3360         }
3361     } else {
3362         if (a->rz) {
3363             gen_helper_vfp_touizd(vd, vm, fpst);
3364         } else {
3365             gen_helper_vfp_touid(vd, vm, fpst);
3366         }
3367     }
3368     vfp_store_reg32(vd, a->vd);
3369     return true;
3370 }
3371
3372 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3373 {
3374     TCGv_i32 rd, rm;
3375
3376     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3377         return false;
3378     }
3379
3380     if (s->vec_len != 0 || s->vec_stride != 0) {
3381         return false;
3382     }
3383
3384     if (!vfp_access_check(s)) {
3385         return true;
3386     }
3387
3388     /* Insert low half of Vm into high half of Vd */
3389     rm = tcg_temp_new_i32();
3390     rd = tcg_temp_new_i32();
3391     vfp_load_reg16(rm, a->vm);
3392     vfp_load_reg16(rd, a->vd);
3393     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3394     vfp_store_reg32(rd, a->vd);
3395     return true;
3396 }
3397
3398 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3399 {
3400     TCGv_i32 rm;
3401
3402     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3403         return false;
3404     }
3405
3406     if (s->vec_len != 0 || s->vec_stride != 0) {
3407         return false;
3408     }
3409
3410     if (!vfp_access_check(s)) {
3411         return true;
3412     }
3413
3414     /* Set Vd to high half of Vm */
3415     rm = tcg_temp_new_i32();
3416     vfp_load_reg32(rm, a->vm);
3417     tcg_gen_shri_i32(rm, rm, 16);
3418     vfp_store_reg32(rm, a->vd);
3419     return true;
3420 }