2 * ARM translation: M-profile MVE instructions
4 * Copyright (c) 2021 Linaro, Ltd.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a32.h"
24 static inline int vidup_imm(DisasContext
*s
, int x
)
29 /* Include the generated decoder */
30 #include "decode-mve.c.inc"
32 typedef void MVEGenLdStFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
33 typedef void MVEGenLdStSGFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
34 typedef void MVEGenLdStIlFn(TCGv_ptr
, TCGv_i32
, TCGv_i32
);
35 typedef void MVEGenOneOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
36 typedef void MVEGenTwoOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
37 typedef void MVEGenTwoOpScalarFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
38 typedef void MVEGenTwoOpShiftFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
39 typedef void MVEGenLongDualAccOpFn(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
40 typedef void MVEGenVADDVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
41 typedef void MVEGenOneOpImmFn(TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
42 typedef void MVEGenVIDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
);
43 typedef void MVEGenVIWDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
44 typedef void MVEGenCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
45 typedef void MVEGenScalarCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void MVEGenVABAVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
47 typedef void MVEGenDualAccOpFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
48 typedef void MVEGenVCVTRmodeFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
50 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
51 static inline long mve_qreg_offset(unsigned reg
)
53 return offsetof(CPUARMState
, vfp
.zregs
[reg
].d
[0]);
56 static TCGv_ptr
mve_qreg_ptr(unsigned reg
)
58 TCGv_ptr ret
= tcg_temp_new_ptr();
59 tcg_gen_addi_ptr(ret
, tcg_env
, mve_qreg_offset(reg
));
63 static bool mve_no_predication(DisasContext
*s
)
66 * Return true if we are executing the entire MVE instruction
67 * with no predication or partial-execution, and so we can safely
68 * use an inline TCG vector implementation.
70 return s
->eci
== 0 && s
->mve_no_pred
;
73 static bool mve_check_qreg_bank(DisasContext
*s
, int qmask
)
76 * Check whether Qregs are in range. For v8.1M only Q0..Q7
77 * are supported, see VFPSmallRegisterBank().
82 bool mve_eci_check(DisasContext
*s
)
85 * This is a beatwise insn: check that ECI is valid (not a
86 * reserved value) and note that we are handling it.
87 * Return true if OK, false if we generated an exception.
89 s
->eci_handled
= true;
98 /* Reserved value: INVSTATE UsageFault */
99 gen_exception_insn(s
, 0, EXCP_INVSTATE
, syn_uncategorized());
104 void mve_update_eci(DisasContext
*s
)
107 * The helper function will always update the CPUState field,
108 * so we only need to update the DisasContext field.
111 s
->eci
= (s
->eci
== ECI_A0A1A2B0
) ? ECI_A0
: ECI_NONE
;
115 void mve_update_and_store_eci(DisasContext
*s
)
118 * For insns which don't call a helper function that will call
119 * mve_advance_vpt(), this version updates s->eci and also stores
120 * it out to the CPUState field.
124 store_cpu_field(tcg_constant_i32(s
->eci
<< 4), condexec_bits
);
128 static bool mve_skip_first_beat(DisasContext
*s
)
130 /* Return true if PSR.ECI says we must skip the first beat of this insn */
140 g_assert_not_reached();
144 static bool do_ldst(DisasContext
*s
, arg_VLDR_VSTR
*a
, MVEGenLdStFn
*fn
,
151 if (!dc_isar_feature(aa32_mve
, s
) ||
152 !mve_check_qreg_bank(s
, a
->qd
) ||
157 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
158 if (a
->rn
== 15 || (a
->rn
== 13 && a
->w
)) {
162 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
166 offset
= a
->imm
<< msize
;
170 addr
= load_reg(s
, a
->rn
);
172 tcg_gen_addi_i32(addr
, addr
, offset
);
175 qreg
= mve_qreg_ptr(a
->qd
);
176 fn(tcg_env
, qreg
, addr
);
179 * Writeback always happens after the last beat of the insn,
180 * regardless of predication
184 tcg_gen_addi_i32(addr
, addr
, offset
);
186 store_reg(s
, a
->rn
, addr
);
192 static bool trans_VLDR_VSTR(DisasContext
*s
, arg_VLDR_VSTR
*a
)
194 static MVEGenLdStFn
* const ldstfns
[4][2] = {
195 { gen_helper_mve_vstrb
, gen_helper_mve_vldrb
},
196 { gen_helper_mve_vstrh
, gen_helper_mve_vldrh
},
197 { gen_helper_mve_vstrw
, gen_helper_mve_vldrw
},
200 return do_ldst(s
, a
, ldstfns
[a
->size
][a
->l
], a
->size
);
203 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
204 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
206 static MVEGenLdStFn * const ldstfns[2][2] = { \
207 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
208 { NULL, gen_helper_mve_##ULD }, \
210 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
213 DO_VLDST_WIDE_NARROW(VLDSTB_H
, vldrb_sh
, vldrb_uh
, vstrb_h
, MO_8
)
214 DO_VLDST_WIDE_NARROW(VLDSTB_W
, vldrb_sw
, vldrb_uw
, vstrb_w
, MO_8
)
215 DO_VLDST_WIDE_NARROW(VLDSTH_W
, vldrh_sw
, vldrh_uw
, vstrh_w
, MO_16
)
217 static bool do_ldst_sg(DisasContext
*s
, arg_vldst_sg
*a
, MVEGenLdStSGFn fn
)
222 if (!dc_isar_feature(aa32_mve
, s
) ||
223 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
224 !fn
|| a
->rn
== 15) {
225 /* Rn case is UNPREDICTABLE */
229 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
233 addr
= load_reg(s
, a
->rn
);
235 qd
= mve_qreg_ptr(a
->qd
);
236 qm
= mve_qreg_ptr(a
->qm
);
237 fn(tcg_env
, qd
, qm
, addr
);
243 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
244 * signextended to halfword elements in register". _os_ indicates that
245 * the offsets in Qm should be scaled by the element size.
247 /* This macro is just to make the arrays more compact in these functions */
248 #define F(N) gen_helper_mve_##N
250 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
251 static bool trans_VLDR_S_sg(DisasContext
*s
, arg_vldst_sg
*a
)
253 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
254 { NULL
, F(vldrb_sg_sh
), F(vldrb_sg_sw
), NULL
},
255 { NULL
, NULL
, F(vldrh_sg_sw
), NULL
},
256 { NULL
, NULL
, NULL
, NULL
},
257 { NULL
, NULL
, NULL
, NULL
}
259 { NULL
, NULL
, NULL
, NULL
},
260 { NULL
, NULL
, F(vldrh_sg_os_sw
), NULL
},
261 { NULL
, NULL
, NULL
, NULL
},
262 { NULL
, NULL
, NULL
, NULL
}
265 if (a
->qd
== a
->qm
) {
266 return false; /* UNPREDICTABLE */
268 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
271 static bool trans_VLDR_U_sg(DisasContext
*s
, arg_vldst_sg
*a
)
273 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
274 { F(vldrb_sg_ub
), F(vldrb_sg_uh
), F(vldrb_sg_uw
), NULL
},
275 { NULL
, F(vldrh_sg_uh
), F(vldrh_sg_uw
), NULL
},
276 { NULL
, NULL
, F(vldrw_sg_uw
), NULL
},
277 { NULL
, NULL
, NULL
, F(vldrd_sg_ud
) }
279 { NULL
, NULL
, NULL
, NULL
},
280 { NULL
, F(vldrh_sg_os_uh
), F(vldrh_sg_os_uw
), NULL
},
281 { NULL
, NULL
, F(vldrw_sg_os_uw
), NULL
},
282 { NULL
, NULL
, NULL
, F(vldrd_sg_os_ud
) }
285 if (a
->qd
== a
->qm
) {
286 return false; /* UNPREDICTABLE */
288 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
291 static bool trans_VSTR_sg(DisasContext
*s
, arg_vldst_sg
*a
)
293 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
294 { F(vstrb_sg_ub
), F(vstrb_sg_uh
), F(vstrb_sg_uw
), NULL
},
295 { NULL
, F(vstrh_sg_uh
), F(vstrh_sg_uw
), NULL
},
296 { NULL
, NULL
, F(vstrw_sg_uw
), NULL
},
297 { NULL
, NULL
, NULL
, F(vstrd_sg_ud
) }
299 { NULL
, NULL
, NULL
, NULL
},
300 { NULL
, F(vstrh_sg_os_uh
), F(vstrh_sg_os_uw
), NULL
},
301 { NULL
, NULL
, F(vstrw_sg_os_uw
), NULL
},
302 { NULL
, NULL
, NULL
, F(vstrd_sg_os_ud
) }
305 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
310 static bool do_ldst_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
,
311 MVEGenLdStSGFn
*fn
, unsigned msize
)
316 if (!dc_isar_feature(aa32_mve
, s
) ||
317 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
322 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
326 offset
= a
->imm
<< msize
;
331 qd
= mve_qreg_ptr(a
->qd
);
332 qm
= mve_qreg_ptr(a
->qm
);
333 fn(tcg_env
, qd
, qm
, tcg_constant_i32(offset
));
338 static bool trans_VLDRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
340 static MVEGenLdStSGFn
* const fns
[] = {
341 gen_helper_mve_vldrw_sg_uw
,
342 gen_helper_mve_vldrw_sg_wb_uw
,
344 if (a
->qd
== a
->qm
) {
345 return false; /* UNPREDICTABLE */
347 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
350 static bool trans_VLDRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
352 static MVEGenLdStSGFn
* const fns
[] = {
353 gen_helper_mve_vldrd_sg_ud
,
354 gen_helper_mve_vldrd_sg_wb_ud
,
356 if (a
->qd
== a
->qm
) {
357 return false; /* UNPREDICTABLE */
359 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
362 static bool trans_VSTRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
364 static MVEGenLdStSGFn
* const fns
[] = {
365 gen_helper_mve_vstrw_sg_uw
,
366 gen_helper_mve_vstrw_sg_wb_uw
,
368 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
371 static bool trans_VSTRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
373 static MVEGenLdStSGFn
* const fns
[] = {
374 gen_helper_mve_vstrd_sg_ud
,
375 gen_helper_mve_vstrd_sg_wb_ud
,
377 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
380 static bool do_vldst_il(DisasContext
*s
, arg_vldst_il
*a
, MVEGenLdStIlFn
*fn
,
385 if (!dc_isar_feature(aa32_mve
, s
) ||
386 !mve_check_qreg_bank(s
, a
->qd
) ||
387 !fn
|| (a
->rn
== 13 && a
->w
) || a
->rn
== 15) {
388 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
391 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
395 rn
= load_reg(s
, a
->rn
);
397 * We pass the index of Qd, not a pointer, because the helper must
398 * access multiple Q registers starting at Qd and working up.
400 fn(tcg_env
, tcg_constant_i32(a
->qd
), rn
);
403 tcg_gen_addi_i32(rn
, rn
, addrinc
);
404 store_reg(s
, a
->rn
, rn
);
406 mve_update_and_store_eci(s
);
410 /* This macro is just to make the arrays more compact in these functions */
411 #define F(N) gen_helper_mve_##N
413 static bool trans_VLD2(DisasContext
*s
, arg_vldst_il
*a
)
415 static MVEGenLdStIlFn
* const fns
[4][4] = {
416 { F(vld20b
), F(vld20h
), F(vld20w
), NULL
, },
417 { F(vld21b
), F(vld21h
), F(vld21w
), NULL
, },
418 { NULL
, NULL
, NULL
, NULL
},
419 { NULL
, NULL
, NULL
, NULL
},
424 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
427 static bool trans_VLD4(DisasContext
*s
, arg_vldst_il
*a
)
429 static MVEGenLdStIlFn
* const fns
[4][4] = {
430 { F(vld40b
), F(vld40h
), F(vld40w
), NULL
, },
431 { F(vld41b
), F(vld41h
), F(vld41w
), NULL
, },
432 { F(vld42b
), F(vld42h
), F(vld42w
), NULL
, },
433 { F(vld43b
), F(vld43h
), F(vld43w
), NULL
, },
438 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
441 static bool trans_VST2(DisasContext
*s
, arg_vldst_il
*a
)
443 static MVEGenLdStIlFn
* const fns
[4][4] = {
444 { F(vst20b
), F(vst20h
), F(vst20w
), NULL
, },
445 { F(vst21b
), F(vst21h
), F(vst21w
), NULL
, },
446 { NULL
, NULL
, NULL
, NULL
},
447 { NULL
, NULL
, NULL
, NULL
},
452 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
455 static bool trans_VST4(DisasContext
*s
, arg_vldst_il
*a
)
457 static MVEGenLdStIlFn
* const fns
[4][4] = {
458 { F(vst40b
), F(vst40h
), F(vst40w
), NULL
, },
459 { F(vst41b
), F(vst41h
), F(vst41w
), NULL
, },
460 { F(vst42b
), F(vst42h
), F(vst42w
), NULL
, },
461 { F(vst43b
), F(vst43h
), F(vst43w
), NULL
, },
466 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
471 static bool trans_VDUP(DisasContext
*s
, arg_VDUP
*a
)
476 if (!dc_isar_feature(aa32_mve
, s
) ||
477 !mve_check_qreg_bank(s
, a
->qd
)) {
480 if (a
->rt
== 13 || a
->rt
== 15) {
481 /* UNPREDICTABLE; we choose to UNDEF */
484 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
488 rt
= load_reg(s
, a
->rt
);
489 if (mve_no_predication(s
)) {
490 tcg_gen_gvec_dup_i32(a
->size
, mve_qreg_offset(a
->qd
), 16, 16, rt
);
492 qd
= mve_qreg_ptr(a
->qd
);
493 tcg_gen_dup_i32(a
->size
, rt
, rt
);
494 gen_helper_mve_vdup(tcg_env
, qd
, rt
);
500 static bool do_1op_vec(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
,
505 if (!dc_isar_feature(aa32_mve
, s
) ||
506 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
511 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
515 if (vecfn
&& mve_no_predication(s
)) {
516 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
), 16, 16);
518 qd
= mve_qreg_ptr(a
->qd
);
519 qm
= mve_qreg_ptr(a
->qm
);
526 static bool do_1op(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
)
528 return do_1op_vec(s
, a
, fn
, NULL
);
531 #define DO_1OP_VEC(INSN, FN, VECFN) \
532 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
534 static MVEGenOneOpFn * const fns[] = { \
535 gen_helper_mve_##FN##b, \
536 gen_helper_mve_##FN##h, \
537 gen_helper_mve_##FN##w, \
540 return do_1op_vec(s, a, fns[a->size], VECFN); \
543 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
547 DO_1OP_VEC(VABS
, vabs
, tcg_gen_gvec_abs
)
548 DO_1OP_VEC(VNEG
, vneg
, tcg_gen_gvec_neg
)
555 * For simple float/int conversions we use the fixed-point
556 * conversion helpers with a zero shift count
558 #define DO_VCVT(INSN, HFN, SFN) \
559 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
561 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
563 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
565 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
567 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
569 static MVEGenOneOpFn * const fns[] = { \
575 if (!dc_isar_feature(aa32_mve_fp, s)) { \
578 return do_1op(s, a, fns[a->size]); \
581 DO_VCVT(VCVT_SF
, vcvt_sh
, vcvt_sf
)
582 DO_VCVT(VCVT_UF
, vcvt_uh
, vcvt_uf
)
583 DO_VCVT(VCVT_FS
, vcvt_hs
, vcvt_fs
)
584 DO_VCVT(VCVT_FU
, vcvt_hu
, vcvt_fu
)
586 static bool do_vcvt_rmode(DisasContext
*s
, arg_1op
*a
,
587 ARMFPRounding rmode
, bool u
)
590 * Handle VCVT fp to int with specified rounding mode.
591 * This is a 1op fn but we must pass the rounding mode as
592 * an immediate to the helper.
595 static MVEGenVCVTRmodeFn
* const fns
[4][2] = {
597 { gen_helper_mve_vcvt_rm_sh
, gen_helper_mve_vcvt_rm_uh
},
598 { gen_helper_mve_vcvt_rm_ss
, gen_helper_mve_vcvt_rm_us
},
601 MVEGenVCVTRmodeFn
*fn
= fns
[a
->size
][u
];
603 if (!dc_isar_feature(aa32_mve_fp
, s
) ||
604 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
609 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
613 qd
= mve_qreg_ptr(a
->qd
);
614 qm
= mve_qreg_ptr(a
->qm
);
615 fn(tcg_env
, qd
, qm
, tcg_constant_i32(arm_rmode_to_sf(rmode
)));
620 #define DO_VCVT_RMODE(INSN, RMODE, U) \
621 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
623 return do_vcvt_rmode(s, a, RMODE, U); \
626 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
627 DO_VCVT_RMODE(VCVTAU
, FPROUNDING_TIEAWAY
, true)
628 DO_VCVT_RMODE(VCVTNS
, FPROUNDING_TIEEVEN
, false)
629 DO_VCVT_RMODE(VCVTNU
, FPROUNDING_TIEEVEN
, true)
630 DO_VCVT_RMODE(VCVTPS
, FPROUNDING_POSINF
, false)
631 DO_VCVT_RMODE(VCVTPU
, FPROUNDING_POSINF
, true)
632 DO_VCVT_RMODE(VCVTMS
, FPROUNDING_NEGINF
, false)
633 DO_VCVT_RMODE(VCVTMU
, FPROUNDING_NEGINF
, true)
635 #define DO_VCVT_SH(INSN, FN) \
636 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
638 if (!dc_isar_feature(aa32_mve_fp, s)) { \
641 return do_1op(s, a, gen_helper_mve_##FN); \
644 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
645 DO_VCVT_SH(VCVTT_SH
, vcvtt_sh
)
646 DO_VCVT_SH(VCVTB_HS
, vcvtb_hs
)
647 DO_VCVT_SH(VCVTT_HS
, vcvtt_hs
)
649 #define DO_VRINT(INSN, RMODE) \
650 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
652 gen_helper_mve_vrint_rm_h(env, qd, qm, \
653 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
655 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
657 gen_helper_mve_vrint_rm_s(env, qd, qm, \
658 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
660 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
662 static MVEGenOneOpFn * const fns[] = { \
668 if (!dc_isar_feature(aa32_mve_fp, s)) { \
671 return do_1op(s, a, fns[a->size]); \
674 DO_VRINT(VRINTN
, FPROUNDING_TIEEVEN
)
675 DO_VRINT(VRINTA
, FPROUNDING_TIEAWAY
)
676 DO_VRINT(VRINTZ
, FPROUNDING_ZERO
)
677 DO_VRINT(VRINTM
, FPROUNDING_NEGINF
)
678 DO_VRINT(VRINTP
, FPROUNDING_POSINF
)
680 static bool trans_VRINTX(DisasContext
*s
, arg_1op
*a
)
682 static MVEGenOneOpFn
* const fns
[] = {
684 gen_helper_mve_vrintx_h
,
685 gen_helper_mve_vrintx_s
,
688 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
691 return do_1op(s
, a
, fns
[a
->size
]);
694 /* Narrowing moves: only size 0 and 1 are valid */
695 #define DO_VMOVN(INSN, FN) \
696 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
698 static MVEGenOneOpFn * const fns[] = { \
699 gen_helper_mve_##FN##b, \
700 gen_helper_mve_##FN##h, \
704 return do_1op(s, a, fns[a->size]); \
707 DO_VMOVN(VMOVNB
, vmovnb
)
708 DO_VMOVN(VMOVNT
, vmovnt
)
709 DO_VMOVN(VQMOVUNB
, vqmovunb
)
710 DO_VMOVN(VQMOVUNT
, vqmovunt
)
711 DO_VMOVN(VQMOVN_BS
, vqmovnbs
)
712 DO_VMOVN(VQMOVN_TS
, vqmovnts
)
713 DO_VMOVN(VQMOVN_BU
, vqmovnbu
)
714 DO_VMOVN(VQMOVN_TU
, vqmovntu
)
716 static bool trans_VREV16(DisasContext
*s
, arg_1op
*a
)
718 static MVEGenOneOpFn
* const fns
[] = {
719 gen_helper_mve_vrev16b
,
724 return do_1op(s
, a
, fns
[a
->size
]);
727 static bool trans_VREV32(DisasContext
*s
, arg_1op
*a
)
729 static MVEGenOneOpFn
* const fns
[] = {
730 gen_helper_mve_vrev32b
,
731 gen_helper_mve_vrev32h
,
735 return do_1op(s
, a
, fns
[a
->size
]);
738 static bool trans_VREV64(DisasContext
*s
, arg_1op
*a
)
740 static MVEGenOneOpFn
* const fns
[] = {
741 gen_helper_mve_vrev64b
,
742 gen_helper_mve_vrev64h
,
743 gen_helper_mve_vrev64w
,
746 return do_1op(s
, a
, fns
[a
->size
]);
749 static bool trans_VMVN(DisasContext
*s
, arg_1op
*a
)
751 return do_1op_vec(s
, a
, gen_helper_mve_vmvn
, tcg_gen_gvec_not
);
754 static bool trans_VABS_fp(DisasContext
*s
, arg_1op
*a
)
756 static MVEGenOneOpFn
* const fns
[] = {
758 gen_helper_mve_vfabsh
,
759 gen_helper_mve_vfabss
,
762 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
765 return do_1op(s
, a
, fns
[a
->size
]);
768 static bool trans_VNEG_fp(DisasContext
*s
, arg_1op
*a
)
770 static MVEGenOneOpFn
* const fns
[] = {
772 gen_helper_mve_vfnegh
,
773 gen_helper_mve_vfnegs
,
776 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
779 return do_1op(s
, a
, fns
[a
->size
]);
782 static bool do_2op_vec(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn fn
,
787 if (!dc_isar_feature(aa32_mve
, s
) ||
788 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
| a
->qm
) ||
792 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
796 if (vecfn
&& mve_no_predication(s
)) {
797 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qn
),
798 mve_qreg_offset(a
->qm
), 16, 16);
800 qd
= mve_qreg_ptr(a
->qd
);
801 qn
= mve_qreg_ptr(a
->qn
);
802 qm
= mve_qreg_ptr(a
->qm
);
803 fn(tcg_env
, qd
, qn
, qm
);
809 static bool do_2op(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn
*fn
)
811 return do_2op_vec(s
, a
, fn
, NULL
);
814 #define DO_LOGIC(INSN, HELPER, VECFN) \
815 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
817 return do_2op_vec(s, a, HELPER, VECFN); \
820 DO_LOGIC(VAND
, gen_helper_mve_vand
, tcg_gen_gvec_and
)
821 DO_LOGIC(VBIC
, gen_helper_mve_vbic
, tcg_gen_gvec_andc
)
822 DO_LOGIC(VORR
, gen_helper_mve_vorr
, tcg_gen_gvec_or
)
823 DO_LOGIC(VORN
, gen_helper_mve_vorn
, tcg_gen_gvec_orc
)
824 DO_LOGIC(VEOR
, gen_helper_mve_veor
, tcg_gen_gvec_xor
)
826 static bool trans_VPSEL(DisasContext
*s
, arg_2op
*a
)
828 /* This insn updates predication bits */
829 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
830 return do_2op(s
, a
, gen_helper_mve_vpsel
);
833 #define DO_2OP_VEC(INSN, FN, VECFN) \
834 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
836 static MVEGenTwoOpFn * const fns[] = { \
837 gen_helper_mve_##FN##b, \
838 gen_helper_mve_##FN##h, \
839 gen_helper_mve_##FN##w, \
842 return do_2op_vec(s, a, fns[a->size], VECFN); \
845 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
847 DO_2OP_VEC(VADD
, vadd
, tcg_gen_gvec_add
)
848 DO_2OP_VEC(VSUB
, vsub
, tcg_gen_gvec_sub
)
849 DO_2OP_VEC(VMUL
, vmul
, tcg_gen_gvec_mul
)
850 DO_2OP(VMULH_S
, vmulhs
)
851 DO_2OP(VMULH_U
, vmulhu
)
852 DO_2OP(VRMULH_S
, vrmulhs
)
853 DO_2OP(VRMULH_U
, vrmulhu
)
854 DO_2OP_VEC(VMAX_S
, vmaxs
, tcg_gen_gvec_smax
)
855 DO_2OP_VEC(VMAX_U
, vmaxu
, tcg_gen_gvec_umax
)
856 DO_2OP_VEC(VMIN_S
, vmins
, tcg_gen_gvec_smin
)
857 DO_2OP_VEC(VMIN_U
, vminu
, tcg_gen_gvec_umin
)
858 DO_2OP(VABD_S
, vabds
)
859 DO_2OP(VABD_U
, vabdu
)
860 DO_2OP(VHADD_S
, vhadds
)
861 DO_2OP(VHADD_U
, vhaddu
)
862 DO_2OP(VHSUB_S
, vhsubs
)
863 DO_2OP(VHSUB_U
, vhsubu
)
864 DO_2OP(VMULL_BS
, vmullbs
)
865 DO_2OP(VMULL_BU
, vmullbu
)
866 DO_2OP(VMULL_TS
, vmullts
)
867 DO_2OP(VMULL_TU
, vmulltu
)
868 DO_2OP(VQDMULH
, vqdmulh
)
869 DO_2OP(VQRDMULH
, vqrdmulh
)
870 DO_2OP(VQADD_S
, vqadds
)
871 DO_2OP(VQADD_U
, vqaddu
)
872 DO_2OP(VQSUB_S
, vqsubs
)
873 DO_2OP(VQSUB_U
, vqsubu
)
874 DO_2OP(VSHL_S
, vshls
)
875 DO_2OP(VSHL_U
, vshlu
)
876 DO_2OP(VRSHL_S
, vrshls
)
877 DO_2OP(VRSHL_U
, vrshlu
)
878 DO_2OP(VQSHL_S
, vqshls
)
879 DO_2OP(VQSHL_U
, vqshlu
)
880 DO_2OP(VQRSHL_S
, vqrshls
)
881 DO_2OP(VQRSHL_U
, vqrshlu
)
882 DO_2OP(VQDMLADH
, vqdmladh
)
883 DO_2OP(VQDMLADHX
, vqdmladhx
)
884 DO_2OP(VQRDMLADH
, vqrdmladh
)
885 DO_2OP(VQRDMLADHX
, vqrdmladhx
)
886 DO_2OP(VQDMLSDH
, vqdmlsdh
)
887 DO_2OP(VQDMLSDHX
, vqdmlsdhx
)
888 DO_2OP(VQRDMLSDH
, vqrdmlsdh
)
889 DO_2OP(VQRDMLSDHX
, vqrdmlsdhx
)
890 DO_2OP(VRHADD_S
, vrhadds
)
891 DO_2OP(VRHADD_U
, vrhaddu
)
893 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
894 * so we can reuse the DO_2OP macro. (Our implementation calculates the
895 * "expected" results in this case.) Similarly for VHCADD.
897 DO_2OP(VCADD90
, vcadd90
)
898 DO_2OP(VCADD270
, vcadd270
)
899 DO_2OP(VHCADD90
, vhcadd90
)
900 DO_2OP(VHCADD270
, vhcadd270
)
902 static bool trans_VQDMULLB(DisasContext
*s
, arg_2op
*a
)
904 static MVEGenTwoOpFn
* const fns
[] = {
906 gen_helper_mve_vqdmullbh
,
907 gen_helper_mve_vqdmullbw
,
910 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
911 /* UNPREDICTABLE; we choose to undef */
914 return do_2op(s
, a
, fns
[a
->size
]);
917 static bool trans_VQDMULLT(DisasContext
*s
, arg_2op
*a
)
919 static MVEGenTwoOpFn
* const fns
[] = {
921 gen_helper_mve_vqdmullth
,
922 gen_helper_mve_vqdmulltw
,
925 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
926 /* UNPREDICTABLE; we choose to undef */
929 return do_2op(s
, a
, fns
[a
->size
]);
932 static bool trans_VMULLP_B(DisasContext
*s
, arg_2op
*a
)
935 * Note that a->size indicates the output size, ie VMULL.P8
936 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
937 * is the 16x16->32 operation and a->size is MO_32.
939 static MVEGenTwoOpFn
* const fns
[] = {
941 gen_helper_mve_vmullpbh
,
942 gen_helper_mve_vmullpbw
,
945 return do_2op(s
, a
, fns
[a
->size
]);
948 static bool trans_VMULLP_T(DisasContext
*s
, arg_2op
*a
)
950 /* a->size is as for trans_VMULLP_B */
951 static MVEGenTwoOpFn
* const fns
[] = {
953 gen_helper_mve_vmullpth
,
954 gen_helper_mve_vmullptw
,
957 return do_2op(s
, a
, fns
[a
->size
]);
961 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
962 * of the 32-bit elements in each lane of the input vectors, where the
963 * carry-out of each add is the carry-in of the next. The initial carry
964 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
965 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
966 * These insns are subject to beat-wise execution. Partial execution
967 * of an I=1 (initial carry input fixed) insn which does not
968 * execute the first beat must start with the current FPSCR.NZCV
969 * value, not the fixed constant input.
971 static bool trans_VADC(DisasContext
*s
, arg_2op
*a
)
973 return do_2op(s
, a
, gen_helper_mve_vadc
);
976 static bool trans_VADCI(DisasContext
*s
, arg_2op
*a
)
978 if (mve_skip_first_beat(s
)) {
979 return trans_VADC(s
, a
);
981 return do_2op(s
, a
, gen_helper_mve_vadci
);
984 static bool trans_VSBC(DisasContext
*s
, arg_2op
*a
)
986 return do_2op(s
, a
, gen_helper_mve_vsbc
);
989 static bool trans_VSBCI(DisasContext
*s
, arg_2op
*a
)
991 if (mve_skip_first_beat(s
)) {
992 return trans_VSBC(s
, a
);
994 return do_2op(s
, a
, gen_helper_mve_vsbci
);
997 #define DO_2OP_FP(INSN, FN) \
998 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1000 static MVEGenTwoOpFn * const fns[] = { \
1002 gen_helper_mve_##FN##h, \
1003 gen_helper_mve_##FN##s, \
1006 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1009 return do_2op(s, a, fns[a->size]); \
1012 DO_2OP_FP(VADD_fp
, vfadd
)
1013 DO_2OP_FP(VSUB_fp
, vfsub
)
1014 DO_2OP_FP(VMUL_fp
, vfmul
)
1015 DO_2OP_FP(VABD_fp
, vfabd
)
1016 DO_2OP_FP(VMAXNM
, vmaxnm
)
1017 DO_2OP_FP(VMINNM
, vminnm
)
1018 DO_2OP_FP(VCADD90_fp
, vfcadd90
)
1019 DO_2OP_FP(VCADD270_fp
, vfcadd270
)
1020 DO_2OP_FP(VFMA
, vfma
)
1021 DO_2OP_FP(VFMS
, vfms
)
1022 DO_2OP_FP(VCMUL0
, vcmul0
)
1023 DO_2OP_FP(VCMUL90
, vcmul90
)
1024 DO_2OP_FP(VCMUL180
, vcmul180
)
1025 DO_2OP_FP(VCMUL270
, vcmul270
)
1026 DO_2OP_FP(VCMLA0
, vcmla0
)
1027 DO_2OP_FP(VCMLA90
, vcmla90
)
1028 DO_2OP_FP(VCMLA180
, vcmla180
)
1029 DO_2OP_FP(VCMLA270
, vcmla270
)
1030 DO_2OP_FP(VMAXNMA
, vmaxnma
)
1031 DO_2OP_FP(VMINNMA
, vminnma
)
1033 static bool do_2op_scalar(DisasContext
*s
, arg_2scalar
*a
,
1034 MVEGenTwoOpScalarFn fn
)
1039 if (!dc_isar_feature(aa32_mve
, s
) ||
1040 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
) ||
1044 if (a
->rm
== 13 || a
->rm
== 15) {
1048 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1052 qd
= mve_qreg_ptr(a
->qd
);
1053 qn
= mve_qreg_ptr(a
->qn
);
1054 rm
= load_reg(s
, a
->rm
);
1055 fn(tcg_env
, qd
, qn
, rm
);
1060 #define DO_2OP_SCALAR(INSN, FN) \
1061 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1063 static MVEGenTwoOpScalarFn * const fns[] = { \
1064 gen_helper_mve_##FN##b, \
1065 gen_helper_mve_##FN##h, \
1066 gen_helper_mve_##FN##w, \
1069 return do_2op_scalar(s, a, fns[a->size]); \
1072 DO_2OP_SCALAR(VADD_scalar
, vadd_scalar
)
1073 DO_2OP_SCALAR(VSUB_scalar
, vsub_scalar
)
1074 DO_2OP_SCALAR(VMUL_scalar
, vmul_scalar
)
1075 DO_2OP_SCALAR(VHADD_S_scalar
, vhadds_scalar
)
1076 DO_2OP_SCALAR(VHADD_U_scalar
, vhaddu_scalar
)
1077 DO_2OP_SCALAR(VHSUB_S_scalar
, vhsubs_scalar
)
1078 DO_2OP_SCALAR(VHSUB_U_scalar
, vhsubu_scalar
)
1079 DO_2OP_SCALAR(VQADD_S_scalar
, vqadds_scalar
)
1080 DO_2OP_SCALAR(VQADD_U_scalar
, vqaddu_scalar
)
1081 DO_2OP_SCALAR(VQSUB_S_scalar
, vqsubs_scalar
)
1082 DO_2OP_SCALAR(VQSUB_U_scalar
, vqsubu_scalar
)
1083 DO_2OP_SCALAR(VQDMULH_scalar
, vqdmulh_scalar
)
1084 DO_2OP_SCALAR(VQRDMULH_scalar
, vqrdmulh_scalar
)
1085 DO_2OP_SCALAR(VBRSR
, vbrsr
)
1086 DO_2OP_SCALAR(VMLA
, vmla
)
1087 DO_2OP_SCALAR(VMLAS
, vmlas
)
1088 DO_2OP_SCALAR(VQDMLAH
, vqdmlah
)
1089 DO_2OP_SCALAR(VQRDMLAH
, vqrdmlah
)
1090 DO_2OP_SCALAR(VQDMLASH
, vqdmlash
)
1091 DO_2OP_SCALAR(VQRDMLASH
, vqrdmlash
)
1093 static bool trans_VQDMULLB_scalar(DisasContext
*s
, arg_2scalar
*a
)
1095 static MVEGenTwoOpScalarFn
* const fns
[] = {
1097 gen_helper_mve_vqdmullb_scalarh
,
1098 gen_helper_mve_vqdmullb_scalarw
,
1101 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1102 /* UNPREDICTABLE; we choose to undef */
1105 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1108 static bool trans_VQDMULLT_scalar(DisasContext
*s
, arg_2scalar
*a
)
1110 static MVEGenTwoOpScalarFn
* const fns
[] = {
1112 gen_helper_mve_vqdmullt_scalarh
,
1113 gen_helper_mve_vqdmullt_scalarw
,
1116 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1117 /* UNPREDICTABLE; we choose to undef */
1120 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1124 #define DO_2OP_FP_SCALAR(INSN, FN) \
1125 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1127 static MVEGenTwoOpScalarFn * const fns[] = { \
1129 gen_helper_mve_##FN##h, \
1130 gen_helper_mve_##FN##s, \
1133 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1136 return do_2op_scalar(s, a, fns[a->size]); \
1139 DO_2OP_FP_SCALAR(VADD_fp_scalar
, vfadd_scalar
)
1140 DO_2OP_FP_SCALAR(VSUB_fp_scalar
, vfsub_scalar
)
1141 DO_2OP_FP_SCALAR(VMUL_fp_scalar
, vfmul_scalar
)
1142 DO_2OP_FP_SCALAR(VFMA_scalar
, vfma_scalar
)
1143 DO_2OP_FP_SCALAR(VFMAS_scalar
, vfmas_scalar
)
1145 static bool do_long_dual_acc(DisasContext
*s
, arg_vmlaldav
*a
,
1146 MVEGenLongDualAccOpFn
*fn
)
1149 TCGv_i64 rda_i
, rda_o
;
1150 TCGv_i32 rdalo
, rdahi
;
1152 if (!dc_isar_feature(aa32_mve
, s
) ||
1153 !mve_check_qreg_bank(s
, a
->qn
| a
->qm
) ||
1158 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1159 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1161 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1164 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1168 qn
= mve_qreg_ptr(a
->qn
);
1169 qm
= mve_qreg_ptr(a
->qm
);
1172 * This insn is subject to beat-wise execution. Partial execution
1173 * of an A=0 (no-accumulate) insn which does not execute the first
1174 * beat must start with the current rda value, not 0.
1176 rda_o
= tcg_temp_new_i64();
1177 if (a
->a
|| mve_skip_first_beat(s
)) {
1179 rdalo
= load_reg(s
, a
->rdalo
);
1180 rdahi
= load_reg(s
, a
->rdahi
);
1181 tcg_gen_concat_i32_i64(rda_i
, rdalo
, rdahi
);
1183 rda_i
= tcg_constant_i64(0);
1186 fn(rda_o
, tcg_env
, qn
, qm
, rda_i
);
1188 rdalo
= tcg_temp_new_i32();
1189 rdahi
= tcg_temp_new_i32();
1190 tcg_gen_extrl_i64_i32(rdalo
, rda_o
);
1191 tcg_gen_extrh_i64_i32(rdahi
, rda_o
);
1192 store_reg(s
, a
->rdalo
, rdalo
);
1193 store_reg(s
, a
->rdahi
, rdahi
);
1198 static bool trans_VMLALDAV_S(DisasContext
*s
, arg_vmlaldav
*a
)
1200 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1202 { gen_helper_mve_vmlaldavsh
, gen_helper_mve_vmlaldavxsh
},
1203 { gen_helper_mve_vmlaldavsw
, gen_helper_mve_vmlaldavxsw
},
1206 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1209 static bool trans_VMLALDAV_U(DisasContext
*s
, arg_vmlaldav
*a
)
1211 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1213 { gen_helper_mve_vmlaldavuh
, NULL
},
1214 { gen_helper_mve_vmlaldavuw
, NULL
},
1217 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1220 static bool trans_VMLSLDAV(DisasContext
*s
, arg_vmlaldav
*a
)
1222 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1224 { gen_helper_mve_vmlsldavsh
, gen_helper_mve_vmlsldavxsh
},
1225 { gen_helper_mve_vmlsldavsw
, gen_helper_mve_vmlsldavxsw
},
1228 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1231 static bool trans_VRMLALDAVH_S(DisasContext
*s
, arg_vmlaldav
*a
)
1233 static MVEGenLongDualAccOpFn
* const fns
[] = {
1234 gen_helper_mve_vrmlaldavhsw
, gen_helper_mve_vrmlaldavhxsw
,
1236 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1239 static bool trans_VRMLALDAVH_U(DisasContext
*s
, arg_vmlaldav
*a
)
1241 static MVEGenLongDualAccOpFn
* const fns
[] = {
1242 gen_helper_mve_vrmlaldavhuw
, NULL
,
1244 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1247 static bool trans_VRMLSLDAVH(DisasContext
*s
, arg_vmlaldav
*a
)
1249 static MVEGenLongDualAccOpFn
* const fns
[] = {
1250 gen_helper_mve_vrmlsldavhsw
, gen_helper_mve_vrmlsldavhxsw
,
1252 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1255 static bool do_dual_acc(DisasContext
*s
, arg_vmladav
*a
, MVEGenDualAccOpFn
*fn
)
1258 TCGv_i32 rda_i
, rda_o
;
1260 if (!dc_isar_feature(aa32_mve
, s
) ||
1261 !mve_check_qreg_bank(s
, a
->qn
) ||
1265 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1269 qn
= mve_qreg_ptr(a
->qn
);
1270 qm
= mve_qreg_ptr(a
->qm
);
1273 * This insn is subject to beat-wise execution. Partial execution
1274 * of an A=0 (no-accumulate) insn which does not execute the first
1275 * beat must start with the current rda value, not 0.
1277 if (a
->a
|| mve_skip_first_beat(s
)) {
1278 rda_o
= rda_i
= load_reg(s
, a
->rda
);
1280 rda_i
= tcg_constant_i32(0);
1281 rda_o
= tcg_temp_new_i32();
1284 fn(rda_o
, tcg_env
, qn
, qm
, rda_i
);
1285 store_reg(s
, a
->rda
, rda_o
);
1291 #define DO_DUAL_ACC(INSN, FN) \
1292 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1294 static MVEGenDualAccOpFn * const fns[4][2] = { \
1295 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1296 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1297 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1300 return do_dual_acc(s, a, fns[a->size][a->x]); \
1303 DO_DUAL_ACC(VMLADAV_S
, vmladavs
)
1304 DO_DUAL_ACC(VMLSDAV
, vmlsdav
)
1306 static bool trans_VMLADAV_U(DisasContext
*s
, arg_vmladav
*a
)
1308 static MVEGenDualAccOpFn
* const fns
[4][2] = {
1309 { gen_helper_mve_vmladavub
, NULL
},
1310 { gen_helper_mve_vmladavuh
, NULL
},
1311 { gen_helper_mve_vmladavuw
, NULL
},
1314 return do_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1317 static void gen_vpst(DisasContext
*s
, uint32_t mask
)
1320 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1321 * being adjacent fields in the register.
1323 * Updating the masks is not predicated, but it is subject to beat-wise
1324 * execution, and the mask is updated on the odd-numbered beats.
1325 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1328 TCGv_i32 vpr
= load_cpu_field(v7m
.vpr
);
1332 /* Update both 01 and 23 fields */
1333 tcg_gen_deposit_i32(vpr
, vpr
,
1334 tcg_constant_i32(mask
| (mask
<< 4)),
1335 R_V7M_VPR_MASK01_SHIFT
,
1336 R_V7M_VPR_MASK01_LENGTH
+ R_V7M_VPR_MASK23_LENGTH
);
1341 /* Update only the 23 mask field */
1342 tcg_gen_deposit_i32(vpr
, vpr
,
1343 tcg_constant_i32(mask
),
1344 R_V7M_VPR_MASK23_SHIFT
, R_V7M_VPR_MASK23_LENGTH
);
1347 g_assert_not_reached();
1349 store_cpu_field(vpr
, v7m
.vpr
);
1352 static bool trans_VPST(DisasContext
*s
, arg_VPST
*a
)
1354 /* mask == 0 is a "related encoding" */
1355 if (!dc_isar_feature(aa32_mve
, s
) || !a
->mask
) {
1358 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1361 gen_vpst(s
, a
->mask
);
1362 mve_update_and_store_eci(s
);
1366 static bool trans_VPNOT(DisasContext
*s
, arg_VPNOT
*a
)
1369 * Invert the predicate in VPR.P0. We have call out to
1370 * a helper because this insn itself is beatwise and can
1373 if (!dc_isar_feature(aa32_mve
, s
)) {
1376 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1380 gen_helper_mve_vpnot(tcg_env
);
1381 /* This insn updates predication bits */
1382 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
1387 static bool trans_VADDV(DisasContext
*s
, arg_VADDV
*a
)
1389 /* VADDV: vector add across vector */
1390 static MVEGenVADDVFn
* const fns
[4][2] = {
1391 { gen_helper_mve_vaddvsb
, gen_helper_mve_vaddvub
},
1392 { gen_helper_mve_vaddvsh
, gen_helper_mve_vaddvuh
},
1393 { gen_helper_mve_vaddvsw
, gen_helper_mve_vaddvuw
},
1397 TCGv_i32 rda_i
, rda_o
;
1399 if (!dc_isar_feature(aa32_mve
, s
) ||
1403 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1408 * This insn is subject to beat-wise execution. Partial execution
1409 * of an A=0 (no-accumulate) insn which does not execute the first
1410 * beat must start with the current value of Rda, not zero.
1412 if (a
->a
|| mve_skip_first_beat(s
)) {
1413 /* Accumulate input from Rda */
1414 rda_o
= rda_i
= load_reg(s
, a
->rda
);
1416 /* Accumulate starting at zero */
1417 rda_i
= tcg_constant_i32(0);
1418 rda_o
= tcg_temp_new_i32();
1421 qm
= mve_qreg_ptr(a
->qm
);
1422 fns
[a
->size
][a
->u
](rda_o
, tcg_env
, qm
, rda_i
);
1423 store_reg(s
, a
->rda
, rda_o
);
1429 static bool trans_VADDLV(DisasContext
*s
, arg_VADDLV
*a
)
1432 * Vector Add Long Across Vector: accumulate the 32-bit
1433 * elements of the vector into a 64-bit result stored in
1434 * a pair of general-purpose registers.
1435 * No need to check Qm's bank: it is only 3 bits in decode.
1438 TCGv_i64 rda_i
, rda_o
;
1439 TCGv_i32 rdalo
, rdahi
;
1441 if (!dc_isar_feature(aa32_mve
, s
)) {
1445 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1446 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1448 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1451 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1456 * This insn is subject to beat-wise execution. Partial execution
1457 * of an A=0 (no-accumulate) insn which does not execute the first
1458 * beat must start with the current value of RdaHi:RdaLo, not zero.
1460 rda_o
= tcg_temp_new_i64();
1461 if (a
->a
|| mve_skip_first_beat(s
)) {
1462 /* Accumulate input from RdaHi:RdaLo */
1464 rdalo
= load_reg(s
, a
->rdalo
);
1465 rdahi
= load_reg(s
, a
->rdahi
);
1466 tcg_gen_concat_i32_i64(rda_i
, rdalo
, rdahi
);
1468 /* Accumulate starting at zero */
1469 rda_i
= tcg_constant_i64(0);
1472 qm
= mve_qreg_ptr(a
->qm
);
1474 gen_helper_mve_vaddlv_u(rda_o
, tcg_env
, qm
, rda_i
);
1476 gen_helper_mve_vaddlv_s(rda_o
, tcg_env
, qm
, rda_i
);
1479 rdalo
= tcg_temp_new_i32();
1480 rdahi
= tcg_temp_new_i32();
1481 tcg_gen_extrl_i64_i32(rdalo
, rda_o
);
1482 tcg_gen_extrh_i64_i32(rdahi
, rda_o
);
1483 store_reg(s
, a
->rdalo
, rdalo
);
1484 store_reg(s
, a
->rdahi
, rdahi
);
1489 static bool do_1imm(DisasContext
*s
, arg_1imm
*a
, MVEGenOneOpImmFn
*fn
,
1495 if (!dc_isar_feature(aa32_mve
, s
) ||
1496 !mve_check_qreg_bank(s
, a
->qd
) ||
1500 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1504 imm
= asimd_imm_const(a
->imm
, a
->cmode
, a
->op
);
1506 if (vecfn
&& mve_no_predication(s
)) {
1507 vecfn(MO_64
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qd
),
1510 qd
= mve_qreg_ptr(a
->qd
);
1511 fn(tcg_env
, qd
, tcg_constant_i64(imm
));
1517 static void gen_gvec_vmovi(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1518 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
1520 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, c
);
1523 static bool trans_Vimm_1r(DisasContext
*s
, arg_1imm
*a
)
1525 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1526 MVEGenOneOpImmFn
*fn
;
1529 if ((a
->cmode
& 1) && a
->cmode
< 12) {
1532 * For op=1, the immediate will be inverted by asimd_imm_const(),
1533 * so the VBIC becomes a logical AND operation.
1535 fn
= gen_helper_mve_vandi
;
1536 vecfn
= tcg_gen_gvec_andi
;
1538 fn
= gen_helper_mve_vorri
;
1539 vecfn
= tcg_gen_gvec_ori
;
1542 /* There is one unallocated cmode/op combination in this space */
1543 if (a
->cmode
== 15 && a
->op
== 1) {
1546 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1547 fn
= gen_helper_mve_vmovi
;
1548 vecfn
= gen_gvec_vmovi
;
1550 return do_1imm(s
, a
, fn
, vecfn
);
1553 static bool do_2shift_vec(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1554 bool negateshift
, GVecGen2iFn vecfn
)
1557 int shift
= a
->shift
;
1559 if (!dc_isar_feature(aa32_mve
, s
) ||
1560 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
1564 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1569 * When we handle a right shift insn using a left-shift helper
1570 * which permits a negative shift count to indicate a right-shift,
1571 * we must negate the shift count.
1577 if (vecfn
&& mve_no_predication(s
)) {
1578 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
),
1581 qd
= mve_qreg_ptr(a
->qd
);
1582 qm
= mve_qreg_ptr(a
->qm
);
1583 fn(tcg_env
, qd
, qm
, tcg_constant_i32(shift
));
1589 static bool do_2shift(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1592 return do_2shift_vec(s
, a
, fn
, negateshift
, NULL
);
1595 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1596 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1598 static MVEGenTwoOpShiftFn * const fns[] = { \
1599 gen_helper_mve_##FN##b, \
1600 gen_helper_mve_##FN##h, \
1601 gen_helper_mve_##FN##w, \
1604 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1607 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1608 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1610 static void do_gvec_shri_s(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1611 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1614 * We get here with a negated shift count, and we must handle
1615 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1618 if (shift
== (8 << vece
)) {
1621 tcg_gen_gvec_sari(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1624 static void do_gvec_shri_u(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1625 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1628 * We get here with a negated shift count, and we must handle
1629 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1632 if (shift
== (8 << vece
)) {
1633 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, 0);
1635 tcg_gen_gvec_shri(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1639 DO_2SHIFT_VEC(VSHLI
, vshli_u
, false, tcg_gen_gvec_shli
)
1640 DO_2SHIFT(VQSHLI_S
, vqshli_s
, false)
1641 DO_2SHIFT(VQSHLI_U
, vqshli_u
, false)
1642 DO_2SHIFT(VQSHLUI
, vqshlui_s
, false)
1643 /* These right shifts use a left-shift helper with negated shift count */
1644 DO_2SHIFT_VEC(VSHRI_S
, vshli_s
, true, do_gvec_shri_s
)
1645 DO_2SHIFT_VEC(VSHRI_U
, vshli_u
, true, do_gvec_shri_u
)
1646 DO_2SHIFT(VRSHRI_S
, vrshli_s
, true)
1647 DO_2SHIFT(VRSHRI_U
, vrshli_u
, true)
1649 DO_2SHIFT_VEC(VSRI
, vsri
, false, gen_gvec_sri
)
1650 DO_2SHIFT_VEC(VSLI
, vsli
, false, gen_gvec_sli
)
1652 #define DO_2SHIFT_FP(INSN, FN) \
1653 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1655 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1658 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1661 DO_2SHIFT_FP(VCVT_SH_fixed
, vcvt_sh
)
1662 DO_2SHIFT_FP(VCVT_UH_fixed
, vcvt_uh
)
1663 DO_2SHIFT_FP(VCVT_HS_fixed
, vcvt_hs
)
1664 DO_2SHIFT_FP(VCVT_HU_fixed
, vcvt_hu
)
1665 DO_2SHIFT_FP(VCVT_SF_fixed
, vcvt_sf
)
1666 DO_2SHIFT_FP(VCVT_UF_fixed
, vcvt_uf
)
1667 DO_2SHIFT_FP(VCVT_FS_fixed
, vcvt_fs
)
1668 DO_2SHIFT_FP(VCVT_FU_fixed
, vcvt_fu
)
1670 static bool do_2shift_scalar(DisasContext
*s
, arg_shl_scalar
*a
,
1671 MVEGenTwoOpShiftFn
*fn
)
1676 if (!dc_isar_feature(aa32_mve
, s
) ||
1677 !mve_check_qreg_bank(s
, a
->qda
) ||
1678 a
->rm
== 13 || a
->rm
== 15 || !fn
) {
1679 /* Rm cases are UNPREDICTABLE */
1682 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1686 qda
= mve_qreg_ptr(a
->qda
);
1687 rm
= load_reg(s
, a
->rm
);
1688 fn(tcg_env
, qda
, qda
, rm
);
1693 #define DO_2SHIFT_SCALAR(INSN, FN) \
1694 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1696 static MVEGenTwoOpShiftFn * const fns[] = { \
1697 gen_helper_mve_##FN##b, \
1698 gen_helper_mve_##FN##h, \
1699 gen_helper_mve_##FN##w, \
1702 return do_2shift_scalar(s, a, fns[a->size]); \
1705 DO_2SHIFT_SCALAR(VSHL_S_scalar
, vshli_s
)
1706 DO_2SHIFT_SCALAR(VSHL_U_scalar
, vshli_u
)
1707 DO_2SHIFT_SCALAR(VRSHL_S_scalar
, vrshli_s
)
1708 DO_2SHIFT_SCALAR(VRSHL_U_scalar
, vrshli_u
)
1709 DO_2SHIFT_SCALAR(VQSHL_S_scalar
, vqshli_s
)
1710 DO_2SHIFT_SCALAR(VQSHL_U_scalar
, vqshli_u
)
1711 DO_2SHIFT_SCALAR(VQRSHL_S_scalar
, vqrshli_s
)
1712 DO_2SHIFT_SCALAR(VQRSHL_U_scalar
, vqrshli_u
)
1714 #define DO_VSHLL(INSN, FN) \
1715 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1717 static MVEGenTwoOpShiftFn * const fns[] = { \
1718 gen_helper_mve_##FN##b, \
1719 gen_helper_mve_##FN##h, \
1721 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1725 * For the VSHLL vector helpers, the vece is the size of the input
1726 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1727 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1729 static void do_gvec_vshllbs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1730 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1732 unsigned ovece
= vece
+ 1;
1733 unsigned ibits
= vece
== MO_8
? 8 : 16;
1734 tcg_gen_gvec_shli(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1735 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1738 static void do_gvec_vshllbu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1739 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1741 unsigned ovece
= vece
+ 1;
1742 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1743 ovece
== MO_16
? 0xff : 0xffff, oprsz
, maxsz
);
1744 tcg_gen_gvec_shli(ovece
, dofs
, dofs
, shift
, oprsz
, maxsz
);
1747 static void do_gvec_vshllts(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1748 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1750 unsigned ovece
= vece
+ 1;
1751 unsigned ibits
= vece
== MO_8
? 8 : 16;
1753 tcg_gen_gvec_sari(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1755 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1756 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1757 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1761 static void do_gvec_vshlltu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1762 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1764 unsigned ovece
= vece
+ 1;
1765 unsigned ibits
= vece
== MO_8
? 8 : 16;
1767 tcg_gen_gvec_shri(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1769 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1770 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1771 tcg_gen_gvec_shri(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1775 DO_VSHLL(VSHLL_BS
, vshllbs
)
1776 DO_VSHLL(VSHLL_BU
, vshllbu
)
1777 DO_VSHLL(VSHLL_TS
, vshllts
)
1778 DO_VSHLL(VSHLL_TU
, vshlltu
)
1780 #define DO_2SHIFT_N(INSN, FN) \
1781 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1783 static MVEGenTwoOpShiftFn * const fns[] = { \
1784 gen_helper_mve_##FN##b, \
1785 gen_helper_mve_##FN##h, \
1787 return do_2shift(s, a, fns[a->size], false); \
1790 DO_2SHIFT_N(VSHRNB
, vshrnb
)
1791 DO_2SHIFT_N(VSHRNT
, vshrnt
)
1792 DO_2SHIFT_N(VRSHRNB
, vrshrnb
)
1793 DO_2SHIFT_N(VRSHRNT
, vrshrnt
)
1794 DO_2SHIFT_N(VQSHRNB_S
, vqshrnb_s
)
1795 DO_2SHIFT_N(VQSHRNT_S
, vqshrnt_s
)
1796 DO_2SHIFT_N(VQSHRNB_U
, vqshrnb_u
)
1797 DO_2SHIFT_N(VQSHRNT_U
, vqshrnt_u
)
1798 DO_2SHIFT_N(VQSHRUNB
, vqshrunb
)
1799 DO_2SHIFT_N(VQSHRUNT
, vqshrunt
)
1800 DO_2SHIFT_N(VQRSHRNB_S
, vqrshrnb_s
)
1801 DO_2SHIFT_N(VQRSHRNT_S
, vqrshrnt_s
)
1802 DO_2SHIFT_N(VQRSHRNB_U
, vqrshrnb_u
)
1803 DO_2SHIFT_N(VQRSHRNT_U
, vqrshrnt_u
)
1804 DO_2SHIFT_N(VQRSHRUNB
, vqrshrunb
)
1805 DO_2SHIFT_N(VQRSHRUNT
, vqrshrunt
)
1807 static bool trans_VSHLC(DisasContext
*s
, arg_VSHLC
*a
)
1810 * Whole Vector Left Shift with Carry. The carry is taken
1811 * from a general purpose register and written back there.
1812 * An imm of 0 means "shift by 32".
1817 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1820 if (a
->rdm
== 13 || a
->rdm
== 15) {
1821 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1824 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1828 qd
= mve_qreg_ptr(a
->qd
);
1829 rdm
= load_reg(s
, a
->rdm
);
1830 gen_helper_mve_vshlc(rdm
, tcg_env
, qd
, rdm
, tcg_constant_i32(a
->imm
));
1831 store_reg(s
, a
->rdm
, rdm
);
1836 static bool do_vidup(DisasContext
*s
, arg_vidup
*a
, MVEGenVIDUPFn
*fn
)
1842 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1843 * This fills the vector with elements of successively increasing
1844 * or decreasing values, starting from Rn.
1846 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1849 if (a
->size
== MO_64
) {
1850 /* size 0b11 is another encoding */
1853 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1857 qd
= mve_qreg_ptr(a
->qd
);
1858 rn
= load_reg(s
, a
->rn
);
1859 fn(rn
, tcg_env
, qd
, rn
, tcg_constant_i32(a
->imm
));
1860 store_reg(s
, a
->rn
, rn
);
1865 static bool do_viwdup(DisasContext
*s
, arg_viwdup
*a
, MVEGenVIWDUPFn
*fn
)
1871 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1872 * This fills the vector with elements of successively increasing
1873 * or decreasing values, starting from Rn. Rm specifies a point where
1874 * the count wraps back around to 0. The updated offset is written back
1877 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1880 if (!fn
|| a
->rm
== 13 || a
->rm
== 15) {
1882 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1883 * Rm == 13 is VIWDUP, VDWDUP.
1887 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1891 qd
= mve_qreg_ptr(a
->qd
);
1892 rn
= load_reg(s
, a
->rn
);
1893 rm
= load_reg(s
, a
->rm
);
1894 fn(rn
, tcg_env
, qd
, rn
, rm
, tcg_constant_i32(a
->imm
));
1895 store_reg(s
, a
->rn
, rn
);
1900 static bool trans_VIDUP(DisasContext
*s
, arg_vidup
*a
)
1902 static MVEGenVIDUPFn
* const fns
[] = {
1903 gen_helper_mve_vidupb
,
1904 gen_helper_mve_viduph
,
1905 gen_helper_mve_vidupw
,
1908 return do_vidup(s
, a
, fns
[a
->size
]);
1911 static bool trans_VDDUP(DisasContext
*s
, arg_vidup
*a
)
1913 static MVEGenVIDUPFn
* const fns
[] = {
1914 gen_helper_mve_vidupb
,
1915 gen_helper_mve_viduph
,
1916 gen_helper_mve_vidupw
,
1919 /* VDDUP is just like VIDUP but with a negative immediate */
1921 return do_vidup(s
, a
, fns
[a
->size
]);
1924 static bool trans_VIWDUP(DisasContext
*s
, arg_viwdup
*a
)
1926 static MVEGenVIWDUPFn
* const fns
[] = {
1927 gen_helper_mve_viwdupb
,
1928 gen_helper_mve_viwduph
,
1929 gen_helper_mve_viwdupw
,
1932 return do_viwdup(s
, a
, fns
[a
->size
]);
1935 static bool trans_VDWDUP(DisasContext
*s
, arg_viwdup
*a
)
1937 static MVEGenVIWDUPFn
* const fns
[] = {
1938 gen_helper_mve_vdwdupb
,
1939 gen_helper_mve_vdwduph
,
1940 gen_helper_mve_vdwdupw
,
1943 return do_viwdup(s
, a
, fns
[a
->size
]);
1946 static bool do_vcmp(DisasContext
*s
, arg_vcmp
*a
, MVEGenCmpFn
*fn
)
1950 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
1954 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1958 qn
= mve_qreg_ptr(a
->qn
);
1959 qm
= mve_qreg_ptr(a
->qm
);
1960 fn(tcg_env
, qn
, qm
);
1963 gen_vpst(s
, a
->mask
);
1965 /* This insn updates predication bits */
1966 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
1971 static bool do_vcmp_scalar(DisasContext
*s
, arg_vcmp_scalar
*a
,
1972 MVEGenScalarCmpFn
*fn
)
1977 if (!dc_isar_feature(aa32_mve
, s
) || !fn
|| a
->rm
== 13) {
1980 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1984 qn
= mve_qreg_ptr(a
->qn
);
1986 /* Encoding Rm=0b1111 means "constant zero" */
1987 rm
= tcg_constant_i32(0);
1989 rm
= load_reg(s
, a
->rm
);
1991 fn(tcg_env
, qn
, rm
);
1994 gen_vpst(s
, a
->mask
);
1996 /* This insn updates predication bits */
1997 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2002 #define DO_VCMP(INSN, FN) \
2003 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2005 static MVEGenCmpFn * const fns[] = { \
2006 gen_helper_mve_##FN##b, \
2007 gen_helper_mve_##FN##h, \
2008 gen_helper_mve_##FN##w, \
2011 return do_vcmp(s, a, fns[a->size]); \
2013 static bool trans_##INSN##_scalar(DisasContext *s, \
2014 arg_vcmp_scalar *a) \
2016 static MVEGenScalarCmpFn * const fns[] = { \
2017 gen_helper_mve_##FN##_scalarb, \
2018 gen_helper_mve_##FN##_scalarh, \
2019 gen_helper_mve_##FN##_scalarw, \
2022 return do_vcmp_scalar(s, a, fns[a->size]); \
2025 DO_VCMP(VCMPEQ
, vcmpeq
)
2026 DO_VCMP(VCMPNE
, vcmpne
)
2027 DO_VCMP(VCMPCS
, vcmpcs
)
2028 DO_VCMP(VCMPHI
, vcmphi
)
2029 DO_VCMP(VCMPGE
, vcmpge
)
2030 DO_VCMP(VCMPLT
, vcmplt
)
2031 DO_VCMP(VCMPGT
, vcmpgt
)
2032 DO_VCMP(VCMPLE
, vcmple
)
2034 #define DO_VCMP_FP(INSN, FN) \
2035 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2037 static MVEGenCmpFn * const fns[] = { \
2039 gen_helper_mve_##FN##h, \
2040 gen_helper_mve_##FN##s, \
2043 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2046 return do_vcmp(s, a, fns[a->size]); \
2048 static bool trans_##INSN##_scalar(DisasContext *s, \
2049 arg_vcmp_scalar *a) \
2051 static MVEGenScalarCmpFn * const fns[] = { \
2053 gen_helper_mve_##FN##_scalarh, \
2054 gen_helper_mve_##FN##_scalars, \
2057 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2060 return do_vcmp_scalar(s, a, fns[a->size]); \
2063 DO_VCMP_FP(VCMPEQ_fp
, vfcmpeq
)
2064 DO_VCMP_FP(VCMPNE_fp
, vfcmpne
)
2065 DO_VCMP_FP(VCMPGE_fp
, vfcmpge
)
2066 DO_VCMP_FP(VCMPLT_fp
, vfcmplt
)
2067 DO_VCMP_FP(VCMPGT_fp
, vfcmpgt
)
2068 DO_VCMP_FP(VCMPLE_fp
, vfcmple
)
2070 static bool do_vmaxv(DisasContext
*s
, arg_vmaxv
*a
, MVEGenVADDVFn fn
)
2073 * MIN/MAX operations across a vector: compute the min or
2074 * max of the initial value in a general purpose register
2075 * and all the elements in the vector, and store it back
2076 * into the general purpose register.
2081 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
2082 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2083 /* Rda cases are UNPREDICTABLE */
2086 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2090 qm
= mve_qreg_ptr(a
->qm
);
2091 rda
= load_reg(s
, a
->rda
);
2092 fn(rda
, tcg_env
, qm
, rda
);
2093 store_reg(s
, a
->rda
, rda
);
2098 #define DO_VMAXV(INSN, FN) \
2099 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2101 static MVEGenVADDVFn * const fns[] = { \
2102 gen_helper_mve_##FN##b, \
2103 gen_helper_mve_##FN##h, \
2104 gen_helper_mve_##FN##w, \
2107 return do_vmaxv(s, a, fns[a->size]); \
2110 DO_VMAXV(VMAXV_S
, vmaxvs
)
2111 DO_VMAXV(VMAXV_U
, vmaxvu
)
2112 DO_VMAXV(VMAXAV
, vmaxav
)
2113 DO_VMAXV(VMINV_S
, vminvs
)
2114 DO_VMAXV(VMINV_U
, vminvu
)
2115 DO_VMAXV(VMINAV
, vminav
)
2117 #define DO_VMAXV_FP(INSN, FN) \
2118 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2120 static MVEGenVADDVFn * const fns[] = { \
2122 gen_helper_mve_##FN##h, \
2123 gen_helper_mve_##FN##s, \
2126 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2129 return do_vmaxv(s, a, fns[a->size]); \
2132 DO_VMAXV_FP(VMAXNMV
, vmaxnmv
)
2133 DO_VMAXV_FP(VMINNMV
, vminnmv
)
2134 DO_VMAXV_FP(VMAXNMAV
, vmaxnmav
)
2135 DO_VMAXV_FP(VMINNMAV
, vminnmav
)
2137 static bool do_vabav(DisasContext
*s
, arg_vabav
*a
, MVEGenVABAVFn
*fn
)
2139 /* Absolute difference accumulated across vector */
2143 if (!dc_isar_feature(aa32_mve
, s
) ||
2144 !mve_check_qreg_bank(s
, a
->qm
| a
->qn
) ||
2145 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2146 /* Rda cases are UNPREDICTABLE */
2149 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2153 qm
= mve_qreg_ptr(a
->qm
);
2154 qn
= mve_qreg_ptr(a
->qn
);
2155 rda
= load_reg(s
, a
->rda
);
2156 fn(rda
, tcg_env
, qn
, qm
, rda
);
2157 store_reg(s
, a
->rda
, rda
);
2162 #define DO_VABAV(INSN, FN) \
2163 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2165 static MVEGenVABAVFn * const fns[] = { \
2166 gen_helper_mve_##FN##b, \
2167 gen_helper_mve_##FN##h, \
2168 gen_helper_mve_##FN##w, \
2171 return do_vabav(s, a, fns[a->size]); \
2174 DO_VABAV(VABAV_S
, vabavs
)
2175 DO_VABAV(VABAV_U
, vabavu
)
2177 static bool trans_VMOV_to_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2180 * VMOV two 32-bit vector lanes to two general-purpose registers.
2181 * This insn is not predicated but it is subject to beat-wise
2182 * execution if it is not in an IT block. For us this means
2183 * only that if PSR.ECI says we should not be executing the beat
2184 * corresponding to the lane of the vector register being accessed
2185 * then we should skip performing the move, and that we need to do
2186 * the usual check for bad ECI state and advance of ECI state.
2187 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2192 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2193 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15 ||
2195 /* Rt/Rt2 cases are UNPREDICTABLE */
2198 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2202 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2205 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2206 tmp
= tcg_temp_new_i32();
2207 read_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2208 store_reg(s
, a
->rt
, tmp
);
2210 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2211 tmp
= tcg_temp_new_i32();
2212 read_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2213 store_reg(s
, a
->rt2
, tmp
);
2216 mve_update_and_store_eci(s
);
2220 static bool trans_VMOV_from_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2223 * VMOV two general-purpose registers to two 32-bit vector lanes.
2224 * This insn is not predicated but it is subject to beat-wise
2225 * execution if it is not in an IT block. For us this means
2226 * only that if PSR.ECI says we should not be executing the beat
2227 * corresponding to the lane of the vector register being accessed
2228 * then we should skip performing the move, and that we need to do
2229 * the usual check for bad ECI state and advance of ECI state.
2230 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2235 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2236 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15) {
2237 /* Rt/Rt2 cases are UNPREDICTABLE */
2240 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2244 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2247 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2248 tmp
= load_reg(s
, a
->rt
);
2249 write_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2251 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2252 tmp
= load_reg(s
, a
->rt2
);
2253 write_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2256 mve_update_and_store_eci(s
);