2 * AArch64 SME translation
4 * Copyright (c) 2022 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
25 * Include the generated decoder.
28 #include "decode-sme.c.inc"
32 * Resolve tile.size[index] to a host pointer, where tile and index
33 * are always decoded together, dependent on the element size.
35 static TCGv_ptr
get_tile_rowcol(DisasContext
*s
, int esz
, int rs
,
36 int tile_index
, bool vertical
)
38 int tile
= tile_index
>> (4 - esz
);
39 int index
= esz
== MO_128
? 0 : extract32(tile_index
, 0, 4 - esz
);
44 /* Compute the final index, which is Rs+imm. */
45 tmp
= tcg_temp_new_i32();
46 tcg_gen_trunc_tl_i32(tmp
, cpu_reg(s
, rs
));
47 tcg_gen_addi_i32(tmp
, tmp
, index
);
49 /* Prepare a power-of-two modulo via extraction of @len bits. */
50 len
= ctz32(streaming_vec_reg_size(s
)) - esz
;
54 * SVL is 128 and the element size is 128. There is exactly
55 * one 128x128 tile in the ZA storage, and so we calculate
56 * (Rs + imm) MOD 1, which is always 0. We need to special case
57 * this because TCG doesn't allow deposit ops with len 0.
59 tcg_gen_movi_i32(tmp
, 0);
60 } else if (vertical
) {
62 * Compute the byte offset of the index within the tile:
63 * (index % (svl / size)) * size
64 * = (index % (svl >> esz)) << esz
65 * Perform the power-of-two modulo via extraction of the low @len bits.
66 * Perform the multiply by shifting left by @pos bits.
67 * Perform these operations simultaneously via deposit into zero.
70 tcg_gen_deposit_z_i32(tmp
, tmp
, pos
, len
);
73 * For big-endian, adjust the indexed column byte offset within
74 * the uint64_t host words that make up env->zarray[].
76 if (HOST_BIG_ENDIAN
&& esz
< MO_64
) {
77 tcg_gen_xori_i32(tmp
, tmp
, 8 - (1 << esz
));
81 * Compute the byte offset of the index within the tile:
82 * (index % (svl / size)) * (size * sizeof(row))
83 * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
85 pos
= esz
+ ctz32(sizeof(ARMVectorReg
));
86 tcg_gen_deposit_z_i32(tmp
, tmp
, pos
, len
);
88 /* Row slices are always aligned and need no endian adjustment. */
91 /* The tile byte offset within env->zarray is the row. */
92 offset
= tile
* sizeof(ARMVectorReg
);
94 /* Include the byte offset of zarray to make this relative to env. */
95 offset
+= offsetof(CPUARMState
, zarray
);
96 tcg_gen_addi_i32(tmp
, tmp
, offset
);
98 /* Add the byte offset to env to produce the final pointer. */
99 addr
= tcg_temp_new_ptr();
100 tcg_gen_ext_i32_ptr(addr
, tmp
);
101 tcg_gen_add_ptr(addr
, addr
, tcg_env
);
107 * Resolve tile.size[0] to a host pointer.
108 * Used by e.g. outer product insns where we require the entire tile.
110 static TCGv_ptr
get_tile(DisasContext
*s
, int esz
, int tile
)
112 TCGv_ptr addr
= tcg_temp_new_ptr();
115 offset
= tile
* sizeof(ARMVectorReg
) + offsetof(CPUARMState
, zarray
);
117 tcg_gen_addi_ptr(addr
, tcg_env
, offset
);
121 static bool trans_ZERO(DisasContext
*s
, arg_ZERO
*a
)
123 if (!dc_isar_feature(aa64_sme
, s
)) {
126 if (sme_za_enabled_check(s
)) {
127 gen_helper_sme_zero(tcg_env
, tcg_constant_i32(a
->imm
),
128 tcg_constant_i32(streaming_vec_reg_size(s
)));
133 static bool trans_MOVA(DisasContext
*s
, arg_MOVA
*a
)
135 static gen_helper_gvec_4
* const h_fns
[5] = {
136 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
137 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
,
138 gen_helper_sve_sel_zpzz_q
140 static gen_helper_gvec_3
* const cz_fns
[5] = {
141 gen_helper_sme_mova_cz_b
, gen_helper_sme_mova_cz_h
,
142 gen_helper_sme_mova_cz_s
, gen_helper_sme_mova_cz_d
,
143 gen_helper_sme_mova_cz_q
,
145 static gen_helper_gvec_3
* const zc_fns
[5] = {
146 gen_helper_sme_mova_zc_b
, gen_helper_sme_mova_zc_h
,
147 gen_helper_sme_mova_zc_s
, gen_helper_sme_mova_zc_d
,
148 gen_helper_sme_mova_zc_q
,
151 TCGv_ptr t_za
, t_zr
, t_pg
;
155 if (!dc_isar_feature(aa64_sme
, s
)) {
158 if (!sme_smza_enabled_check(s
)) {
162 t_za
= get_tile_rowcol(s
, a
->esz
, a
->rs
, a
->za_imm
, a
->v
);
163 t_zr
= vec_full_reg_ptr(s
, a
->zr
);
164 t_pg
= pred_full_reg_ptr(s
, a
->pg
);
166 svl
= streaming_vec_reg_size(s
);
167 t_desc
= tcg_constant_i32(simd_desc(svl
, svl
, 0));
170 /* Vertical slice -- use sme mova helpers. */
172 zc_fns
[a
->esz
](t_zr
, t_za
, t_pg
, t_desc
);
174 cz_fns
[a
->esz
](t_za
, t_zr
, t_pg
, t_desc
);
177 /* Horizontal slice -- reuse sve sel helpers. */
179 h_fns
[a
->esz
](t_zr
, t_za
, t_zr
, t_pg
, t_desc
);
181 h_fns
[a
->esz
](t_za
, t_zr
, t_za
, t_pg
, t_desc
);
187 static bool trans_LDST1(DisasContext
*s
, arg_LDST1
*a
)
189 typedef void GenLdSt1(TCGv_env
, TCGv_ptr
, TCGv_ptr
, TCGv
, TCGv_i32
);
192 * Indexed by [esz][be][v][mte][st], which is (except for load/store)
193 * also the order in which the elements appear in the function names,
194 * and so how we must concatenate the pieces.
197 #define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
198 #define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
199 #define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
200 #define FN_END(L, B) { FN_HV(L), FN_HV(B) }
202 static GenLdSt1
* const fns
[5][2][2][2][2] = {
218 bool be
= s
->be_data
== MO_BE
;
219 bool mte
= s
->mte_active
[0];
221 if (!dc_isar_feature(aa64_sme
, s
)) {
224 if (!sme_smza_enabled_check(s
)) {
228 t_za
= get_tile_rowcol(s
, a
->esz
, a
->rs
, a
->za_imm
, a
->v
);
229 t_pg
= pred_full_reg_ptr(s
, a
->pg
);
230 addr
= tcg_temp_new_i64();
232 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), a
->esz
);
233 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
236 addr
= clean_data_tbi(s
, addr
);
239 desc
= make_svemte_desc(s
, streaming_vec_reg_size(s
), 1, a
->esz
, a
->st
, 0);
241 fns
[a
->esz
][be
][a
->v
][mte
][a
->st
](tcg_env
, t_za
, t_pg
, addr
,
242 tcg_constant_i32(desc
));
246 typedef void GenLdStR(DisasContext
*, TCGv_ptr
, int, int, int, int);
248 static bool do_ldst_r(DisasContext
*s
, arg_ldstr
*a
, GenLdStR
*fn
)
250 int svl
= streaming_vec_reg_size(s
);
254 if (!sme_za_enabled_check(s
)) {
258 /* ZA[n] equates to ZA0H.B[n]. */
259 base
= get_tile_rowcol(s
, MO_8
, a
->rv
, imm
, false);
261 fn(s
, base
, 0, svl
, a
->rn
, imm
* svl
);
265 TRANS_FEAT(LDR
, aa64_sme
, do_ldst_r
, a
, gen_sve_ldr
)
266 TRANS_FEAT(STR
, aa64_sme
, do_ldst_r
, a
, gen_sve_str
)
268 static bool do_adda(DisasContext
*s
, arg_adda
*a
, MemOp esz
,
269 gen_helper_gvec_4
*fn
)
271 int svl
= streaming_vec_reg_size(s
);
272 uint32_t desc
= simd_desc(svl
, svl
, 0);
273 TCGv_ptr za
, zn
, pn
, pm
;
275 if (!sme_smza_enabled_check(s
)) {
279 za
= get_tile(s
, esz
, a
->zad
);
280 zn
= vec_full_reg_ptr(s
, a
->zn
);
281 pn
= pred_full_reg_ptr(s
, a
->pn
);
282 pm
= pred_full_reg_ptr(s
, a
->pm
);
284 fn(za
, zn
, pn
, pm
, tcg_constant_i32(desc
));
288 TRANS_FEAT(ADDHA_s
, aa64_sme
, do_adda
, a
, MO_32
, gen_helper_sme_addha_s
)
289 TRANS_FEAT(ADDVA_s
, aa64_sme
, do_adda
, a
, MO_32
, gen_helper_sme_addva_s
)
290 TRANS_FEAT(ADDHA_d
, aa64_sme_i16i64
, do_adda
, a
, MO_64
, gen_helper_sme_addha_d
)
291 TRANS_FEAT(ADDVA_d
, aa64_sme_i16i64
, do_adda
, a
, MO_64
, gen_helper_sme_addva_d
)
293 static bool do_outprod(DisasContext
*s
, arg_op
*a
, MemOp esz
,
294 gen_helper_gvec_5
*fn
)
296 int svl
= streaming_vec_reg_size(s
);
297 uint32_t desc
= simd_desc(svl
, svl
, a
->sub
);
298 TCGv_ptr za
, zn
, zm
, pn
, pm
;
300 if (!sme_smza_enabled_check(s
)) {
304 za
= get_tile(s
, esz
, a
->zad
);
305 zn
= vec_full_reg_ptr(s
, a
->zn
);
306 zm
= vec_full_reg_ptr(s
, a
->zm
);
307 pn
= pred_full_reg_ptr(s
, a
->pn
);
308 pm
= pred_full_reg_ptr(s
, a
->pm
);
310 fn(za
, zn
, zm
, pn
, pm
, tcg_constant_i32(desc
));
314 static bool do_outprod_fpst(DisasContext
*s
, arg_op
*a
, MemOp esz
,
315 ARMFPStatusFlavour e_fpst
,
316 gen_helper_gvec_5_ptr
*fn
)
318 int svl
= streaming_vec_reg_size(s
);
319 uint32_t desc
= simd_desc(svl
, svl
, a
->sub
);
320 TCGv_ptr za
, zn
, zm
, pn
, pm
, fpst
;
322 if (!sme_smza_enabled_check(s
)) {
326 za
= get_tile(s
, esz
, a
->zad
);
327 zn
= vec_full_reg_ptr(s
, a
->zn
);
328 zm
= vec_full_reg_ptr(s
, a
->zm
);
329 pn
= pred_full_reg_ptr(s
, a
->pn
);
330 pm
= pred_full_reg_ptr(s
, a
->pm
);
331 fpst
= fpstatus_ptr(e_fpst
);
333 fn(za
, zn
, zm
, pn
, pm
, fpst
, tcg_constant_i32(desc
));
337 static bool do_outprod_env(DisasContext
*s
, arg_op
*a
, MemOp esz
,
338 gen_helper_gvec_5_ptr
*fn
)
340 int svl
= streaming_vec_reg_size(s
);
341 uint32_t desc
= simd_desc(svl
, svl
, a
->sub
);
342 TCGv_ptr za
, zn
, zm
, pn
, pm
;
344 if (!sme_smza_enabled_check(s
)) {
348 za
= get_tile(s
, esz
, a
->zad
);
349 zn
= vec_full_reg_ptr(s
, a
->zn
);
350 zm
= vec_full_reg_ptr(s
, a
->zm
);
351 pn
= pred_full_reg_ptr(s
, a
->pn
);
352 pm
= pred_full_reg_ptr(s
, a
->pm
);
354 fn(za
, zn
, zm
, pn
, pm
, tcg_env
, tcg_constant_i32(desc
));
358 TRANS_FEAT(FMOPA_h
, aa64_sme
, do_outprod_env
, a
,
359 MO_32
, gen_helper_sme_fmopa_h
)
360 TRANS_FEAT(FMOPA_s
, aa64_sme
, do_outprod_fpst
, a
,
361 MO_32
, FPST_FPCR
, gen_helper_sme_fmopa_s
)
362 TRANS_FEAT(FMOPA_d
, aa64_sme_f64f64
, do_outprod_fpst
, a
,
363 MO_64
, FPST_FPCR
, gen_helper_sme_fmopa_d
)
365 TRANS_FEAT(BFMOPA
, aa64_sme
, do_outprod_env
, a
, MO_32
, gen_helper_sme_bfmopa
)
367 TRANS_FEAT(SMOPA_s
, aa64_sme
, do_outprod
, a
, MO_32
, gen_helper_sme_smopa_s
)
368 TRANS_FEAT(UMOPA_s
, aa64_sme
, do_outprod
, a
, MO_32
, gen_helper_sme_umopa_s
)
369 TRANS_FEAT(SUMOPA_s
, aa64_sme
, do_outprod
, a
, MO_32
, gen_helper_sme_sumopa_s
)
370 TRANS_FEAT(USMOPA_s
, aa64_sme
, do_outprod
, a
, MO_32
, gen_helper_sme_usmopa_s
)
372 TRANS_FEAT(SMOPA_d
, aa64_sme_i16i64
, do_outprod
, a
, MO_64
, gen_helper_sme_smopa_d
)
373 TRANS_FEAT(UMOPA_d
, aa64_sme_i16i64
, do_outprod
, a
, MO_64
, gen_helper_sme_umopa_d
)
374 TRANS_FEAT(SUMOPA_d
, aa64_sme_i16i64
, do_outprod
, a
, MO_64
, gen_helper_sme_sumopa_d
)
375 TRANS_FEAT(USMOPA_d
, aa64_sme_i16i64
, do_outprod
, a
, MO_64
, gen_helper_sme_usmopa_d
)