2 * AArch64 generic vector expansion
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
25 static void gen_rax1_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
)
27 tcg_gen_rotli_i64(d
, m
, 1);
28 tcg_gen_xor_i64(d
, d
, n
);
31 static void gen_rax1_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
, TCGv_vec m
)
33 tcg_gen_rotli_vec(vece
, d
, m
, 1);
34 tcg_gen_xor_vec(vece
, d
, d
, n
);
37 void gen_gvec_rax1(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
38 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
40 static const TCGOpcode vecop_list
[] = { INDEX_op_rotli_vec
, 0 };
41 static const GVecGen3 op
= {
44 .opt_opc
= vecop_list
,
45 .fno
= gen_helper_crypto_rax1
,
48 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &op
);
51 static void gen_xar8_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, int64_t sh
)
53 TCGv_i64 t
= tcg_temp_new_i64();
54 uint64_t mask
= dup_const(MO_8
, 0xff >> sh
);
56 tcg_gen_xor_i64(t
, n
, m
);
57 tcg_gen_shri_i64(d
, t
, sh
);
58 tcg_gen_shli_i64(t
, t
, 8 - sh
);
59 tcg_gen_andi_i64(d
, d
, mask
);
60 tcg_gen_andi_i64(t
, t
, ~mask
);
61 tcg_gen_or_i64(d
, d
, t
);
64 static void gen_xar16_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, int64_t sh
)
66 TCGv_i64 t
= tcg_temp_new_i64();
67 uint64_t mask
= dup_const(MO_16
, 0xffff >> sh
);
69 tcg_gen_xor_i64(t
, n
, m
);
70 tcg_gen_shri_i64(d
, t
, sh
);
71 tcg_gen_shli_i64(t
, t
, 16 - sh
);
72 tcg_gen_andi_i64(d
, d
, mask
);
73 tcg_gen_andi_i64(t
, t
, ~mask
);
74 tcg_gen_or_i64(d
, d
, t
);
77 static void gen_xar_i32(TCGv_i32 d
, TCGv_i32 n
, TCGv_i32 m
, int32_t sh
)
79 tcg_gen_xor_i32(d
, n
, m
);
80 tcg_gen_rotri_i32(d
, d
, sh
);
83 static void gen_xar_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, int64_t sh
)
85 tcg_gen_xor_i64(d
, n
, m
);
86 tcg_gen_rotri_i64(d
, d
, sh
);
89 static void gen_xar_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
90 TCGv_vec m
, int64_t sh
)
92 tcg_gen_xor_vec(vece
, d
, n
, m
);
93 tcg_gen_rotri_vec(vece
, d
, d
, sh
);
96 void gen_gvec_xar(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
97 uint32_t rm_ofs
, int64_t shift
,
98 uint32_t opr_sz
, uint32_t max_sz
)
100 static const TCGOpcode vecop
[] = { INDEX_op_rotli_vec
, 0 };
101 static const GVecGen3i ops
[4] = {
102 { .fni8
= gen_xar8_i64
,
104 .fno
= gen_helper_sve2_xar_b
,
107 { .fni8
= gen_xar16_i64
,
109 .fno
= gen_helper_sve2_xar_h
,
112 { .fni4
= gen_xar_i32
,
114 .fno
= gen_helper_sve2_xar_s
,
117 { .fni8
= gen_xar_i64
,
119 .fno
= gen_helper_gvec_xar_d
,
123 int esize
= 8 << vece
;
125 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
126 tcg_debug_assert(shift
>= 0);
127 tcg_debug_assert(shift
<= esize
);
131 /* xar with no rotate devolves to xor. */
132 tcg_gen_gvec_xor(vece
, rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
);
134 tcg_gen_gvec_3i(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
,
139 static void gen_eor3_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
141 tcg_gen_xor_i64(d
, n
, m
);
142 tcg_gen_xor_i64(d
, d
, k
);
145 static void gen_eor3_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
146 TCGv_vec m
, TCGv_vec k
)
148 tcg_gen_xor_vec(vece
, d
, n
, m
);
149 tcg_gen_xor_vec(vece
, d
, d
, k
);
152 void gen_gvec_eor3(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
153 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
155 static const GVecGen4 op
= {
156 .fni8
= gen_eor3_i64
,
157 .fniv
= gen_eor3_vec
,
158 .fno
= gen_helper_sve2_eor3
,
160 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
162 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
165 static void gen_bcax_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
167 tcg_gen_andc_i64(d
, m
, k
);
168 tcg_gen_xor_i64(d
, d
, n
);
171 static void gen_bcax_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
172 TCGv_vec m
, TCGv_vec k
)
174 tcg_gen_andc_vec(vece
, d
, m
, k
);
175 tcg_gen_xor_vec(vece
, d
, d
, n
);
178 void gen_gvec_bcax(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
179 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
181 static const GVecGen4 op
= {
182 .fni8
= gen_bcax_i64
,
183 .fniv
= gen_bcax_vec
,
184 .fno
= gen_helper_sve2_bcax
,
186 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
188 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
192 * Set @res to the correctly saturated result.
193 * Set @qc non-zero if saturation occured.
195 void gen_suqadd_bhs(TCGv_i64 res
, TCGv_i64 qc
,
196 TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
198 TCGv_i64 max
= tcg_constant_i64((1ull << ((8 << esz
) - 1)) - 1);
199 TCGv_i64 t
= tcg_temp_new_i64();
201 tcg_gen_add_i64(t
, a
, b
);
202 tcg_gen_smin_i64(res
, t
, max
);
203 tcg_gen_xor_i64(t
, t
, res
);
204 tcg_gen_or_i64(qc
, qc
, t
);
207 void gen_suqadd_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
209 TCGv_i64 max
= tcg_constant_i64(INT64_MAX
);
210 TCGv_i64 t
= tcg_temp_new_i64();
212 /* Maximum value that can be added to @a without overflow. */
213 tcg_gen_sub_i64(t
, max
, a
);
215 /* Constrain addend so that the next addition never overflows. */
216 tcg_gen_umin_i64(t
, t
, b
);
217 tcg_gen_add_i64(res
, a
, t
);
219 tcg_gen_xor_i64(t
, t
, b
);
220 tcg_gen_or_i64(qc
, qc
, t
);
223 static void gen_suqadd_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
224 TCGv_vec a
, TCGv_vec b
)
227 tcg_constant_vec_matching(t
, vece
, (1ull << ((8 << vece
) - 1)) - 1);
228 TCGv_vec u
= tcg_temp_new_vec_matching(t
);
230 /* Maximum value that can be added to @a without overflow. */
231 tcg_gen_sub_vec(vece
, u
, max
, a
);
233 /* Constrain addend so that the next addition never overflows. */
234 tcg_gen_umin_vec(vece
, u
, u
, b
);
235 tcg_gen_add_vec(vece
, t
, u
, a
);
237 /* Compute QC by comparing the adjusted @b. */
238 tcg_gen_xor_vec(vece
, u
, u
, b
);
239 tcg_gen_or_vec(vece
, qc
, qc
, u
);
242 void gen_gvec_suqadd_qc(unsigned vece
, uint32_t rd_ofs
,
243 uint32_t rn_ofs
, uint32_t rm_ofs
,
244 uint32_t opr_sz
, uint32_t max_sz
)
246 static const TCGOpcode vecop_list
[] = {
247 INDEX_op_add_vec
, INDEX_op_sub_vec
, INDEX_op_umin_vec
, 0
249 static const GVecGen4 ops
[4] = {
250 { .fniv
= gen_suqadd_vec
,
251 .fno
= gen_helper_gvec_suqadd_b
,
252 .opt_opc
= vecop_list
,
255 { .fniv
= gen_suqadd_vec
,
256 .fno
= gen_helper_gvec_suqadd_h
,
257 .opt_opc
= vecop_list
,
260 { .fniv
= gen_suqadd_vec
,
261 .fno
= gen_helper_gvec_suqadd_s
,
262 .opt_opc
= vecop_list
,
265 { .fniv
= gen_suqadd_vec
,
266 .fni8
= gen_suqadd_d
,
267 .fno
= gen_helper_gvec_suqadd_d
,
268 .opt_opc
= vecop_list
,
273 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
274 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
275 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
278 void gen_usqadd_bhs(TCGv_i64 res
, TCGv_i64 qc
,
279 TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
281 TCGv_i64 max
= tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz
));
282 TCGv_i64 zero
= tcg_constant_i64(0);
283 TCGv_i64 tmp
= tcg_temp_new_i64();
285 tcg_gen_add_i64(tmp
, a
, b
);
286 tcg_gen_smin_i64(res
, tmp
, max
);
287 tcg_gen_smax_i64(res
, res
, zero
);
288 tcg_gen_xor_i64(tmp
, tmp
, res
);
289 tcg_gen_or_i64(qc
, qc
, tmp
);
292 void gen_usqadd_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
294 TCGv_i64 tmp
= tcg_temp_new_i64();
295 TCGv_i64 tneg
= tcg_temp_new_i64();
296 TCGv_i64 tpos
= tcg_temp_new_i64();
297 TCGv_i64 max
= tcg_constant_i64(UINT64_MAX
);
298 TCGv_i64 zero
= tcg_constant_i64(0);
300 tcg_gen_add_i64(tmp
, a
, b
);
302 /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */
303 tcg_gen_movcond_i64(TCG_COND_LTU
, tpos
, tmp
, a
, max
, tmp
);
305 /* If @b is negative, saturate if a < -b, ie subtraction is negative. */
306 tcg_gen_neg_i64(tneg
, b
);
307 tcg_gen_movcond_i64(TCG_COND_LTU
, tneg
, a
, tneg
, zero
, tmp
);
309 /* Select correct result from sign of @b. */
310 tcg_gen_movcond_i64(TCG_COND_LT
, res
, b
, zero
, tneg
, tpos
);
311 tcg_gen_xor_i64(tmp
, tmp
, res
);
312 tcg_gen_or_i64(qc
, qc
, tmp
);
315 static void gen_usqadd_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
316 TCGv_vec a
, TCGv_vec b
)
318 TCGv_vec u
= tcg_temp_new_vec_matching(t
);
319 TCGv_vec z
= tcg_constant_vec_matching(t
, vece
, 0);
321 /* Compute unsigned saturation of add for +b and sub for -b. */
322 tcg_gen_neg_vec(vece
, t
, b
);
323 tcg_gen_usadd_vec(vece
, u
, a
, b
);
324 tcg_gen_ussub_vec(vece
, t
, a
, t
);
326 /* Select the correct result depending on the sign of b. */
327 tcg_gen_cmpsel_vec(TCG_COND_LT
, vece
, t
, b
, z
, t
, u
);
329 /* Compute QC by comparing against the non-saturated result. */
330 tcg_gen_add_vec(vece
, u
, a
, b
);
331 tcg_gen_xor_vec(vece
, u
, u
, t
);
332 tcg_gen_or_vec(vece
, qc
, qc
, u
);
335 void gen_gvec_usqadd_qc(unsigned vece
, uint32_t rd_ofs
,
336 uint32_t rn_ofs
, uint32_t rm_ofs
,
337 uint32_t opr_sz
, uint32_t max_sz
)
339 static const TCGOpcode vecop_list
[] = {
340 INDEX_op_neg_vec
, INDEX_op_add_vec
,
341 INDEX_op_usadd_vec
, INDEX_op_ussub_vec
,
342 INDEX_op_cmpsel_vec
, 0
344 static const GVecGen4 ops
[4] = {
345 { .fniv
= gen_usqadd_vec
,
346 .fno
= gen_helper_gvec_usqadd_b
,
347 .opt_opc
= vecop_list
,
350 { .fniv
= gen_usqadd_vec
,
351 .fno
= gen_helper_gvec_usqadd_h
,
352 .opt_opc
= vecop_list
,
355 { .fniv
= gen_usqadd_vec
,
356 .fno
= gen_helper_gvec_usqadd_s
,
357 .opt_opc
= vecop_list
,
360 { .fniv
= gen_usqadd_vec
,
361 .fni8
= gen_usqadd_d
,
362 .fno
= gen_helper_gvec_usqadd_d
,
363 .opt_opc
= vecop_list
,
368 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
369 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
370 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);