2 * ARM generic vector expansion
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 #include "qemu/osdep.h"
23 #include "translate.h"
26 static void gen_gvec_fn3_qc(uint32_t rd_ofs
, uint32_t rn_ofs
, uint32_t rm_ofs
,
27 uint32_t opr_sz
, uint32_t max_sz
,
28 gen_helper_gvec_3_ptr
*fn
)
30 TCGv_ptr qc_ptr
= tcg_temp_new_ptr();
32 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
33 tcg_gen_addi_ptr(qc_ptr
, tcg_env
, offsetof(CPUARMState
, vfp
.qc
));
34 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, qc_ptr
,
35 opr_sz
, max_sz
, 0, fn
);
38 void gen_gvec_sqdmulh_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
39 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
41 static gen_helper_gvec_3_ptr
* const fns
[2] = {
42 gen_helper_neon_sqdmulh_h
, gen_helper_neon_sqdmulh_s
44 tcg_debug_assert(vece
>= 1 && vece
<= 2);
45 gen_gvec_fn3_qc(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, fns
[vece
- 1]);
48 void gen_gvec_sqrdmulh_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
49 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
51 static gen_helper_gvec_3_ptr
* const fns
[2] = {
52 gen_helper_neon_sqrdmulh_h
, gen_helper_neon_sqrdmulh_s
54 tcg_debug_assert(vece
>= 1 && vece
<= 2);
55 gen_gvec_fn3_qc(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, fns
[vece
- 1]);
58 void gen_gvec_sqrdmlah_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
59 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
61 static gen_helper_gvec_3_ptr
* const fns
[2] = {
62 gen_helper_gvec_qrdmlah_s16
, gen_helper_gvec_qrdmlah_s32
64 tcg_debug_assert(vece
>= 1 && vece
<= 2);
65 gen_gvec_fn3_qc(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, fns
[vece
- 1]);
68 void gen_gvec_sqrdmlsh_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
69 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
71 static gen_helper_gvec_3_ptr
* const fns
[2] = {
72 gen_helper_gvec_qrdmlsh_s16
, gen_helper_gvec_qrdmlsh_s32
74 tcg_debug_assert(vece
>= 1 && vece
<= 2);
75 gen_gvec_fn3_qc(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, fns
[vece
- 1]);
78 #define GEN_CMP0(NAME, COND) \
79 void NAME(unsigned vece, uint32_t d, uint32_t m, \
80 uint32_t opr_sz, uint32_t max_sz) \
81 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
83 GEN_CMP0(gen_gvec_ceq0
, TCG_COND_EQ
)
84 GEN_CMP0(gen_gvec_cle0
, TCG_COND_LE
)
85 GEN_CMP0(gen_gvec_cge0
, TCG_COND_GE
)
86 GEN_CMP0(gen_gvec_clt0
, TCG_COND_LT
)
87 GEN_CMP0(gen_gvec_cgt0
, TCG_COND_GT
)
91 void gen_gvec_sshr(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
92 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
94 /* Signed shift out of range results in all-sign-bits */
95 shift
= MIN(shift
, (8 << vece
) - 1);
96 tcg_gen_gvec_sari(vece
, rd_ofs
, rm_ofs
, shift
, opr_sz
, max_sz
);
99 void gen_gvec_ushr(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
100 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
102 /* Unsigned shift out of range results in all-zero-bits */
103 if (shift
>= (8 << vece
)) {
104 tcg_gen_gvec_dup_imm(vece
, rd_ofs
, opr_sz
, max_sz
, 0);
106 tcg_gen_gvec_shri(vece
, rd_ofs
, rm_ofs
, shift
, opr_sz
, max_sz
);
110 static void gen_ssra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
112 tcg_gen_vec_sar8i_i64(a
, a
, shift
);
113 tcg_gen_vec_add8_i64(d
, d
, a
);
116 static void gen_ssra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
118 tcg_gen_vec_sar16i_i64(a
, a
, shift
);
119 tcg_gen_vec_add16_i64(d
, d
, a
);
122 static void gen_ssra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
124 tcg_gen_sari_i32(a
, a
, shift
);
125 tcg_gen_add_i32(d
, d
, a
);
128 static void gen_ssra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
130 tcg_gen_sari_i64(a
, a
, shift
);
131 tcg_gen_add_i64(d
, d
, a
);
134 static void gen_ssra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
136 tcg_gen_sari_vec(vece
, a
, a
, sh
);
137 tcg_gen_add_vec(vece
, d
, d
, a
);
140 void gen_gvec_ssra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
141 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
143 static const TCGOpcode vecop_list
[] = {
144 INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
146 static const GVecGen2i ops
[4] = {
147 { .fni8
= gen_ssra8_i64
,
148 .fniv
= gen_ssra_vec
,
149 .fno
= gen_helper_gvec_ssra_b
,
151 .opt_opc
= vecop_list
,
153 { .fni8
= gen_ssra16_i64
,
154 .fniv
= gen_ssra_vec
,
155 .fno
= gen_helper_gvec_ssra_h
,
157 .opt_opc
= vecop_list
,
159 { .fni4
= gen_ssra32_i32
,
160 .fniv
= gen_ssra_vec
,
161 .fno
= gen_helper_gvec_ssra_s
,
163 .opt_opc
= vecop_list
,
165 { .fni8
= gen_ssra64_i64
,
166 .fniv
= gen_ssra_vec
,
167 .fno
= gen_helper_gvec_ssra_d
,
168 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
169 .opt_opc
= vecop_list
,
174 /* tszimm encoding produces immediates in the range [1..esize]. */
175 tcg_debug_assert(shift
> 0);
176 tcg_debug_assert(shift
<= (8 << vece
));
179 * Shifts larger than the element size are architecturally valid.
180 * Signed results in all sign bits.
182 shift
= MIN(shift
, (8 << vece
) - 1);
183 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
186 static void gen_usra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
188 tcg_gen_vec_shr8i_i64(a
, a
, shift
);
189 tcg_gen_vec_add8_i64(d
, d
, a
);
192 static void gen_usra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
194 tcg_gen_vec_shr16i_i64(a
, a
, shift
);
195 tcg_gen_vec_add16_i64(d
, d
, a
);
198 static void gen_usra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
200 tcg_gen_shri_i32(a
, a
, shift
);
201 tcg_gen_add_i32(d
, d
, a
);
204 static void gen_usra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
206 tcg_gen_shri_i64(a
, a
, shift
);
207 tcg_gen_add_i64(d
, d
, a
);
210 static void gen_usra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
212 tcg_gen_shri_vec(vece
, a
, a
, sh
);
213 tcg_gen_add_vec(vece
, d
, d
, a
);
216 void gen_gvec_usra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
217 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
219 static const TCGOpcode vecop_list
[] = {
220 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
222 static const GVecGen2i ops
[4] = {
223 { .fni8
= gen_usra8_i64
,
224 .fniv
= gen_usra_vec
,
225 .fno
= gen_helper_gvec_usra_b
,
227 .opt_opc
= vecop_list
,
229 { .fni8
= gen_usra16_i64
,
230 .fniv
= gen_usra_vec
,
231 .fno
= gen_helper_gvec_usra_h
,
233 .opt_opc
= vecop_list
,
235 { .fni4
= gen_usra32_i32
,
236 .fniv
= gen_usra_vec
,
237 .fno
= gen_helper_gvec_usra_s
,
239 .opt_opc
= vecop_list
,
241 { .fni8
= gen_usra64_i64
,
242 .fniv
= gen_usra_vec
,
243 .fno
= gen_helper_gvec_usra_d
,
244 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
246 .opt_opc
= vecop_list
,
250 /* tszimm encoding produces immediates in the range [1..esize]. */
251 tcg_debug_assert(shift
> 0);
252 tcg_debug_assert(shift
<= (8 << vece
));
255 * Shifts larger than the element size are architecturally valid.
256 * Unsigned results in all zeros as input to accumulate: nop.
258 if (shift
< (8 << vece
)) {
259 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
261 /* Nop, but we do need to clear the tail. */
262 tcg_gen_gvec_mov(vece
, rd_ofs
, rd_ofs
, opr_sz
, max_sz
);
267 * Shift one less than the requested amount, and the low bit is
268 * the rounding bit. For the 8 and 16-bit operations, because we
269 * mask the low bit, we can perform a normal integer shift instead
272 static void gen_srshr8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
274 TCGv_i64 t
= tcg_temp_new_i64();
276 tcg_gen_shri_i64(t
, a
, sh
- 1);
277 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
278 tcg_gen_vec_sar8i_i64(d
, a
, sh
);
279 tcg_gen_vec_add8_i64(d
, d
, t
);
282 static void gen_srshr16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
284 TCGv_i64 t
= tcg_temp_new_i64();
286 tcg_gen_shri_i64(t
, a
, sh
- 1);
287 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
288 tcg_gen_vec_sar16i_i64(d
, a
, sh
);
289 tcg_gen_vec_add16_i64(d
, d
, t
);
292 void gen_srshr32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
296 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
298 tcg_gen_movi_i32(d
, 0);
301 t
= tcg_temp_new_i32();
302 tcg_gen_extract_i32(t
, a
, sh
- 1, 1);
303 tcg_gen_sari_i32(d
, a
, sh
);
304 tcg_gen_add_i32(d
, d
, t
);
307 void gen_srshr64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
309 TCGv_i64 t
= tcg_temp_new_i64();
311 tcg_gen_extract_i64(t
, a
, sh
- 1, 1);
312 tcg_gen_sari_i64(d
, a
, sh
);
313 tcg_gen_add_i64(d
, d
, t
);
316 static void gen_srshr_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
318 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
319 TCGv_vec ones
= tcg_constant_vec_matching(d
, vece
, 1);
321 tcg_gen_shri_vec(vece
, t
, a
, sh
- 1);
322 tcg_gen_and_vec(vece
, t
, t
, ones
);
323 tcg_gen_sari_vec(vece
, d
, a
, sh
);
324 tcg_gen_add_vec(vece
, d
, d
, t
);
327 void gen_gvec_srshr(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
328 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
330 static const TCGOpcode vecop_list
[] = {
331 INDEX_op_shri_vec
, INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
333 static const GVecGen2i ops
[4] = {
334 { .fni8
= gen_srshr8_i64
,
335 .fniv
= gen_srshr_vec
,
336 .fno
= gen_helper_gvec_srshr_b
,
337 .opt_opc
= vecop_list
,
339 { .fni8
= gen_srshr16_i64
,
340 .fniv
= gen_srshr_vec
,
341 .fno
= gen_helper_gvec_srshr_h
,
342 .opt_opc
= vecop_list
,
344 { .fni4
= gen_srshr32_i32
,
345 .fniv
= gen_srshr_vec
,
346 .fno
= gen_helper_gvec_srshr_s
,
347 .opt_opc
= vecop_list
,
349 { .fni8
= gen_srshr64_i64
,
350 .fniv
= gen_srshr_vec
,
351 .fno
= gen_helper_gvec_srshr_d
,
352 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
353 .opt_opc
= vecop_list
,
357 /* tszimm encoding produces immediates in the range [1..esize] */
358 tcg_debug_assert(shift
> 0);
359 tcg_debug_assert(shift
<= (8 << vece
));
361 if (shift
== (8 << vece
)) {
363 * Shifts larger than the element size are architecturally valid.
364 * Signed results in all sign bits. With rounding, this produces
365 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
368 tcg_gen_gvec_dup_imm(vece
, rd_ofs
, opr_sz
, max_sz
, 0);
370 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
374 static void gen_srsra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
376 TCGv_i64 t
= tcg_temp_new_i64();
378 gen_srshr8_i64(t
, a
, sh
);
379 tcg_gen_vec_add8_i64(d
, d
, t
);
382 static void gen_srsra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
384 TCGv_i64 t
= tcg_temp_new_i64();
386 gen_srshr16_i64(t
, a
, sh
);
387 tcg_gen_vec_add16_i64(d
, d
, t
);
390 static void gen_srsra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
392 TCGv_i32 t
= tcg_temp_new_i32();
394 gen_srshr32_i32(t
, a
, sh
);
395 tcg_gen_add_i32(d
, d
, t
);
398 static void gen_srsra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
400 TCGv_i64 t
= tcg_temp_new_i64();
402 gen_srshr64_i64(t
, a
, sh
);
403 tcg_gen_add_i64(d
, d
, t
);
406 static void gen_srsra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
408 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
410 gen_srshr_vec(vece
, t
, a
, sh
);
411 tcg_gen_add_vec(vece
, d
, d
, t
);
414 void gen_gvec_srsra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
415 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
417 static const TCGOpcode vecop_list
[] = {
418 INDEX_op_shri_vec
, INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
420 static const GVecGen2i ops
[4] = {
421 { .fni8
= gen_srsra8_i64
,
422 .fniv
= gen_srsra_vec
,
423 .fno
= gen_helper_gvec_srsra_b
,
424 .opt_opc
= vecop_list
,
427 { .fni8
= gen_srsra16_i64
,
428 .fniv
= gen_srsra_vec
,
429 .fno
= gen_helper_gvec_srsra_h
,
430 .opt_opc
= vecop_list
,
433 { .fni4
= gen_srsra32_i32
,
434 .fniv
= gen_srsra_vec
,
435 .fno
= gen_helper_gvec_srsra_s
,
436 .opt_opc
= vecop_list
,
439 { .fni8
= gen_srsra64_i64
,
440 .fniv
= gen_srsra_vec
,
441 .fno
= gen_helper_gvec_srsra_d
,
442 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
443 .opt_opc
= vecop_list
,
448 /* tszimm encoding produces immediates in the range [1..esize] */
449 tcg_debug_assert(shift
> 0);
450 tcg_debug_assert(shift
<= (8 << vece
));
453 * Shifts larger than the element size are architecturally valid.
454 * Signed results in all sign bits. With rounding, this produces
455 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
456 * I.e. always zero. With accumulation, this leaves D unchanged.
458 if (shift
== (8 << vece
)) {
459 /* Nop, but we do need to clear the tail. */
460 tcg_gen_gvec_mov(vece
, rd_ofs
, rd_ofs
, opr_sz
, max_sz
);
462 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
466 static void gen_urshr8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
468 TCGv_i64 t
= tcg_temp_new_i64();
470 tcg_gen_shri_i64(t
, a
, sh
- 1);
471 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
472 tcg_gen_vec_shr8i_i64(d
, a
, sh
);
473 tcg_gen_vec_add8_i64(d
, d
, t
);
476 static void gen_urshr16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
478 TCGv_i64 t
= tcg_temp_new_i64();
480 tcg_gen_shri_i64(t
, a
, sh
- 1);
481 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
482 tcg_gen_vec_shr16i_i64(d
, a
, sh
);
483 tcg_gen_vec_add16_i64(d
, d
, t
);
486 void gen_urshr32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
490 /* Handle shift by the input size for the benefit of trans_URSHR_ri */
492 tcg_gen_extract_i32(d
, a
, sh
- 1, 1);
495 t
= tcg_temp_new_i32();
496 tcg_gen_extract_i32(t
, a
, sh
- 1, 1);
497 tcg_gen_shri_i32(d
, a
, sh
);
498 tcg_gen_add_i32(d
, d
, t
);
501 void gen_urshr64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
503 TCGv_i64 t
= tcg_temp_new_i64();
505 tcg_gen_extract_i64(t
, a
, sh
- 1, 1);
506 tcg_gen_shri_i64(d
, a
, sh
);
507 tcg_gen_add_i64(d
, d
, t
);
510 static void gen_urshr_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t shift
)
512 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
513 TCGv_vec ones
= tcg_constant_vec_matching(d
, vece
, 1);
515 tcg_gen_shri_vec(vece
, t
, a
, shift
- 1);
516 tcg_gen_and_vec(vece
, t
, t
, ones
);
517 tcg_gen_shri_vec(vece
, d
, a
, shift
);
518 tcg_gen_add_vec(vece
, d
, d
, t
);
521 void gen_gvec_urshr(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
522 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
524 static const TCGOpcode vecop_list
[] = {
525 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
527 static const GVecGen2i ops
[4] = {
528 { .fni8
= gen_urshr8_i64
,
529 .fniv
= gen_urshr_vec
,
530 .fno
= gen_helper_gvec_urshr_b
,
531 .opt_opc
= vecop_list
,
533 { .fni8
= gen_urshr16_i64
,
534 .fniv
= gen_urshr_vec
,
535 .fno
= gen_helper_gvec_urshr_h
,
536 .opt_opc
= vecop_list
,
538 { .fni4
= gen_urshr32_i32
,
539 .fniv
= gen_urshr_vec
,
540 .fno
= gen_helper_gvec_urshr_s
,
541 .opt_opc
= vecop_list
,
543 { .fni8
= gen_urshr64_i64
,
544 .fniv
= gen_urshr_vec
,
545 .fno
= gen_helper_gvec_urshr_d
,
546 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
547 .opt_opc
= vecop_list
,
551 /* tszimm encoding produces immediates in the range [1..esize] */
552 tcg_debug_assert(shift
> 0);
553 tcg_debug_assert(shift
<= (8 << vece
));
555 if (shift
== (8 << vece
)) {
557 * Shifts larger than the element size are architecturally valid.
558 * Unsigned results in zero. With rounding, this produces a
559 * copy of the most significant bit.
561 tcg_gen_gvec_shri(vece
, rd_ofs
, rm_ofs
, shift
- 1, opr_sz
, max_sz
);
563 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
567 static void gen_ursra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
569 TCGv_i64 t
= tcg_temp_new_i64();
572 tcg_gen_vec_shr8i_i64(t
, a
, 7);
574 gen_urshr8_i64(t
, a
, sh
);
576 tcg_gen_vec_add8_i64(d
, d
, t
);
579 static void gen_ursra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
581 TCGv_i64 t
= tcg_temp_new_i64();
584 tcg_gen_vec_shr16i_i64(t
, a
, 15);
586 gen_urshr16_i64(t
, a
, sh
);
588 tcg_gen_vec_add16_i64(d
, d
, t
);
591 static void gen_ursra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
593 TCGv_i32 t
= tcg_temp_new_i32();
596 tcg_gen_shri_i32(t
, a
, 31);
598 gen_urshr32_i32(t
, a
, sh
);
600 tcg_gen_add_i32(d
, d
, t
);
603 static void gen_ursra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
605 TCGv_i64 t
= tcg_temp_new_i64();
608 tcg_gen_shri_i64(t
, a
, 63);
610 gen_urshr64_i64(t
, a
, sh
);
612 tcg_gen_add_i64(d
, d
, t
);
615 static void gen_ursra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
617 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
619 if (sh
== (8 << vece
)) {
620 tcg_gen_shri_vec(vece
, t
, a
, sh
- 1);
622 gen_urshr_vec(vece
, t
, a
, sh
);
624 tcg_gen_add_vec(vece
, d
, d
, t
);
627 void gen_gvec_ursra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
628 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
630 static const TCGOpcode vecop_list
[] = {
631 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
633 static const GVecGen2i ops
[4] = {
634 { .fni8
= gen_ursra8_i64
,
635 .fniv
= gen_ursra_vec
,
636 .fno
= gen_helper_gvec_ursra_b
,
637 .opt_opc
= vecop_list
,
640 { .fni8
= gen_ursra16_i64
,
641 .fniv
= gen_ursra_vec
,
642 .fno
= gen_helper_gvec_ursra_h
,
643 .opt_opc
= vecop_list
,
646 { .fni4
= gen_ursra32_i32
,
647 .fniv
= gen_ursra_vec
,
648 .fno
= gen_helper_gvec_ursra_s
,
649 .opt_opc
= vecop_list
,
652 { .fni8
= gen_ursra64_i64
,
653 .fniv
= gen_ursra_vec
,
654 .fno
= gen_helper_gvec_ursra_d
,
655 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
656 .opt_opc
= vecop_list
,
661 /* tszimm encoding produces immediates in the range [1..esize] */
662 tcg_debug_assert(shift
> 0);
663 tcg_debug_assert(shift
<= (8 << vece
));
665 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
668 static void gen_shr8_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
670 uint64_t mask
= dup_const(MO_8
, 0xff >> shift
);
671 TCGv_i64 t
= tcg_temp_new_i64();
673 tcg_gen_shri_i64(t
, a
, shift
);
674 tcg_gen_andi_i64(t
, t
, mask
);
675 tcg_gen_andi_i64(d
, d
, ~mask
);
676 tcg_gen_or_i64(d
, d
, t
);
679 static void gen_shr16_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
681 uint64_t mask
= dup_const(MO_16
, 0xffff >> shift
);
682 TCGv_i64 t
= tcg_temp_new_i64();
684 tcg_gen_shri_i64(t
, a
, shift
);
685 tcg_gen_andi_i64(t
, t
, mask
);
686 tcg_gen_andi_i64(d
, d
, ~mask
);
687 tcg_gen_or_i64(d
, d
, t
);
690 static void gen_shr32_ins_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
692 tcg_gen_shri_i32(a
, a
, shift
);
693 tcg_gen_deposit_i32(d
, d
, a
, 0, 32 - shift
);
696 static void gen_shr64_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
698 tcg_gen_shri_i64(a
, a
, shift
);
699 tcg_gen_deposit_i64(d
, d
, a
, 0, 64 - shift
);
702 static void gen_shr_ins_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
704 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
705 int64_t mi
= MAKE_64BIT_MASK((8 << vece
) - sh
, sh
);
706 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, mi
);
708 tcg_gen_shri_vec(vece
, t
, a
, sh
);
709 tcg_gen_and_vec(vece
, d
, d
, m
);
710 tcg_gen_or_vec(vece
, d
, d
, t
);
713 void gen_gvec_sri(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
714 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
716 static const TCGOpcode vecop_list
[] = { INDEX_op_shri_vec
, 0 };
717 const GVecGen2i ops
[4] = {
718 { .fni8
= gen_shr8_ins_i64
,
719 .fniv
= gen_shr_ins_vec
,
720 .fno
= gen_helper_gvec_sri_b
,
722 .opt_opc
= vecop_list
,
724 { .fni8
= gen_shr16_ins_i64
,
725 .fniv
= gen_shr_ins_vec
,
726 .fno
= gen_helper_gvec_sri_h
,
728 .opt_opc
= vecop_list
,
730 { .fni4
= gen_shr32_ins_i32
,
731 .fniv
= gen_shr_ins_vec
,
732 .fno
= gen_helper_gvec_sri_s
,
734 .opt_opc
= vecop_list
,
736 { .fni8
= gen_shr64_ins_i64
,
737 .fniv
= gen_shr_ins_vec
,
738 .fno
= gen_helper_gvec_sri_d
,
739 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
741 .opt_opc
= vecop_list
,
745 /* tszimm encoding produces immediates in the range [1..esize]. */
746 tcg_debug_assert(shift
> 0);
747 tcg_debug_assert(shift
<= (8 << vece
));
749 /* Shift of esize leaves destination unchanged. */
750 if (shift
< (8 << vece
)) {
751 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
753 /* Nop, but we do need to clear the tail. */
754 tcg_gen_gvec_mov(vece
, rd_ofs
, rd_ofs
, opr_sz
, max_sz
);
758 static void gen_shl8_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
760 uint64_t mask
= dup_const(MO_8
, 0xff << shift
);
761 TCGv_i64 t
= tcg_temp_new_i64();
763 tcg_gen_shli_i64(t
, a
, shift
);
764 tcg_gen_andi_i64(t
, t
, mask
);
765 tcg_gen_andi_i64(d
, d
, ~mask
);
766 tcg_gen_or_i64(d
, d
, t
);
769 static void gen_shl16_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
771 uint64_t mask
= dup_const(MO_16
, 0xffff << shift
);
772 TCGv_i64 t
= tcg_temp_new_i64();
774 tcg_gen_shli_i64(t
, a
, shift
);
775 tcg_gen_andi_i64(t
, t
, mask
);
776 tcg_gen_andi_i64(d
, d
, ~mask
);
777 tcg_gen_or_i64(d
, d
, t
);
780 static void gen_shl32_ins_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
782 tcg_gen_deposit_i32(d
, d
, a
, shift
, 32 - shift
);
785 static void gen_shl64_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
787 tcg_gen_deposit_i64(d
, d
, a
, shift
, 64 - shift
);
790 static void gen_shl_ins_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
792 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
793 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, MAKE_64BIT_MASK(0, sh
));
795 tcg_gen_shli_vec(vece
, t
, a
, sh
);
796 tcg_gen_and_vec(vece
, d
, d
, m
);
797 tcg_gen_or_vec(vece
, d
, d
, t
);
800 void gen_gvec_sli(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
801 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
803 static const TCGOpcode vecop_list
[] = { INDEX_op_shli_vec
, 0 };
804 const GVecGen2i ops
[4] = {
805 { .fni8
= gen_shl8_ins_i64
,
806 .fniv
= gen_shl_ins_vec
,
807 .fno
= gen_helper_gvec_sli_b
,
809 .opt_opc
= vecop_list
,
811 { .fni8
= gen_shl16_ins_i64
,
812 .fniv
= gen_shl_ins_vec
,
813 .fno
= gen_helper_gvec_sli_h
,
815 .opt_opc
= vecop_list
,
817 { .fni4
= gen_shl32_ins_i32
,
818 .fniv
= gen_shl_ins_vec
,
819 .fno
= gen_helper_gvec_sli_s
,
821 .opt_opc
= vecop_list
,
823 { .fni8
= gen_shl64_ins_i64
,
824 .fniv
= gen_shl_ins_vec
,
825 .fno
= gen_helper_gvec_sli_d
,
826 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
828 .opt_opc
= vecop_list
,
832 /* tszimm encoding produces immediates in the range [0..esize-1]. */
833 tcg_debug_assert(shift
>= 0);
834 tcg_debug_assert(shift
< (8 << vece
));
837 tcg_gen_gvec_mov(vece
, rd_ofs
, rm_ofs
, opr_sz
, max_sz
);
839 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
843 static void gen_mla8_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
845 gen_helper_neon_mul_u8(a
, a
, b
);
846 gen_helper_neon_add_u8(d
, d
, a
);
849 static void gen_mls8_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
851 gen_helper_neon_mul_u8(a
, a
, b
);
852 gen_helper_neon_sub_u8(d
, d
, a
);
855 static void gen_mla16_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
857 gen_helper_neon_mul_u16(a
, a
, b
);
858 gen_helper_neon_add_u16(d
, d
, a
);
861 static void gen_mls16_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
863 gen_helper_neon_mul_u16(a
, a
, b
);
864 gen_helper_neon_sub_u16(d
, d
, a
);
867 static void gen_mla32_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
869 tcg_gen_mul_i32(a
, a
, b
);
870 tcg_gen_add_i32(d
, d
, a
);
873 static void gen_mls32_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
875 tcg_gen_mul_i32(a
, a
, b
);
876 tcg_gen_sub_i32(d
, d
, a
);
879 static void gen_mla64_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
881 tcg_gen_mul_i64(a
, a
, b
);
882 tcg_gen_add_i64(d
, d
, a
);
885 static void gen_mls64_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
887 tcg_gen_mul_i64(a
, a
, b
);
888 tcg_gen_sub_i64(d
, d
, a
);
891 static void gen_mla_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
893 tcg_gen_mul_vec(vece
, a
, a
, b
);
894 tcg_gen_add_vec(vece
, d
, d
, a
);
897 static void gen_mls_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
899 tcg_gen_mul_vec(vece
, a
, a
, b
);
900 tcg_gen_sub_vec(vece
, d
, d
, a
);
903 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
904 * these tables are shared with AArch64 which does support them.
906 void gen_gvec_mla(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
907 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
909 static const TCGOpcode vecop_list
[] = {
910 INDEX_op_mul_vec
, INDEX_op_add_vec
, 0
912 static const GVecGen3 ops
[4] = {
913 { .fni4
= gen_mla8_i32
,
916 .opt_opc
= vecop_list
,
918 { .fni4
= gen_mla16_i32
,
921 .opt_opc
= vecop_list
,
923 { .fni4
= gen_mla32_i32
,
926 .opt_opc
= vecop_list
,
928 { .fni8
= gen_mla64_i64
,
930 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
932 .opt_opc
= vecop_list
,
935 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
938 void gen_gvec_mls(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
939 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
941 static const TCGOpcode vecop_list
[] = {
942 INDEX_op_mul_vec
, INDEX_op_sub_vec
, 0
944 static const GVecGen3 ops
[4] = {
945 { .fni4
= gen_mls8_i32
,
948 .opt_opc
= vecop_list
,
950 { .fni4
= gen_mls16_i32
,
953 .opt_opc
= vecop_list
,
955 { .fni4
= gen_mls32_i32
,
958 .opt_opc
= vecop_list
,
960 { .fni8
= gen_mls64_i64
,
962 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
964 .opt_opc
= vecop_list
,
967 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
970 /* CMTST : test is "if (X & Y != 0)". */
971 static void gen_cmtst_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
973 tcg_gen_negsetcond_i32(TCG_COND_TSTNE
, d
, a
, b
);
976 void gen_cmtst_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
978 tcg_gen_negsetcond_i64(TCG_COND_TSTNE
, d
, a
, b
);
981 static void gen_cmtst_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
983 tcg_gen_cmp_vec(TCG_COND_TSTNE
, vece
, d
, a
, b
);
986 void gen_gvec_cmtst(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
987 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
989 static const TCGOpcode vecop_list
[] = { INDEX_op_cmp_vec
, 0 };
990 static const GVecGen3 ops
[4] = {
991 { .fni4
= gen_helper_neon_tst_u8
,
992 .fniv
= gen_cmtst_vec
,
993 .opt_opc
= vecop_list
,
995 { .fni4
= gen_helper_neon_tst_u16
,
996 .fniv
= gen_cmtst_vec
,
997 .opt_opc
= vecop_list
,
999 { .fni4
= gen_cmtst_i32
,
1000 .fniv
= gen_cmtst_vec
,
1001 .opt_opc
= vecop_list
,
1003 { .fni8
= gen_cmtst_i64
,
1004 .fniv
= gen_cmtst_vec
,
1005 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1006 .opt_opc
= vecop_list
,
1009 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1012 void gen_ushl_i32(TCGv_i32 dst
, TCGv_i32 src
, TCGv_i32 shift
)
1014 TCGv_i32 lval
= tcg_temp_new_i32();
1015 TCGv_i32 rval
= tcg_temp_new_i32();
1016 TCGv_i32 lsh
= tcg_temp_new_i32();
1017 TCGv_i32 rsh
= tcg_temp_new_i32();
1018 TCGv_i32 zero
= tcg_constant_i32(0);
1019 TCGv_i32 max
= tcg_constant_i32(32);
1022 * Rely on the TCG guarantee that out of range shifts produce
1023 * unspecified results, not undefined behaviour (i.e. no trap).
1024 * Discard out-of-range results after the fact.
1026 tcg_gen_ext8s_i32(lsh
, shift
);
1027 tcg_gen_neg_i32(rsh
, lsh
);
1028 tcg_gen_shl_i32(lval
, src
, lsh
);
1029 tcg_gen_shr_i32(rval
, src
, rsh
);
1030 tcg_gen_movcond_i32(TCG_COND_LTU
, dst
, lsh
, max
, lval
, zero
);
1031 tcg_gen_movcond_i32(TCG_COND_LTU
, dst
, rsh
, max
, rval
, dst
);
1034 void gen_ushl_i64(TCGv_i64 dst
, TCGv_i64 src
, TCGv_i64 shift
)
1036 TCGv_i64 lval
= tcg_temp_new_i64();
1037 TCGv_i64 rval
= tcg_temp_new_i64();
1038 TCGv_i64 lsh
= tcg_temp_new_i64();
1039 TCGv_i64 rsh
= tcg_temp_new_i64();
1040 TCGv_i64 zero
= tcg_constant_i64(0);
1041 TCGv_i64 max
= tcg_constant_i64(64);
1044 * Rely on the TCG guarantee that out of range shifts produce
1045 * unspecified results, not undefined behaviour (i.e. no trap).
1046 * Discard out-of-range results after the fact.
1048 tcg_gen_ext8s_i64(lsh
, shift
);
1049 tcg_gen_neg_i64(rsh
, lsh
);
1050 tcg_gen_shl_i64(lval
, src
, lsh
);
1051 tcg_gen_shr_i64(rval
, src
, rsh
);
1052 tcg_gen_movcond_i64(TCG_COND_LTU
, dst
, lsh
, max
, lval
, zero
);
1053 tcg_gen_movcond_i64(TCG_COND_LTU
, dst
, rsh
, max
, rval
, dst
);
1056 static void gen_ushl_vec(unsigned vece
, TCGv_vec dst
,
1057 TCGv_vec src
, TCGv_vec shift
)
1059 TCGv_vec lval
= tcg_temp_new_vec_matching(dst
);
1060 TCGv_vec rval
= tcg_temp_new_vec_matching(dst
);
1061 TCGv_vec lsh
= tcg_temp_new_vec_matching(dst
);
1062 TCGv_vec rsh
= tcg_temp_new_vec_matching(dst
);
1065 tcg_gen_neg_vec(vece
, rsh
, shift
);
1067 tcg_gen_mov_vec(lsh
, shift
);
1069 TCGv_vec msk
= tcg_constant_vec_matching(dst
, vece
, 0xff);
1070 tcg_gen_and_vec(vece
, lsh
, shift
, msk
);
1071 tcg_gen_and_vec(vece
, rsh
, rsh
, msk
);
1075 * Rely on the TCG guarantee that out of range shifts produce
1076 * unspecified results, not undefined behaviour (i.e. no trap).
1077 * Discard out-of-range results after the fact.
1079 tcg_gen_shlv_vec(vece
, lval
, src
, lsh
);
1080 tcg_gen_shrv_vec(vece
, rval
, src
, rsh
);
1083 * The choice of GE (signed) and GEU (unsigned) are biased toward
1084 * the instructions of the x86_64 host. For MO_8, the whole byte
1085 * is significant so we must use an unsigned compare; otherwise we
1086 * have already masked to a byte and so a signed compare works.
1087 * Other tcg hosts have a full set of comparisons and do not care.
1089 zero
= tcg_constant_vec_matching(dst
, vece
, 0);
1090 max
= tcg_constant_vec_matching(dst
, vece
, 8 << vece
);
1092 tcg_gen_cmpsel_vec(TCG_COND_GEU
, vece
, lval
, lsh
, max
, zero
, lval
);
1093 tcg_gen_cmpsel_vec(TCG_COND_GEU
, vece
, rval
, rsh
, max
, zero
, rval
);
1095 tcg_gen_cmpsel_vec(TCG_COND_GE
, vece
, lval
, lsh
, max
, zero
, lval
);
1096 tcg_gen_cmpsel_vec(TCG_COND_GE
, vece
, rval
, rsh
, max
, zero
, rval
);
1098 tcg_gen_or_vec(vece
, dst
, lval
, rval
);
1101 void gen_gvec_ushl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1102 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1104 static const TCGOpcode vecop_list
[] = {
1105 INDEX_op_neg_vec
, INDEX_op_shlv_vec
,
1106 INDEX_op_shrv_vec
, INDEX_op_cmpsel_vec
, 0
1108 static const GVecGen3 ops
[4] = {
1109 { .fniv
= gen_ushl_vec
,
1110 .fno
= gen_helper_gvec_ushl_b
,
1111 .opt_opc
= vecop_list
,
1113 { .fniv
= gen_ushl_vec
,
1114 .fno
= gen_helper_gvec_ushl_h
,
1115 .opt_opc
= vecop_list
,
1117 { .fni4
= gen_ushl_i32
,
1118 .fniv
= gen_ushl_vec
,
1119 .opt_opc
= vecop_list
,
1121 { .fni8
= gen_ushl_i64
,
1122 .fniv
= gen_ushl_vec
,
1123 .opt_opc
= vecop_list
,
1126 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1129 void gen_sshl_i32(TCGv_i32 dst
, TCGv_i32 src
, TCGv_i32 shift
)
1131 TCGv_i32 lval
= tcg_temp_new_i32();
1132 TCGv_i32 rval
= tcg_temp_new_i32();
1133 TCGv_i32 lsh
= tcg_temp_new_i32();
1134 TCGv_i32 rsh
= tcg_temp_new_i32();
1135 TCGv_i32 zero
= tcg_constant_i32(0);
1136 TCGv_i32 max
= tcg_constant_i32(31);
1139 * Rely on the TCG guarantee that out of range shifts produce
1140 * unspecified results, not undefined behaviour (i.e. no trap).
1141 * Discard out-of-range results after the fact.
1143 tcg_gen_ext8s_i32(lsh
, shift
);
1144 tcg_gen_neg_i32(rsh
, lsh
);
1145 tcg_gen_shl_i32(lval
, src
, lsh
);
1146 tcg_gen_umin_i32(rsh
, rsh
, max
);
1147 tcg_gen_sar_i32(rval
, src
, rsh
);
1148 tcg_gen_movcond_i32(TCG_COND_LEU
, lval
, lsh
, max
, lval
, zero
);
1149 tcg_gen_movcond_i32(TCG_COND_LT
, dst
, lsh
, zero
, rval
, lval
);
1152 void gen_sshl_i64(TCGv_i64 dst
, TCGv_i64 src
, TCGv_i64 shift
)
1154 TCGv_i64 lval
= tcg_temp_new_i64();
1155 TCGv_i64 rval
= tcg_temp_new_i64();
1156 TCGv_i64 lsh
= tcg_temp_new_i64();
1157 TCGv_i64 rsh
= tcg_temp_new_i64();
1158 TCGv_i64 zero
= tcg_constant_i64(0);
1159 TCGv_i64 max
= tcg_constant_i64(63);
1162 * Rely on the TCG guarantee that out of range shifts produce
1163 * unspecified results, not undefined behaviour (i.e. no trap).
1164 * Discard out-of-range results after the fact.
1166 tcg_gen_ext8s_i64(lsh
, shift
);
1167 tcg_gen_neg_i64(rsh
, lsh
);
1168 tcg_gen_shl_i64(lval
, src
, lsh
);
1169 tcg_gen_umin_i64(rsh
, rsh
, max
);
1170 tcg_gen_sar_i64(rval
, src
, rsh
);
1171 tcg_gen_movcond_i64(TCG_COND_LEU
, lval
, lsh
, max
, lval
, zero
);
1172 tcg_gen_movcond_i64(TCG_COND_LT
, dst
, lsh
, zero
, rval
, lval
);
1175 static void gen_sshl_vec(unsigned vece
, TCGv_vec dst
,
1176 TCGv_vec src
, TCGv_vec shift
)
1178 TCGv_vec lval
= tcg_temp_new_vec_matching(dst
);
1179 TCGv_vec rval
= tcg_temp_new_vec_matching(dst
);
1180 TCGv_vec lsh
= tcg_temp_new_vec_matching(dst
);
1181 TCGv_vec rsh
= tcg_temp_new_vec_matching(dst
);
1185 * Rely on the TCG guarantee that out of range shifts produce
1186 * unspecified results, not undefined behaviour (i.e. no trap).
1187 * Discard out-of-range results after the fact.
1189 tcg_gen_neg_vec(vece
, rsh
, shift
);
1191 tcg_gen_mov_vec(lsh
, shift
);
1193 TCGv_vec msk
= tcg_constant_vec_matching(dst
, vece
, 0xff);
1194 tcg_gen_and_vec(vece
, lsh
, shift
, msk
);
1195 tcg_gen_and_vec(vece
, rsh
, rsh
, msk
);
1198 /* Bound rsh so out of bound right shift gets -1. */
1199 max
= tcg_constant_vec_matching(dst
, vece
, (8 << vece
) - 1);
1200 tcg_gen_umin_vec(vece
, rsh
, rsh
, max
);
1202 tcg_gen_shlv_vec(vece
, lval
, src
, lsh
);
1203 tcg_gen_sarv_vec(vece
, rval
, src
, rsh
);
1205 /* Select in-bound left shift. */
1206 zero
= tcg_constant_vec_matching(dst
, vece
, 0);
1207 tcg_gen_cmpsel_vec(TCG_COND_GT
, vece
, lval
, lsh
, max
, zero
, lval
);
1209 /* Select between left and right shift. */
1211 tcg_gen_cmpsel_vec(TCG_COND_LT
, vece
, dst
, lsh
, zero
, rval
, lval
);
1213 TCGv_vec sgn
= tcg_constant_vec_matching(dst
, vece
, 0x80);
1214 tcg_gen_cmpsel_vec(TCG_COND_LT
, vece
, dst
, lsh
, sgn
, lval
, rval
);
1218 void gen_gvec_sshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1219 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1221 static const TCGOpcode vecop_list
[] = {
1222 INDEX_op_neg_vec
, INDEX_op_umin_vec
, INDEX_op_shlv_vec
,
1223 INDEX_op_sarv_vec
, INDEX_op_cmpsel_vec
, 0
1225 static const GVecGen3 ops
[4] = {
1226 { .fniv
= gen_sshl_vec
,
1227 .fno
= gen_helper_gvec_sshl_b
,
1228 .opt_opc
= vecop_list
,
1230 { .fniv
= gen_sshl_vec
,
1231 .fno
= gen_helper_gvec_sshl_h
,
1232 .opt_opc
= vecop_list
,
1234 { .fni4
= gen_sshl_i32
,
1235 .fniv
= gen_sshl_vec
,
1236 .opt_opc
= vecop_list
,
1238 { .fni8
= gen_sshl_i64
,
1239 .fniv
= gen_sshl_vec
,
1240 .opt_opc
= vecop_list
,
1243 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1246 void gen_gvec_srshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1247 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1249 static gen_helper_gvec_3
* const fns
[] = {
1250 gen_helper_gvec_srshl_b
, gen_helper_gvec_srshl_h
,
1251 gen_helper_gvec_srshl_s
, gen_helper_gvec_srshl_d
,
1253 tcg_debug_assert(vece
<= MO_64
);
1254 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1257 void gen_gvec_urshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1258 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1260 static gen_helper_gvec_3
* const fns
[] = {
1261 gen_helper_gvec_urshl_b
, gen_helper_gvec_urshl_h
,
1262 gen_helper_gvec_urshl_s
, gen_helper_gvec_urshl_d
,
1264 tcg_debug_assert(vece
<= MO_64
);
1265 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1268 void gen_neon_sqshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1269 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1271 static gen_helper_gvec_3_ptr
* const fns
[] = {
1272 gen_helper_neon_sqshl_b
, gen_helper_neon_sqshl_h
,
1273 gen_helper_neon_sqshl_s
, gen_helper_neon_sqshl_d
,
1275 tcg_debug_assert(vece
<= MO_64
);
1276 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, tcg_env
,
1277 opr_sz
, max_sz
, 0, fns
[vece
]);
1280 void gen_neon_uqshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1281 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1283 static gen_helper_gvec_3_ptr
* const fns
[] = {
1284 gen_helper_neon_uqshl_b
, gen_helper_neon_uqshl_h
,
1285 gen_helper_neon_uqshl_s
, gen_helper_neon_uqshl_d
,
1287 tcg_debug_assert(vece
<= MO_64
);
1288 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, tcg_env
,
1289 opr_sz
, max_sz
, 0, fns
[vece
]);
1292 void gen_neon_sqrshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1293 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1295 static gen_helper_gvec_3_ptr
* const fns
[] = {
1296 gen_helper_neon_sqrshl_b
, gen_helper_neon_sqrshl_h
,
1297 gen_helper_neon_sqrshl_s
, gen_helper_neon_sqrshl_d
,
1299 tcg_debug_assert(vece
<= MO_64
);
1300 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, tcg_env
,
1301 opr_sz
, max_sz
, 0, fns
[vece
]);
1304 void gen_neon_uqrshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1305 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1307 static gen_helper_gvec_3_ptr
* const fns
[] = {
1308 gen_helper_neon_uqrshl_b
, gen_helper_neon_uqrshl_h
,
1309 gen_helper_neon_uqrshl_s
, gen_helper_neon_uqrshl_d
,
1311 tcg_debug_assert(vece
<= MO_64
);
1312 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, tcg_env
,
1313 opr_sz
, max_sz
, 0, fns
[vece
]);
1316 void gen_neon_sqshli(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1317 int64_t c
, uint32_t opr_sz
, uint32_t max_sz
)
1319 static gen_helper_gvec_2_ptr
* const fns
[] = {
1320 gen_helper_neon_sqshli_b
, gen_helper_neon_sqshli_h
,
1321 gen_helper_neon_sqshli_s
, gen_helper_neon_sqshli_d
,
1323 tcg_debug_assert(vece
<= MO_64
);
1324 tcg_debug_assert(c
>= 0 && c
<= (8 << vece
));
1325 tcg_gen_gvec_2_ptr(rd_ofs
, rn_ofs
, tcg_env
, opr_sz
, max_sz
, c
, fns
[vece
]);
1328 void gen_neon_uqshli(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1329 int64_t c
, uint32_t opr_sz
, uint32_t max_sz
)
1331 static gen_helper_gvec_2_ptr
* const fns
[] = {
1332 gen_helper_neon_uqshli_b
, gen_helper_neon_uqshli_h
,
1333 gen_helper_neon_uqshli_s
, gen_helper_neon_uqshli_d
,
1335 tcg_debug_assert(vece
<= MO_64
);
1336 tcg_debug_assert(c
>= 0 && c
<= (8 << vece
));
1337 tcg_gen_gvec_2_ptr(rd_ofs
, rn_ofs
, tcg_env
, opr_sz
, max_sz
, c
, fns
[vece
]);
1340 void gen_neon_sqshlui(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1341 int64_t c
, uint32_t opr_sz
, uint32_t max_sz
)
1343 static gen_helper_gvec_2_ptr
* const fns
[] = {
1344 gen_helper_neon_sqshlui_b
, gen_helper_neon_sqshlui_h
,
1345 gen_helper_neon_sqshlui_s
, gen_helper_neon_sqshlui_d
,
1347 tcg_debug_assert(vece
<= MO_64
);
1348 tcg_debug_assert(c
>= 0 && c
<= (8 << vece
));
1349 tcg_gen_gvec_2_ptr(rd_ofs
, rn_ofs
, tcg_env
, opr_sz
, max_sz
, c
, fns
[vece
]);
1352 void gen_uqadd_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1354 uint64_t max
= MAKE_64BIT_MASK(0, 8 << esz
);
1355 TCGv_i64 tmp
= tcg_temp_new_i64();
1357 tcg_gen_add_i64(tmp
, a
, b
);
1358 tcg_gen_umin_i64(res
, tmp
, tcg_constant_i64(max
));
1359 tcg_gen_xor_i64(tmp
, tmp
, res
);
1360 tcg_gen_or_i64(qc
, qc
, tmp
);
1363 void gen_uqadd_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1365 TCGv_i64 t
= tcg_temp_new_i64();
1367 tcg_gen_add_i64(t
, a
, b
);
1368 tcg_gen_movcond_i64(TCG_COND_LTU
, res
, t
, a
,
1369 tcg_constant_i64(UINT64_MAX
), t
);
1370 tcg_gen_xor_i64(t
, t
, res
);
1371 tcg_gen_or_i64(qc
, qc
, t
);
1374 static void gen_uqadd_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1375 TCGv_vec a
, TCGv_vec b
)
1377 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1378 tcg_gen_add_vec(vece
, x
, a
, b
);
1379 tcg_gen_usadd_vec(vece
, t
, a
, b
);
1380 tcg_gen_xor_vec(vece
, x
, x
, t
);
1381 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1384 void gen_gvec_uqadd_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1385 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1387 static const TCGOpcode vecop_list
[] = {
1388 INDEX_op_usadd_vec
, INDEX_op_add_vec
, 0
1390 static const GVecGen4 ops
[4] = {
1391 { .fniv
= gen_uqadd_vec
,
1392 .fno
= gen_helper_gvec_uqadd_b
,
1394 .opt_opc
= vecop_list
,
1396 { .fniv
= gen_uqadd_vec
,
1397 .fno
= gen_helper_gvec_uqadd_h
,
1399 .opt_opc
= vecop_list
,
1401 { .fniv
= gen_uqadd_vec
,
1402 .fno
= gen_helper_gvec_uqadd_s
,
1404 .opt_opc
= vecop_list
,
1406 { .fniv
= gen_uqadd_vec
,
1407 .fni8
= gen_uqadd_d
,
1408 .fno
= gen_helper_gvec_uqadd_d
,
1410 .opt_opc
= vecop_list
,
1414 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1415 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1416 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1419 void gen_sqadd_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1421 int64_t max
= MAKE_64BIT_MASK(0, (8 << esz
) - 1);
1422 int64_t min
= -1ll - max
;
1423 TCGv_i64 tmp
= tcg_temp_new_i64();
1425 tcg_gen_add_i64(tmp
, a
, b
);
1426 tcg_gen_smin_i64(res
, tmp
, tcg_constant_i64(max
));
1427 tcg_gen_smax_i64(res
, res
, tcg_constant_i64(min
));
1428 tcg_gen_xor_i64(tmp
, tmp
, res
);
1429 tcg_gen_or_i64(qc
, qc
, tmp
);
1432 void gen_sqadd_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1434 TCGv_i64 t0
= tcg_temp_new_i64();
1435 TCGv_i64 t1
= tcg_temp_new_i64();
1436 TCGv_i64 t2
= tcg_temp_new_i64();
1438 tcg_gen_add_i64(t0
, a
, b
);
1440 /* Compute signed overflow indication into T1 */
1441 tcg_gen_xor_i64(t1
, a
, b
);
1442 tcg_gen_xor_i64(t2
, t0
, a
);
1443 tcg_gen_andc_i64(t1
, t2
, t1
);
1445 /* Compute saturated value into T2 */
1446 tcg_gen_sari_i64(t2
, a
, 63);
1447 tcg_gen_xori_i64(t2
, t2
, INT64_MAX
);
1449 tcg_gen_movcond_i64(TCG_COND_LT
, res
, t1
, tcg_constant_i64(0), t2
, t0
);
1450 tcg_gen_xor_i64(t0
, t0
, res
);
1451 tcg_gen_or_i64(qc
, qc
, t0
);
1454 static void gen_sqadd_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1455 TCGv_vec a
, TCGv_vec b
)
1457 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1458 tcg_gen_add_vec(vece
, x
, a
, b
);
1459 tcg_gen_ssadd_vec(vece
, t
, a
, b
);
1460 tcg_gen_xor_vec(vece
, x
, x
, t
);
1461 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1464 void gen_gvec_sqadd_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1465 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1467 static const TCGOpcode vecop_list
[] = {
1468 INDEX_op_ssadd_vec
, INDEX_op_add_vec
, 0
1470 static const GVecGen4 ops
[4] = {
1471 { .fniv
= gen_sqadd_vec
,
1472 .fno
= gen_helper_gvec_sqadd_b
,
1473 .opt_opc
= vecop_list
,
1476 { .fniv
= gen_sqadd_vec
,
1477 .fno
= gen_helper_gvec_sqadd_h
,
1478 .opt_opc
= vecop_list
,
1481 { .fniv
= gen_sqadd_vec
,
1482 .fno
= gen_helper_gvec_sqadd_s
,
1483 .opt_opc
= vecop_list
,
1486 { .fniv
= gen_sqadd_vec
,
1487 .fni8
= gen_sqadd_d
,
1488 .fno
= gen_helper_gvec_sqadd_d
,
1489 .opt_opc
= vecop_list
,
1494 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1495 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1496 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1499 void gen_uqsub_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1501 TCGv_i64 tmp
= tcg_temp_new_i64();
1503 tcg_gen_sub_i64(tmp
, a
, b
);
1504 tcg_gen_smax_i64(res
, tmp
, tcg_constant_i64(0));
1505 tcg_gen_xor_i64(tmp
, tmp
, res
);
1506 tcg_gen_or_i64(qc
, qc
, tmp
);
1509 void gen_uqsub_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1511 TCGv_i64 t
= tcg_temp_new_i64();
1513 tcg_gen_sub_i64(t
, a
, b
);
1514 tcg_gen_movcond_i64(TCG_COND_LTU
, res
, a
, b
, tcg_constant_i64(0), t
);
1515 tcg_gen_xor_i64(t
, t
, res
);
1516 tcg_gen_or_i64(qc
, qc
, t
);
1519 static void gen_uqsub_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1520 TCGv_vec a
, TCGv_vec b
)
1522 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1523 tcg_gen_sub_vec(vece
, x
, a
, b
);
1524 tcg_gen_ussub_vec(vece
, t
, a
, b
);
1525 tcg_gen_xor_vec(vece
, x
, x
, t
);
1526 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1529 void gen_gvec_uqsub_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1530 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1532 static const TCGOpcode vecop_list
[] = {
1533 INDEX_op_ussub_vec
, INDEX_op_sub_vec
, 0
1535 static const GVecGen4 ops
[4] = {
1536 { .fniv
= gen_uqsub_vec
,
1537 .fno
= gen_helper_gvec_uqsub_b
,
1538 .opt_opc
= vecop_list
,
1541 { .fniv
= gen_uqsub_vec
,
1542 .fno
= gen_helper_gvec_uqsub_h
,
1543 .opt_opc
= vecop_list
,
1546 { .fniv
= gen_uqsub_vec
,
1547 .fno
= gen_helper_gvec_uqsub_s
,
1548 .opt_opc
= vecop_list
,
1551 { .fniv
= gen_uqsub_vec
,
1552 .fni8
= gen_uqsub_d
,
1553 .fno
= gen_helper_gvec_uqsub_d
,
1554 .opt_opc
= vecop_list
,
1559 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1560 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1561 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1564 void gen_sqsub_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1566 int64_t max
= MAKE_64BIT_MASK(0, (8 << esz
) - 1);
1567 int64_t min
= -1ll - max
;
1568 TCGv_i64 tmp
= tcg_temp_new_i64();
1570 tcg_gen_sub_i64(tmp
, a
, b
);
1571 tcg_gen_smin_i64(res
, tmp
, tcg_constant_i64(max
));
1572 tcg_gen_smax_i64(res
, res
, tcg_constant_i64(min
));
1573 tcg_gen_xor_i64(tmp
, tmp
, res
);
1574 tcg_gen_or_i64(qc
, qc
, tmp
);
1577 void gen_sqsub_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1579 TCGv_i64 t0
= tcg_temp_new_i64();
1580 TCGv_i64 t1
= tcg_temp_new_i64();
1581 TCGv_i64 t2
= tcg_temp_new_i64();
1583 tcg_gen_sub_i64(t0
, a
, b
);
1585 /* Compute signed overflow indication into T1 */
1586 tcg_gen_xor_i64(t1
, a
, b
);
1587 tcg_gen_xor_i64(t2
, t0
, a
);
1588 tcg_gen_and_i64(t1
, t1
, t2
);
1590 /* Compute saturated value into T2 */
1591 tcg_gen_sari_i64(t2
, a
, 63);
1592 tcg_gen_xori_i64(t2
, t2
, INT64_MAX
);
1594 tcg_gen_movcond_i64(TCG_COND_LT
, res
, t1
, tcg_constant_i64(0), t2
, t0
);
1595 tcg_gen_xor_i64(t0
, t0
, res
);
1596 tcg_gen_or_i64(qc
, qc
, t0
);
1599 static void gen_sqsub_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1600 TCGv_vec a
, TCGv_vec b
)
1602 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1603 tcg_gen_sub_vec(vece
, x
, a
, b
);
1604 tcg_gen_sssub_vec(vece
, t
, a
, b
);
1605 tcg_gen_xor_vec(vece
, x
, x
, t
);
1606 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1609 void gen_gvec_sqsub_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1610 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1612 static const TCGOpcode vecop_list
[] = {
1613 INDEX_op_sssub_vec
, INDEX_op_sub_vec
, 0
1615 static const GVecGen4 ops
[4] = {
1616 { .fniv
= gen_sqsub_vec
,
1617 .fno
= gen_helper_gvec_sqsub_b
,
1618 .opt_opc
= vecop_list
,
1621 { .fniv
= gen_sqsub_vec
,
1622 .fno
= gen_helper_gvec_sqsub_h
,
1623 .opt_opc
= vecop_list
,
1626 { .fniv
= gen_sqsub_vec
,
1627 .fno
= gen_helper_gvec_sqsub_s
,
1628 .opt_opc
= vecop_list
,
1631 { .fniv
= gen_sqsub_vec
,
1632 .fni8
= gen_sqsub_d
,
1633 .fno
= gen_helper_gvec_sqsub_d
,
1634 .opt_opc
= vecop_list
,
1639 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1640 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1641 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1644 static void gen_sabd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1646 TCGv_i32 t
= tcg_temp_new_i32();
1648 tcg_gen_sub_i32(t
, a
, b
);
1649 tcg_gen_sub_i32(d
, b
, a
);
1650 tcg_gen_movcond_i32(TCG_COND_LT
, d
, a
, b
, d
, t
);
1653 static void gen_sabd_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1655 TCGv_i64 t
= tcg_temp_new_i64();
1657 tcg_gen_sub_i64(t
, a
, b
);
1658 tcg_gen_sub_i64(d
, b
, a
);
1659 tcg_gen_movcond_i64(TCG_COND_LT
, d
, a
, b
, d
, t
);
1662 static void gen_sabd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1664 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1666 tcg_gen_smin_vec(vece
, t
, a
, b
);
1667 tcg_gen_smax_vec(vece
, d
, a
, b
);
1668 tcg_gen_sub_vec(vece
, d
, d
, t
);
1671 void gen_gvec_sabd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1672 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1674 static const TCGOpcode vecop_list
[] = {
1675 INDEX_op_sub_vec
, INDEX_op_smin_vec
, INDEX_op_smax_vec
, 0
1677 static const GVecGen3 ops
[4] = {
1678 { .fniv
= gen_sabd_vec
,
1679 .fno
= gen_helper_gvec_sabd_b
,
1680 .opt_opc
= vecop_list
,
1682 { .fniv
= gen_sabd_vec
,
1683 .fno
= gen_helper_gvec_sabd_h
,
1684 .opt_opc
= vecop_list
,
1686 { .fni4
= gen_sabd_i32
,
1687 .fniv
= gen_sabd_vec
,
1688 .fno
= gen_helper_gvec_sabd_s
,
1689 .opt_opc
= vecop_list
,
1691 { .fni8
= gen_sabd_i64
,
1692 .fniv
= gen_sabd_vec
,
1693 .fno
= gen_helper_gvec_sabd_d
,
1694 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1695 .opt_opc
= vecop_list
,
1698 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1701 static void gen_uabd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1703 TCGv_i32 t
= tcg_temp_new_i32();
1705 tcg_gen_sub_i32(t
, a
, b
);
1706 tcg_gen_sub_i32(d
, b
, a
);
1707 tcg_gen_movcond_i32(TCG_COND_LTU
, d
, a
, b
, d
, t
);
1710 static void gen_uabd_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1712 TCGv_i64 t
= tcg_temp_new_i64();
1714 tcg_gen_sub_i64(t
, a
, b
);
1715 tcg_gen_sub_i64(d
, b
, a
);
1716 tcg_gen_movcond_i64(TCG_COND_LTU
, d
, a
, b
, d
, t
);
1719 static void gen_uabd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1721 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1723 tcg_gen_umin_vec(vece
, t
, a
, b
);
1724 tcg_gen_umax_vec(vece
, d
, a
, b
);
1725 tcg_gen_sub_vec(vece
, d
, d
, t
);
1728 void gen_gvec_uabd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1729 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1731 static const TCGOpcode vecop_list
[] = {
1732 INDEX_op_sub_vec
, INDEX_op_umin_vec
, INDEX_op_umax_vec
, 0
1734 static const GVecGen3 ops
[4] = {
1735 { .fniv
= gen_uabd_vec
,
1736 .fno
= gen_helper_gvec_uabd_b
,
1737 .opt_opc
= vecop_list
,
1739 { .fniv
= gen_uabd_vec
,
1740 .fno
= gen_helper_gvec_uabd_h
,
1741 .opt_opc
= vecop_list
,
1743 { .fni4
= gen_uabd_i32
,
1744 .fniv
= gen_uabd_vec
,
1745 .fno
= gen_helper_gvec_uabd_s
,
1746 .opt_opc
= vecop_list
,
1748 { .fni8
= gen_uabd_i64
,
1749 .fniv
= gen_uabd_vec
,
1750 .fno
= gen_helper_gvec_uabd_d
,
1751 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1752 .opt_opc
= vecop_list
,
1755 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1758 static void gen_saba_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1760 TCGv_i32 t
= tcg_temp_new_i32();
1761 gen_sabd_i32(t
, a
, b
);
1762 tcg_gen_add_i32(d
, d
, t
);
1765 static void gen_saba_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1767 TCGv_i64 t
= tcg_temp_new_i64();
1768 gen_sabd_i64(t
, a
, b
);
1769 tcg_gen_add_i64(d
, d
, t
);
1772 static void gen_saba_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1774 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1775 gen_sabd_vec(vece
, t
, a
, b
);
1776 tcg_gen_add_vec(vece
, d
, d
, t
);
1779 void gen_gvec_saba(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1780 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1782 static const TCGOpcode vecop_list
[] = {
1783 INDEX_op_sub_vec
, INDEX_op_add_vec
,
1784 INDEX_op_smin_vec
, INDEX_op_smax_vec
, 0
1786 static const GVecGen3 ops
[4] = {
1787 { .fniv
= gen_saba_vec
,
1788 .fno
= gen_helper_gvec_saba_b
,
1789 .opt_opc
= vecop_list
,
1792 { .fniv
= gen_saba_vec
,
1793 .fno
= gen_helper_gvec_saba_h
,
1794 .opt_opc
= vecop_list
,
1797 { .fni4
= gen_saba_i32
,
1798 .fniv
= gen_saba_vec
,
1799 .fno
= gen_helper_gvec_saba_s
,
1800 .opt_opc
= vecop_list
,
1803 { .fni8
= gen_saba_i64
,
1804 .fniv
= gen_saba_vec
,
1805 .fno
= gen_helper_gvec_saba_d
,
1806 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1807 .opt_opc
= vecop_list
,
1811 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1814 static void gen_uaba_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1816 TCGv_i32 t
= tcg_temp_new_i32();
1817 gen_uabd_i32(t
, a
, b
);
1818 tcg_gen_add_i32(d
, d
, t
);
1821 static void gen_uaba_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1823 TCGv_i64 t
= tcg_temp_new_i64();
1824 gen_uabd_i64(t
, a
, b
);
1825 tcg_gen_add_i64(d
, d
, t
);
1828 static void gen_uaba_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1830 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1831 gen_uabd_vec(vece
, t
, a
, b
);
1832 tcg_gen_add_vec(vece
, d
, d
, t
);
1835 void gen_gvec_uaba(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1836 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1838 static const TCGOpcode vecop_list
[] = {
1839 INDEX_op_sub_vec
, INDEX_op_add_vec
,
1840 INDEX_op_umin_vec
, INDEX_op_umax_vec
, 0
1842 static const GVecGen3 ops
[4] = {
1843 { .fniv
= gen_uaba_vec
,
1844 .fno
= gen_helper_gvec_uaba_b
,
1845 .opt_opc
= vecop_list
,
1848 { .fniv
= gen_uaba_vec
,
1849 .fno
= gen_helper_gvec_uaba_h
,
1850 .opt_opc
= vecop_list
,
1853 { .fni4
= gen_uaba_i32
,
1854 .fniv
= gen_uaba_vec
,
1855 .fno
= gen_helper_gvec_uaba_s
,
1856 .opt_opc
= vecop_list
,
1859 { .fni8
= gen_uaba_i64
,
1860 .fniv
= gen_uaba_vec
,
1861 .fno
= gen_helper_gvec_uaba_d
,
1862 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1863 .opt_opc
= vecop_list
,
1867 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1870 void gen_gvec_addp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1871 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1873 static gen_helper_gvec_3
* const fns
[4] = {
1874 gen_helper_gvec_addp_b
,
1875 gen_helper_gvec_addp_h
,
1876 gen_helper_gvec_addp_s
,
1877 gen_helper_gvec_addp_d
,
1879 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1882 void gen_gvec_smaxp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1883 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1885 static gen_helper_gvec_3
* const fns
[4] = {
1886 gen_helper_gvec_smaxp_b
,
1887 gen_helper_gvec_smaxp_h
,
1888 gen_helper_gvec_smaxp_s
,
1890 tcg_debug_assert(vece
<= MO_32
);
1891 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1894 void gen_gvec_sminp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1895 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1897 static gen_helper_gvec_3
* const fns
[4] = {
1898 gen_helper_gvec_sminp_b
,
1899 gen_helper_gvec_sminp_h
,
1900 gen_helper_gvec_sminp_s
,
1902 tcg_debug_assert(vece
<= MO_32
);
1903 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1906 void gen_gvec_umaxp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1907 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1909 static gen_helper_gvec_3
* const fns
[4] = {
1910 gen_helper_gvec_umaxp_b
,
1911 gen_helper_gvec_umaxp_h
,
1912 gen_helper_gvec_umaxp_s
,
1914 tcg_debug_assert(vece
<= MO_32
);
1915 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1918 void gen_gvec_uminp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1919 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1921 static gen_helper_gvec_3
* const fns
[4] = {
1922 gen_helper_gvec_uminp_b
,
1923 gen_helper_gvec_uminp_h
,
1924 gen_helper_gvec_uminp_s
,
1926 tcg_debug_assert(vece
<= MO_32
);
1927 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1930 static void gen_shadd8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1932 TCGv_i64 t
= tcg_temp_new_i64();
1934 tcg_gen_and_i64(t
, a
, b
);
1935 tcg_gen_vec_sar8i_i64(a
, a
, 1);
1936 tcg_gen_vec_sar8i_i64(b
, b
, 1);
1937 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
1938 tcg_gen_vec_add8_i64(d
, a
, b
);
1939 tcg_gen_vec_add8_i64(d
, d
, t
);
1942 static void gen_shadd16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1944 TCGv_i64 t
= tcg_temp_new_i64();
1946 tcg_gen_and_i64(t
, a
, b
);
1947 tcg_gen_vec_sar16i_i64(a
, a
, 1);
1948 tcg_gen_vec_sar16i_i64(b
, b
, 1);
1949 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
1950 tcg_gen_vec_add16_i64(d
, a
, b
);
1951 tcg_gen_vec_add16_i64(d
, d
, t
);
1954 static void gen_shadd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1956 TCGv_i32 t
= tcg_temp_new_i32();
1958 tcg_gen_and_i32(t
, a
, b
);
1959 tcg_gen_sari_i32(a
, a
, 1);
1960 tcg_gen_sari_i32(b
, b
, 1);
1961 tcg_gen_andi_i32(t
, t
, 1);
1962 tcg_gen_add_i32(d
, a
, b
);
1963 tcg_gen_add_i32(d
, d
, t
);
1966 static void gen_shadd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1968 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1970 tcg_gen_and_vec(vece
, t
, a
, b
);
1971 tcg_gen_sari_vec(vece
, a
, a
, 1);
1972 tcg_gen_sari_vec(vece
, b
, b
, 1);
1973 tcg_gen_and_vec(vece
, t
, t
, tcg_constant_vec_matching(d
, vece
, 1));
1974 tcg_gen_add_vec(vece
, d
, a
, b
);
1975 tcg_gen_add_vec(vece
, d
, d
, t
);
1978 void gen_gvec_shadd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1979 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1981 static const TCGOpcode vecop_list
[] = {
1982 INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
1984 static const GVecGen3 g
[] = {
1985 { .fni8
= gen_shadd8_i64
,
1986 .fniv
= gen_shadd_vec
,
1987 .opt_opc
= vecop_list
,
1989 { .fni8
= gen_shadd16_i64
,
1990 .fniv
= gen_shadd_vec
,
1991 .opt_opc
= vecop_list
,
1993 { .fni4
= gen_shadd_i32
,
1994 .fniv
= gen_shadd_vec
,
1995 .opt_opc
= vecop_list
,
1998 tcg_debug_assert(vece
<= MO_32
);
1999 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &g
[vece
]);
2002 static void gen_uhadd8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2004 TCGv_i64 t
= tcg_temp_new_i64();
2006 tcg_gen_and_i64(t
, a
, b
);
2007 tcg_gen_vec_shr8i_i64(a
, a
, 1);
2008 tcg_gen_vec_shr8i_i64(b
, b
, 1);
2009 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
2010 tcg_gen_vec_add8_i64(d
, a
, b
);
2011 tcg_gen_vec_add8_i64(d
, d
, t
);
2014 static void gen_uhadd16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2016 TCGv_i64 t
= tcg_temp_new_i64();
2018 tcg_gen_and_i64(t
, a
, b
);
2019 tcg_gen_vec_shr16i_i64(a
, a
, 1);
2020 tcg_gen_vec_shr16i_i64(b
, b
, 1);
2021 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
2022 tcg_gen_vec_add16_i64(d
, a
, b
);
2023 tcg_gen_vec_add16_i64(d
, d
, t
);
2026 static void gen_uhadd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2028 TCGv_i32 t
= tcg_temp_new_i32();
2030 tcg_gen_and_i32(t
, a
, b
);
2031 tcg_gen_shri_i32(a
, a
, 1);
2032 tcg_gen_shri_i32(b
, b
, 1);
2033 tcg_gen_andi_i32(t
, t
, 1);
2034 tcg_gen_add_i32(d
, a
, b
);
2035 tcg_gen_add_i32(d
, d
, t
);
2038 static void gen_uhadd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
2040 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
2042 tcg_gen_and_vec(vece
, t
, a
, b
);
2043 tcg_gen_shri_vec(vece
, a
, a
, 1);
2044 tcg_gen_shri_vec(vece
, b
, b
, 1);
2045 tcg_gen_and_vec(vece
, t
, t
, tcg_constant_vec_matching(d
, vece
, 1));
2046 tcg_gen_add_vec(vece
, d
, a
, b
);
2047 tcg_gen_add_vec(vece
, d
, d
, t
);
2050 void gen_gvec_uhadd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
2051 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
2053 static const TCGOpcode vecop_list
[] = {
2054 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
2056 static const GVecGen3 g
[] = {
2057 { .fni8
= gen_uhadd8_i64
,
2058 .fniv
= gen_uhadd_vec
,
2059 .opt_opc
= vecop_list
,
2061 { .fni8
= gen_uhadd16_i64
,
2062 .fniv
= gen_uhadd_vec
,
2063 .opt_opc
= vecop_list
,
2065 { .fni4
= gen_uhadd_i32
,
2066 .fniv
= gen_uhadd_vec
,
2067 .opt_opc
= vecop_list
,
2070 tcg_debug_assert(vece
<= MO_32
);
2071 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &g
[vece
]);
2074 static void gen_shsub8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2076 TCGv_i64 t
= tcg_temp_new_i64();
2078 tcg_gen_andc_i64(t
, b
, a
);
2079 tcg_gen_vec_sar8i_i64(a
, a
, 1);
2080 tcg_gen_vec_sar8i_i64(b
, b
, 1);
2081 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
2082 tcg_gen_vec_sub8_i64(d
, a
, b
);
2083 tcg_gen_vec_sub8_i64(d
, d
, t
);
2086 static void gen_shsub16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2088 TCGv_i64 t
= tcg_temp_new_i64();
2090 tcg_gen_andc_i64(t
, b
, a
);
2091 tcg_gen_vec_sar16i_i64(a
, a
, 1);
2092 tcg_gen_vec_sar16i_i64(b
, b
, 1);
2093 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
2094 tcg_gen_vec_sub16_i64(d
, a
, b
);
2095 tcg_gen_vec_sub16_i64(d
, d
, t
);
2098 static void gen_shsub_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2100 TCGv_i32 t
= tcg_temp_new_i32();
2102 tcg_gen_andc_i32(t
, b
, a
);
2103 tcg_gen_sari_i32(a
, a
, 1);
2104 tcg_gen_sari_i32(b
, b
, 1);
2105 tcg_gen_andi_i32(t
, t
, 1);
2106 tcg_gen_sub_i32(d
, a
, b
);
2107 tcg_gen_sub_i32(d
, d
, t
);
2110 static void gen_shsub_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
2112 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
2114 tcg_gen_andc_vec(vece
, t
, b
, a
);
2115 tcg_gen_sari_vec(vece
, a
, a
, 1);
2116 tcg_gen_sari_vec(vece
, b
, b
, 1);
2117 tcg_gen_and_vec(vece
, t
, t
, tcg_constant_vec_matching(d
, vece
, 1));
2118 tcg_gen_sub_vec(vece
, d
, a
, b
);
2119 tcg_gen_sub_vec(vece
, d
, d
, t
);
2122 void gen_gvec_shsub(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
2123 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
2125 static const TCGOpcode vecop_list
[] = {
2126 INDEX_op_sari_vec
, INDEX_op_sub_vec
, 0
2128 static const GVecGen3 g
[4] = {
2129 { .fni8
= gen_shsub8_i64
,
2130 .fniv
= gen_shsub_vec
,
2131 .opt_opc
= vecop_list
,
2133 { .fni8
= gen_shsub16_i64
,
2134 .fniv
= gen_shsub_vec
,
2135 .opt_opc
= vecop_list
,
2137 { .fni4
= gen_shsub_i32
,
2138 .fniv
= gen_shsub_vec
,
2139 .opt_opc
= vecop_list
,
2142 assert(vece
<= MO_32
);
2143 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &g
[vece
]);
2146 static void gen_uhsub8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2148 TCGv_i64 t
= tcg_temp_new_i64();
2150 tcg_gen_andc_i64(t
, b
, a
);
2151 tcg_gen_vec_shr8i_i64(a
, a
, 1);
2152 tcg_gen_vec_shr8i_i64(b
, b
, 1);
2153 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
2154 tcg_gen_vec_sub8_i64(d
, a
, b
);
2155 tcg_gen_vec_sub8_i64(d
, d
, t
);
2158 static void gen_uhsub16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2160 TCGv_i64 t
= tcg_temp_new_i64();
2162 tcg_gen_andc_i64(t
, b
, a
);
2163 tcg_gen_vec_shr16i_i64(a
, a
, 1);
2164 tcg_gen_vec_shr16i_i64(b
, b
, 1);
2165 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
2166 tcg_gen_vec_sub16_i64(d
, a
, b
);
2167 tcg_gen_vec_sub16_i64(d
, d
, t
);
2170 static void gen_uhsub_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2172 TCGv_i32 t
= tcg_temp_new_i32();
2174 tcg_gen_andc_i32(t
, b
, a
);
2175 tcg_gen_shri_i32(a
, a
, 1);
2176 tcg_gen_shri_i32(b
, b
, 1);
2177 tcg_gen_andi_i32(t
, t
, 1);
2178 tcg_gen_sub_i32(d
, a
, b
);
2179 tcg_gen_sub_i32(d
, d
, t
);
2182 static void gen_uhsub_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
2184 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
2186 tcg_gen_andc_vec(vece
, t
, b
, a
);
2187 tcg_gen_shri_vec(vece
, a
, a
, 1);
2188 tcg_gen_shri_vec(vece
, b
, b
, 1);
2189 tcg_gen_and_vec(vece
, t
, t
, tcg_constant_vec_matching(d
, vece
, 1));
2190 tcg_gen_sub_vec(vece
, d
, a
, b
);
2191 tcg_gen_sub_vec(vece
, d
, d
, t
);
2194 void gen_gvec_uhsub(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
2195 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
2197 static const TCGOpcode vecop_list
[] = {
2198 INDEX_op_shri_vec
, INDEX_op_sub_vec
, 0
2200 static const GVecGen3 g
[4] = {
2201 { .fni8
= gen_uhsub8_i64
,
2202 .fniv
= gen_uhsub_vec
,
2203 .opt_opc
= vecop_list
,
2205 { .fni8
= gen_uhsub16_i64
,
2206 .fniv
= gen_uhsub_vec
,
2207 .opt_opc
= vecop_list
,
2209 { .fni4
= gen_uhsub_i32
,
2210 .fniv
= gen_uhsub_vec
,
2211 .opt_opc
= vecop_list
,
2214 assert(vece
<= MO_32
);
2215 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &g
[vece
]);
2218 static void gen_srhadd8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2220 TCGv_i64 t
= tcg_temp_new_i64();
2222 tcg_gen_or_i64(t
, a
, b
);
2223 tcg_gen_vec_sar8i_i64(a
, a
, 1);
2224 tcg_gen_vec_sar8i_i64(b
, b
, 1);
2225 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
2226 tcg_gen_vec_add8_i64(d
, a
, b
);
2227 tcg_gen_vec_add8_i64(d
, d
, t
);
2230 static void gen_srhadd16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2232 TCGv_i64 t
= tcg_temp_new_i64();
2234 tcg_gen_or_i64(t
, a
, b
);
2235 tcg_gen_vec_sar16i_i64(a
, a
, 1);
2236 tcg_gen_vec_sar16i_i64(b
, b
, 1);
2237 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
2238 tcg_gen_vec_add16_i64(d
, a
, b
);
2239 tcg_gen_vec_add16_i64(d
, d
, t
);
2242 static void gen_srhadd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2244 TCGv_i32 t
= tcg_temp_new_i32();
2246 tcg_gen_or_i32(t
, a
, b
);
2247 tcg_gen_sari_i32(a
, a
, 1);
2248 tcg_gen_sari_i32(b
, b
, 1);
2249 tcg_gen_andi_i32(t
, t
, 1);
2250 tcg_gen_add_i32(d
, a
, b
);
2251 tcg_gen_add_i32(d
, d
, t
);
2254 static void gen_srhadd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
2256 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
2258 tcg_gen_or_vec(vece
, t
, a
, b
);
2259 tcg_gen_sari_vec(vece
, a
, a
, 1);
2260 tcg_gen_sari_vec(vece
, b
, b
, 1);
2261 tcg_gen_and_vec(vece
, t
, t
, tcg_constant_vec_matching(d
, vece
, 1));
2262 tcg_gen_add_vec(vece
, d
, a
, b
);
2263 tcg_gen_add_vec(vece
, d
, d
, t
);
2266 void gen_gvec_srhadd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
2267 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
2269 static const TCGOpcode vecop_list
[] = {
2270 INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
2272 static const GVecGen3 g
[] = {
2273 { .fni8
= gen_srhadd8_i64
,
2274 .fniv
= gen_srhadd_vec
,
2275 .opt_opc
= vecop_list
,
2277 { .fni8
= gen_srhadd16_i64
,
2278 .fniv
= gen_srhadd_vec
,
2279 .opt_opc
= vecop_list
,
2281 { .fni4
= gen_srhadd_i32
,
2282 .fniv
= gen_srhadd_vec
,
2283 .opt_opc
= vecop_list
,
2286 assert(vece
<= MO_32
);
2287 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &g
[vece
]);
2290 static void gen_urhadd8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2292 TCGv_i64 t
= tcg_temp_new_i64();
2294 tcg_gen_or_i64(t
, a
, b
);
2295 tcg_gen_vec_shr8i_i64(a
, a
, 1);
2296 tcg_gen_vec_shr8i_i64(b
, b
, 1);
2297 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
2298 tcg_gen_vec_add8_i64(d
, a
, b
);
2299 tcg_gen_vec_add8_i64(d
, d
, t
);
2302 static void gen_urhadd16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2304 TCGv_i64 t
= tcg_temp_new_i64();
2306 tcg_gen_or_i64(t
, a
, b
);
2307 tcg_gen_vec_shr16i_i64(a
, a
, 1);
2308 tcg_gen_vec_shr16i_i64(b
, b
, 1);
2309 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
2310 tcg_gen_vec_add16_i64(d
, a
, b
);
2311 tcg_gen_vec_add16_i64(d
, d
, t
);
2314 static void gen_urhadd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2316 TCGv_i32 t
= tcg_temp_new_i32();
2318 tcg_gen_or_i32(t
, a
, b
);
2319 tcg_gen_shri_i32(a
, a
, 1);
2320 tcg_gen_shri_i32(b
, b
, 1);
2321 tcg_gen_andi_i32(t
, t
, 1);
2322 tcg_gen_add_i32(d
, a
, b
);
2323 tcg_gen_add_i32(d
, d
, t
);
2326 static void gen_urhadd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
2328 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
2330 tcg_gen_or_vec(vece
, t
, a
, b
);
2331 tcg_gen_shri_vec(vece
, a
, a
, 1);
2332 tcg_gen_shri_vec(vece
, b
, b
, 1);
2333 tcg_gen_and_vec(vece
, t
, t
, tcg_constant_vec_matching(d
, vece
, 1));
2334 tcg_gen_add_vec(vece
, d
, a
, b
);
2335 tcg_gen_add_vec(vece
, d
, d
, t
);
2338 void gen_gvec_urhadd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
2339 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
2341 static const TCGOpcode vecop_list
[] = {
2342 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
2344 static const GVecGen3 g
[] = {
2345 { .fni8
= gen_urhadd8_i64
,
2346 .fniv
= gen_urhadd_vec
,
2347 .opt_opc
= vecop_list
,
2349 { .fni8
= gen_urhadd16_i64
,
2350 .fniv
= gen_urhadd_vec
,
2351 .opt_opc
= vecop_list
,
2353 { .fni4
= gen_urhadd_i32
,
2354 .fniv
= gen_urhadd_vec
,
2355 .opt_opc
= vecop_list
,
2358 assert(vece
<= MO_32
);
2359 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &g
[vece
]);