qapi: add missing colon-ending for section name
[qemu/armbru.git] / target-ppc / int_helper.c
blob2d57c9a1c22d613bf877f38283834d8e779c3a2f
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "internal.h"
22 #include "exec/exec-all.h"
23 #include "qemu/host-utils.h"
24 #include "exec/helper-proto.h"
25 #include "crypto/aes.h"
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
34 uint64_t rt = 0;
35 int overflow = 0;
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
59 return (target_ulong)rt;
62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
65 int64_t rt = 0;
66 int overflow = 0;
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
91 return (target_ulong)rt;
94 #if defined(TARGET_PPC64)
96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
98 uint64_t rt = 0;
99 int overflow = 0;
101 overflow = divu128(&rt, &ra, rb);
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
115 return rt;
118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
129 if (oe) {
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
138 return rt;
141 #endif
144 target_ulong helper_cntlzw(target_ulong t)
146 return clz32(t);
149 target_ulong helper_cnttzw(target_ulong t)
151 return ctz32(t);
154 #if defined(TARGET_PPC64)
155 /* if x = 0xab, returns 0xababababababababa */
156 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
158 /* substract 1 from each byte, and with inverse, check if MSB is set at each
159 * byte.
160 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
161 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
163 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
165 /* When you XOR the pattern and there is a match, that byte will be zero */
166 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
168 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
170 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
173 #undef pattern
174 #undef haszero
175 #undef hasvalue
177 target_ulong helper_cntlzd(target_ulong t)
179 return clz64(t);
182 target_ulong helper_cnttzd(target_ulong t)
184 return ctz64(t);
187 /* Return invalid random number.
189 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
190 * random number
192 target_ulong helper_darn32(void)
194 return -1;
197 target_ulong helper_darn64(void)
199 return -1;
202 #endif
204 #if defined(TARGET_PPC64)
206 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
208 int i;
209 uint64_t ra = 0;
211 for (i = 0; i < 8; i++) {
212 int index = (rs >> (i*8)) & 0xFF;
213 if (index < 64) {
214 if (rb & (1ull << (63-index))) {
215 ra |= 1 << i;
219 return ra;
222 #endif
224 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
226 target_ulong mask = 0xff;
227 target_ulong ra = 0;
228 int i;
230 for (i = 0; i < sizeof(target_ulong); i++) {
231 if ((rs & mask) == (rb & mask)) {
232 ra |= mask;
234 mask <<= 8;
236 return ra;
239 /* shift right arithmetic helper */
240 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
241 target_ulong shift)
243 int32_t ret;
245 if (likely(!(shift & 0x20))) {
246 if (likely((uint32_t)shift != 0)) {
247 shift &= 0x1f;
248 ret = (int32_t)value >> shift;
249 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
250 env->ca = 0;
251 } else {
252 env->ca = 1;
254 } else {
255 ret = (int32_t)value;
256 env->ca = 0;
258 } else {
259 ret = (int32_t)value >> 31;
260 env->ca = (ret != 0);
262 return (target_long)ret;
265 #if defined(TARGET_PPC64)
266 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
267 target_ulong shift)
269 int64_t ret;
271 if (likely(!(shift & 0x40))) {
272 if (likely((uint64_t)shift != 0)) {
273 shift &= 0x3f;
274 ret = (int64_t)value >> shift;
275 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
276 env->ca = 0;
277 } else {
278 env->ca = 1;
280 } else {
281 ret = (int64_t)value;
282 env->ca = 0;
284 } else {
285 ret = (int64_t)value >> 63;
286 env->ca = (ret != 0);
288 return ret;
290 #endif
292 #if defined(TARGET_PPC64)
293 target_ulong helper_popcntb(target_ulong val)
295 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
296 0x5555555555555555ULL);
297 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
298 0x3333333333333333ULL);
299 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
300 0x0f0f0f0f0f0f0f0fULL);
301 return val;
304 target_ulong helper_popcntw(target_ulong val)
306 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
307 0x5555555555555555ULL);
308 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
309 0x3333333333333333ULL);
310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
311 0x0f0f0f0f0f0f0f0fULL);
312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
313 0x00ff00ff00ff00ffULL);
314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
315 0x0000ffff0000ffffULL);
316 return val;
319 target_ulong helper_popcntd(target_ulong val)
321 return ctpop64(val);
323 #else
324 target_ulong helper_popcntb(target_ulong val)
326 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
327 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
328 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
329 return val;
332 target_ulong helper_popcntw(target_ulong val)
334 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
335 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
336 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
337 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
338 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
339 return val;
341 #endif
343 /*****************************************************************************/
344 /* PowerPC 601 specific instructions (POWER bridge) */
345 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
347 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
349 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
350 (int32_t)arg2 == 0) {
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->spr[SPR_MQ] = tmp % arg2;
355 return tmp / (int32_t)arg2;
359 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
360 target_ulong arg2)
362 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
364 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
365 (int32_t)arg2 == 0) {
366 env->so = env->ov = 1;
367 env->spr[SPR_MQ] = 0;
368 return INT32_MIN;
369 } else {
370 env->spr[SPR_MQ] = tmp % arg2;
371 tmp /= (int32_t)arg2;
372 if ((int32_t)tmp != tmp) {
373 env->so = env->ov = 1;
374 } else {
375 env->ov = 0;
377 return tmp;
381 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
382 target_ulong arg2)
384 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
385 (int32_t)arg2 == 0) {
386 env->spr[SPR_MQ] = 0;
387 return INT32_MIN;
388 } else {
389 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
390 return (int32_t)arg1 / (int32_t)arg2;
394 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
395 target_ulong arg2)
397 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
398 (int32_t)arg2 == 0) {
399 env->so = env->ov = 1;
400 env->spr[SPR_MQ] = 0;
401 return INT32_MIN;
402 } else {
403 env->ov = 0;
404 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
405 return (int32_t)arg1 / (int32_t)arg2;
409 /*****************************************************************************/
410 /* 602 specific instructions */
411 /* mfrom is the most crazy instruction ever seen, imho ! */
412 /* Real implementation uses a ROM table. Do the same */
413 /* Extremely decomposed:
414 * -arg / 256
415 * return 256 * log10(10 + 1.0) + 0.5
417 #if !defined(CONFIG_USER_ONLY)
418 target_ulong helper_602_mfrom(target_ulong arg)
420 if (likely(arg < 602)) {
421 #include "mfrom_table.c"
422 return mfrom_ROM_table[arg];
423 } else {
424 return 0;
427 #endif
429 /*****************************************************************************/
430 /* Altivec extension helpers */
431 #if defined(HOST_WORDS_BIGENDIAN)
432 #define HI_IDX 0
433 #define LO_IDX 1
434 #define AVRB(i) u8[i]
435 #define AVRW(i) u32[i]
436 #else
437 #define HI_IDX 1
438 #define LO_IDX 0
439 #define AVRB(i) u8[15-(i)]
440 #define AVRW(i) u32[3-(i)]
441 #endif
443 #if defined(HOST_WORDS_BIGENDIAN)
444 #define VECTOR_FOR_INORDER_I(index, element) \
445 for (index = 0; index < ARRAY_SIZE(r->element); index++)
446 #else
447 #define VECTOR_FOR_INORDER_I(index, element) \
448 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
449 #endif
451 /* Saturating arithmetic helpers. */
452 #define SATCVT(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
455 to_type r; \
457 if (x < (from_type)min) { \
458 r = min; \
459 *sat = 1; \
460 } else if (x > (from_type)max) { \
461 r = max; \
462 *sat = 1; \
463 } else { \
464 r = x; \
466 return r; \
468 #define SATCVTU(from, to, from_type, to_type, min, max) \
469 static inline to_type cvt##from##to(from_type x, int *sat) \
471 to_type r; \
473 if (x > (from_type)max) { \
474 r = max; \
475 *sat = 1; \
476 } else { \
477 r = x; \
479 return r; \
481 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
482 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
483 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
485 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
486 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
487 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
488 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
489 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
490 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
491 #undef SATCVT
492 #undef SATCVTU
494 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
496 int i, j = (sh & 0xf);
498 VECTOR_FOR_INORDER_I(i, u8) {
499 r->u8[i] = j++;
503 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
505 int i, j = 0x10 - (sh & 0xf);
507 VECTOR_FOR_INORDER_I(i, u8) {
508 r->u8[i] = j++;
512 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
514 #if defined(HOST_WORDS_BIGENDIAN)
515 env->vscr = r->u32[3];
516 #else
517 env->vscr = r->u32[0];
518 #endif
519 set_flush_to_zero(vscr_nj, &env->vec_status);
522 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
524 int i;
526 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
527 r->u32[i] = ~a->u32[i] < b->u32[i];
531 /* vprtybw */
532 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
534 int i;
535 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
536 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
537 res ^= res >> 8;
538 r->u32[i] = res & 1;
542 /* vprtybd */
543 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
545 int i;
546 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
547 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
548 res ^= res >> 16;
549 res ^= res >> 8;
550 r->u64[i] = res & 1;
554 /* vprtybq */
555 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
557 uint64_t res = b->u64[0] ^ b->u64[1];
558 res ^= res >> 32;
559 res ^= res >> 16;
560 res ^= res >> 8;
561 r->u64[LO_IDX] = res & 1;
562 r->u64[HI_IDX] = 0;
565 #define VARITH_DO(name, op, element) \
566 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
568 int i; \
570 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
571 r->element[i] = a->element[i] op b->element[i]; \
574 #define VARITH(suffix, element) \
575 VARITH_DO(add##suffix, +, element) \
576 VARITH_DO(sub##suffix, -, element)
577 VARITH(ubm, u8)
578 VARITH(uhm, u16)
579 VARITH(uwm, u32)
580 VARITH(udm, u64)
581 VARITH_DO(muluwm, *, u32)
582 #undef VARITH_DO
583 #undef VARITH
585 #define VARITHFP(suffix, func) \
586 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
587 ppc_avr_t *b) \
589 int i; \
591 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
592 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
595 VARITHFP(addfp, float32_add)
596 VARITHFP(subfp, float32_sub)
597 VARITHFP(minfp, float32_min)
598 VARITHFP(maxfp, float32_max)
599 #undef VARITHFP
601 #define VARITHFPFMA(suffix, type) \
602 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
603 ppc_avr_t *b, ppc_avr_t *c) \
605 int i; \
606 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
607 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
608 type, &env->vec_status); \
611 VARITHFPFMA(maddfp, 0);
612 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
613 #undef VARITHFPFMA
615 #define VARITHSAT_CASE(type, op, cvt, element) \
617 type result = (type)a->element[i] op (type)b->element[i]; \
618 r->element[i] = cvt(result, &sat); \
621 #define VARITHSAT_DO(name, op, optype, cvt, element) \
622 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
623 ppc_avr_t *b) \
625 int sat = 0; \
626 int i; \
628 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
629 switch (sizeof(r->element[0])) { \
630 case 1: \
631 VARITHSAT_CASE(optype, op, cvt, element); \
632 break; \
633 case 2: \
634 VARITHSAT_CASE(optype, op, cvt, element); \
635 break; \
636 case 4: \
637 VARITHSAT_CASE(optype, op, cvt, element); \
638 break; \
641 if (sat) { \
642 env->vscr |= (1 << VSCR_SAT); \
645 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
646 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
647 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
648 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
649 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
650 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
651 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
652 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
653 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
654 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
655 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
656 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
657 #undef VARITHSAT_CASE
658 #undef VARITHSAT_DO
659 #undef VARITHSAT_SIGNED
660 #undef VARITHSAT_UNSIGNED
662 #define VAVG_DO(name, element, etype) \
663 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
665 int i; \
667 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
668 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
669 r->element[i] = x >> 1; \
673 #define VAVG(type, signed_element, signed_type, unsigned_element, \
674 unsigned_type) \
675 VAVG_DO(avgs##type, signed_element, signed_type) \
676 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
677 VAVG(b, s8, int16_t, u8, uint16_t)
678 VAVG(h, s16, int32_t, u16, uint32_t)
679 VAVG(w, s32, int64_t, u32, uint64_t)
680 #undef VAVG_DO
681 #undef VAVG
683 #define VABSDU_DO(name, element) \
684 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
686 int i; \
688 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
689 r->element[i] = (a->element[i] > b->element[i]) ? \
690 (a->element[i] - b->element[i]) : \
691 (b->element[i] - a->element[i]); \
695 /* VABSDU - Vector absolute difference unsigned
696 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
697 * element - element type to access from vector
699 #define VABSDU(type, element) \
700 VABSDU_DO(absdu##type, element)
701 VABSDU(b, u8)
702 VABSDU(h, u16)
703 VABSDU(w, u32)
704 #undef VABSDU_DO
705 #undef VABSDU
707 #define VCF(suffix, cvt, element) \
708 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
709 ppc_avr_t *b, uint32_t uim) \
711 int i; \
713 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
714 float32 t = cvt(b->element[i], &env->vec_status); \
715 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
718 VCF(ux, uint32_to_float32, u32)
719 VCF(sx, int32_to_float32, s32)
720 #undef VCF
722 #define VCMP_DO(suffix, compare, element, record) \
723 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
724 ppc_avr_t *a, ppc_avr_t *b) \
726 uint64_t ones = (uint64_t)-1; \
727 uint64_t all = ones; \
728 uint64_t none = 0; \
729 int i; \
731 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
732 uint64_t result = (a->element[i] compare b->element[i] ? \
733 ones : 0x0); \
734 switch (sizeof(a->element[0])) { \
735 case 8: \
736 r->u64[i] = result; \
737 break; \
738 case 4: \
739 r->u32[i] = result; \
740 break; \
741 case 2: \
742 r->u16[i] = result; \
743 break; \
744 case 1: \
745 r->u8[i] = result; \
746 break; \
748 all &= result; \
749 none |= result; \
751 if (record) { \
752 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
755 #define VCMP(suffix, compare, element) \
756 VCMP_DO(suffix, compare, element, 0) \
757 VCMP_DO(suffix##_dot, compare, element, 1)
758 VCMP(equb, ==, u8)
759 VCMP(equh, ==, u16)
760 VCMP(equw, ==, u32)
761 VCMP(equd, ==, u64)
762 VCMP(gtub, >, u8)
763 VCMP(gtuh, >, u16)
764 VCMP(gtuw, >, u32)
765 VCMP(gtud, >, u64)
766 VCMP(gtsb, >, s8)
767 VCMP(gtsh, >, s16)
768 VCMP(gtsw, >, s32)
769 VCMP(gtsd, >, s64)
770 #undef VCMP_DO
771 #undef VCMP
773 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
774 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
775 ppc_avr_t *a, ppc_avr_t *b) \
777 etype ones = (etype)-1; \
778 etype all = ones; \
779 etype result, none = 0; \
780 int i; \
782 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
783 if (cmpzero) { \
784 result = ((a->element[i] == 0) \
785 || (b->element[i] == 0) \
786 || (a->element[i] != b->element[i]) ? \
787 ones : 0x0); \
788 } else { \
789 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
791 r->element[i] = result; \
792 all &= result; \
793 none |= result; \
795 if (record) { \
796 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
800 /* VCMPNEZ - Vector compare not equal to zero
801 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
802 * element - element type to access from vector
804 #define VCMPNE(suffix, element, etype, cmpzero) \
805 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
806 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
807 VCMPNE(zb, u8, uint8_t, 1)
808 VCMPNE(zh, u16, uint16_t, 1)
809 VCMPNE(zw, u32, uint32_t, 1)
810 VCMPNE(b, u8, uint8_t, 0)
811 VCMPNE(h, u16, uint16_t, 0)
812 VCMPNE(w, u32, uint32_t, 0)
813 #undef VCMPNE_DO
814 #undef VCMPNE
816 #define VCMPFP_DO(suffix, compare, order, record) \
817 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
818 ppc_avr_t *a, ppc_avr_t *b) \
820 uint32_t ones = (uint32_t)-1; \
821 uint32_t all = ones; \
822 uint32_t none = 0; \
823 int i; \
825 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
826 uint32_t result; \
827 int rel = float32_compare_quiet(a->f[i], b->f[i], \
828 &env->vec_status); \
829 if (rel == float_relation_unordered) { \
830 result = 0; \
831 } else if (rel compare order) { \
832 result = ones; \
833 } else { \
834 result = 0; \
836 r->u32[i] = result; \
837 all &= result; \
838 none |= result; \
840 if (record) { \
841 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
844 #define VCMPFP(suffix, compare, order) \
845 VCMPFP_DO(suffix, compare, order, 0) \
846 VCMPFP_DO(suffix##_dot, compare, order, 1)
847 VCMPFP(eqfp, ==, float_relation_equal)
848 VCMPFP(gefp, !=, float_relation_less)
849 VCMPFP(gtfp, ==, float_relation_greater)
850 #undef VCMPFP_DO
851 #undef VCMPFP
853 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
854 ppc_avr_t *a, ppc_avr_t *b, int record)
856 int i;
857 int all_in = 0;
859 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
860 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
861 if (le_rel == float_relation_unordered) {
862 r->u32[i] = 0xc0000000;
863 all_in = 1;
864 } else {
865 float32 bneg = float32_chs(b->f[i]);
866 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
867 int le = le_rel != float_relation_greater;
868 int ge = ge_rel != float_relation_less;
870 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
871 all_in |= (!le | !ge);
874 if (record) {
875 env->crf[6] = (all_in == 0) << 1;
879 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
881 vcmpbfp_internal(env, r, a, b, 0);
884 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
885 ppc_avr_t *b)
887 vcmpbfp_internal(env, r, a, b, 1);
890 #define VCT(suffix, satcvt, element) \
891 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
892 ppc_avr_t *b, uint32_t uim) \
894 int i; \
895 int sat = 0; \
896 float_status s = env->vec_status; \
898 set_float_rounding_mode(float_round_to_zero, &s); \
899 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
900 if (float32_is_any_nan(b->f[i])) { \
901 r->element[i] = 0; \
902 } else { \
903 float64 t = float32_to_float64(b->f[i], &s); \
904 int64_t j; \
906 t = float64_scalbn(t, uim, &s); \
907 j = float64_to_int64(t, &s); \
908 r->element[i] = satcvt(j, &sat); \
911 if (sat) { \
912 env->vscr |= (1 << VSCR_SAT); \
915 VCT(uxs, cvtsduw, u32)
916 VCT(sxs, cvtsdsw, s32)
917 #undef VCT
919 target_ulong helper_vclzlsbb(ppc_avr_t *r)
921 target_ulong count = 0;
922 int i;
923 VECTOR_FOR_INORDER_I(i, u8) {
924 if (r->u8[i] & 0x01) {
925 break;
927 count++;
929 return count;
932 target_ulong helper_vctzlsbb(ppc_avr_t *r)
934 target_ulong count = 0;
935 int i;
936 #if defined(HOST_WORDS_BIGENDIAN)
937 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
938 #else
939 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
940 #endif
941 if (r->u8[i] & 0x01) {
942 break;
944 count++;
946 return count;
949 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
950 ppc_avr_t *b, ppc_avr_t *c)
952 int sat = 0;
953 int i;
955 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
956 int32_t prod = a->s16[i] * b->s16[i];
957 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
959 r->s16[i] = cvtswsh(t, &sat);
962 if (sat) {
963 env->vscr |= (1 << VSCR_SAT);
967 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
968 ppc_avr_t *b, ppc_avr_t *c)
970 int sat = 0;
971 int i;
973 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
974 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
975 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
976 r->s16[i] = cvtswsh(t, &sat);
979 if (sat) {
980 env->vscr |= (1 << VSCR_SAT);
984 #define VMINMAX_DO(name, compare, element) \
985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 int i; \
989 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
990 if (a->element[i] compare b->element[i]) { \
991 r->element[i] = b->element[i]; \
992 } else { \
993 r->element[i] = a->element[i]; \
997 #define VMINMAX(suffix, element) \
998 VMINMAX_DO(min##suffix, >, element) \
999 VMINMAX_DO(max##suffix, <, element)
1000 VMINMAX(sb, s8)
1001 VMINMAX(sh, s16)
1002 VMINMAX(sw, s32)
1003 VMINMAX(sd, s64)
1004 VMINMAX(ub, u8)
1005 VMINMAX(uh, u16)
1006 VMINMAX(uw, u32)
1007 VMINMAX(ud, u64)
1008 #undef VMINMAX_DO
1009 #undef VMINMAX
1011 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1013 int i;
1015 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1016 int32_t prod = a->s16[i] * b->s16[i];
1017 r->s16[i] = (int16_t) (prod + c->s16[i]);
1021 #define VMRG_DO(name, element, highp) \
1022 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1024 ppc_avr_t result; \
1025 int i; \
1026 size_t n_elems = ARRAY_SIZE(r->element); \
1028 for (i = 0; i < n_elems / 2; i++) { \
1029 if (highp) { \
1030 result.element[i*2+HI_IDX] = a->element[i]; \
1031 result.element[i*2+LO_IDX] = b->element[i]; \
1032 } else { \
1033 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1034 b->element[n_elems - i - 1]; \
1035 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1036 a->element[n_elems - i - 1]; \
1039 *r = result; \
1041 #if defined(HOST_WORDS_BIGENDIAN)
1042 #define MRGHI 0
1043 #define MRGLO 1
1044 #else
1045 #define MRGHI 1
1046 #define MRGLO 0
1047 #endif
1048 #define VMRG(suffix, element) \
1049 VMRG_DO(mrgl##suffix, element, MRGHI) \
1050 VMRG_DO(mrgh##suffix, element, MRGLO)
1051 VMRG(b, u8)
1052 VMRG(h, u16)
1053 VMRG(w, u32)
1054 #undef VMRG_DO
1055 #undef VMRG
1056 #undef MRGHI
1057 #undef MRGLO
1059 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1060 ppc_avr_t *b, ppc_avr_t *c)
1062 int32_t prod[16];
1063 int i;
1065 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1066 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1069 VECTOR_FOR_INORDER_I(i, s32) {
1070 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1071 prod[4 * i + 2] + prod[4 * i + 3];
1075 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1076 ppc_avr_t *b, ppc_avr_t *c)
1078 int32_t prod[8];
1079 int i;
1081 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1082 prod[i] = a->s16[i] * b->s16[i];
1085 VECTOR_FOR_INORDER_I(i, s32) {
1086 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1090 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1091 ppc_avr_t *b, ppc_avr_t *c)
1093 int32_t prod[8];
1094 int i;
1095 int sat = 0;
1097 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1098 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1101 VECTOR_FOR_INORDER_I(i, s32) {
1102 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1104 r->u32[i] = cvtsdsw(t, &sat);
1107 if (sat) {
1108 env->vscr |= (1 << VSCR_SAT);
1112 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1113 ppc_avr_t *b, ppc_avr_t *c)
1115 uint16_t prod[16];
1116 int i;
1118 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1119 prod[i] = a->u8[i] * b->u8[i];
1122 VECTOR_FOR_INORDER_I(i, u32) {
1123 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1124 prod[4 * i + 2] + prod[4 * i + 3];
1128 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1129 ppc_avr_t *b, ppc_avr_t *c)
1131 uint32_t prod[8];
1132 int i;
1134 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1135 prod[i] = a->u16[i] * b->u16[i];
1138 VECTOR_FOR_INORDER_I(i, u32) {
1139 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1143 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1144 ppc_avr_t *b, ppc_avr_t *c)
1146 uint32_t prod[8];
1147 int i;
1148 int sat = 0;
1150 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1151 prod[i] = a->u16[i] * b->u16[i];
1154 VECTOR_FOR_INORDER_I(i, s32) {
1155 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1157 r->u32[i] = cvtuduw(t, &sat);
1160 if (sat) {
1161 env->vscr |= (1 << VSCR_SAT);
1165 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1166 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1168 int i; \
1170 VECTOR_FOR_INORDER_I(i, prod_element) { \
1171 if (evenp) { \
1172 r->prod_element[i] = \
1173 (cast)a->mul_element[i * 2 + HI_IDX] * \
1174 (cast)b->mul_element[i * 2 + HI_IDX]; \
1175 } else { \
1176 r->prod_element[i] = \
1177 (cast)a->mul_element[i * 2 + LO_IDX] * \
1178 (cast)b->mul_element[i * 2 + LO_IDX]; \
1182 #define VMUL(suffix, mul_element, prod_element, cast) \
1183 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1184 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1185 VMUL(sb, s8, s16, int16_t)
1186 VMUL(sh, s16, s32, int32_t)
1187 VMUL(sw, s32, s64, int64_t)
1188 VMUL(ub, u8, u16, uint16_t)
1189 VMUL(uh, u16, u32, uint32_t)
1190 VMUL(uw, u32, u64, uint64_t)
1191 #undef VMUL_DO
1192 #undef VMUL
1194 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1195 ppc_avr_t *c)
1197 ppc_avr_t result;
1198 int i;
1200 VECTOR_FOR_INORDER_I(i, u8) {
1201 int s = c->u8[i] & 0x1f;
1202 #if defined(HOST_WORDS_BIGENDIAN)
1203 int index = s & 0xf;
1204 #else
1205 int index = 15 - (s & 0xf);
1206 #endif
1208 if (s & 0x10) {
1209 result.u8[i] = b->u8[index];
1210 } else {
1211 result.u8[i] = a->u8[index];
1214 *r = result;
1217 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1218 ppc_avr_t *c)
1220 ppc_avr_t result;
1221 int i;
1223 VECTOR_FOR_INORDER_I(i, u8) {
1224 int s = c->u8[i] & 0x1f;
1225 #if defined(HOST_WORDS_BIGENDIAN)
1226 int index = 15 - (s & 0xf);
1227 #else
1228 int index = s & 0xf;
1229 #endif
1231 if (s & 0x10) {
1232 result.u8[i] = a->u8[index];
1233 } else {
1234 result.u8[i] = b->u8[index];
1237 *r = result;
1240 #if defined(HOST_WORDS_BIGENDIAN)
1241 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1242 #define VBPERMD_INDEX(i) (i)
1243 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1244 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1245 #else
1246 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1247 #define VBPERMD_INDEX(i) (1 - i)
1248 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1249 #define EXTRACT_BIT(avr, i, index) \
1250 (extract64((avr)->u64[1 - i], 63 - index, 1))
1251 #endif
1253 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1255 int i, j;
1256 ppc_avr_t result = { .u64 = { 0, 0 } };
1257 VECTOR_FOR_INORDER_I(i, u64) {
1258 for (j = 0; j < 8; j++) {
1259 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1260 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1261 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1265 *r = result;
1268 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1270 int i;
1271 uint64_t perm = 0;
1273 VECTOR_FOR_INORDER_I(i, u8) {
1274 int index = VBPERMQ_INDEX(b, i);
1276 if (index < 128) {
1277 uint64_t mask = (1ull << (63-(index & 0x3F)));
1278 if (a->u64[VBPERMQ_DW(index)] & mask) {
1279 perm |= (0x8000 >> i);
1284 r->u64[HI_IDX] = perm;
1285 r->u64[LO_IDX] = 0;
1288 #undef VBPERMQ_INDEX
1289 #undef VBPERMQ_DW
1291 static const uint64_t VGBBD_MASKS[256] = {
1292 0x0000000000000000ull, /* 00 */
1293 0x0000000000000080ull, /* 01 */
1294 0x0000000000008000ull, /* 02 */
1295 0x0000000000008080ull, /* 03 */
1296 0x0000000000800000ull, /* 04 */
1297 0x0000000000800080ull, /* 05 */
1298 0x0000000000808000ull, /* 06 */
1299 0x0000000000808080ull, /* 07 */
1300 0x0000000080000000ull, /* 08 */
1301 0x0000000080000080ull, /* 09 */
1302 0x0000000080008000ull, /* 0A */
1303 0x0000000080008080ull, /* 0B */
1304 0x0000000080800000ull, /* 0C */
1305 0x0000000080800080ull, /* 0D */
1306 0x0000000080808000ull, /* 0E */
1307 0x0000000080808080ull, /* 0F */
1308 0x0000008000000000ull, /* 10 */
1309 0x0000008000000080ull, /* 11 */
1310 0x0000008000008000ull, /* 12 */
1311 0x0000008000008080ull, /* 13 */
1312 0x0000008000800000ull, /* 14 */
1313 0x0000008000800080ull, /* 15 */
1314 0x0000008000808000ull, /* 16 */
1315 0x0000008000808080ull, /* 17 */
1316 0x0000008080000000ull, /* 18 */
1317 0x0000008080000080ull, /* 19 */
1318 0x0000008080008000ull, /* 1A */
1319 0x0000008080008080ull, /* 1B */
1320 0x0000008080800000ull, /* 1C */
1321 0x0000008080800080ull, /* 1D */
1322 0x0000008080808000ull, /* 1E */
1323 0x0000008080808080ull, /* 1F */
1324 0x0000800000000000ull, /* 20 */
1325 0x0000800000000080ull, /* 21 */
1326 0x0000800000008000ull, /* 22 */
1327 0x0000800000008080ull, /* 23 */
1328 0x0000800000800000ull, /* 24 */
1329 0x0000800000800080ull, /* 25 */
1330 0x0000800000808000ull, /* 26 */
1331 0x0000800000808080ull, /* 27 */
1332 0x0000800080000000ull, /* 28 */
1333 0x0000800080000080ull, /* 29 */
1334 0x0000800080008000ull, /* 2A */
1335 0x0000800080008080ull, /* 2B */
1336 0x0000800080800000ull, /* 2C */
1337 0x0000800080800080ull, /* 2D */
1338 0x0000800080808000ull, /* 2E */
1339 0x0000800080808080ull, /* 2F */
1340 0x0000808000000000ull, /* 30 */
1341 0x0000808000000080ull, /* 31 */
1342 0x0000808000008000ull, /* 32 */
1343 0x0000808000008080ull, /* 33 */
1344 0x0000808000800000ull, /* 34 */
1345 0x0000808000800080ull, /* 35 */
1346 0x0000808000808000ull, /* 36 */
1347 0x0000808000808080ull, /* 37 */
1348 0x0000808080000000ull, /* 38 */
1349 0x0000808080000080ull, /* 39 */
1350 0x0000808080008000ull, /* 3A */
1351 0x0000808080008080ull, /* 3B */
1352 0x0000808080800000ull, /* 3C */
1353 0x0000808080800080ull, /* 3D */
1354 0x0000808080808000ull, /* 3E */
1355 0x0000808080808080ull, /* 3F */
1356 0x0080000000000000ull, /* 40 */
1357 0x0080000000000080ull, /* 41 */
1358 0x0080000000008000ull, /* 42 */
1359 0x0080000000008080ull, /* 43 */
1360 0x0080000000800000ull, /* 44 */
1361 0x0080000000800080ull, /* 45 */
1362 0x0080000000808000ull, /* 46 */
1363 0x0080000000808080ull, /* 47 */
1364 0x0080000080000000ull, /* 48 */
1365 0x0080000080000080ull, /* 49 */
1366 0x0080000080008000ull, /* 4A */
1367 0x0080000080008080ull, /* 4B */
1368 0x0080000080800000ull, /* 4C */
1369 0x0080000080800080ull, /* 4D */
1370 0x0080000080808000ull, /* 4E */
1371 0x0080000080808080ull, /* 4F */
1372 0x0080008000000000ull, /* 50 */
1373 0x0080008000000080ull, /* 51 */
1374 0x0080008000008000ull, /* 52 */
1375 0x0080008000008080ull, /* 53 */
1376 0x0080008000800000ull, /* 54 */
1377 0x0080008000800080ull, /* 55 */
1378 0x0080008000808000ull, /* 56 */
1379 0x0080008000808080ull, /* 57 */
1380 0x0080008080000000ull, /* 58 */
1381 0x0080008080000080ull, /* 59 */
1382 0x0080008080008000ull, /* 5A */
1383 0x0080008080008080ull, /* 5B */
1384 0x0080008080800000ull, /* 5C */
1385 0x0080008080800080ull, /* 5D */
1386 0x0080008080808000ull, /* 5E */
1387 0x0080008080808080ull, /* 5F */
1388 0x0080800000000000ull, /* 60 */
1389 0x0080800000000080ull, /* 61 */
1390 0x0080800000008000ull, /* 62 */
1391 0x0080800000008080ull, /* 63 */
1392 0x0080800000800000ull, /* 64 */
1393 0x0080800000800080ull, /* 65 */
1394 0x0080800000808000ull, /* 66 */
1395 0x0080800000808080ull, /* 67 */
1396 0x0080800080000000ull, /* 68 */
1397 0x0080800080000080ull, /* 69 */
1398 0x0080800080008000ull, /* 6A */
1399 0x0080800080008080ull, /* 6B */
1400 0x0080800080800000ull, /* 6C */
1401 0x0080800080800080ull, /* 6D */
1402 0x0080800080808000ull, /* 6E */
1403 0x0080800080808080ull, /* 6F */
1404 0x0080808000000000ull, /* 70 */
1405 0x0080808000000080ull, /* 71 */
1406 0x0080808000008000ull, /* 72 */
1407 0x0080808000008080ull, /* 73 */
1408 0x0080808000800000ull, /* 74 */
1409 0x0080808000800080ull, /* 75 */
1410 0x0080808000808000ull, /* 76 */
1411 0x0080808000808080ull, /* 77 */
1412 0x0080808080000000ull, /* 78 */
1413 0x0080808080000080ull, /* 79 */
1414 0x0080808080008000ull, /* 7A */
1415 0x0080808080008080ull, /* 7B */
1416 0x0080808080800000ull, /* 7C */
1417 0x0080808080800080ull, /* 7D */
1418 0x0080808080808000ull, /* 7E */
1419 0x0080808080808080ull, /* 7F */
1420 0x8000000000000000ull, /* 80 */
1421 0x8000000000000080ull, /* 81 */
1422 0x8000000000008000ull, /* 82 */
1423 0x8000000000008080ull, /* 83 */
1424 0x8000000000800000ull, /* 84 */
1425 0x8000000000800080ull, /* 85 */
1426 0x8000000000808000ull, /* 86 */
1427 0x8000000000808080ull, /* 87 */
1428 0x8000000080000000ull, /* 88 */
1429 0x8000000080000080ull, /* 89 */
1430 0x8000000080008000ull, /* 8A */
1431 0x8000000080008080ull, /* 8B */
1432 0x8000000080800000ull, /* 8C */
1433 0x8000000080800080ull, /* 8D */
1434 0x8000000080808000ull, /* 8E */
1435 0x8000000080808080ull, /* 8F */
1436 0x8000008000000000ull, /* 90 */
1437 0x8000008000000080ull, /* 91 */
1438 0x8000008000008000ull, /* 92 */
1439 0x8000008000008080ull, /* 93 */
1440 0x8000008000800000ull, /* 94 */
1441 0x8000008000800080ull, /* 95 */
1442 0x8000008000808000ull, /* 96 */
1443 0x8000008000808080ull, /* 97 */
1444 0x8000008080000000ull, /* 98 */
1445 0x8000008080000080ull, /* 99 */
1446 0x8000008080008000ull, /* 9A */
1447 0x8000008080008080ull, /* 9B */
1448 0x8000008080800000ull, /* 9C */
1449 0x8000008080800080ull, /* 9D */
1450 0x8000008080808000ull, /* 9E */
1451 0x8000008080808080ull, /* 9F */
1452 0x8000800000000000ull, /* A0 */
1453 0x8000800000000080ull, /* A1 */
1454 0x8000800000008000ull, /* A2 */
1455 0x8000800000008080ull, /* A3 */
1456 0x8000800000800000ull, /* A4 */
1457 0x8000800000800080ull, /* A5 */
1458 0x8000800000808000ull, /* A6 */
1459 0x8000800000808080ull, /* A7 */
1460 0x8000800080000000ull, /* A8 */
1461 0x8000800080000080ull, /* A9 */
1462 0x8000800080008000ull, /* AA */
1463 0x8000800080008080ull, /* AB */
1464 0x8000800080800000ull, /* AC */
1465 0x8000800080800080ull, /* AD */
1466 0x8000800080808000ull, /* AE */
1467 0x8000800080808080ull, /* AF */
1468 0x8000808000000000ull, /* B0 */
1469 0x8000808000000080ull, /* B1 */
1470 0x8000808000008000ull, /* B2 */
1471 0x8000808000008080ull, /* B3 */
1472 0x8000808000800000ull, /* B4 */
1473 0x8000808000800080ull, /* B5 */
1474 0x8000808000808000ull, /* B6 */
1475 0x8000808000808080ull, /* B7 */
1476 0x8000808080000000ull, /* B8 */
1477 0x8000808080000080ull, /* B9 */
1478 0x8000808080008000ull, /* BA */
1479 0x8000808080008080ull, /* BB */
1480 0x8000808080800000ull, /* BC */
1481 0x8000808080800080ull, /* BD */
1482 0x8000808080808000ull, /* BE */
1483 0x8000808080808080ull, /* BF */
1484 0x8080000000000000ull, /* C0 */
1485 0x8080000000000080ull, /* C1 */
1486 0x8080000000008000ull, /* C2 */
1487 0x8080000000008080ull, /* C3 */
1488 0x8080000000800000ull, /* C4 */
1489 0x8080000000800080ull, /* C5 */
1490 0x8080000000808000ull, /* C6 */
1491 0x8080000000808080ull, /* C7 */
1492 0x8080000080000000ull, /* C8 */
1493 0x8080000080000080ull, /* C9 */
1494 0x8080000080008000ull, /* CA */
1495 0x8080000080008080ull, /* CB */
1496 0x8080000080800000ull, /* CC */
1497 0x8080000080800080ull, /* CD */
1498 0x8080000080808000ull, /* CE */
1499 0x8080000080808080ull, /* CF */
1500 0x8080008000000000ull, /* D0 */
1501 0x8080008000000080ull, /* D1 */
1502 0x8080008000008000ull, /* D2 */
1503 0x8080008000008080ull, /* D3 */
1504 0x8080008000800000ull, /* D4 */
1505 0x8080008000800080ull, /* D5 */
1506 0x8080008000808000ull, /* D6 */
1507 0x8080008000808080ull, /* D7 */
1508 0x8080008080000000ull, /* D8 */
1509 0x8080008080000080ull, /* D9 */
1510 0x8080008080008000ull, /* DA */
1511 0x8080008080008080ull, /* DB */
1512 0x8080008080800000ull, /* DC */
1513 0x8080008080800080ull, /* DD */
1514 0x8080008080808000ull, /* DE */
1515 0x8080008080808080ull, /* DF */
1516 0x8080800000000000ull, /* E0 */
1517 0x8080800000000080ull, /* E1 */
1518 0x8080800000008000ull, /* E2 */
1519 0x8080800000008080ull, /* E3 */
1520 0x8080800000800000ull, /* E4 */
1521 0x8080800000800080ull, /* E5 */
1522 0x8080800000808000ull, /* E6 */
1523 0x8080800000808080ull, /* E7 */
1524 0x8080800080000000ull, /* E8 */
1525 0x8080800080000080ull, /* E9 */
1526 0x8080800080008000ull, /* EA */
1527 0x8080800080008080ull, /* EB */
1528 0x8080800080800000ull, /* EC */
1529 0x8080800080800080ull, /* ED */
1530 0x8080800080808000ull, /* EE */
1531 0x8080800080808080ull, /* EF */
1532 0x8080808000000000ull, /* F0 */
1533 0x8080808000000080ull, /* F1 */
1534 0x8080808000008000ull, /* F2 */
1535 0x8080808000008080ull, /* F3 */
1536 0x8080808000800000ull, /* F4 */
1537 0x8080808000800080ull, /* F5 */
1538 0x8080808000808000ull, /* F6 */
1539 0x8080808000808080ull, /* F7 */
1540 0x8080808080000000ull, /* F8 */
1541 0x8080808080000080ull, /* F9 */
1542 0x8080808080008000ull, /* FA */
1543 0x8080808080008080ull, /* FB */
1544 0x8080808080800000ull, /* FC */
1545 0x8080808080800080ull, /* FD */
1546 0x8080808080808000ull, /* FE */
1547 0x8080808080808080ull, /* FF */
1550 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1552 int i;
1553 uint64_t t[2] = { 0, 0 };
1555 VECTOR_FOR_INORDER_I(i, u8) {
1556 #if defined(HOST_WORDS_BIGENDIAN)
1557 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1558 #else
1559 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1560 #endif
1563 r->u64[0] = t[0];
1564 r->u64[1] = t[1];
1567 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1568 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1570 int i, j; \
1571 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1573 VECTOR_FOR_INORDER_I(i, srcfld) { \
1574 prod[i] = 0; \
1575 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1576 if (a->srcfld[i] & (1ull<<j)) { \
1577 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1582 VECTOR_FOR_INORDER_I(i, trgfld) { \
1583 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1587 PMSUM(vpmsumb, u8, u16, uint16_t)
1588 PMSUM(vpmsumh, u16, u32, uint32_t)
1589 PMSUM(vpmsumw, u32, u64, uint64_t)
1591 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1594 #ifdef CONFIG_INT128
1595 int i, j;
1596 __uint128_t prod[2];
1598 VECTOR_FOR_INORDER_I(i, u64) {
1599 prod[i] = 0;
1600 for (j = 0; j < 64; j++) {
1601 if (a->u64[i] & (1ull<<j)) {
1602 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1607 r->u128 = prod[0] ^ prod[1];
1609 #else
1610 int i, j;
1611 ppc_avr_t prod[2];
1613 VECTOR_FOR_INORDER_I(i, u64) {
1614 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1615 for (j = 0; j < 64; j++) {
1616 if (a->u64[i] & (1ull<<j)) {
1617 ppc_avr_t bshift;
1618 if (j == 0) {
1619 bshift.u64[HI_IDX] = 0;
1620 bshift.u64[LO_IDX] = b->u64[i];
1621 } else {
1622 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1623 bshift.u64[LO_IDX] = b->u64[i] << j;
1625 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1626 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1631 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1632 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1633 #endif
1637 #if defined(HOST_WORDS_BIGENDIAN)
1638 #define PKBIG 1
1639 #else
1640 #define PKBIG 0
1641 #endif
1642 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1644 int i, j;
1645 ppc_avr_t result;
1646 #if defined(HOST_WORDS_BIGENDIAN)
1647 const ppc_avr_t *x[2] = { a, b };
1648 #else
1649 const ppc_avr_t *x[2] = { b, a };
1650 #endif
1652 VECTOR_FOR_INORDER_I(i, u64) {
1653 VECTOR_FOR_INORDER_I(j, u32) {
1654 uint32_t e = x[i]->u32[j];
1656 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1657 ((e >> 6) & 0x3e0) |
1658 ((e >> 3) & 0x1f));
1661 *r = result;
1664 #define VPK(suffix, from, to, cvt, dosat) \
1665 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1666 ppc_avr_t *a, ppc_avr_t *b) \
1668 int i; \
1669 int sat = 0; \
1670 ppc_avr_t result; \
1671 ppc_avr_t *a0 = PKBIG ? a : b; \
1672 ppc_avr_t *a1 = PKBIG ? b : a; \
1674 VECTOR_FOR_INORDER_I(i, from) { \
1675 result.to[i] = cvt(a0->from[i], &sat); \
1676 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1678 *r = result; \
1679 if (dosat && sat) { \
1680 env->vscr |= (1 << VSCR_SAT); \
1683 #define I(x, y) (x)
1684 VPK(shss, s16, s8, cvtshsb, 1)
1685 VPK(shus, s16, u8, cvtshub, 1)
1686 VPK(swss, s32, s16, cvtswsh, 1)
1687 VPK(swus, s32, u16, cvtswuh, 1)
1688 VPK(sdss, s64, s32, cvtsdsw, 1)
1689 VPK(sdus, s64, u32, cvtsduw, 1)
1690 VPK(uhus, u16, u8, cvtuhub, 1)
1691 VPK(uwus, u32, u16, cvtuwuh, 1)
1692 VPK(udus, u64, u32, cvtuduw, 1)
1693 VPK(uhum, u16, u8, I, 0)
1694 VPK(uwum, u32, u16, I, 0)
1695 VPK(udum, u64, u32, I, 0)
1696 #undef I
1697 #undef VPK
1698 #undef PKBIG
1700 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1702 int i;
1704 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1705 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1709 #define VRFI(suffix, rounding) \
1710 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1711 ppc_avr_t *b) \
1713 int i; \
1714 float_status s = env->vec_status; \
1716 set_float_rounding_mode(rounding, &s); \
1717 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1718 r->f[i] = float32_round_to_int (b->f[i], &s); \
1721 VRFI(n, float_round_nearest_even)
1722 VRFI(m, float_round_down)
1723 VRFI(p, float_round_up)
1724 VRFI(z, float_round_to_zero)
1725 #undef VRFI
1727 #define VROTATE(suffix, element, mask) \
1728 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1730 int i; \
1732 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1733 unsigned int shift = b->element[i] & mask; \
1734 r->element[i] = (a->element[i] << shift) | \
1735 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1738 VROTATE(b, u8, 0x7)
1739 VROTATE(h, u16, 0xF)
1740 VROTATE(w, u32, 0x1F)
1741 VROTATE(d, u64, 0x3F)
1742 #undef VROTATE
1744 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1746 int i;
1748 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1749 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1751 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1755 #define VRLMI(name, size, element, insert) \
1756 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1758 int i; \
1759 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1760 uint##size##_t src1 = a->element[i]; \
1761 uint##size##_t src2 = b->element[i]; \
1762 uint##size##_t src3 = r->element[i]; \
1763 uint##size##_t begin, end, shift, mask, rot_val; \
1765 shift = extract##size(src2, 0, 6); \
1766 end = extract##size(src2, 8, 6); \
1767 begin = extract##size(src2, 16, 6); \
1768 rot_val = rol##size(src1, shift); \
1769 mask = mask_u##size(begin, end); \
1770 if (insert) { \
1771 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1772 } else { \
1773 r->element[i] = (rot_val & mask); \
1778 VRLMI(vrldmi, 64, u64, 1);
1779 VRLMI(vrlwmi, 32, u32, 1);
1780 VRLMI(vrldnm, 64, u64, 0);
1781 VRLMI(vrlwnm, 32, u32, 0);
1783 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1784 ppc_avr_t *c)
1786 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1787 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1790 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1792 int i;
1794 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1795 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1799 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1801 int i;
1803 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1804 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1808 /* The specification says that the results are undefined if all of the
1809 * shift counts are not identical. We check to make sure that they are
1810 * to conform to what real hardware appears to do. */
1811 #define VSHIFT(suffix, leftp) \
1812 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1814 int shift = b->u8[LO_IDX*15] & 0x7; \
1815 int doit = 1; \
1816 int i; \
1818 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1819 doit = doit && ((b->u8[i] & 0x7) == shift); \
1821 if (doit) { \
1822 if (shift == 0) { \
1823 *r = *a; \
1824 } else if (leftp) { \
1825 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1827 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1828 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1829 } else { \
1830 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1832 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1833 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1837 VSHIFT(l, 1)
1838 VSHIFT(r, 0)
1839 #undef VSHIFT
1841 #define VSL(suffix, element, mask) \
1842 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1844 int i; \
1846 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1847 unsigned int shift = b->element[i] & mask; \
1849 r->element[i] = a->element[i] << shift; \
1852 VSL(b, u8, 0x7)
1853 VSL(h, u16, 0x0F)
1854 VSL(w, u32, 0x1F)
1855 VSL(d, u64, 0x3F)
1856 #undef VSL
1858 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1860 int i;
1861 unsigned int shift, bytes, size;
1863 size = ARRAY_SIZE(r->u8);
1864 for (i = 0; i < size; i++) {
1865 shift = b->u8[i] & 0x7; /* extract shift value */
1866 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1867 (((i + 1) < size) ? a->u8[i + 1] : 0);
1868 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1872 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1874 int i;
1875 unsigned int shift, bytes;
1877 /* Use reverse order, as destination and source register can be same. Its
1878 * being modified in place saving temporary, reverse order will guarantee
1879 * that computed result is not fed back.
1881 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1882 shift = b->u8[i] & 0x7; /* extract shift value */
1883 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1884 /* extract adjacent bytes */
1885 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1889 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1891 int sh = shift & 0xf;
1892 int i;
1893 ppc_avr_t result;
1895 #if defined(HOST_WORDS_BIGENDIAN)
1896 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1897 int index = sh + i;
1898 if (index > 0xf) {
1899 result.u8[i] = b->u8[index - 0x10];
1900 } else {
1901 result.u8[i] = a->u8[index];
1904 #else
1905 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1906 int index = (16 - sh) + i;
1907 if (index > 0xf) {
1908 result.u8[i] = a->u8[index - 0x10];
1909 } else {
1910 result.u8[i] = b->u8[index];
1913 #endif
1914 *r = result;
1917 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1919 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1921 #if defined(HOST_WORDS_BIGENDIAN)
1922 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1923 memset(&r->u8[16-sh], 0, sh);
1924 #else
1925 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1926 memset(&r->u8[0], 0, sh);
1927 #endif
1930 /* Experimental testing shows that hardware masks the immediate. */
1931 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1932 #if defined(HOST_WORDS_BIGENDIAN)
1933 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1934 #else
1935 #define SPLAT_ELEMENT(element) \
1936 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1937 #endif
1938 #define VSPLT(suffix, element) \
1939 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1941 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1942 int i; \
1944 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1945 r->element[i] = s; \
1948 VSPLT(b, u8)
1949 VSPLT(h, u16)
1950 VSPLT(w, u32)
1951 #undef VSPLT
1952 #undef SPLAT_ELEMENT
1953 #undef _SPLAT_MASKED
1954 #if defined(HOST_WORDS_BIGENDIAN)
1955 #define VINSERT(suffix, element) \
1956 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1958 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1959 sizeof(r->element[0])); \
1961 #else
1962 #define VINSERT(suffix, element) \
1963 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1965 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1966 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1968 #endif
1969 VINSERT(b, u8)
1970 VINSERT(h, u16)
1971 VINSERT(w, u32)
1972 VINSERT(d, u64)
1973 #undef VINSERT
1974 #if defined(HOST_WORDS_BIGENDIAN)
1975 #define VEXTRACT(suffix, element) \
1976 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1978 uint32_t es = sizeof(r->element[0]); \
1979 memmove(&r->u8[8 - es], &b->u8[index], es); \
1980 memset(&r->u8[8], 0, 8); \
1981 memset(&r->u8[0], 0, 8 - es); \
1983 #else
1984 #define VEXTRACT(suffix, element) \
1985 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1987 uint32_t es = sizeof(r->element[0]); \
1988 uint32_t s = (16 - index) - es; \
1989 memmove(&r->u8[8], &b->u8[s], es); \
1990 memset(&r->u8[0], 0, 8); \
1991 memset(&r->u8[8 + es], 0, 8 - es); \
1993 #endif
1994 VEXTRACT(ub, u8)
1995 VEXTRACT(uh, u16)
1996 VEXTRACT(uw, u32)
1997 VEXTRACT(d, u64)
1998 #undef VEXTRACT
2000 #define VEXT_SIGNED(name, element, mask, cast, recast) \
2001 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2003 int i; \
2004 VECTOR_FOR_INORDER_I(i, element) { \
2005 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
2008 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2009 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2010 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2011 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2012 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2013 #undef VEXT_SIGNED
2015 #define VNEG(name, element) \
2016 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2018 int i; \
2019 VECTOR_FOR_INORDER_I(i, element) { \
2020 r->element[i] = -b->element[i]; \
2023 VNEG(vnegw, s32)
2024 VNEG(vnegd, s64)
2025 #undef VNEG
2027 #define VSPLTI(suffix, element, splat_type) \
2028 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2030 splat_type x = (int8_t)(splat << 3) >> 3; \
2031 int i; \
2033 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2034 r->element[i] = x; \
2037 VSPLTI(b, s8, int8_t)
2038 VSPLTI(h, s16, int16_t)
2039 VSPLTI(w, s32, int32_t)
2040 #undef VSPLTI
2042 #define VSR(suffix, element, mask) \
2043 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2045 int i; \
2047 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2048 unsigned int shift = b->element[i] & mask; \
2049 r->element[i] = a->element[i] >> shift; \
2052 VSR(ab, s8, 0x7)
2053 VSR(ah, s16, 0xF)
2054 VSR(aw, s32, 0x1F)
2055 VSR(ad, s64, 0x3F)
2056 VSR(b, u8, 0x7)
2057 VSR(h, u16, 0xF)
2058 VSR(w, u32, 0x1F)
2059 VSR(d, u64, 0x3F)
2060 #undef VSR
2062 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2064 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2066 #if defined(HOST_WORDS_BIGENDIAN)
2067 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2068 memset(&r->u8[0], 0, sh);
2069 #else
2070 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2071 memset(&r->u8[16 - sh], 0, sh);
2072 #endif
2075 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2077 int i;
2079 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2080 r->u32[i] = a->u32[i] >= b->u32[i];
2084 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2086 int64_t t;
2087 int i, upper;
2088 ppc_avr_t result;
2089 int sat = 0;
2091 #if defined(HOST_WORDS_BIGENDIAN)
2092 upper = ARRAY_SIZE(r->s32)-1;
2093 #else
2094 upper = 0;
2095 #endif
2096 t = (int64_t)b->s32[upper];
2097 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2098 t += a->s32[i];
2099 result.s32[i] = 0;
2101 result.s32[upper] = cvtsdsw(t, &sat);
2102 *r = result;
2104 if (sat) {
2105 env->vscr |= (1 << VSCR_SAT);
2109 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2111 int i, j, upper;
2112 ppc_avr_t result;
2113 int sat = 0;
2115 #if defined(HOST_WORDS_BIGENDIAN)
2116 upper = 1;
2117 #else
2118 upper = 0;
2119 #endif
2120 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2121 int64_t t = (int64_t)b->s32[upper + i * 2];
2123 result.u64[i] = 0;
2124 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2125 t += a->s32[2 * i + j];
2127 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2130 *r = result;
2131 if (sat) {
2132 env->vscr |= (1 << VSCR_SAT);
2136 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2138 int i, j;
2139 int sat = 0;
2141 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2142 int64_t t = (int64_t)b->s32[i];
2144 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2145 t += a->s8[4 * i + j];
2147 r->s32[i] = cvtsdsw(t, &sat);
2150 if (sat) {
2151 env->vscr |= (1 << VSCR_SAT);
2155 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2157 int sat = 0;
2158 int i;
2160 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2161 int64_t t = (int64_t)b->s32[i];
2163 t += a->s16[2 * i] + a->s16[2 * i + 1];
2164 r->s32[i] = cvtsdsw(t, &sat);
2167 if (sat) {
2168 env->vscr |= (1 << VSCR_SAT);
2172 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2174 int i, j;
2175 int sat = 0;
2177 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2178 uint64_t t = (uint64_t)b->u32[i];
2180 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2181 t += a->u8[4 * i + j];
2183 r->u32[i] = cvtuduw(t, &sat);
2186 if (sat) {
2187 env->vscr |= (1 << VSCR_SAT);
2191 #if defined(HOST_WORDS_BIGENDIAN)
2192 #define UPKHI 1
2193 #define UPKLO 0
2194 #else
2195 #define UPKHI 0
2196 #define UPKLO 1
2197 #endif
2198 #define VUPKPX(suffix, hi) \
2199 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2201 int i; \
2202 ppc_avr_t result; \
2204 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2205 uint16_t e = b->u16[hi ? i : i+4]; \
2206 uint8_t a = (e >> 15) ? 0xff : 0; \
2207 uint8_t r = (e >> 10) & 0x1f; \
2208 uint8_t g = (e >> 5) & 0x1f; \
2209 uint8_t b = e & 0x1f; \
2211 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2213 *r = result; \
2215 VUPKPX(lpx, UPKLO)
2216 VUPKPX(hpx, UPKHI)
2217 #undef VUPKPX
2219 #define VUPK(suffix, unpacked, packee, hi) \
2220 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2222 int i; \
2223 ppc_avr_t result; \
2225 if (hi) { \
2226 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2227 result.unpacked[i] = b->packee[i]; \
2229 } else { \
2230 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2231 i++) { \
2232 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2235 *r = result; \
2237 VUPK(hsb, s16, s8, UPKHI)
2238 VUPK(hsh, s32, s16, UPKHI)
2239 VUPK(hsw, s64, s32, UPKHI)
2240 VUPK(lsb, s16, s8, UPKLO)
2241 VUPK(lsh, s32, s16, UPKLO)
2242 VUPK(lsw, s64, s32, UPKLO)
2243 #undef VUPK
2244 #undef UPKHI
2245 #undef UPKLO
2247 #define VGENERIC_DO(name, element) \
2248 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2250 int i; \
2252 VECTOR_FOR_INORDER_I(i, element) { \
2253 r->element[i] = name(b->element[i]); \
2257 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2258 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2259 #define clzw(v) clz32((v))
2260 #define clzd(v) clz64((v))
2262 VGENERIC_DO(clzb, u8)
2263 VGENERIC_DO(clzh, u16)
2264 VGENERIC_DO(clzw, u32)
2265 VGENERIC_DO(clzd, u64)
2267 #undef clzb
2268 #undef clzh
2269 #undef clzw
2270 #undef clzd
2272 #define ctzb(v) ((v) ? ctz32(v) : 8)
2273 #define ctzh(v) ((v) ? ctz32(v) : 16)
2274 #define ctzw(v) ctz32((v))
2275 #define ctzd(v) ctz64((v))
2277 VGENERIC_DO(ctzb, u8)
2278 VGENERIC_DO(ctzh, u16)
2279 VGENERIC_DO(ctzw, u32)
2280 VGENERIC_DO(ctzd, u64)
2282 #undef ctzb
2283 #undef ctzh
2284 #undef ctzw
2285 #undef ctzd
2287 #define popcntb(v) ctpop8(v)
2288 #define popcnth(v) ctpop16(v)
2289 #define popcntw(v) ctpop32(v)
2290 #define popcntd(v) ctpop64(v)
2292 VGENERIC_DO(popcntb, u8)
2293 VGENERIC_DO(popcnth, u16)
2294 VGENERIC_DO(popcntw, u32)
2295 VGENERIC_DO(popcntd, u64)
2297 #undef popcntb
2298 #undef popcnth
2299 #undef popcntw
2300 #undef popcntd
2302 #undef VGENERIC_DO
2304 #if defined(HOST_WORDS_BIGENDIAN)
2305 #define QW_ONE { .u64 = { 0, 1 } }
2306 #else
2307 #define QW_ONE { .u64 = { 1, 0 } }
2308 #endif
2310 #ifndef CONFIG_INT128
2312 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2314 t->u64[0] = ~a.u64[0];
2315 t->u64[1] = ~a.u64[1];
2318 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2320 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2321 return -1;
2322 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2323 return 1;
2324 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2325 return -1;
2326 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2327 return 1;
2328 } else {
2329 return 0;
2333 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2335 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2336 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2337 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2340 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2342 ppc_avr_t not_a;
2343 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2344 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2345 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2346 avr_qw_not(&not_a, a);
2347 return avr_qw_cmpu(not_a, b) < 0;
2350 #endif
2352 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2354 #ifdef CONFIG_INT128
2355 r->u128 = a->u128 + b->u128;
2356 #else
2357 avr_qw_add(r, *a, *b);
2358 #endif
2361 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2363 #ifdef CONFIG_INT128
2364 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2365 #else
2367 if (c->u64[LO_IDX] & 1) {
2368 ppc_avr_t tmp;
2370 tmp.u64[HI_IDX] = 0;
2371 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2372 avr_qw_add(&tmp, *a, tmp);
2373 avr_qw_add(r, tmp, *b);
2374 } else {
2375 avr_qw_add(r, *a, *b);
2377 #endif
2380 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2382 #ifdef CONFIG_INT128
2383 r->u128 = (~a->u128 < b->u128);
2384 #else
2385 ppc_avr_t not_a;
2387 avr_qw_not(&not_a, *a);
2389 r->u64[HI_IDX] = 0;
2390 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2391 #endif
2394 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2396 #ifdef CONFIG_INT128
2397 int carry_out = (~a->u128 < b->u128);
2398 if (!carry_out && (c->u128 & 1)) {
2399 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2400 ((a->u128 != 0) || (b->u128 != 0));
2402 r->u128 = carry_out;
2403 #else
2405 int carry_in = c->u64[LO_IDX] & 1;
2406 int carry_out = 0;
2407 ppc_avr_t tmp;
2409 carry_out = avr_qw_addc(&tmp, *a, *b);
2411 if (!carry_out && carry_in) {
2412 ppc_avr_t one = QW_ONE;
2413 carry_out = avr_qw_addc(&tmp, tmp, one);
2415 r->u64[HI_IDX] = 0;
2416 r->u64[LO_IDX] = carry_out;
2417 #endif
2420 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2422 #ifdef CONFIG_INT128
2423 r->u128 = a->u128 - b->u128;
2424 #else
2425 ppc_avr_t tmp;
2426 ppc_avr_t one = QW_ONE;
2428 avr_qw_not(&tmp, *b);
2429 avr_qw_add(&tmp, *a, tmp);
2430 avr_qw_add(r, tmp, one);
2431 #endif
2434 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2436 #ifdef CONFIG_INT128
2437 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2438 #else
2439 ppc_avr_t tmp, sum;
2441 avr_qw_not(&tmp, *b);
2442 avr_qw_add(&sum, *a, tmp);
2444 tmp.u64[HI_IDX] = 0;
2445 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2446 avr_qw_add(r, sum, tmp);
2447 #endif
2450 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2452 #ifdef CONFIG_INT128
2453 r->u128 = (~a->u128 < ~b->u128) ||
2454 (a->u128 + ~b->u128 == (__uint128_t)-1);
2455 #else
2456 int carry = (avr_qw_cmpu(*a, *b) > 0);
2457 if (!carry) {
2458 ppc_avr_t tmp;
2459 avr_qw_not(&tmp, *b);
2460 avr_qw_add(&tmp, *a, tmp);
2461 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2463 r->u64[HI_IDX] = 0;
2464 r->u64[LO_IDX] = carry;
2465 #endif
2468 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2470 #ifdef CONFIG_INT128
2471 r->u128 =
2472 (~a->u128 < ~b->u128) ||
2473 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2474 #else
2475 int carry_in = c->u64[LO_IDX] & 1;
2476 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2477 if (!carry_out && carry_in) {
2478 ppc_avr_t tmp;
2479 avr_qw_not(&tmp, *b);
2480 avr_qw_add(&tmp, *a, tmp);
2481 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2484 r->u64[HI_IDX] = 0;
2485 r->u64[LO_IDX] = carry_out;
2486 #endif
2489 #define BCD_PLUS_PREF_1 0xC
2490 #define BCD_PLUS_PREF_2 0xF
2491 #define BCD_PLUS_ALT_1 0xA
2492 #define BCD_NEG_PREF 0xD
2493 #define BCD_NEG_ALT 0xB
2494 #define BCD_PLUS_ALT_2 0xE
2495 #define NATIONAL_PLUS 0x2B
2496 #define NATIONAL_NEG 0x2D
2498 #if defined(HOST_WORDS_BIGENDIAN)
2499 #define BCD_DIG_BYTE(n) (15 - (n/2))
2500 #else
2501 #define BCD_DIG_BYTE(n) (n/2)
2502 #endif
2504 static int bcd_get_sgn(ppc_avr_t *bcd)
2506 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2507 case BCD_PLUS_PREF_1:
2508 case BCD_PLUS_PREF_2:
2509 case BCD_PLUS_ALT_1:
2510 case BCD_PLUS_ALT_2:
2512 return 1;
2515 case BCD_NEG_PREF:
2516 case BCD_NEG_ALT:
2518 return -1;
2521 default:
2523 return 0;
2528 static int bcd_preferred_sgn(int sgn, int ps)
2530 if (sgn >= 0) {
2531 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2532 } else {
2533 return BCD_NEG_PREF;
2537 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2539 uint8_t result;
2540 if (n & 1) {
2541 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2542 } else {
2543 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2546 if (unlikely(result > 9)) {
2547 *invalid = true;
2549 return result;
2552 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2554 if (n & 1) {
2555 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2556 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2557 } else {
2558 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2559 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2563 static int bcd_cmp_zero(ppc_avr_t *bcd)
2565 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2566 return 1 << CRF_EQ;
2567 } else {
2568 return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2572 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2574 #if defined(HOST_WORDS_BIGENDIAN)
2575 return reg->u16[7 - n];
2576 #else
2577 return reg->u16[n];
2578 #endif
2581 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2583 #if defined(HOST_WORDS_BIGENDIAN)
2584 reg->u16[7 - n] = val;
2585 #else
2586 reg->u16[n] = val;
2587 #endif
2590 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2592 int i;
2593 int invalid = 0;
2594 for (i = 31; i > 0; i--) {
2595 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2596 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2597 if (unlikely(invalid)) {
2598 return 0; /* doesn't matter */
2599 } else if (dig_a > dig_b) {
2600 return 1;
2601 } else if (dig_a < dig_b) {
2602 return -1;
2606 return 0;
2609 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2610 int *overflow)
2612 int carry = 0;
2613 int i;
2614 int is_zero = 1;
2615 for (i = 1; i <= 31; i++) {
2616 uint8_t digit = bcd_get_digit(a, i, invalid) +
2617 bcd_get_digit(b, i, invalid) + carry;
2618 is_zero &= (digit == 0);
2619 if (digit > 9) {
2620 carry = 1;
2621 digit -= 10;
2622 } else {
2623 carry = 0;
2626 bcd_put_digit(t, digit, i);
2628 if (unlikely(*invalid)) {
2629 return -1;
2633 *overflow = carry;
2634 return is_zero;
2637 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2638 int *overflow)
2640 int carry = 0;
2641 int i;
2642 int is_zero = 1;
2643 for (i = 1; i <= 31; i++) {
2644 uint8_t digit = bcd_get_digit(a, i, invalid) -
2645 bcd_get_digit(b, i, invalid) + carry;
2646 is_zero &= (digit == 0);
2647 if (digit & 0x80) {
2648 carry = -1;
2649 digit += 10;
2650 } else {
2651 carry = 0;
2654 bcd_put_digit(t, digit, i);
2656 if (unlikely(*invalid)) {
2657 return -1;
2661 *overflow = carry;
2662 return is_zero;
2665 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2668 int sgna = bcd_get_sgn(a);
2669 int sgnb = bcd_get_sgn(b);
2670 int invalid = (sgna == 0) || (sgnb == 0);
2671 int overflow = 0;
2672 int zero = 0;
2673 uint32_t cr = 0;
2674 ppc_avr_t result = { .u64 = { 0, 0 } };
2676 if (!invalid) {
2677 if (sgna == sgnb) {
2678 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2679 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2680 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2681 } else if (bcd_cmp_mag(a, b) > 0) {
2682 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2683 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2684 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2685 } else {
2686 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2687 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2688 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2692 if (unlikely(invalid)) {
2693 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2694 cr = 1 << CRF_SO;
2695 } else if (overflow) {
2696 cr |= 1 << CRF_SO;
2697 } else if (zero) {
2698 cr = 1 << CRF_EQ;
2701 *r = result;
2703 return cr;
2706 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2708 ppc_avr_t bcopy = *b;
2709 int sgnb = bcd_get_sgn(b);
2710 if (sgnb < 0) {
2711 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2712 } else if (sgnb > 0) {
2713 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2715 /* else invalid ... defer to bcdadd code for proper handling */
2717 return helper_bcdadd(r, a, &bcopy, ps);
2720 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2722 int i;
2723 int cr = 0;
2724 uint16_t national = 0;
2725 uint16_t sgnb = get_national_digit(b, 0);
2726 ppc_avr_t ret = { .u64 = { 0, 0 } };
2727 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2729 for (i = 1; i < 8; i++) {
2730 national = get_national_digit(b, i);
2731 if (unlikely(national < 0x30 || national > 0x39)) {
2732 invalid = 1;
2733 break;
2736 bcd_put_digit(&ret, national & 0xf, i);
2739 if (sgnb == NATIONAL_PLUS) {
2740 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2741 } else {
2742 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2745 cr = bcd_cmp_zero(&ret);
2747 if (unlikely(invalid)) {
2748 cr = 1 << CRF_SO;
2751 *r = ret;
2753 return cr;
2756 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2758 int i;
2759 int cr = 0;
2760 int sgnb = bcd_get_sgn(b);
2761 int invalid = (sgnb == 0);
2762 ppc_avr_t ret = { .u64 = { 0, 0 } };
2764 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2766 for (i = 1; i < 8; i++) {
2767 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2769 if (unlikely(invalid)) {
2770 break;
2773 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2775 cr = bcd_cmp_zero(b);
2777 if (ox_flag) {
2778 cr |= 1 << CRF_SO;
2781 if (unlikely(invalid)) {
2782 cr = 1 << CRF_SO;
2785 *r = ret;
2787 return cr;
2790 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2792 int i;
2793 int cr = 0;
2794 int invalid = 0;
2795 int zone_digit = 0;
2796 int zone_lead = ps ? 0xF : 0x3;
2797 int digit = 0;
2798 ppc_avr_t ret = { .u64 = { 0, 0 } };
2799 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2801 if (unlikely((sgnb < 0xA) && ps)) {
2802 invalid = 1;
2805 for (i = 0; i < 16; i++) {
2806 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2807 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2808 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2809 invalid = 1;
2810 break;
2813 bcd_put_digit(&ret, digit, i + 1);
2816 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2817 (!ps && (sgnb & 0x4))) {
2818 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2819 } else {
2820 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2823 cr = bcd_cmp_zero(&ret);
2825 if (unlikely(invalid)) {
2826 cr = 1 << CRF_SO;
2829 *r = ret;
2831 return cr;
2834 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2836 int i;
2837 int cr = 0;
2838 uint8_t digit = 0;
2839 int sgnb = bcd_get_sgn(b);
2840 int zone_lead = (ps) ? 0xF0 : 0x30;
2841 int invalid = (sgnb == 0);
2842 ppc_avr_t ret = { .u64 = { 0, 0 } };
2844 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2846 for (i = 0; i < 16; i++) {
2847 digit = bcd_get_digit(b, i + 1, &invalid);
2849 if (unlikely(invalid)) {
2850 break;
2853 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2856 if (ps) {
2857 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2858 } else {
2859 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2862 cr = bcd_cmp_zero(b);
2864 if (ox_flag) {
2865 cr |= 1 << CRF_SO;
2868 if (unlikely(invalid)) {
2869 cr = 1 << CRF_SO;
2872 *r = ret;
2874 return cr;
2877 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2879 int i;
2880 VECTOR_FOR_INORDER_I(i, u8) {
2881 r->u8[i] = AES_sbox[a->u8[i]];
2885 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2887 ppc_avr_t result;
2888 int i;
2890 VECTOR_FOR_INORDER_I(i, u32) {
2891 result.AVRW(i) = b->AVRW(i) ^
2892 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2893 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2894 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2895 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2897 *r = result;
2900 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2902 ppc_avr_t result;
2903 int i;
2905 VECTOR_FOR_INORDER_I(i, u8) {
2906 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2908 *r = result;
2911 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2913 /* This differs from what is written in ISA V2.07. The RTL is */
2914 /* incorrect and will be fixed in V2.07B. */
2915 int i;
2916 ppc_avr_t tmp;
2918 VECTOR_FOR_INORDER_I(i, u8) {
2919 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2922 VECTOR_FOR_INORDER_I(i, u32) {
2923 r->AVRW(i) =
2924 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2925 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2926 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2927 AES_imc[tmp.AVRB(4*i + 3)][3];
2931 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2933 ppc_avr_t result;
2934 int i;
2936 VECTOR_FOR_INORDER_I(i, u8) {
2937 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2939 *r = result;
2942 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2943 #if defined(HOST_WORDS_BIGENDIAN)
2944 #define EL_IDX(i) (i)
2945 #else
2946 #define EL_IDX(i) (3 - (i))
2947 #endif
2949 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2951 int st = (st_six & 0x10) != 0;
2952 int six = st_six & 0xF;
2953 int i;
2955 VECTOR_FOR_INORDER_I(i, u32) {
2956 if (st == 0) {
2957 if ((six & (0x8 >> i)) == 0) {
2958 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2959 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2960 (a->u32[EL_IDX(i)] >> 3);
2961 } else { /* six.bit[i] == 1 */
2962 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2963 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2964 (a->u32[EL_IDX(i)] >> 10);
2966 } else { /* st == 1 */
2967 if ((six & (0x8 >> i)) == 0) {
2968 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2969 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2970 ROTRu32(a->u32[EL_IDX(i)], 22);
2971 } else { /* six.bit[i] == 1 */
2972 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2973 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2974 ROTRu32(a->u32[EL_IDX(i)], 25);
2980 #undef ROTRu32
2981 #undef EL_IDX
2983 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2984 #if defined(HOST_WORDS_BIGENDIAN)
2985 #define EL_IDX(i) (i)
2986 #else
2987 #define EL_IDX(i) (1 - (i))
2988 #endif
2990 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2992 int st = (st_six & 0x10) != 0;
2993 int six = st_six & 0xF;
2994 int i;
2996 VECTOR_FOR_INORDER_I(i, u64) {
2997 if (st == 0) {
2998 if ((six & (0x8 >> (2*i))) == 0) {
2999 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3000 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3001 (a->u64[EL_IDX(i)] >> 7);
3002 } else { /* six.bit[2*i] == 1 */
3003 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3004 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3005 (a->u64[EL_IDX(i)] >> 6);
3007 } else { /* st == 1 */
3008 if ((six & (0x8 >> (2*i))) == 0) {
3009 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3010 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3011 ROTRu64(a->u64[EL_IDX(i)], 39);
3012 } else { /* six.bit[2*i] == 1 */
3013 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3014 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3015 ROTRu64(a->u64[EL_IDX(i)], 41);
3021 #undef ROTRu64
3022 #undef EL_IDX
3024 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3026 ppc_avr_t result;
3027 int i;
3029 VECTOR_FOR_INORDER_I(i, u8) {
3030 int indexA = c->u8[i] >> 4;
3031 int indexB = c->u8[i] & 0xF;
3032 #if defined(HOST_WORDS_BIGENDIAN)
3033 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3034 #else
3035 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3036 #endif
3038 *r = result;
3041 #undef VECTOR_FOR_INORDER_I
3042 #undef HI_IDX
3043 #undef LO_IDX
3045 /*****************************************************************************/
3046 /* SPE extension helpers */
3047 /* Use a table to make this quicker */
3048 static const uint8_t hbrev[16] = {
3049 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3050 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3053 static inline uint8_t byte_reverse(uint8_t val)
3055 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3058 static inline uint32_t word_reverse(uint32_t val)
3060 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3061 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3064 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3065 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3067 uint32_t a, b, d, mask;
3069 mask = UINT32_MAX >> (32 - MASKBITS);
3070 a = arg1 & mask;
3071 b = arg2 & mask;
3072 d = word_reverse(1 + word_reverse(a | ~b));
3073 return (arg1 & ~mask) | (d & b);
3076 uint32_t helper_cntlsw32(uint32_t val)
3078 if (val & 0x80000000) {
3079 return clz32(~val);
3080 } else {
3081 return clz32(val);
3085 uint32_t helper_cntlzw32(uint32_t val)
3087 return clz32(val);
3090 /* 440 specific */
3091 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3092 target_ulong low, uint32_t update_Rc)
3094 target_ulong mask;
3095 int i;
3097 i = 1;
3098 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3099 if ((high & mask) == 0) {
3100 if (update_Rc) {
3101 env->crf[0] = 0x4;
3103 goto done;
3105 i++;
3107 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3108 if ((low & mask) == 0) {
3109 if (update_Rc) {
3110 env->crf[0] = 0x8;
3112 goto done;
3114 i++;
3116 i = 8;
3117 if (update_Rc) {
3118 env->crf[0] = 0x2;
3120 done:
3121 env->xer = (env->xer & ~0x7F) | i;
3122 if (update_Rc) {
3123 env->crf[0] |= xer_so;
3125 return i;