1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
6 This file is part of GDB, the GNU debugger.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
26 /* Within mdmx.c we refer to the sim_cpu directly. */
28 #define SD (CPU_STATE(CPU))
30 /* XXX FIXME: temporary hack while the impact of making unpredictable()
31 a "normal" (non-igen) function is evaluated. */
33 #define Unpredictable() unpredictable_action (cpu, cia)
35 /* MDMX Representations
37 An 8-bit packed byte element (OB) is always unsigned.
38 The 24-bit accumulators are signed and are represented as 32-bit
39 signed values, which are reduced to 24-bit signed values prior to
40 Round and Clamp operations.
42 A 16-bit packed halfword element (QH) is always signed.
43 The 48-bit accumulators are signed and are represented as 64-bit
44 signed values, which are reduced to 48-bit signed values prior to
45 Round and Clamp operations.
47 The code below assumes a 2's-complement representation of signed
48 quantities. Care is required to clear extended sign bits when
51 The code (and the code for arithmetic shifts in mips.igen) also makes
52 the (not guaranteed portable) assumption that right shifts of signed
53 quantities in C do sign extension. */
55 typedef unsigned64 unsigned48
;
56 #define MASK48 (UNSIGNED64 (0xffffffffffff))
58 typedef unsigned32 unsigned24
;
59 #define MASK24 (UNSIGNED32 (0xffffff))
62 mdmx_ob
, /* OB (octal byte) */
63 mdmx_qh
/* QH (quad half-word) */
67 sel_elem
, /* element select */
68 sel_vect
, /* vector select */
69 sel_imm
/* immediate select */
72 #define OB_MAX ((unsigned8)0xFF)
73 #define QH_MIN ((signed16)0x8000)
74 #define QH_MAX ((signed16)0x7FFF)
76 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
77 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
78 ((x) > QH_MAX ? QH_MAX : (x))))
80 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
81 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
82 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
84 #define QH_ELEM(v,fmtsel) \
85 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
86 #define OB_ELEM(v,fmtsel) \
87 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
90 typedef signed16 (*QH_FUNC
)(signed16
, signed16
);
91 typedef unsigned8 (*OB_FUNC
)(unsigned8
, unsigned8
);
93 /* vectorized logical operators */
96 AndQH(signed16 ts
, signed16 tt
)
98 return (signed16
)((unsigned16
)ts
& (unsigned16
)tt
);
102 AndOB(unsigned8 ts
, unsigned8 tt
)
108 NorQH(signed16 ts
, signed16 tt
)
110 return (signed16
)(((unsigned16
)ts
| (unsigned16
)tt
) ^ 0xFFFF);
114 NorOB(unsigned8 ts
, unsigned8 tt
)
116 return (ts
| tt
) ^ 0xFF;
120 OrQH(signed16 ts
, signed16 tt
)
122 return (signed16
)((unsigned16
)ts
| (unsigned16
)tt
);
126 OrOB(unsigned8 ts
, unsigned8 tt
)
132 XorQH(signed16 ts
, signed16 tt
)
134 return (signed16
)((unsigned16
)ts
^ (unsigned16
)tt
);
138 XorOB(unsigned8 ts
, unsigned8 tt
)
144 SLLQH(signed16 ts
, signed16 tt
)
146 unsigned32 s
= (unsigned32
)tt
& 0xF;
147 return (signed16
)(((unsigned32
)ts
<< s
) & 0xFFFF);
151 SLLOB(unsigned8 ts
, unsigned8 tt
)
153 unsigned32 s
= tt
& 0x7;
154 return (ts
<< s
) & 0xFF;
158 SRLQH(signed16 ts
, signed16 tt
)
160 unsigned32 s
= (unsigned32
)tt
& 0xF;
161 return (signed16
)((unsigned16
)ts
>> s
);
165 SRLOB(unsigned8 ts
, unsigned8 tt
)
167 unsigned32 s
= tt
& 0x7;
172 /* Vectorized arithmetic operators. */
175 AddQH(signed16 ts
, signed16 tt
)
177 signed32 t
= (signed32
)ts
+ (signed32
)tt
;
182 AddOB(unsigned8 ts
, unsigned8 tt
)
184 unsigned32 t
= (unsigned32
)ts
+ (unsigned32
)tt
;
189 SubQH(signed16 ts
, signed16 tt
)
191 signed32 t
= (signed32
)ts
- (signed32
)tt
;
196 SubOB(unsigned8 ts
, unsigned8 tt
)
199 t
= (signed32
)ts
- (signed32
)tt
;
206 MinQH(signed16 ts
, signed16 tt
)
208 return (ts
< tt
? ts
: tt
);
212 MinOB(unsigned8 ts
, unsigned8 tt
)
214 return (ts
< tt
? ts
: tt
);
218 MaxQH(signed16 ts
, signed16 tt
)
220 return (ts
> tt
? ts
: tt
);
224 MaxOB(unsigned8 ts
, unsigned8 tt
)
226 return (ts
> tt
? ts
: tt
);
230 MulQH(signed16 ts
, signed16 tt
)
232 signed32 t
= (signed32
)ts
* (signed32
)tt
;
237 MulOB(unsigned8 ts
, unsigned8 tt
)
239 unsigned32 t
= (unsigned32
)ts
* (unsigned32
)tt
;
243 /* "msgn" and "sra" are defined only for QH format. */
246 MsgnQH(signed16 ts
, signed16 tt
)
250 t
= (tt
== QH_MIN
? QH_MAX
: -tt
);
259 SRAQH(signed16 ts
, signed16 tt
)
261 unsigned32 s
= (unsigned32
)tt
& 0xF;
262 return (signed16
)((signed32
)ts
>> s
);
266 /* "pabsdiff" and "pavg" are defined only for OB format. */
269 AbsDiffOB(unsigned8 ts
, unsigned8 tt
)
271 return (ts
>= tt
? ts
- tt
: tt
- ts
);
275 AvgOB(unsigned8 ts
, unsigned8 tt
)
277 return ((unsigned32
)ts
+ (unsigned32
)tt
+ 1) >> 1;
281 /* Dispatch tables for operations that update a CPR. */
283 static const QH_FUNC qh_func
[] = {
284 AndQH
, NorQH
, OrQH
, XorQH
, SLLQH
, SRLQH
,
285 AddQH
, SubQH
, MinQH
, MaxQH
,
286 MulQH
, MsgnQH
, SRAQH
, NULL
, NULL
289 static const OB_FUNC ob_func
[] = {
290 AndOB
, NorOB
, OrOB
, XorOB
, SLLOB
, SRLOB
,
291 AddOB
, SubOB
, MinOB
, MaxOB
,
292 MulOB
, NULL
, NULL
, AbsDiffOB
, AvgOB
295 /* Auxiliary functions for CPR updates. */
297 /* Vector mapping for QH format. */
299 qh_vector_op(unsigned64 v1
, unsigned64 v2
, QH_FUNC func
)
301 unsigned64 result
= 0;
305 for (i
= 0; i
< 64; i
+= 16)
307 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
308 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
310 result
|= ((unsigned64
)((unsigned16
)h
) << i
);
316 qh_map_op(unsigned64 v1
, signed16 h2
, QH_FUNC func
)
318 unsigned64 result
= 0;
322 for (i
= 0; i
< 64; i
+= 16)
324 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
326 result
|= ((unsigned64
)((unsigned16
)h
) << i
);
332 /* Vector operations for OB format. */
335 ob_vector_op(unsigned64 v1
, unsigned64 v2
, OB_FUNC func
)
337 unsigned64 result
= 0;
341 for (i
= 0; i
< 64; i
+= 8)
343 b1
= v1
& 0xFF; v1
>>= 8;
344 b2
= v2
& 0xFF; v2
>>= 8;
346 result
|= ((unsigned64
)b
<< i
);
352 ob_map_op(unsigned64 v1
, unsigned8 b2
, OB_FUNC func
)
354 unsigned64 result
= 0;
358 for (i
= 0; i
< 64; i
+= 8)
360 b1
= v1
& 0xFF; v1
>>= 8;
362 result
|= ((unsigned64
)b
<< i
);
368 /* Primary entry for operations that update CPRs. */
370 mdmx_cpr_op(sim_cpu
*cpu
,
378 unsigned64 result
= 0;
380 switch (MX_FMT (fmtsel
))
383 switch (MX_VT (fmtsel
))
386 op2
= ValueFPR(vt
, fmt_mdmx
);
387 result
= qh_map_op(op1
, QH_ELEM(op2
, fmtsel
), qh_func
[op
]);
390 result
= qh_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), qh_func
[op
]);
393 result
= qh_map_op(op1
, vt
, qh_func
[op
]);
398 switch (MX_VT (fmtsel
))
401 op2
= ValueFPR(vt
, fmt_mdmx
);
402 result
= ob_map_op(op1
, OB_ELEM(op2
, fmtsel
), ob_func
[op
]);
405 result
= ob_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), ob_func
[op
]);
408 result
= ob_map_op(op1
, vt
, ob_func
[op
]);
420 /* Operations that update CCs */
423 qh_vector_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int cond
)
429 for (i
= 0; i
< 4; i
++)
431 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
432 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
433 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
434 ((cond
& MX_C_LT
) && (h1
< h2
));
440 qh_map_test(sim_cpu
*cpu
, unsigned64 v1
, signed16 h2
, int cond
)
446 for (i
= 0; i
< 4; i
++)
448 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
449 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
450 ((cond
& MX_C_LT
) && (h1
< h2
));
456 ob_vector_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int cond
)
462 for (i
= 0; i
< 8; i
++)
464 b1
= v1
& 0xFF; v1
>>= 8;
465 b2
= v2
& 0xFF; v2
>>= 8;
466 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
467 ((cond
& MX_C_LT
) && (b1
< b2
));
473 ob_map_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned8 b2
, int cond
)
479 for (i
= 0; i
< 8; i
++)
481 b1
= (unsigned8
)(v1
& 0xFF); v1
>>= 8;
482 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
483 ((cond
& MX_C_LT
) && (b1
< b2
));
490 mdmx_cc_op(sim_cpu
*cpu
,
499 switch (MX_FMT (fmtsel
))
502 switch (MX_VT (fmtsel
))
505 op2
= ValueFPR(vt
, fmt_mdmx
);
506 qh_map_test(cpu
, v1
, QH_ELEM(op2
, fmtsel
), cond
);
509 qh_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
512 qh_map_test(cpu
, v1
, vt
, cond
);
517 switch (MX_VT (fmtsel
))
520 op2
= ValueFPR(vt
, fmt_mdmx
);
521 ob_map_test(cpu
, v1
, OB_ELEM(op2
, fmtsel
), cond
);
524 ob_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
527 ob_map_test(cpu
, v1
, vt
, cond
);
537 /* Pick operations. */
540 qh_vector_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int tf
)
542 unsigned64 result
= 0;
547 for (i
= 0; i
< 4; i
++)
549 h
= ((GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (v2
& 0xFFFF));
550 v1
>>= 16; v2
>>= 16;
551 result
|= ((unsigned64
)h
<< s
);
558 qh_map_pick(sim_cpu
*cpu
, unsigned64 v1
, signed16 h2
, int tf
)
560 unsigned64 result
= 0;
565 for (i
= 0; i
< 4; i
++)
567 h
= (GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (unsigned16
)h2
;
569 result
|= ((unsigned64
)h
<< s
);
576 ob_vector_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int tf
)
578 unsigned64 result
= 0;
583 for (i
= 0; i
< 8; i
++)
585 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : (v2
& 0xFF);
587 result
|= ((unsigned64
)b
<< s
);
594 ob_map_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned8 b2
, int tf
)
596 unsigned64 result
= 0;
601 for (i
= 0; i
< 8; i
++)
603 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : b2
;
605 result
|= ((unsigned64
)b
<< s
);
613 mdmx_pick_op(sim_cpu
*cpu
,
620 unsigned64 result
= 0;
623 switch (MX_FMT (fmtsel
))
626 switch (MX_VT (fmtsel
))
629 op2
= ValueFPR(vt
, fmt_mdmx
);
630 result
= qh_map_pick(cpu
, v1
, QH_ELEM(op2
, fmtsel
), tf
);
633 result
= qh_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
636 result
= qh_map_pick(cpu
, v1
, vt
, tf
);
641 switch (MX_VT (fmtsel
))
644 op2
= ValueFPR(vt
, fmt_mdmx
);
645 result
= ob_map_pick(cpu
, v1
, OB_ELEM(op2
, fmtsel
), tf
);
648 result
= ob_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
651 result
= ob_map_pick(cpu
, v1
, vt
, tf
);
664 typedef void (*QH_ACC
)(signed48
*a
, signed16 ts
, signed16 tt
);
667 AccAddAQH(signed48
*a
, signed16 ts
, signed16 tt
)
669 *a
+= (signed48
)ts
+ (signed48
)tt
;
673 AccAddLQH(signed48
*a
, signed16 ts
, signed16 tt
)
675 *a
= (signed48
)ts
+ (signed48
)tt
;
679 AccMulAQH(signed48
*a
, signed16 ts
, signed16 tt
)
681 *a
+= (signed48
)ts
* (signed48
)tt
;
685 AccMulLQH(signed48
*a
, signed16 ts
, signed16 tt
)
687 *a
= (signed48
)ts
* (signed48
)tt
;
691 SubMulAQH(signed48
*a
, signed16 ts
, signed16 tt
)
693 *a
-= (signed48
)ts
* (signed48
)tt
;
697 SubMulLQH(signed48
*a
, signed16 ts
, signed16 tt
)
699 *a
= -((signed48
)ts
* (signed48
)tt
);
703 AccSubAQH(signed48
*a
, signed16 ts
, signed16 tt
)
705 *a
+= (signed48
)ts
- (signed48
)tt
;
709 AccSubLQH(signed48
*a
, signed16 ts
, signed16 tt
)
711 *a
= (signed48
)ts
- (signed48
)tt
;
715 typedef void (*OB_ACC
)(signed24
*acc
, unsigned8 ts
, unsigned8 tt
);
718 AccAddAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
720 *a
+= (signed24
)ts
+ (signed24
)tt
;
724 AccAddLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
726 *a
= (signed24
)ts
+ (signed24
)tt
;
730 AccMulAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
732 *a
+= (signed24
)ts
* (signed24
)tt
;
736 AccMulLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
738 *a
= (signed24
)ts
* (signed24
)tt
;
742 SubMulAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
744 *a
-= (signed24
)ts
* (signed24
)tt
;
748 SubMulLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
750 *a
= -((signed24
)ts
* (signed24
)tt
);
754 AccSubAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
756 *a
+= (signed24
)ts
- (signed24
)tt
;
760 AccSubLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
762 *a
= (signed24
)ts
- (signed24
)tt
;
766 AccAbsDiffOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
768 unsigned8 t
= (ts
>= tt
? ts
- tt
: tt
- ts
);
773 /* Dispatch tables for operations that update a CPR. */
775 static const QH_ACC qh_acc
[] = {
776 AccAddAQH
, AccAddAQH
, AccMulAQH
, AccMulLQH
,
777 SubMulAQH
, SubMulLQH
, AccSubAQH
, AccSubLQH
,
781 static const OB_ACC ob_acc
[] = {
782 AccAddAOB
, AccAddLOB
, AccMulAOB
, AccMulLOB
,
783 SubMulAOB
, SubMulLOB
, AccSubAOB
, AccSubLOB
,
789 qh_vector_acc(signed48 a
[], unsigned64 v1
, unsigned64 v2
, QH_ACC acc
)
794 for (i
= 0; i
< 4; i
++)
796 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
797 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
798 (*acc
)(&a
[i
], h1
, h2
);
803 qh_map_acc(signed48 a
[], unsigned64 v1
, signed16 h2
, QH_ACC acc
)
808 for (i
= 0; i
< 4; i
++)
810 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
811 (*acc
)(&a
[i
], h1
, h2
);
816 ob_vector_acc(signed24 a
[], unsigned64 v1
, unsigned64 v2
, OB_ACC acc
)
821 for (i
= 0; i
< 8; i
++)
823 b1
= v1
& 0xFF; v1
>>= 8;
824 b2
= v2
& 0xFF; v2
>>= 8;
825 (*acc
)(&a
[i
], b1
, b2
);
830 ob_map_acc(signed24 a
[], unsigned64 v1
, unsigned8 b2
, OB_ACC acc
)
835 for (i
= 0; i
< 8; i
++)
837 b1
= v1
& 0xFF; v1
>>= 8;
838 (*acc
)(&a
[i
], b1
, b2
);
843 /* Primary entry for operations that accumulate */
845 mdmx_acc_op(sim_cpu
*cpu
,
854 switch (MX_FMT (fmtsel
))
857 switch (MX_VT (fmtsel
))
860 op2
= ValueFPR(vt
, fmt_mdmx
);
861 qh_map_acc(ACC
.qh
, op1
, QH_ELEM(op2
, fmtsel
), qh_acc
[op
]);
864 qh_vector_acc(ACC
.qh
, op1
, ValueFPR(vt
, fmt_mdmx
), qh_acc
[op
]);
867 qh_map_acc(ACC
.qh
, op1
, vt
, qh_acc
[op
]);
872 switch (MX_VT (fmtsel
))
875 op2
= ValueFPR(vt
, fmt_mdmx
);
876 ob_map_acc(ACC
.ob
, op1
, OB_ELEM(op2
, fmtsel
), ob_acc
[op
]);
879 ob_vector_acc(ACC
.ob
, op1
, ValueFPR(vt
, fmt_mdmx
), ob_acc
[op
]);
882 ob_map_acc(ACC
.ob
, op1
, vt
, ob_acc
[op
]);
892 /* Reading and writing accumulator (no conversion). */
895 mdmx_rac_op(sim_cpu
*cpu
,
904 shift
= op
; /* L = 00, M = 01, H = 10. */
910 shift
<<= 4; /* 16 bits per element. */
911 for (i
= 3; i
>= 0; --i
)
914 result
|= ((ACC
.qh
[i
] >> shift
) & 0xFFFF);
918 shift
<<= 3; /* 8 bits per element. */
919 for (i
= 7; i
>= 0; --i
)
922 result
|= ((ACC
.ob
[i
] >> shift
) & 0xFF);
932 mdmx_wacl(sim_cpu
*cpu
,
943 for (i
= 0; i
< 4; i
++)
945 signed32 s
= (signed16
)(vs
& 0xFFFF);
946 ACC
.qh
[i
] = ((signed48
)s
<< 16) | (vt
& 0xFFFF);
947 vs
>>= 16; vt
>>= 16;
951 for (i
= 0; i
< 8; i
++)
953 signed16 s
= (signed8
)(vs
& 0xFF);
954 ACC
.ob
[i
] = ((signed24
)s
<< 8) | (vt
& 0xFF);
964 mdmx_wach(sim_cpu
*cpu
,
974 for (i
= 0; i
< 4; i
++)
976 signed32 s
= (signed16
)(vs
& 0xFFFF);
977 ACC
.qh
[i
] &= ~((signed48
)0xFFFF << 32);
978 ACC
.qh
[i
] |= ((signed48
)s
<< 32);
983 for (i
= 0; i
< 8; i
++)
985 ACC
.ob
[i
] &= ~((signed24
)0xFF << 16);
986 ACC
.ob
[i
] |= ((signed24
)(vs
& 0xFF) << 16);
996 /* Reading and writing accumulator (rounding conversions).
997 Enumerating function guarantees s >= 0 for QH ops. */
999 typedef signed16 (*QH_ROUND
)(signed48 a
, signed16 s
);
1001 #define QH_BIT(n) ((unsigned48)1 << (n))
1002 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1005 RNASQH(signed48 a
, signed16 s
)
1008 signed16 result
= 0;
1015 if ((a
& QH_BIT(47)) == 0)
1017 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1024 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1026 if (s
> 1 && ((unsigned48
)a
& QH_ONES(s
-1)) != 0)
1032 result
= (signed16
)t
;
1038 RNAUQH(signed48 a
, signed16 s
)
1046 result
= ((unsigned48
)a
& MASK48
) >> 47;
1049 t
= ((unsigned48
)a
& MASK48
) >> s
;
1050 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1054 result
= (signed16
)t
;
1060 RNESQH(signed48 a
, signed16 s
)
1063 signed16 result
= 0;
1070 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1072 if (s
== 1 || (a
& QH_ONES(s
-1)) == 0)
1077 if ((a
& QH_BIT(47)) == 0)
1087 result
= (signed16
)t
;
1093 RNEUQH(signed48 a
, signed16 s
)
1101 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1104 t
= ((unsigned48
)a
& MASK48
) >> s
;
1105 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1107 if (s
> 1 && (a
& QH_ONES(s
-1)) != 0)
1114 result
= (signed16
)t
;
1120 RZSQH(signed48 a
, signed16 s
)
1123 signed16 result
= 0;
1130 if ((a
& QH_BIT(47)) == 0)
1140 result
= (signed16
)t
;
1146 RZUQH(signed48 a
, signed16 s
)
1149 signed16 result
= 0;
1154 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1157 t
= ((unsigned48
)a
& MASK48
) >> s
;
1160 result
= (signed16
)t
;
1166 typedef unsigned8 (*OB_ROUND
)(signed24 a
, unsigned8 s
);
1168 #define OB_BIT(n) ((unsigned24)1 << (n))
1169 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1172 RNAUOB(signed24 a
, unsigned8 s
)
1180 result
= ((unsigned24
)a
& MASK24
) >> 23;
1183 t
= ((unsigned24
)a
& MASK24
) >> s
;
1184 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1186 result
= OB_CLAMP(t
);
1192 RNEUOB(signed24 a
, unsigned8 s
)
1200 result
= (((unsigned24
)a
& MASK24
) > OB_BIT(23) ? 1 : 0);
1203 t
= ((unsigned24
)a
& MASK24
) >> s
;
1204 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1206 if (s
> 1 && (a
& OB_ONES(s
-1)) != 0)
1211 result
= OB_CLAMP(t
);
1217 RZUOB(signed24 a
, unsigned8 s
)
1226 t
= ((unsigned24
)a
& MASK24
) >> s
;
1227 result
= OB_CLAMP(t
);
1233 static const QH_ROUND qh_round
[] = {
1234 RNASQH
, RNAUQH
, RNESQH
, RNEUQH
, RZSQH
, RZUQH
1237 static const OB_ROUND ob_round
[] = {
1238 NULL
, RNAUOB
, NULL
, RNEUOB
, NULL
, RZUOB
1243 qh_vector_round(sim_cpu
*cpu
, address_word cia
, unsigned64 v2
, QH_ROUND round
)
1245 unsigned64 result
= 0;
1250 for (i
= 0; i
< 4; i
++)
1252 h2
= (signed16
)(v2
& 0xFFFF);
1254 h
= (*round
)(ACC
.qh
[i
], h2
);
1257 UnpredictableResult ();
1261 result
|= ((unsigned64
)((unsigned16
)h
) << s
);
1268 qh_map_round(sim_cpu
*cpu
, address_word cia
, signed16 h2
, QH_ROUND round
)
1270 unsigned64 result
= 0;
1275 for (i
= 0; i
< 4; i
++)
1278 h
= (*round
)(ACC
.qh
[i
], h2
);
1281 UnpredictableResult ();
1284 result
|= ((unsigned64
)((unsigned16
)h
) << s
);
1291 ob_vector_round(sim_cpu
*cpu
, address_word cia
, unsigned64 v2
, OB_ROUND round
)
1293 unsigned64 result
= 0;
1298 for (i
= 0; i
< 8; i
++)
1300 b2
= v2
& 0xFF; v2
>>= 8;
1301 b
= (*round
)(ACC
.ob
[i
], b2
);
1302 result
|= ((unsigned64
)b
<< s
);
1309 ob_map_round(sim_cpu
*cpu
, address_word cia
, unsigned8 b2
, OB_ROUND round
)
1311 unsigned64 result
= 0;
1316 for (i
= 0; i
< 8; i
++)
1318 b
= (*round
)(ACC
.ob
[i
], b2
);
1319 result
|= ((unsigned64
)b
<< s
);
1327 mdmx_round_op(sim_cpu
*cpu
,
1334 unsigned64 result
= 0;
1336 switch (MX_FMT (fmtsel
))
1339 switch (MX_VT (fmtsel
))
1342 op2
= ValueFPR(vt
, fmt_mdmx
);
1343 result
= qh_map_round(cpu
, cia
, QH_ELEM(op2
, fmtsel
), qh_round
[rm
]);
1346 op2
= ValueFPR(vt
, fmt_mdmx
);
1347 result
= qh_vector_round(cpu
, cia
, op2
, qh_round
[rm
]);
1350 result
= qh_map_round(cpu
, cia
, vt
, qh_round
[rm
]);
1355 switch (MX_VT (fmtsel
))
1358 op2
= ValueFPR(vt
, fmt_mdmx
);
1359 result
= ob_map_round(cpu
, cia
, OB_ELEM(op2
, fmtsel
), ob_round
[rm
]);
1362 op2
= ValueFPR(vt
, fmt_mdmx
);
1363 result
= ob_vector_round(cpu
, cia
, op2
, ob_round
[rm
]);
1366 result
= ob_map_round(cpu
, cia
, vt
, ob_round
[rm
]);
1378 /* Shuffle operation. */
1381 enum {vs
, ss
, vt
} source
;
1385 static const sh_map ob_shuffle
[][8] = {
1386 /* MDMX 2.0 encodings (3-4, 6-7). */
1387 /* vr5400 encoding (5), otherwise. */
1389 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* RSVD */
1390 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* RSVD */
1391 {{vs
,0}, {ss
,0}, {vs
,1}, {ss
,1}, {vs
,2}, {ss
,2}, {vs
,3}, {ss
,3}}, /* upsl */
1392 {{vt
,1}, {vt
,3}, {vt
,5}, {vt
,7}, {vs
,1}, {vs
,3}, {vs
,5}, {vs
,7}}, /* pach */
1393 {{vt
,0}, {vt
,2}, {vt
,4}, {vt
,6}, {vs
,0}, {vs
,2}, {vs
,4}, {vs
,6}}, /* pacl */
1394 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* mixh */
1395 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}} /* mixl */
1398 static const sh_map qh_shuffle
[][4] = {
1399 {{vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* mixh */
1400 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}}, /* mixl */
1401 {{vt
,1}, {vt
,3}, {vs
,1}, {vs
,3}}, /* pach */
1403 {{vt
,1}, {vs
,0}, {vt
,3}, {vs
,2}}, /* bfla */
1405 {{vt
,2}, {vt
,3}, {vs
,2}, {vs
,3}}, /* repa */
1406 {{vt
,0}, {vt
,1}, {vs
,0}, {vs
,1}} /* repb */
1411 mdmx_shuffle(sim_cpu
*cpu
,
1417 unsigned64 result
= 0;
1421 if ((shop
& 0x3) == 0x1) /* QH format. */
1425 for (i
= 0; i
< 4; i
++)
1429 switch (qh_shuffle
[op
][i
].source
)
1441 result
|= (((v
>> 16*qh_shuffle
[op
][i
].index
) & 0xFFFF) << s
);
1445 else if ((shop
& 0x1) == 0x0) /* OB format. */
1449 for (i
= 0; i
< 8; i
++)
1452 unsigned int ishift
= 8*ob_shuffle
[op
][i
].index
;
1454 switch (ob_shuffle
[op
][i
].source
)
1457 b
= (op1
>> ishift
) & 0xFF;
1460 b
= ((op1
>> ishift
) & 0x80) ? 0xFF : 0;
1463 b
= (op2
>> ishift
) & 0xFF;
1469 result
|= ((unsigned64
)b
<< s
);