1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002-2019 Free Software Foundation, Inc.
3 Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
6 This file is part of GDB, the GNU debugger.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 /* Within mdmx.c we refer to the sim_cpu directly. */
27 #define SD (CPU_STATE(CPU))
29 /* XXX FIXME: temporary hack while the impact of making unpredictable()
30 a "normal" (non-igen) function is evaluated. */
32 #define Unpredictable() unpredictable_action (cpu, cia)
34 /* MDMX Representations
36 An 8-bit packed byte element (OB) is always unsigned.
37 The 24-bit accumulators are signed and are represented as 32-bit
38 signed values, which are reduced to 24-bit signed values prior to
39 Round and Clamp operations.
41 A 16-bit packed halfword element (QH) is always signed.
42 The 48-bit accumulators are signed and are represented as 64-bit
43 signed values, which are reduced to 48-bit signed values prior to
44 Round and Clamp operations.
46 The code below assumes a 2's-complement representation of signed
47 quantities. Care is required to clear extended sign bits when
50 The code (and the code for arithmetic shifts in mips.igen) also makes
51 the (not guaranteed portable) assumption that right shifts of signed
52 quantities in C do sign extension. */
54 typedef unsigned64 unsigned48
;
55 #define MASK48 (UNSIGNED64 (0xffffffffffff))
57 typedef unsigned32 unsigned24
;
58 #define MASK24 (UNSIGNED32 (0xffffff))
61 mdmx_ob
, /* OB (octal byte) */
62 mdmx_qh
/* QH (quad half-word) */
66 sel_elem
, /* element select */
67 sel_vect
, /* vector select */
68 sel_imm
/* immediate select */
71 #define OB_MAX ((unsigned8)0xFF)
72 #define QH_MIN ((signed16)0x8000)
73 #define QH_MAX ((signed16)0x7FFF)
75 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
76 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
77 ((x) > QH_MAX ? QH_MAX : (x))))
79 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
80 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
81 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
83 #define QH_ELEM(v,fmtsel) \
84 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
85 #define OB_ELEM(v,fmtsel) \
86 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
89 typedef signed16 (*QH_FUNC
)(signed16
, signed16
);
90 typedef unsigned8 (*OB_FUNC
)(unsigned8
, unsigned8
);
92 /* vectorized logical operators */
95 AndQH(signed16 ts
, signed16 tt
)
97 return (signed16
)((unsigned16
)ts
& (unsigned16
)tt
);
101 AndOB(unsigned8 ts
, unsigned8 tt
)
107 NorQH(signed16 ts
, signed16 tt
)
109 return (signed16
)(((unsigned16
)ts
| (unsigned16
)tt
) ^ 0xFFFF);
113 NorOB(unsigned8 ts
, unsigned8 tt
)
115 return (ts
| tt
) ^ 0xFF;
119 OrQH(signed16 ts
, signed16 tt
)
121 return (signed16
)((unsigned16
)ts
| (unsigned16
)tt
);
125 OrOB(unsigned8 ts
, unsigned8 tt
)
131 XorQH(signed16 ts
, signed16 tt
)
133 return (signed16
)((unsigned16
)ts
^ (unsigned16
)tt
);
137 XorOB(unsigned8 ts
, unsigned8 tt
)
143 SLLQH(signed16 ts
, signed16 tt
)
145 unsigned32 s
= (unsigned32
)tt
& 0xF;
146 return (signed16
)(((unsigned32
)ts
<< s
) & 0xFFFF);
150 SLLOB(unsigned8 ts
, unsigned8 tt
)
152 unsigned32 s
= tt
& 0x7;
153 return (ts
<< s
) & 0xFF;
157 SRLQH(signed16 ts
, signed16 tt
)
159 unsigned32 s
= (unsigned32
)tt
& 0xF;
160 return (signed16
)((unsigned16
)ts
>> s
);
164 SRLOB(unsigned8 ts
, unsigned8 tt
)
166 unsigned32 s
= tt
& 0x7;
171 /* Vectorized arithmetic operators. */
174 AddQH(signed16 ts
, signed16 tt
)
176 signed32 t
= (signed32
)ts
+ (signed32
)tt
;
181 AddOB(unsigned8 ts
, unsigned8 tt
)
183 unsigned32 t
= (unsigned32
)ts
+ (unsigned32
)tt
;
188 SubQH(signed16 ts
, signed16 tt
)
190 signed32 t
= (signed32
)ts
- (signed32
)tt
;
195 SubOB(unsigned8 ts
, unsigned8 tt
)
198 t
= (signed32
)ts
- (signed32
)tt
;
205 MinQH(signed16 ts
, signed16 tt
)
207 return (ts
< tt
? ts
: tt
);
211 MinOB(unsigned8 ts
, unsigned8 tt
)
213 return (ts
< tt
? ts
: tt
);
217 MaxQH(signed16 ts
, signed16 tt
)
219 return (ts
> tt
? ts
: tt
);
223 MaxOB(unsigned8 ts
, unsigned8 tt
)
225 return (ts
> tt
? ts
: tt
);
229 MulQH(signed16 ts
, signed16 tt
)
231 signed32 t
= (signed32
)ts
* (signed32
)tt
;
236 MulOB(unsigned8 ts
, unsigned8 tt
)
238 unsigned32 t
= (unsigned32
)ts
* (unsigned32
)tt
;
242 /* "msgn" and "sra" are defined only for QH format. */
245 MsgnQH(signed16 ts
, signed16 tt
)
249 t
= (tt
== QH_MIN
? QH_MAX
: -tt
);
258 SRAQH(signed16 ts
, signed16 tt
)
260 unsigned32 s
= (unsigned32
)tt
& 0xF;
261 return (signed16
)((signed32
)ts
>> s
);
265 /* "pabsdiff" and "pavg" are defined only for OB format. */
268 AbsDiffOB(unsigned8 ts
, unsigned8 tt
)
270 return (ts
>= tt
? ts
- tt
: tt
- ts
);
274 AvgOB(unsigned8 ts
, unsigned8 tt
)
276 return ((unsigned32
)ts
+ (unsigned32
)tt
+ 1) >> 1;
280 /* Dispatch tables for operations that update a CPR. */
282 static const QH_FUNC qh_func
[] = {
283 AndQH
, NorQH
, OrQH
, XorQH
, SLLQH
, SRLQH
,
284 AddQH
, SubQH
, MinQH
, MaxQH
,
285 MulQH
, MsgnQH
, SRAQH
, NULL
, NULL
288 static const OB_FUNC ob_func
[] = {
289 AndOB
, NorOB
, OrOB
, XorOB
, SLLOB
, SRLOB
,
290 AddOB
, SubOB
, MinOB
, MaxOB
,
291 MulOB
, NULL
, NULL
, AbsDiffOB
, AvgOB
294 /* Auxiliary functions for CPR updates. */
296 /* Vector mapping for QH format. */
298 qh_vector_op(unsigned64 v1
, unsigned64 v2
, QH_FUNC func
)
300 unsigned64 result
= 0;
304 for (i
= 0; i
< 64; i
+= 16)
306 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
307 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
309 result
|= ((unsigned64
)((unsigned16
)h
) << i
);
315 qh_map_op(unsigned64 v1
, signed16 h2
, QH_FUNC func
)
317 unsigned64 result
= 0;
321 for (i
= 0; i
< 64; i
+= 16)
323 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
325 result
|= ((unsigned64
)((unsigned16
)h
) << i
);
331 /* Vector operations for OB format. */
334 ob_vector_op(unsigned64 v1
, unsigned64 v2
, OB_FUNC func
)
336 unsigned64 result
= 0;
340 for (i
= 0; i
< 64; i
+= 8)
342 b1
= v1
& 0xFF; v1
>>= 8;
343 b2
= v2
& 0xFF; v2
>>= 8;
345 result
|= ((unsigned64
)b
<< i
);
351 ob_map_op(unsigned64 v1
, unsigned8 b2
, OB_FUNC func
)
353 unsigned64 result
= 0;
357 for (i
= 0; i
< 64; i
+= 8)
359 b1
= v1
& 0xFF; v1
>>= 8;
361 result
|= ((unsigned64
)b
<< i
);
367 /* Primary entry for operations that update CPRs. */
369 mdmx_cpr_op(sim_cpu
*cpu
,
377 unsigned64 result
= 0;
379 switch (MX_FMT (fmtsel
))
382 switch (MX_VT (fmtsel
))
385 op2
= ValueFPR(vt
, fmt_mdmx
);
386 result
= qh_map_op(op1
, QH_ELEM(op2
, fmtsel
), qh_func
[op
]);
389 result
= qh_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), qh_func
[op
]);
392 result
= qh_map_op(op1
, vt
, qh_func
[op
]);
397 switch (MX_VT (fmtsel
))
400 op2
= ValueFPR(vt
, fmt_mdmx
);
401 result
= ob_map_op(op1
, OB_ELEM(op2
, fmtsel
), ob_func
[op
]);
404 result
= ob_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), ob_func
[op
]);
407 result
= ob_map_op(op1
, vt
, ob_func
[op
]);
419 /* Operations that update CCs */
422 qh_vector_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int cond
)
428 for (i
= 0; i
< 4; i
++)
430 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
431 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
432 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
433 ((cond
& MX_C_LT
) && (h1
< h2
));
439 qh_map_test(sim_cpu
*cpu
, unsigned64 v1
, signed16 h2
, int cond
)
445 for (i
= 0; i
< 4; i
++)
447 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
448 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
449 ((cond
& MX_C_LT
) && (h1
< h2
));
455 ob_vector_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int cond
)
461 for (i
= 0; i
< 8; i
++)
463 b1
= v1
& 0xFF; v1
>>= 8;
464 b2
= v2
& 0xFF; v2
>>= 8;
465 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
466 ((cond
& MX_C_LT
) && (b1
< b2
));
472 ob_map_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned8 b2
, int cond
)
478 for (i
= 0; i
< 8; i
++)
480 b1
= (unsigned8
)(v1
& 0xFF); v1
>>= 8;
481 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
482 ((cond
& MX_C_LT
) && (b1
< b2
));
489 mdmx_cc_op(sim_cpu
*cpu
,
498 switch (MX_FMT (fmtsel
))
501 switch (MX_VT (fmtsel
))
504 op2
= ValueFPR(vt
, fmt_mdmx
);
505 qh_map_test(cpu
, v1
, QH_ELEM(op2
, fmtsel
), cond
);
508 qh_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
511 qh_map_test(cpu
, v1
, vt
, cond
);
516 switch (MX_VT (fmtsel
))
519 op2
= ValueFPR(vt
, fmt_mdmx
);
520 ob_map_test(cpu
, v1
, OB_ELEM(op2
, fmtsel
), cond
);
523 ob_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
526 ob_map_test(cpu
, v1
, vt
, cond
);
536 /* Pick operations. */
539 qh_vector_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int tf
)
541 unsigned64 result
= 0;
546 for (i
= 0; i
< 4; i
++)
548 h
= ((GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (v2
& 0xFFFF));
549 v1
>>= 16; v2
>>= 16;
550 result
|= ((unsigned64
)h
<< s
);
557 qh_map_pick(sim_cpu
*cpu
, unsigned64 v1
, signed16 h2
, int tf
)
559 unsigned64 result
= 0;
564 for (i
= 0; i
< 4; i
++)
566 h
= (GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (unsigned16
)h2
;
568 result
|= ((unsigned64
)h
<< s
);
575 ob_vector_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int tf
)
577 unsigned64 result
= 0;
582 for (i
= 0; i
< 8; i
++)
584 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : (v2
& 0xFF);
586 result
|= ((unsigned64
)b
<< s
);
593 ob_map_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned8 b2
, int tf
)
595 unsigned64 result
= 0;
600 for (i
= 0; i
< 8; i
++)
602 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : b2
;
604 result
|= ((unsigned64
)b
<< s
);
612 mdmx_pick_op(sim_cpu
*cpu
,
619 unsigned64 result
= 0;
622 switch (MX_FMT (fmtsel
))
625 switch (MX_VT (fmtsel
))
628 op2
= ValueFPR(vt
, fmt_mdmx
);
629 result
= qh_map_pick(cpu
, v1
, QH_ELEM(op2
, fmtsel
), tf
);
632 result
= qh_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
635 result
= qh_map_pick(cpu
, v1
, vt
, tf
);
640 switch (MX_VT (fmtsel
))
643 op2
= ValueFPR(vt
, fmt_mdmx
);
644 result
= ob_map_pick(cpu
, v1
, OB_ELEM(op2
, fmtsel
), tf
);
647 result
= ob_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
650 result
= ob_map_pick(cpu
, v1
, vt
, tf
);
663 typedef void (*QH_ACC
)(signed48
*a
, signed16 ts
, signed16 tt
);
666 AccAddAQH(signed48
*a
, signed16 ts
, signed16 tt
)
668 *a
+= (signed48
)ts
+ (signed48
)tt
;
672 AccAddLQH(signed48
*a
, signed16 ts
, signed16 tt
)
674 *a
= (signed48
)ts
+ (signed48
)tt
;
678 AccMulAQH(signed48
*a
, signed16 ts
, signed16 tt
)
680 *a
+= (signed48
)ts
* (signed48
)tt
;
684 AccMulLQH(signed48
*a
, signed16 ts
, signed16 tt
)
686 *a
= (signed48
)ts
* (signed48
)tt
;
690 SubMulAQH(signed48
*a
, signed16 ts
, signed16 tt
)
692 *a
-= (signed48
)ts
* (signed48
)tt
;
696 SubMulLQH(signed48
*a
, signed16 ts
, signed16 tt
)
698 *a
= -((signed48
)ts
* (signed48
)tt
);
702 AccSubAQH(signed48
*a
, signed16 ts
, signed16 tt
)
704 *a
+= (signed48
)ts
- (signed48
)tt
;
708 AccSubLQH(signed48
*a
, signed16 ts
, signed16 tt
)
710 *a
= (signed48
)ts
- (signed48
)tt
;
714 typedef void (*OB_ACC
)(signed24
*acc
, unsigned8 ts
, unsigned8 tt
);
717 AccAddAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
719 *a
+= (signed24
)ts
+ (signed24
)tt
;
723 AccAddLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
725 *a
= (signed24
)ts
+ (signed24
)tt
;
729 AccMulAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
731 *a
+= (signed24
)ts
* (signed24
)tt
;
735 AccMulLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
737 *a
= (signed24
)ts
* (signed24
)tt
;
741 SubMulAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
743 *a
-= (signed24
)ts
* (signed24
)tt
;
747 SubMulLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
749 *a
= -((signed24
)ts
* (signed24
)tt
);
753 AccSubAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
755 *a
+= (signed24
)ts
- (signed24
)tt
;
759 AccSubLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
761 *a
= (signed24
)ts
- (signed24
)tt
;
765 AccAbsDiffOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
767 unsigned8 t
= (ts
>= tt
? ts
- tt
: tt
- ts
);
772 /* Dispatch tables for operations that update a CPR. */
774 static const QH_ACC qh_acc
[] = {
775 AccAddAQH
, AccAddAQH
, AccMulAQH
, AccMulLQH
,
776 SubMulAQH
, SubMulLQH
, AccSubAQH
, AccSubLQH
,
780 static const OB_ACC ob_acc
[] = {
781 AccAddAOB
, AccAddLOB
, AccMulAOB
, AccMulLOB
,
782 SubMulAOB
, SubMulLOB
, AccSubAOB
, AccSubLOB
,
788 qh_vector_acc(signed48 a
[], unsigned64 v1
, unsigned64 v2
, QH_ACC acc
)
793 for (i
= 0; i
< 4; i
++)
795 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
796 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
797 (*acc
)(&a
[i
], h1
, h2
);
802 qh_map_acc(signed48 a
[], unsigned64 v1
, signed16 h2
, QH_ACC acc
)
807 for (i
= 0; i
< 4; i
++)
809 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
810 (*acc
)(&a
[i
], h1
, h2
);
815 ob_vector_acc(signed24 a
[], unsigned64 v1
, unsigned64 v2
, OB_ACC acc
)
820 for (i
= 0; i
< 8; i
++)
822 b1
= v1
& 0xFF; v1
>>= 8;
823 b2
= v2
& 0xFF; v2
>>= 8;
824 (*acc
)(&a
[i
], b1
, b2
);
829 ob_map_acc(signed24 a
[], unsigned64 v1
, unsigned8 b2
, OB_ACC acc
)
834 for (i
= 0; i
< 8; i
++)
836 b1
= v1
& 0xFF; v1
>>= 8;
837 (*acc
)(&a
[i
], b1
, b2
);
842 /* Primary entry for operations that accumulate */
844 mdmx_acc_op(sim_cpu
*cpu
,
853 switch (MX_FMT (fmtsel
))
856 switch (MX_VT (fmtsel
))
859 op2
= ValueFPR(vt
, fmt_mdmx
);
860 qh_map_acc(ACC
.qh
, op1
, QH_ELEM(op2
, fmtsel
), qh_acc
[op
]);
863 qh_vector_acc(ACC
.qh
, op1
, ValueFPR(vt
, fmt_mdmx
), qh_acc
[op
]);
866 qh_map_acc(ACC
.qh
, op1
, vt
, qh_acc
[op
]);
871 switch (MX_VT (fmtsel
))
874 op2
= ValueFPR(vt
, fmt_mdmx
);
875 ob_map_acc(ACC
.ob
, op1
, OB_ELEM(op2
, fmtsel
), ob_acc
[op
]);
878 ob_vector_acc(ACC
.ob
, op1
, ValueFPR(vt
, fmt_mdmx
), ob_acc
[op
]);
881 ob_map_acc(ACC
.ob
, op1
, vt
, ob_acc
[op
]);
891 /* Reading and writing accumulator (no conversion). */
894 mdmx_rac_op(sim_cpu
*cpu
,
903 shift
= op
; /* L = 00, M = 01, H = 10. */
909 shift
<<= 4; /* 16 bits per element. */
910 for (i
= 3; i
>= 0; --i
)
913 result
|= ((ACC
.qh
[i
] >> shift
) & 0xFFFF);
917 shift
<<= 3; /* 8 bits per element. */
918 for (i
= 7; i
>= 0; --i
)
921 result
|= ((ACC
.ob
[i
] >> shift
) & 0xFF);
931 mdmx_wacl(sim_cpu
*cpu
,
942 for (i
= 0; i
< 4; i
++)
944 signed32 s
= (signed16
)(vs
& 0xFFFF);
945 ACC
.qh
[i
] = ((signed48
)s
<< 16) | (vt
& 0xFFFF);
946 vs
>>= 16; vt
>>= 16;
950 for (i
= 0; i
< 8; i
++)
952 signed16 s
= (signed8
)(vs
& 0xFF);
953 ACC
.ob
[i
] = ((signed24
)s
<< 8) | (vt
& 0xFF);
963 mdmx_wach(sim_cpu
*cpu
,
973 for (i
= 0; i
< 4; i
++)
975 signed32 s
= (signed16
)(vs
& 0xFFFF);
976 ACC
.qh
[i
] &= ~((signed48
)0xFFFF << 32);
977 ACC
.qh
[i
] |= ((signed48
)s
<< 32);
982 for (i
= 0; i
< 8; i
++)
984 ACC
.ob
[i
] &= ~((signed24
)0xFF << 16);
985 ACC
.ob
[i
] |= ((signed24
)(vs
& 0xFF) << 16);
995 /* Reading and writing accumulator (rounding conversions).
996 Enumerating function guarantees s >= 0 for QH ops. */
998 typedef signed16 (*QH_ROUND
)(signed48 a
, signed16 s
);
1000 #define QH_BIT(n) ((unsigned48)1 << (n))
1001 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1004 RNASQH(signed48 a
, signed16 s
)
1007 signed16 result
= 0;
1014 if ((a
& QH_BIT(47)) == 0)
1016 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1023 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1025 if (s
> 1 && ((unsigned48
)a
& QH_ONES(s
-1)) != 0)
1031 result
= (signed16
)t
;
1037 RNAUQH(signed48 a
, signed16 s
)
1045 result
= ((unsigned48
)a
& MASK48
) >> 47;
1048 t
= ((unsigned48
)a
& MASK48
) >> s
;
1049 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1053 result
= (signed16
)t
;
1059 RNESQH(signed48 a
, signed16 s
)
1062 signed16 result
= 0;
1069 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1071 if (s
== 1 || (a
& QH_ONES(s
-1)) == 0)
1076 if ((a
& QH_BIT(47)) == 0)
1086 result
= (signed16
)t
;
1092 RNEUQH(signed48 a
, signed16 s
)
1100 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1103 t
= ((unsigned48
)a
& MASK48
) >> s
;
1104 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1106 if (s
> 1 && (a
& QH_ONES(s
-1)) != 0)
1113 result
= (signed16
)t
;
1119 RZSQH(signed48 a
, signed16 s
)
1122 signed16 result
= 0;
1129 if ((a
& QH_BIT(47)) == 0)
1139 result
= (signed16
)t
;
1145 RZUQH(signed48 a
, signed16 s
)
1148 signed16 result
= 0;
1153 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1156 t
= ((unsigned48
)a
& MASK48
) >> s
;
1159 result
= (signed16
)t
;
1165 typedef unsigned8 (*OB_ROUND
)(signed24 a
, unsigned8 s
);
1167 #define OB_BIT(n) ((unsigned24)1 << (n))
1168 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1171 RNAUOB(signed24 a
, unsigned8 s
)
1179 result
= ((unsigned24
)a
& MASK24
) >> 23;
1182 t
= ((unsigned24
)a
& MASK24
) >> s
;
1183 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1185 result
= OB_CLAMP(t
);
1191 RNEUOB(signed24 a
, unsigned8 s
)
1199 result
= (((unsigned24
)a
& MASK24
) > OB_BIT(23) ? 1 : 0);
1202 t
= ((unsigned24
)a
& MASK24
) >> s
;
1203 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1205 if (s
> 1 && (a
& OB_ONES(s
-1)) != 0)
1210 result
= OB_CLAMP(t
);
1216 RZUOB(signed24 a
, unsigned8 s
)
1225 t
= ((unsigned24
)a
& MASK24
) >> s
;
1226 result
= OB_CLAMP(t
);
1232 static const QH_ROUND qh_round
[] = {
1233 RNASQH
, RNAUQH
, RNESQH
, RNEUQH
, RZSQH
, RZUQH
1236 static const OB_ROUND ob_round
[] = {
1237 NULL
, RNAUOB
, NULL
, RNEUOB
, NULL
, RZUOB
1242 qh_vector_round(sim_cpu
*cpu
, address_word cia
, unsigned64 v2
, QH_ROUND round
)
1244 unsigned64 result
= 0;
1249 for (i
= 0; i
< 4; i
++)
1251 h2
= (signed16
)(v2
& 0xFFFF);
1253 h
= (*round
)(ACC
.qh
[i
], h2
);
1256 UnpredictableResult ();
1260 result
|= ((unsigned64
)((unsigned16
)h
) << s
);
1267 qh_map_round(sim_cpu
*cpu
, address_word cia
, signed16 h2
, QH_ROUND round
)
1269 unsigned64 result
= 0;
1274 for (i
= 0; i
< 4; i
++)
1277 h
= (*round
)(ACC
.qh
[i
], h2
);
1280 UnpredictableResult ();
1283 result
|= ((unsigned64
)((unsigned16
)h
) << s
);
1290 ob_vector_round(sim_cpu
*cpu
, address_word cia
, unsigned64 v2
, OB_ROUND round
)
1292 unsigned64 result
= 0;
1297 for (i
= 0; i
< 8; i
++)
1299 b2
= v2
& 0xFF; v2
>>= 8;
1300 b
= (*round
)(ACC
.ob
[i
], b2
);
1301 result
|= ((unsigned64
)b
<< s
);
1308 ob_map_round(sim_cpu
*cpu
, address_word cia
, unsigned8 b2
, OB_ROUND round
)
1310 unsigned64 result
= 0;
1315 for (i
= 0; i
< 8; i
++)
1317 b
= (*round
)(ACC
.ob
[i
], b2
);
1318 result
|= ((unsigned64
)b
<< s
);
1326 mdmx_round_op(sim_cpu
*cpu
,
1333 unsigned64 result
= 0;
1335 switch (MX_FMT (fmtsel
))
1338 switch (MX_VT (fmtsel
))
1341 op2
= ValueFPR(vt
, fmt_mdmx
);
1342 result
= qh_map_round(cpu
, cia
, QH_ELEM(op2
, fmtsel
), qh_round
[rm
]);
1345 op2
= ValueFPR(vt
, fmt_mdmx
);
1346 result
= qh_vector_round(cpu
, cia
, op2
, qh_round
[rm
]);
1349 result
= qh_map_round(cpu
, cia
, vt
, qh_round
[rm
]);
1354 switch (MX_VT (fmtsel
))
1357 op2
= ValueFPR(vt
, fmt_mdmx
);
1358 result
= ob_map_round(cpu
, cia
, OB_ELEM(op2
, fmtsel
), ob_round
[rm
]);
1361 op2
= ValueFPR(vt
, fmt_mdmx
);
1362 result
= ob_vector_round(cpu
, cia
, op2
, ob_round
[rm
]);
1365 result
= ob_map_round(cpu
, cia
, vt
, ob_round
[rm
]);
1377 /* Shuffle operation. */
1380 enum {vs
, ss
, vt
} source
;
1384 static const sh_map ob_shuffle
[][8] = {
1385 /* MDMX 2.0 encodings (3-4, 6-7). */
1386 /* vr5400 encoding (5), otherwise. */
1388 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* RSVD */
1389 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* RSVD */
1390 {{vs
,0}, {ss
,0}, {vs
,1}, {ss
,1}, {vs
,2}, {ss
,2}, {vs
,3}, {ss
,3}}, /* upsl */
1391 {{vt
,1}, {vt
,3}, {vt
,5}, {vt
,7}, {vs
,1}, {vs
,3}, {vs
,5}, {vs
,7}}, /* pach */
1392 {{vt
,0}, {vt
,2}, {vt
,4}, {vt
,6}, {vs
,0}, {vs
,2}, {vs
,4}, {vs
,6}}, /* pacl */
1393 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* mixh */
1394 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}} /* mixl */
1397 static const sh_map qh_shuffle
[][4] = {
1398 {{vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* mixh */
1399 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}}, /* mixl */
1400 {{vt
,1}, {vt
,3}, {vs
,1}, {vs
,3}}, /* pach */
1402 {{vt
,1}, {vs
,0}, {vt
,3}, {vs
,2}}, /* bfla */
1404 {{vt
,2}, {vt
,3}, {vs
,2}, {vs
,3}}, /* repa */
1405 {{vt
,0}, {vt
,1}, {vs
,0}, {vs
,1}} /* repb */
1410 mdmx_shuffle(sim_cpu
*cpu
,
1416 unsigned64 result
= 0;
1420 if ((shop
& 0x3) == 0x1) /* QH format. */
1424 for (i
= 0; i
< 4; i
++)
1428 switch (qh_shuffle
[op
][i
].source
)
1440 result
|= (((v
>> 16*qh_shuffle
[op
][i
].index
) & 0xFFFF) << s
);
1444 else if ((shop
& 0x1) == 0x0) /* OB format. */
1448 for (i
= 0; i
< 8; i
++)
1451 unsigned int ishift
= 8*ob_shuffle
[op
][i
].index
;
1453 switch (ob_shuffle
[op
][i
].source
)
1456 b
= (op1
>> ishift
) & 0xFF;
1459 b
= ((op1
>> ishift
) & 0x80) ? 0xFF : 0;
1462 b
= (op2
>> ishift
) & 0xFF;
1468 result
|= ((unsigned64
)b
<< s
);