1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
3 Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
6 This file is part of GDB, the GNU debugger.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* This must come before any other includes. */
28 /* Within mdmx.c we refer to the sim_cpu directly. */
30 #define SD (CPU_STATE(CPU))
32 /* XXX FIXME: temporary hack while the impact of making unpredictable()
33 a "normal" (non-igen) function is evaluated. */
35 #define Unpredictable() unpredictable_action (cpu, cia)
37 /* MDMX Representations
39 An 8-bit packed byte element (OB) is always unsigned.
40 The 24-bit accumulators are signed and are represented as 32-bit
41 signed values, which are reduced to 24-bit signed values prior to
42 Round and Clamp operations.
44 A 16-bit packed halfword element (QH) is always signed.
45 The 48-bit accumulators are signed and are represented as 64-bit
46 signed values, which are reduced to 48-bit signed values prior to
47 Round and Clamp operations.
49 The code below assumes a 2's-complement representation of signed
50 quantities. Care is required to clear extended sign bits when
53 The code (and the code for arithmetic shifts in mips.igen) also makes
54 the (not guaranteed portable) assumption that right shifts of signed
55 quantities in C do sign extension. */
57 typedef uint64_t unsigned48
;
58 #define MASK48 (UNSIGNED64 (0xffffffffffff))
60 typedef uint32_t unsigned24
;
61 #define MASK24 (UNSIGNED32 (0xffffff))
64 mdmx_ob
, /* OB (octal byte) */
65 mdmx_qh
/* QH (quad half-word) */
69 sel_elem
, /* element select */
70 sel_vect
, /* vector select */
71 sel_imm
/* immediate select */
74 #define OB_MAX ((uint8_t)0xFF)
75 #define QH_MIN ((int16_t)0x8000)
76 #define QH_MAX ((int16_t)0x7FFF)
78 #define OB_CLAMP(x) ((uint8_t)((x) > OB_MAX ? OB_MAX : (x)))
79 #define QH_CLAMP(x) ((int16_t)((x) < QH_MIN ? QH_MIN : \
80 ((x) > QH_MAX ? QH_MAX : (x))))
82 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
83 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
84 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
86 #define QH_ELEM(v,fmtsel) \
87 ((int16_t)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
88 #define OB_ELEM(v,fmtsel) \
89 ((uint8_t)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
92 typedef int16_t (*QH_FUNC
)(int16_t, int16_t);
93 typedef uint8_t (*OB_FUNC
)(uint8_t, uint8_t);
95 /* vectorized logical operators */
98 AndQH(int16_t ts
, int16_t tt
)
100 return (int16_t)((uint16_t)ts
& (uint16_t)tt
);
104 AndOB(uint8_t ts
, uint8_t tt
)
110 NorQH(int16_t ts
, int16_t tt
)
112 return (int16_t)(((uint16_t)ts
| (uint16_t)tt
) ^ 0xFFFF);
116 NorOB(uint8_t ts
, uint8_t tt
)
118 return (ts
| tt
) ^ 0xFF;
122 OrQH(int16_t ts
, int16_t tt
)
124 return (int16_t)((uint16_t)ts
| (uint16_t)tt
);
128 OrOB(uint8_t ts
, uint8_t tt
)
134 XorQH(int16_t ts
, int16_t tt
)
136 return (int16_t)((uint16_t)ts
^ (uint16_t)tt
);
140 XorOB(uint8_t ts
, uint8_t tt
)
146 SLLQH(int16_t ts
, int16_t tt
)
148 uint32_t s
= (uint32_t)tt
& 0xF;
149 return (int16_t)(((uint32_t)ts
<< s
) & 0xFFFF);
153 SLLOB(uint8_t ts
, uint8_t tt
)
155 uint32_t s
= tt
& 0x7;
156 return (ts
<< s
) & 0xFF;
160 SRLQH(int16_t ts
, int16_t tt
)
162 uint32_t s
= (uint32_t)tt
& 0xF;
163 return (int16_t)((uint16_t)ts
>> s
);
167 SRLOB(uint8_t ts
, uint8_t tt
)
169 uint32_t s
= tt
& 0x7;
174 /* Vectorized arithmetic operators. */
177 AddQH(int16_t ts
, int16_t tt
)
179 int32_t t
= (int32_t)ts
+ (int32_t)tt
;
184 AddOB(uint8_t ts
, uint8_t tt
)
186 uint32_t t
= (uint32_t)ts
+ (uint32_t)tt
;
191 SubQH(int16_t ts
, int16_t tt
)
193 int32_t t
= (int32_t)ts
- (int32_t)tt
;
198 SubOB(uint8_t ts
, uint8_t tt
)
201 t
= (int32_t)ts
- (int32_t)tt
;
208 MinQH(int16_t ts
, int16_t tt
)
210 return (ts
< tt
? ts
: tt
);
214 MinOB(uint8_t ts
, uint8_t tt
)
216 return (ts
< tt
? ts
: tt
);
220 MaxQH(int16_t ts
, int16_t tt
)
222 return (ts
> tt
? ts
: tt
);
226 MaxOB(uint8_t ts
, uint8_t tt
)
228 return (ts
> tt
? ts
: tt
);
232 MulQH(int16_t ts
, int16_t tt
)
234 int32_t t
= (int32_t)ts
* (int32_t)tt
;
239 MulOB(uint8_t ts
, uint8_t tt
)
241 uint32_t t
= (uint32_t)ts
* (uint32_t)tt
;
245 /* "msgn" and "sra" are defined only for QH format. */
248 MsgnQH(int16_t ts
, int16_t tt
)
252 t
= (tt
== QH_MIN
? QH_MAX
: -tt
);
261 SRAQH(int16_t ts
, int16_t tt
)
263 uint32_t s
= (uint32_t)tt
& 0xF;
264 return (int16_t)((int32_t)ts
>> s
);
268 /* "pabsdiff" and "pavg" are defined only for OB format. */
271 AbsDiffOB(uint8_t ts
, uint8_t tt
)
273 return (ts
>= tt
? ts
- tt
: tt
- ts
);
277 AvgOB(uint8_t ts
, uint8_t tt
)
279 return ((uint32_t)ts
+ (uint32_t)tt
+ 1) >> 1;
283 /* Dispatch tables for operations that update a CPR. */
285 static const QH_FUNC qh_func
[] = {
286 AndQH
, NorQH
, OrQH
, XorQH
, SLLQH
, SRLQH
,
287 AddQH
, SubQH
, MinQH
, MaxQH
,
288 MulQH
, MsgnQH
, SRAQH
, NULL
, NULL
291 static const OB_FUNC ob_func
[] = {
292 AndOB
, NorOB
, OrOB
, XorOB
, SLLOB
, SRLOB
,
293 AddOB
, SubOB
, MinOB
, MaxOB
,
294 MulOB
, NULL
, NULL
, AbsDiffOB
, AvgOB
297 /* Auxiliary functions for CPR updates. */
299 /* Vector mapping for QH format. */
301 qh_vector_op(uint64_t v1
, uint64_t v2
, QH_FUNC func
)
307 for (i
= 0; i
< 64; i
+= 16)
309 h1
= (int16_t)(v1
& 0xFFFF); v1
>>= 16;
310 h2
= (int16_t)(v2
& 0xFFFF); v2
>>= 16;
312 result
|= ((uint64_t)((uint16_t)h
) << i
);
318 qh_map_op(uint64_t v1
, int16_t h2
, QH_FUNC func
)
324 for (i
= 0; i
< 64; i
+= 16)
326 h1
= (int16_t)(v1
& 0xFFFF); v1
>>= 16;
328 result
|= ((uint64_t)((uint16_t)h
) << i
);
334 /* Vector operations for OB format. */
337 ob_vector_op(uint64_t v1
, uint64_t v2
, OB_FUNC func
)
343 for (i
= 0; i
< 64; i
+= 8)
345 b1
= v1
& 0xFF; v1
>>= 8;
346 b2
= v2
& 0xFF; v2
>>= 8;
348 result
|= ((uint64_t)b
<< i
);
354 ob_map_op(uint64_t v1
, uint8_t b2
, OB_FUNC func
)
360 for (i
= 0; i
< 64; i
+= 8)
362 b1
= v1
& 0xFF; v1
>>= 8;
364 result
|= ((uint64_t)b
<< i
);
370 /* Primary entry for operations that update CPRs. */
372 mdmx_cpr_op(sim_cpu
*cpu
,
382 switch (MX_FMT (fmtsel
))
385 switch (MX_VT (fmtsel
))
388 op2
= ValueFPR(vt
, fmt_mdmx
);
389 result
= qh_map_op(op1
, QH_ELEM(op2
, fmtsel
), qh_func
[op
]);
392 result
= qh_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), qh_func
[op
]);
395 result
= qh_map_op(op1
, vt
, qh_func
[op
]);
400 switch (MX_VT (fmtsel
))
403 op2
= ValueFPR(vt
, fmt_mdmx
);
404 result
= ob_map_op(op1
, OB_ELEM(op2
, fmtsel
), ob_func
[op
]);
407 result
= ob_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), ob_func
[op
]);
410 result
= ob_map_op(op1
, vt
, ob_func
[op
]);
422 /* Operations that update CCs */
425 qh_vector_test(sim_cpu
*cpu
, uint64_t v1
, uint64_t v2
, int cond
)
431 for (i
= 0; i
< 4; i
++)
433 h1
= (int16_t)(v1
& 0xFFFF); v1
>>= 16;
434 h2
= (int16_t)(v2
& 0xFFFF); v2
>>= 16;
435 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
436 ((cond
& MX_C_LT
) && (h1
< h2
));
442 qh_map_test(sim_cpu
*cpu
, uint64_t v1
, int16_t h2
, int cond
)
448 for (i
= 0; i
< 4; i
++)
450 h1
= (int16_t)(v1
& 0xFFFF); v1
>>= 16;
451 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
452 ((cond
& MX_C_LT
) && (h1
< h2
));
458 ob_vector_test(sim_cpu
*cpu
, uint64_t v1
, uint64_t v2
, int cond
)
464 for (i
= 0; i
< 8; i
++)
466 b1
= v1
& 0xFF; v1
>>= 8;
467 b2
= v2
& 0xFF; v2
>>= 8;
468 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
469 ((cond
& MX_C_LT
) && (b1
< b2
));
475 ob_map_test(sim_cpu
*cpu
, uint64_t v1
, uint8_t b2
, int cond
)
481 for (i
= 0; i
< 8; i
++)
483 b1
= (uint8_t)(v1
& 0xFF); v1
>>= 8;
484 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
485 ((cond
& MX_C_LT
) && (b1
< b2
));
492 mdmx_cc_op(sim_cpu
*cpu
,
501 switch (MX_FMT (fmtsel
))
504 switch (MX_VT (fmtsel
))
507 op2
= ValueFPR(vt
, fmt_mdmx
);
508 qh_map_test(cpu
, v1
, QH_ELEM(op2
, fmtsel
), cond
);
511 qh_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
514 qh_map_test(cpu
, v1
, vt
, cond
);
519 switch (MX_VT (fmtsel
))
522 op2
= ValueFPR(vt
, fmt_mdmx
);
523 ob_map_test(cpu
, v1
, OB_ELEM(op2
, fmtsel
), cond
);
526 ob_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
529 ob_map_test(cpu
, v1
, vt
, cond
);
539 /* Pick operations. */
542 qh_vector_pick(sim_cpu
*cpu
, uint64_t v1
, uint64_t v2
, int tf
)
549 for (i
= 0; i
< 4; i
++)
551 h
= ((GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (v2
& 0xFFFF));
552 v1
>>= 16; v2
>>= 16;
553 result
|= ((uint64_t)h
<< s
);
560 qh_map_pick(sim_cpu
*cpu
, uint64_t v1
, int16_t h2
, int tf
)
567 for (i
= 0; i
< 4; i
++)
569 h
= (GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (uint16_t)h2
;
571 result
|= ((uint64_t)h
<< s
);
578 ob_vector_pick(sim_cpu
*cpu
, uint64_t v1
, uint64_t v2
, int tf
)
585 for (i
= 0; i
< 8; i
++)
587 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : (v2
& 0xFF);
589 result
|= ((uint64_t)b
<< s
);
596 ob_map_pick(sim_cpu
*cpu
, uint64_t v1
, uint8_t b2
, int tf
)
603 for (i
= 0; i
< 8; i
++)
605 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : b2
;
607 result
|= ((uint64_t)b
<< s
);
615 mdmx_pick_op(sim_cpu
*cpu
,
625 switch (MX_FMT (fmtsel
))
628 switch (MX_VT (fmtsel
))
631 op2
= ValueFPR(vt
, fmt_mdmx
);
632 result
= qh_map_pick(cpu
, v1
, QH_ELEM(op2
, fmtsel
), tf
);
635 result
= qh_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
638 result
= qh_map_pick(cpu
, v1
, vt
, tf
);
643 switch (MX_VT (fmtsel
))
646 op2
= ValueFPR(vt
, fmt_mdmx
);
647 result
= ob_map_pick(cpu
, v1
, OB_ELEM(op2
, fmtsel
), tf
);
650 result
= ob_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
653 result
= ob_map_pick(cpu
, v1
, vt
, tf
);
666 typedef void (*QH_ACC
)(signed48
*a
, int16_t ts
, int16_t tt
);
669 AccAddAQH(signed48
*a
, int16_t ts
, int16_t tt
)
671 *a
+= (signed48
)ts
+ (signed48
)tt
;
675 AccAddLQH(signed48
*a
, int16_t ts
, int16_t tt
)
677 *a
= (signed48
)ts
+ (signed48
)tt
;
681 AccMulAQH(signed48
*a
, int16_t ts
, int16_t tt
)
683 *a
+= (signed48
)ts
* (signed48
)tt
;
687 AccMulLQH(signed48
*a
, int16_t ts
, int16_t tt
)
689 *a
= (signed48
)ts
* (signed48
)tt
;
693 SubMulAQH(signed48
*a
, int16_t ts
, int16_t tt
)
695 *a
-= (signed48
)ts
* (signed48
)tt
;
699 SubMulLQH(signed48
*a
, int16_t ts
, int16_t tt
)
701 *a
= -((signed48
)ts
* (signed48
)tt
);
705 AccSubAQH(signed48
*a
, int16_t ts
, int16_t tt
)
707 *a
+= (signed48
)ts
- (signed48
)tt
;
711 AccSubLQH(signed48
*a
, int16_t ts
, int16_t tt
)
713 *a
= (signed48
)ts
- (signed48
)tt
;
717 typedef void (*OB_ACC
)(signed24
*acc
, uint8_t ts
, uint8_t tt
);
720 AccAddAOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
722 *a
+= (signed24
)ts
+ (signed24
)tt
;
726 AccAddLOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
728 *a
= (signed24
)ts
+ (signed24
)tt
;
732 AccMulAOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
734 *a
+= (signed24
)ts
* (signed24
)tt
;
738 AccMulLOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
740 *a
= (signed24
)ts
* (signed24
)tt
;
744 SubMulAOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
746 *a
-= (signed24
)ts
* (signed24
)tt
;
750 SubMulLOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
752 *a
= -((signed24
)ts
* (signed24
)tt
);
756 AccSubAOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
758 *a
+= (signed24
)ts
- (signed24
)tt
;
762 AccSubLOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
764 *a
= (signed24
)ts
- (signed24
)tt
;
768 AccAbsDiffOB(signed24
*a
, uint8_t ts
, uint8_t tt
)
770 uint8_t t
= (ts
>= tt
? ts
- tt
: tt
- ts
);
775 /* Dispatch tables for operations that update a CPR. */
777 static const QH_ACC qh_acc
[] = {
778 AccAddAQH
, AccAddLQH
, AccMulAQH
, AccMulLQH
,
779 SubMulAQH
, SubMulLQH
, AccSubAQH
, AccSubLQH
,
783 static const OB_ACC ob_acc
[] = {
784 AccAddAOB
, AccAddLOB
, AccMulAOB
, AccMulLOB
,
785 SubMulAOB
, SubMulLOB
, AccSubAOB
, AccSubLOB
,
791 qh_vector_acc(signed48 a
[], uint64_t v1
, uint64_t v2
, QH_ACC acc
)
796 for (i
= 0; i
< 4; i
++)
798 h1
= (int16_t)(v1
& 0xFFFF); v1
>>= 16;
799 h2
= (int16_t)(v2
& 0xFFFF); v2
>>= 16;
800 (*acc
)(&a
[i
], h1
, h2
);
805 qh_map_acc(signed48 a
[], uint64_t v1
, int16_t h2
, QH_ACC acc
)
810 for (i
= 0; i
< 4; i
++)
812 h1
= (int16_t)(v1
& 0xFFFF); v1
>>= 16;
813 (*acc
)(&a
[i
], h1
, h2
);
818 ob_vector_acc(signed24 a
[], uint64_t v1
, uint64_t v2
, OB_ACC acc
)
823 for (i
= 0; i
< 8; i
++)
825 b1
= v1
& 0xFF; v1
>>= 8;
826 b2
= v2
& 0xFF; v2
>>= 8;
827 (*acc
)(&a
[i
], b1
, b2
);
832 ob_map_acc(signed24 a
[], uint64_t v1
, uint8_t b2
, OB_ACC acc
)
837 for (i
= 0; i
< 8; i
++)
839 b1
= v1
& 0xFF; v1
>>= 8;
840 (*acc
)(&a
[i
], b1
, b2
);
845 /* Primary entry for operations that accumulate */
847 mdmx_acc_op(sim_cpu
*cpu
,
856 switch (MX_FMT (fmtsel
))
859 switch (MX_VT (fmtsel
))
862 op2
= ValueFPR(vt
, fmt_mdmx
);
863 qh_map_acc(ACC
.qh
, op1
, QH_ELEM(op2
, fmtsel
), qh_acc
[op
]);
866 qh_vector_acc(ACC
.qh
, op1
, ValueFPR(vt
, fmt_mdmx
), qh_acc
[op
]);
869 qh_map_acc(ACC
.qh
, op1
, vt
, qh_acc
[op
]);
874 switch (MX_VT (fmtsel
))
877 op2
= ValueFPR(vt
, fmt_mdmx
);
878 ob_map_acc(ACC
.ob
, op1
, OB_ELEM(op2
, fmtsel
), ob_acc
[op
]);
881 ob_vector_acc(ACC
.ob
, op1
, ValueFPR(vt
, fmt_mdmx
), ob_acc
[op
]);
884 ob_map_acc(ACC
.ob
, op1
, vt
, ob_acc
[op
]);
894 /* Reading and writing accumulator (no conversion). */
897 mdmx_rac_op(sim_cpu
*cpu
,
906 shift
= op
; /* L = 00, M = 01, H = 10. */
912 shift
<<= 4; /* 16 bits per element. */
913 for (i
= 3; i
>= 0; --i
)
916 result
|= ((ACC
.qh
[i
] >> shift
) & 0xFFFF);
920 shift
<<= 3; /* 8 bits per element. */
921 for (i
= 7; i
>= 0; --i
)
924 result
|= ((ACC
.ob
[i
] >> shift
) & 0xFF);
934 mdmx_wacl(sim_cpu
*cpu
,
945 for (i
= 0; i
< 4; i
++)
947 int32_t s
= (int16_t)(vs
& 0xFFFF);
948 ACC
.qh
[i
] = ((signed48
)s
<< 16) | (vt
& 0xFFFF);
949 vs
>>= 16; vt
>>= 16;
953 for (i
= 0; i
< 8; i
++)
955 int16_t s
= (int8_t)(vs
& 0xFF);
956 ACC
.ob
[i
] = ((signed24
)s
<< 8) | (vt
& 0xFF);
966 mdmx_wach(sim_cpu
*cpu
,
976 for (i
= 0; i
< 4; i
++)
978 int32_t s
= (int16_t)(vs
& 0xFFFF);
979 ACC
.qh
[i
] &= ~((signed48
)0xFFFF << 32);
980 ACC
.qh
[i
] |= ((signed48
)s
<< 32);
985 for (i
= 0; i
< 8; i
++)
987 ACC
.ob
[i
] &= ~((signed24
)0xFF << 16);
988 ACC
.ob
[i
] |= ((signed24
)(vs
& 0xFF) << 16);
998 /* Reading and writing accumulator (rounding conversions).
999 Enumerating function guarantees s >= 0 for QH ops. */
1001 typedef int16_t (*QH_ROUND
)(signed48 a
, int16_t s
);
1003 #define QH_BIT(n) ((unsigned48)1 << (n))
1004 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1007 RNASQH(signed48 a
, int16_t s
)
1017 if ((a
& QH_BIT(47)) == 0)
1019 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1026 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1028 if (s
> 1 && ((unsigned48
)a
& QH_ONES(s
-1)) != 0)
1034 result
= (int16_t)t
;
1040 RNAUQH(signed48 a
, int16_t s
)
1048 result
= ((unsigned48
)a
& MASK48
) >> 47;
1051 t
= ((unsigned48
)a
& MASK48
) >> s
;
1052 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1056 result
= (int16_t)t
;
1062 RNESQH(signed48 a
, int16_t s
)
1072 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1074 if (s
== 1 || (a
& QH_ONES(s
-1)) == 0)
1079 if ((a
& QH_BIT(47)) == 0)
1089 result
= (int16_t)t
;
1095 RNEUQH(signed48 a
, int16_t s
)
1103 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1106 t
= ((unsigned48
)a
& MASK48
) >> s
;
1107 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1109 if (s
> 1 && (a
& QH_ONES(s
-1)) != 0)
1116 result
= (int16_t)t
;
1122 RZSQH(signed48 a
, int16_t s
)
1132 if ((a
& QH_BIT(47)) == 0)
1142 result
= (int16_t)t
;
1148 RZUQH(signed48 a
, int16_t s
)
1156 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1159 t
= ((unsigned48
)a
& MASK48
) >> s
;
1162 result
= (int16_t)t
;
1168 typedef uint8_t (*OB_ROUND
)(signed24 a
, uint8_t s
);
1170 #define OB_BIT(n) ((unsigned24)1 << (n))
1171 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1174 RNAUOB(signed24 a
, uint8_t s
)
1182 result
= ((unsigned24
)a
& MASK24
) >> 23;
1185 t
= ((unsigned24
)a
& MASK24
) >> s
;
1186 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1188 result
= OB_CLAMP(t
);
1194 RNEUOB(signed24 a
, uint8_t s
)
1202 result
= (((unsigned24
)a
& MASK24
) > OB_BIT(23) ? 1 : 0);
1205 t
= ((unsigned24
)a
& MASK24
) >> s
;
1206 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1208 if (s
> 1 && (a
& OB_ONES(s
-1)) != 0)
1213 result
= OB_CLAMP(t
);
1219 RZUOB(signed24 a
, uint8_t s
)
1228 t
= ((unsigned24
)a
& MASK24
) >> s
;
1229 result
= OB_CLAMP(t
);
1235 static const QH_ROUND qh_round
[] = {
1236 RNASQH
, RNAUQH
, RNESQH
, RNEUQH
, RZSQH
, RZUQH
1239 static const OB_ROUND ob_round
[] = {
1240 NULL
, RNAUOB
, NULL
, RNEUOB
, NULL
, RZUOB
1245 qh_vector_round(sim_cpu
*cpu
, address_word cia
, uint64_t v2
, QH_ROUND round
)
1247 uint64_t result
= 0;
1252 for (i
= 0; i
< 4; i
++)
1254 h2
= (int16_t)(v2
& 0xFFFF);
1256 h
= (*round
)(ACC
.qh
[i
], h2
);
1259 UnpredictableResult ();
1263 result
|= ((uint64_t)((uint16_t)h
) << s
);
1270 qh_map_round(sim_cpu
*cpu
, address_word cia
, int16_t h2
, QH_ROUND round
)
1272 uint64_t result
= 0;
1277 for (i
= 0; i
< 4; i
++)
1280 h
= (*round
)(ACC
.qh
[i
], h2
);
1283 UnpredictableResult ();
1286 result
|= ((uint64_t)((uint16_t)h
) << s
);
1293 ob_vector_round(sim_cpu
*cpu
, address_word cia
, uint64_t v2
, OB_ROUND round
)
1295 uint64_t result
= 0;
1300 for (i
= 0; i
< 8; i
++)
1302 b2
= v2
& 0xFF; v2
>>= 8;
1303 b
= (*round
)(ACC
.ob
[i
], b2
);
1304 result
|= ((uint64_t)b
<< s
);
1311 ob_map_round(sim_cpu
*cpu
, address_word cia
, uint8_t b2
, OB_ROUND round
)
1313 uint64_t result
= 0;
1318 for (i
= 0; i
< 8; i
++)
1320 b
= (*round
)(ACC
.ob
[i
], b2
);
1321 result
|= ((uint64_t)b
<< s
);
1329 mdmx_round_op(sim_cpu
*cpu
,
1336 uint64_t result
= 0;
1338 switch (MX_FMT (fmtsel
))
1341 switch (MX_VT (fmtsel
))
1344 op2
= ValueFPR(vt
, fmt_mdmx
);
1345 result
= qh_map_round(cpu
, cia
, QH_ELEM(op2
, fmtsel
), qh_round
[rm
]);
1348 op2
= ValueFPR(vt
, fmt_mdmx
);
1349 result
= qh_vector_round(cpu
, cia
, op2
, qh_round
[rm
]);
1352 result
= qh_map_round(cpu
, cia
, vt
, qh_round
[rm
]);
1357 switch (MX_VT (fmtsel
))
1360 op2
= ValueFPR(vt
, fmt_mdmx
);
1361 result
= ob_map_round(cpu
, cia
, OB_ELEM(op2
, fmtsel
), ob_round
[rm
]);
1364 op2
= ValueFPR(vt
, fmt_mdmx
);
1365 result
= ob_vector_round(cpu
, cia
, op2
, ob_round
[rm
]);
1368 result
= ob_map_round(cpu
, cia
, vt
, ob_round
[rm
]);
1380 /* Shuffle operation. */
1383 enum {vs
, ss
, vt
} source
;
1387 static const sh_map ob_shuffle
[][8] = {
1388 /* MDMX 2.0 encodings (3-4, 6-7). */
1389 /* vr5400 encoding (5), otherwise. */
1391 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* RSVD */
1392 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* RSVD */
1393 {{vs
,0}, {ss
,0}, {vs
,1}, {ss
,1}, {vs
,2}, {ss
,2}, {vs
,3}, {ss
,3}}, /* upsl */
1394 {{vt
,1}, {vt
,3}, {vt
,5}, {vt
,7}, {vs
,1}, {vs
,3}, {vs
,5}, {vs
,7}}, /* pach */
1395 {{vt
,0}, {vt
,2}, {vt
,4}, {vt
,6}, {vs
,0}, {vs
,2}, {vs
,4}, {vs
,6}}, /* pacl */
1396 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* mixh */
1397 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}} /* mixl */
1400 static const sh_map qh_shuffle
[][4] = {
1401 {{vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* mixh */
1402 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}}, /* mixl */
1403 {{vt
,1}, {vt
,3}, {vs
,1}, {vs
,3}}, /* pach */
1405 {{vt
,1}, {vs
,0}, {vt
,3}, {vs
,2}}, /* bfla */
1407 {{vt
,2}, {vt
,3}, {vs
,2}, {vs
,3}}, /* repa */
1408 {{vt
,0}, {vt
,1}, {vs
,0}, {vs
,1}} /* repb */
1413 mdmx_shuffle(sim_cpu
*cpu
,
1419 uint64_t result
= 0;
1423 if ((shop
& 0x3) == 0x1) /* QH format. */
1427 for (i
= 0; i
< 4; i
++)
1431 switch (qh_shuffle
[op
][i
].source
)
1443 result
|= (((v
>> 16*qh_shuffle
[op
][i
].index
) & 0xFFFF) << s
);
1447 else if ((shop
& 0x1) == 0x0) /* OB format. */
1451 for (i
= 0; i
< 8; i
++)
1454 unsigned int ishift
= 8*ob_shuffle
[op
][i
].index
;
1456 switch (ob_shuffle
[op
][i
].source
)
1459 b
= (op1
>> ishift
) & 0xFF;
1462 b
= ((op1
>> ishift
) & 0x80) ? 0xFF : 0;
1465 b
= (op2
>> ishift
) & 0xFF;
1471 result
|= ((uint64_t)b
<< s
);