[PATCH 22/57][Arm][GAS] Add support for MVE instructions: vmlaldav, vmlalv, vmlsldav...
[binutils-gdb.git] / sim / mips / mdmx.c
blobf18dd919c43b06aaeae50df9291ba52d1bb0c26a
1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002-2019 Free Software Foundation, Inc.
3 Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
4 Corporation (SiByte).
6 This file is part of GDB, the GNU debugger.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <stdio.h>
23 #include "sim-main.h"
25 /* Within mdmx.c we refer to the sim_cpu directly. */
26 #define CPU cpu
27 #define SD (CPU_STATE(CPU))
29 /* XXX FIXME: temporary hack while the impact of making unpredictable()
30 a "normal" (non-igen) function is evaluated. */
31 #undef Unpredictable
32 #define Unpredictable() unpredictable_action (cpu, cia)
34 /* MDMX Representations
36 An 8-bit packed byte element (OB) is always unsigned.
37 The 24-bit accumulators are signed and are represented as 32-bit
38 signed values, which are reduced to 24-bit signed values prior to
39 Round and Clamp operations.
41 A 16-bit packed halfword element (QH) is always signed.
42 The 48-bit accumulators are signed and are represented as 64-bit
43 signed values, which are reduced to 48-bit signed values prior to
44 Round and Clamp operations.
46 The code below assumes a 2's-complement representation of signed
47 quantities. Care is required to clear extended sign bits when
48 repacking fields.
50 The code (and the code for arithmetic shifts in mips.igen) also makes
51 the (not guaranteed portable) assumption that right shifts of signed
52 quantities in C do sign extension. */
54 typedef unsigned64 unsigned48;
55 #define MASK48 (UNSIGNED64 (0xffffffffffff))
57 typedef unsigned32 unsigned24;
58 #define MASK24 (UNSIGNED32 (0xffffff))
60 typedef enum {
61 mdmx_ob, /* OB (octal byte) */
62 mdmx_qh /* QH (quad half-word) */
63 } MX_fmt;
65 typedef enum {
66 sel_elem, /* element select */
67 sel_vect, /* vector select */
68 sel_imm /* immediate select */
69 } VT_select;
71 #define OB_MAX ((unsigned8)0xFF)
72 #define QH_MIN ((signed16)0x8000)
73 #define QH_MAX ((signed16)0x7FFF)
75 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
76 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
77 ((x) > QH_MAX ? QH_MAX : (x))))
79 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
80 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
81 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
83 #define QH_ELEM(v,fmtsel) \
84 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
85 #define OB_ELEM(v,fmtsel) \
86 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
89 typedef signed16 (*QH_FUNC)(signed16, signed16);
90 typedef unsigned8 (*OB_FUNC)(unsigned8, unsigned8);
92 /* vectorized logical operators */
94 static signed16
95 AndQH(signed16 ts, signed16 tt)
97 return (signed16)((unsigned16)ts & (unsigned16)tt);
100 static unsigned8
101 AndOB(unsigned8 ts, unsigned8 tt)
103 return ts & tt;
106 static signed16
107 NorQH(signed16 ts, signed16 tt)
109 return (signed16)(((unsigned16)ts | (unsigned16)tt) ^ 0xFFFF);
112 static unsigned8
113 NorOB(unsigned8 ts, unsigned8 tt)
115 return (ts | tt) ^ 0xFF;
118 static signed16
119 OrQH(signed16 ts, signed16 tt)
121 return (signed16)((unsigned16)ts | (unsigned16)tt);
124 static unsigned8
125 OrOB(unsigned8 ts, unsigned8 tt)
127 return ts | tt;
130 static signed16
131 XorQH(signed16 ts, signed16 tt)
133 return (signed16)((unsigned16)ts ^ (unsigned16)tt);
136 static unsigned8
137 XorOB(unsigned8 ts, unsigned8 tt)
139 return ts ^ tt;
142 static signed16
143 SLLQH(signed16 ts, signed16 tt)
145 unsigned32 s = (unsigned32)tt & 0xF;
146 return (signed16)(((unsigned32)ts << s) & 0xFFFF);
149 static unsigned8
150 SLLOB(unsigned8 ts, unsigned8 tt)
152 unsigned32 s = tt & 0x7;
153 return (ts << s) & 0xFF;
156 static signed16
157 SRLQH(signed16 ts, signed16 tt)
159 unsigned32 s = (unsigned32)tt & 0xF;
160 return (signed16)((unsigned16)ts >> s);
163 static unsigned8
164 SRLOB(unsigned8 ts, unsigned8 tt)
166 unsigned32 s = tt & 0x7;
167 return ts >> s;
171 /* Vectorized arithmetic operators. */
173 static signed16
174 AddQH(signed16 ts, signed16 tt)
176 signed32 t = (signed32)ts + (signed32)tt;
177 return QH_CLAMP(t);
180 static unsigned8
181 AddOB(unsigned8 ts, unsigned8 tt)
183 unsigned32 t = (unsigned32)ts + (unsigned32)tt;
184 return OB_CLAMP(t);
187 static signed16
188 SubQH(signed16 ts, signed16 tt)
190 signed32 t = (signed32)ts - (signed32)tt;
191 return QH_CLAMP(t);
194 static unsigned8
195 SubOB(unsigned8 ts, unsigned8 tt)
197 signed32 t;
198 t = (signed32)ts - (signed32)tt;
199 if (t < 0)
200 t = 0;
201 return (unsigned8)t;
204 static signed16
205 MinQH(signed16 ts, signed16 tt)
207 return (ts < tt ? ts : tt);
210 static unsigned8
211 MinOB(unsigned8 ts, unsigned8 tt)
213 return (ts < tt ? ts : tt);
216 static signed16
217 MaxQH(signed16 ts, signed16 tt)
219 return (ts > tt ? ts : tt);
222 static unsigned8
223 MaxOB(unsigned8 ts, unsigned8 tt)
225 return (ts > tt ? ts : tt);
228 static signed16
229 MulQH(signed16 ts, signed16 tt)
231 signed32 t = (signed32)ts * (signed32)tt;
232 return QH_CLAMP(t);
235 static unsigned8
236 MulOB(unsigned8 ts, unsigned8 tt)
238 unsigned32 t = (unsigned32)ts * (unsigned32)tt;
239 return OB_CLAMP(t);
242 /* "msgn" and "sra" are defined only for QH format. */
244 static signed16
245 MsgnQH(signed16 ts, signed16 tt)
247 signed16 t;
248 if (ts < 0)
249 t = (tt == QH_MIN ? QH_MAX : -tt);
250 else if (ts == 0)
251 t = 0;
252 else
253 t = tt;
254 return t;
257 static signed16
258 SRAQH(signed16 ts, signed16 tt)
260 unsigned32 s = (unsigned32)tt & 0xF;
261 return (signed16)((signed32)ts >> s);
265 /* "pabsdiff" and "pavg" are defined only for OB format. */
267 static unsigned8
268 AbsDiffOB(unsigned8 ts, unsigned8 tt)
270 return (ts >= tt ? ts - tt : tt - ts);
273 static unsigned8
274 AvgOB(unsigned8 ts, unsigned8 tt)
276 return ((unsigned32)ts + (unsigned32)tt + 1) >> 1;
280 /* Dispatch tables for operations that update a CPR. */
282 static const QH_FUNC qh_func[] = {
283 AndQH, NorQH, OrQH, XorQH, SLLQH, SRLQH,
284 AddQH, SubQH, MinQH, MaxQH,
285 MulQH, MsgnQH, SRAQH, NULL, NULL
288 static const OB_FUNC ob_func[] = {
289 AndOB, NorOB, OrOB, XorOB, SLLOB, SRLOB,
290 AddOB, SubOB, MinOB, MaxOB,
291 MulOB, NULL, NULL, AbsDiffOB, AvgOB
294 /* Auxiliary functions for CPR updates. */
296 /* Vector mapping for QH format. */
297 static unsigned64
298 qh_vector_op(unsigned64 v1, unsigned64 v2, QH_FUNC func)
300 unsigned64 result = 0;
301 int i;
302 signed16 h, h1, h2;
304 for (i = 0; i < 64; i += 16)
306 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
307 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
308 h = (*func)(h1, h2);
309 result |= ((unsigned64)((unsigned16)h) << i);
311 return result;
314 static unsigned64
315 qh_map_op(unsigned64 v1, signed16 h2, QH_FUNC func)
317 unsigned64 result = 0;
318 int i;
319 signed16 h, h1;
321 for (i = 0; i < 64; i += 16)
323 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
324 h = (*func)(h1, h2);
325 result |= ((unsigned64)((unsigned16)h) << i);
327 return result;
331 /* Vector operations for OB format. */
333 static unsigned64
334 ob_vector_op(unsigned64 v1, unsigned64 v2, OB_FUNC func)
336 unsigned64 result = 0;
337 int i;
338 unsigned8 b, b1, b2;
340 for (i = 0; i < 64; i += 8)
342 b1 = v1 & 0xFF; v1 >>= 8;
343 b2 = v2 & 0xFF; v2 >>= 8;
344 b = (*func)(b1, b2);
345 result |= ((unsigned64)b << i);
347 return result;
350 static unsigned64
351 ob_map_op(unsigned64 v1, unsigned8 b2, OB_FUNC func)
353 unsigned64 result = 0;
354 int i;
355 unsigned8 b, b1;
357 for (i = 0; i < 64; i += 8)
359 b1 = v1 & 0xFF; v1 >>= 8;
360 b = (*func)(b1, b2);
361 result |= ((unsigned64)b << i);
363 return result;
367 /* Primary entry for operations that update CPRs. */
368 unsigned64
369 mdmx_cpr_op(sim_cpu *cpu,
370 address_word cia,
371 int op,
372 unsigned64 op1,
373 int vt,
374 MX_fmtsel fmtsel)
376 unsigned64 op2;
377 unsigned64 result = 0;
379 switch (MX_FMT (fmtsel))
381 case mdmx_qh:
382 switch (MX_VT (fmtsel))
384 case sel_elem:
385 op2 = ValueFPR(vt, fmt_mdmx);
386 result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
387 break;
388 case sel_vect:
389 result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
390 break;
391 case sel_imm:
392 result = qh_map_op(op1, vt, qh_func[op]);
393 break;
395 break;
396 case mdmx_ob:
397 switch (MX_VT (fmtsel))
399 case sel_elem:
400 op2 = ValueFPR(vt, fmt_mdmx);
401 result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
402 break;
403 case sel_vect:
404 result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
405 break;
406 case sel_imm:
407 result = ob_map_op(op1, vt, ob_func[op]);
408 break;
410 break;
411 default:
412 Unpredictable ();
415 return result;
419 /* Operations that update CCs */
421 static void
422 qh_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
424 int i;
425 signed16 h1, h2;
426 int boolean;
428 for (i = 0; i < 4; i++)
430 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
431 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
432 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
433 ((cond & MX_C_LT) && (h1 < h2));
434 SETFCC(i, boolean);
438 static void
439 qh_map_test(sim_cpu *cpu, unsigned64 v1, signed16 h2, int cond)
441 int i;
442 signed16 h1;
443 int boolean;
445 for (i = 0; i < 4; i++)
447 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
448 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
449 ((cond & MX_C_LT) && (h1 < h2));
450 SETFCC(i, boolean);
454 static void
455 ob_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
457 int i;
458 unsigned8 b1, b2;
459 int boolean;
461 for (i = 0; i < 8; i++)
463 b1 = v1 & 0xFF; v1 >>= 8;
464 b2 = v2 & 0xFF; v2 >>= 8;
465 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
466 ((cond & MX_C_LT) && (b1 < b2));
467 SETFCC(i, boolean);
471 static void
472 ob_map_test(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int cond)
474 int i;
475 unsigned8 b1;
476 int boolean;
478 for (i = 0; i < 8; i++)
480 b1 = (unsigned8)(v1 & 0xFF); v1 >>= 8;
481 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
482 ((cond & MX_C_LT) && (b1 < b2));
483 SETFCC(i, boolean);
488 void
489 mdmx_cc_op(sim_cpu *cpu,
490 address_word cia,
491 int cond,
492 unsigned64 v1,
493 int vt,
494 MX_fmtsel fmtsel)
496 unsigned64 op2;
498 switch (MX_FMT (fmtsel))
500 case mdmx_qh:
501 switch (MX_VT (fmtsel))
503 case sel_elem:
504 op2 = ValueFPR(vt, fmt_mdmx);
505 qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
506 break;
507 case sel_vect:
508 qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
509 break;
510 case sel_imm:
511 qh_map_test(cpu, v1, vt, cond);
512 break;
514 break;
515 case mdmx_ob:
516 switch (MX_VT (fmtsel))
518 case sel_elem:
519 op2 = ValueFPR(vt, fmt_mdmx);
520 ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
521 break;
522 case sel_vect:
523 ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
524 break;
525 case sel_imm:
526 ob_map_test(cpu, v1, vt, cond);
527 break;
529 break;
530 default:
531 Unpredictable ();
536 /* Pick operations. */
538 static unsigned64
539 qh_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
541 unsigned64 result = 0;
542 int i, s;
543 unsigned16 h;
545 s = 0;
546 for (i = 0; i < 4; i++)
548 h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
549 v1 >>= 16; v2 >>= 16;
550 result |= ((unsigned64)h << s);
551 s += 16;
553 return result;
556 static unsigned64
557 qh_map_pick(sim_cpu *cpu, unsigned64 v1, signed16 h2, int tf)
559 unsigned64 result = 0;
560 int i, s;
561 unsigned16 h;
563 s = 0;
564 for (i = 0; i < 4; i++)
566 h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (unsigned16)h2;
567 v1 >>= 16;
568 result |= ((unsigned64)h << s);
569 s += 16;
571 return result;
574 static unsigned64
575 ob_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
577 unsigned64 result = 0;
578 int i, s;
579 unsigned8 b;
581 s = 0;
582 for (i = 0; i < 8; i++)
584 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
585 v1 >>= 8; v2 >>= 8;
586 result |= ((unsigned64)b << s);
587 s += 8;
589 return result;
592 static unsigned64
593 ob_map_pick(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int tf)
595 unsigned64 result = 0;
596 int i, s;
597 unsigned8 b;
599 s = 0;
600 for (i = 0; i < 8; i++)
602 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
603 v1 >>= 8;
604 result |= ((unsigned64)b << s);
605 s += 8;
607 return result;
611 unsigned64
612 mdmx_pick_op(sim_cpu *cpu,
613 address_word cia,
614 int tf,
615 unsigned64 v1,
616 int vt,
617 MX_fmtsel fmtsel)
619 unsigned64 result = 0;
620 unsigned64 op2;
622 switch (MX_FMT (fmtsel))
624 case mdmx_qh:
625 switch (MX_VT (fmtsel))
627 case sel_elem:
628 op2 = ValueFPR(vt, fmt_mdmx);
629 result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
630 break;
631 case sel_vect:
632 result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
633 break;
634 case sel_imm:
635 result = qh_map_pick(cpu, v1, vt, tf);
636 break;
638 break;
639 case mdmx_ob:
640 switch (MX_VT (fmtsel))
642 case sel_elem:
643 op2 = ValueFPR(vt, fmt_mdmx);
644 result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
645 break;
646 case sel_vect:
647 result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
648 break;
649 case sel_imm:
650 result = ob_map_pick(cpu, v1, vt, tf);
651 break;
653 break;
654 default:
655 Unpredictable ();
657 return result;
661 /* Accumulators. */
663 typedef void (*QH_ACC)(signed48 *a, signed16 ts, signed16 tt);
665 static void
666 AccAddAQH(signed48 *a, signed16 ts, signed16 tt)
668 *a += (signed48)ts + (signed48)tt;
671 static void
672 AccAddLQH(signed48 *a, signed16 ts, signed16 tt)
674 *a = (signed48)ts + (signed48)tt;
677 static void
678 AccMulAQH(signed48 *a, signed16 ts, signed16 tt)
680 *a += (signed48)ts * (signed48)tt;
683 static void
684 AccMulLQH(signed48 *a, signed16 ts, signed16 tt)
686 *a = (signed48)ts * (signed48)tt;
689 static void
690 SubMulAQH(signed48 *a, signed16 ts, signed16 tt)
692 *a -= (signed48)ts * (signed48)tt;
695 static void
696 SubMulLQH(signed48 *a, signed16 ts, signed16 tt)
698 *a = -((signed48)ts * (signed48)tt);
701 static void
702 AccSubAQH(signed48 *a, signed16 ts, signed16 tt)
704 *a += (signed48)ts - (signed48)tt;
707 static void
708 AccSubLQH(signed48 *a, signed16 ts, signed16 tt)
710 *a = (signed48)ts - (signed48)tt;
714 typedef void (*OB_ACC)(signed24 *acc, unsigned8 ts, unsigned8 tt);
716 static void
717 AccAddAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
719 *a += (signed24)ts + (signed24)tt;
722 static void
723 AccAddLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
725 *a = (signed24)ts + (signed24)tt;
728 static void
729 AccMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
731 *a += (signed24)ts * (signed24)tt;
734 static void
735 AccMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
737 *a = (signed24)ts * (signed24)tt;
740 static void
741 SubMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
743 *a -= (signed24)ts * (signed24)tt;
746 static void
747 SubMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
749 *a = -((signed24)ts * (signed24)tt);
752 static void
753 AccSubAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
755 *a += (signed24)ts - (signed24)tt;
758 static void
759 AccSubLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
761 *a = (signed24)ts - (signed24)tt;
764 static void
765 AccAbsDiffOB(signed24 *a, unsigned8 ts, unsigned8 tt)
767 unsigned8 t = (ts >= tt ? ts - tt : tt - ts);
768 *a += (signed24)t;
772 /* Dispatch tables for operations that update a CPR. */
774 static const QH_ACC qh_acc[] = {
775 AccAddAQH, AccAddAQH, AccMulAQH, AccMulLQH,
776 SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
777 NULL
780 static const OB_ACC ob_acc[] = {
781 AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
782 SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
783 AccAbsDiffOB
787 static void
788 qh_vector_acc(signed48 a[], unsigned64 v1, unsigned64 v2, QH_ACC acc)
790 int i;
791 signed16 h1, h2;
793 for (i = 0; i < 4; i++)
795 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
796 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
797 (*acc)(&a[i], h1, h2);
801 static void
802 qh_map_acc(signed48 a[], unsigned64 v1, signed16 h2, QH_ACC acc)
804 int i;
805 signed16 h1;
807 for (i = 0; i < 4; i++)
809 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
810 (*acc)(&a[i], h1, h2);
814 static void
815 ob_vector_acc(signed24 a[], unsigned64 v1, unsigned64 v2, OB_ACC acc)
817 int i;
818 unsigned8 b1, b2;
820 for (i = 0; i < 8; i++)
822 b1 = v1 & 0xFF; v1 >>= 8;
823 b2 = v2 & 0xFF; v2 >>= 8;
824 (*acc)(&a[i], b1, b2);
828 static void
829 ob_map_acc(signed24 a[], unsigned64 v1, unsigned8 b2, OB_ACC acc)
831 int i;
832 unsigned8 b1;
834 for (i = 0; i < 8; i++)
836 b1 = v1 & 0xFF; v1 >>= 8;
837 (*acc)(&a[i], b1, b2);
842 /* Primary entry for operations that accumulate */
843 void
844 mdmx_acc_op(sim_cpu *cpu,
845 address_word cia,
846 int op,
847 unsigned64 op1,
848 int vt,
849 MX_fmtsel fmtsel)
851 unsigned64 op2;
853 switch (MX_FMT (fmtsel))
855 case mdmx_qh:
856 switch (MX_VT (fmtsel))
858 case sel_elem:
859 op2 = ValueFPR(vt, fmt_mdmx);
860 qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
861 break;
862 case sel_vect:
863 qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
864 break;
865 case sel_imm:
866 qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
867 break;
869 break;
870 case mdmx_ob:
871 switch (MX_VT (fmtsel))
873 case sel_elem:
874 op2 = ValueFPR(vt, fmt_mdmx);
875 ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
876 break;
877 case sel_vect:
878 ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
879 break;
880 case sel_imm:
881 ob_map_acc(ACC.ob, op1, vt, ob_acc[op]);
882 break;
884 break;
885 default:
886 Unpredictable ();
891 /* Reading and writing accumulator (no conversion). */
893 unsigned64
894 mdmx_rac_op(sim_cpu *cpu,
895 address_word cia,
896 int op,
897 int fmt)
899 unsigned64 result;
900 unsigned int shift;
901 int i;
903 shift = op; /* L = 00, M = 01, H = 10. */
904 result = 0;
906 switch (fmt)
908 case MX_FMT_QH:
909 shift <<= 4; /* 16 bits per element. */
910 for (i = 3; i >= 0; --i)
912 result <<= 16;
913 result |= ((ACC.qh[i] >> shift) & 0xFFFF);
915 break;
916 case MX_FMT_OB:
917 shift <<= 3; /* 8 bits per element. */
918 for (i = 7; i >= 0; --i)
920 result <<= 8;
921 result |= ((ACC.ob[i] >> shift) & 0xFF);
923 break;
924 default:
925 Unpredictable ();
927 return result;
930 void
931 mdmx_wacl(sim_cpu *cpu,
932 address_word cia,
933 int fmt,
934 unsigned64 vs,
935 unsigned64 vt)
937 int i;
939 switch (fmt)
941 case MX_FMT_QH:
942 for (i = 0; i < 4; i++)
944 signed32 s = (signed16)(vs & 0xFFFF);
945 ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
946 vs >>= 16; vt >>= 16;
948 break;
949 case MX_FMT_OB:
950 for (i = 0; i < 8; i++)
952 signed16 s = (signed8)(vs & 0xFF);
953 ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
954 vs >>= 8; vt >>= 8;
956 break;
957 default:
958 Unpredictable ();
962 void
963 mdmx_wach(sim_cpu *cpu,
964 address_word cia,
965 int fmt,
966 unsigned64 vs)
968 int i;
970 switch (fmt)
972 case MX_FMT_QH:
973 for (i = 0; i < 4; i++)
975 signed32 s = (signed16)(vs & 0xFFFF);
976 ACC.qh[i] &= ~((signed48)0xFFFF << 32);
977 ACC.qh[i] |= ((signed48)s << 32);
978 vs >>= 16;
980 break;
981 case MX_FMT_OB:
982 for (i = 0; i < 8; i++)
984 ACC.ob[i] &= ~((signed24)0xFF << 16);
985 ACC.ob[i] |= ((signed24)(vs & 0xFF) << 16);
986 vs >>= 8;
988 break;
989 default:
990 Unpredictable ();
995 /* Reading and writing accumulator (rounding conversions).
996 Enumerating function guarantees s >= 0 for QH ops. */
998 typedef signed16 (*QH_ROUND)(signed48 a, signed16 s);
1000 #define QH_BIT(n) ((unsigned48)1 << (n))
1001 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1003 static signed16
1004 RNASQH(signed48 a, signed16 s)
1006 signed48 t;
1007 signed16 result = 0;
1009 if (s > 48)
1010 result = 0;
1011 else
1013 t = (a >> s);
1014 if ((a & QH_BIT(47)) == 0)
1016 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1017 t++;
1018 if (t > QH_MAX)
1019 t = QH_MAX;
1021 else
1023 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1025 if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1026 t++;
1028 if (t < QH_MIN)
1029 t = QH_MIN;
1031 result = (signed16)t;
1033 return result;
1036 static signed16
1037 RNAUQH(signed48 a, signed16 s)
1039 unsigned48 t;
1040 signed16 result;
1042 if (s > 48)
1043 result = 0;
1044 else if (s == 48)
1045 result = ((unsigned48)a & MASK48) >> 47;
1046 else
1048 t = ((unsigned48)a & MASK48) >> s;
1049 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1050 t++;
1051 if (t > 0xFFFF)
1052 t = 0xFFFF;
1053 result = (signed16)t;
1055 return result;
1058 static signed16
1059 RNESQH(signed48 a, signed16 s)
1061 signed48 t;
1062 signed16 result = 0;
1064 if (s > 47)
1065 result = 0;
1066 else
1068 t = (a >> s);
1069 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1071 if (s == 1 || (a & QH_ONES(s-1)) == 0)
1072 t += t & 1;
1073 else
1074 t += 1;
1076 if ((a & QH_BIT(47)) == 0)
1078 if (t > QH_MAX)
1079 t = QH_MAX;
1081 else
1083 if (t < QH_MIN)
1084 t = QH_MIN;
1086 result = (signed16)t;
1088 return result;
1091 static signed16
1092 RNEUQH(signed48 a, signed16 s)
1094 unsigned48 t;
1095 signed16 result;
1097 if (s > 48)
1098 result = 0;
1099 else if (s == 48)
1100 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1101 else
1103 t = ((unsigned48)a & MASK48) >> s;
1104 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1106 if (s > 1 && (a & QH_ONES(s-1)) != 0)
1107 t++;
1108 else
1109 t += t & 1;
1111 if (t > 0xFFFF)
1112 t = 0xFFFF;
1113 result = (signed16)t;
1115 return result;
1118 static signed16
1119 RZSQH(signed48 a, signed16 s)
1121 signed48 t;
1122 signed16 result = 0;
1124 if (s > 47)
1125 result = 0;
1126 else
1128 t = (a >> s);
1129 if ((a & QH_BIT(47)) == 0)
1131 if (t > QH_MAX)
1132 t = QH_MAX;
1134 else
1136 if (t < QH_MIN)
1137 t = QH_MIN;
1139 result = (signed16)t;
1141 return result;
1144 static signed16
1145 RZUQH(signed48 a, signed16 s)
1147 unsigned48 t;
1148 signed16 result = 0;
1150 if (s > 48)
1151 result = 0;
1152 else if (s == 48)
1153 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1154 else
1156 t = ((unsigned48)a & MASK48) >> s;
1157 if (t > 0xFFFF)
1158 t = 0xFFFF;
1159 result = (signed16)t;
1161 return result;
1165 typedef unsigned8 (*OB_ROUND)(signed24 a, unsigned8 s);
1167 #define OB_BIT(n) ((unsigned24)1 << (n))
1168 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1170 static unsigned8
1171 RNAUOB(signed24 a, unsigned8 s)
1173 unsigned8 result;
1174 unsigned24 t;
1176 if (s > 24)
1177 result = 0;
1178 else if (s == 24)
1179 result = ((unsigned24)a & MASK24) >> 23;
1180 else
1182 t = ((unsigned24)a & MASK24) >> s;
1183 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1184 t ++;
1185 result = OB_CLAMP(t);
1187 return result;
1190 static unsigned8
1191 RNEUOB(signed24 a, unsigned8 s)
1193 unsigned8 result;
1194 unsigned24 t;
1196 if (s > 24)
1197 result = 0;
1198 else if (s == 24)
1199 result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1200 else
1202 t = ((unsigned24)a & MASK24) >> s;
1203 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1205 if (s > 1 && (a & OB_ONES(s-1)) != 0)
1206 t++;
1207 else
1208 t += t & 1;
1210 result = OB_CLAMP(t);
1212 return result;
1215 static unsigned8
1216 RZUOB(signed24 a, unsigned8 s)
1218 unsigned8 result;
1219 unsigned24 t;
1221 if (s >= 24)
1222 result = 0;
1223 else
1225 t = ((unsigned24)a & MASK24) >> s;
1226 result = OB_CLAMP(t);
1228 return result;
1232 static const QH_ROUND qh_round[] = {
1233 RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH, RZUQH
1236 static const OB_ROUND ob_round[] = {
1237 NULL, RNAUOB, NULL, RNEUOB, NULL, RZUOB
1241 static unsigned64
1242 qh_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, QH_ROUND round)
1244 unsigned64 result = 0;
1245 int i, s;
1246 signed16 h, h2;
1248 s = 0;
1249 for (i = 0; i < 4; i++)
1251 h2 = (signed16)(v2 & 0xFFFF);
1252 if (h2 >= 0)
1253 h = (*round)(ACC.qh[i], h2);
1254 else
1256 UnpredictableResult ();
1257 h = 0xdead;
1259 v2 >>= 16;
1260 result |= ((unsigned64)((unsigned16)h) << s);
1261 s += 16;
1263 return result;
1266 static unsigned64
1267 qh_map_round(sim_cpu *cpu, address_word cia, signed16 h2, QH_ROUND round)
1269 unsigned64 result = 0;
1270 int i, s;
1271 signed16 h;
1273 s = 0;
1274 for (i = 0; i < 4; i++)
1276 if (h2 >= 0)
1277 h = (*round)(ACC.qh[i], h2);
1278 else
1280 UnpredictableResult ();
1281 h = 0xdead;
1283 result |= ((unsigned64)((unsigned16)h) << s);
1284 s += 16;
1286 return result;
1289 static unsigned64
1290 ob_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, OB_ROUND round)
1292 unsigned64 result = 0;
1293 int i, s;
1294 unsigned8 b, b2;
1296 s = 0;
1297 for (i = 0; i < 8; i++)
1299 b2 = v2 & 0xFF; v2 >>= 8;
1300 b = (*round)(ACC.ob[i], b2);
1301 result |= ((unsigned64)b << s);
1302 s += 8;
1304 return result;
1307 static unsigned64
1308 ob_map_round(sim_cpu *cpu, address_word cia, unsigned8 b2, OB_ROUND round)
1310 unsigned64 result = 0;
1311 int i, s;
1312 unsigned8 b;
1314 s = 0;
1315 for (i = 0; i < 8; i++)
1317 b = (*round)(ACC.ob[i], b2);
1318 result |= ((unsigned64)b << s);
1319 s += 8;
1321 return result;
1325 unsigned64
1326 mdmx_round_op(sim_cpu *cpu,
1327 address_word cia,
1328 int rm,
1329 int vt,
1330 MX_fmtsel fmtsel)
1332 unsigned64 op2;
1333 unsigned64 result = 0;
1335 switch (MX_FMT (fmtsel))
1337 case mdmx_qh:
1338 switch (MX_VT (fmtsel))
1340 case sel_elem:
1341 op2 = ValueFPR(vt, fmt_mdmx);
1342 result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1343 break;
1344 case sel_vect:
1345 op2 = ValueFPR(vt, fmt_mdmx);
1346 result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1347 break;
1348 case sel_imm:
1349 result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1350 break;
1352 break;
1353 case mdmx_ob:
1354 switch (MX_VT (fmtsel))
1356 case sel_elem:
1357 op2 = ValueFPR(vt, fmt_mdmx);
1358 result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1359 break;
1360 case sel_vect:
1361 op2 = ValueFPR(vt, fmt_mdmx);
1362 result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1363 break;
1364 case sel_imm:
1365 result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1366 break;
1368 break;
1369 default:
1370 Unpredictable ();
1373 return result;
1377 /* Shuffle operation. */
1379 typedef struct {
1380 enum {vs, ss, vt} source;
1381 unsigned int index;
1382 } sh_map;
1384 static const sh_map ob_shuffle[][8] = {
1385 /* MDMX 2.0 encodings (3-4, 6-7). */
1386 /* vr5400 encoding (5), otherwise. */
1387 { }, /* RSVD */
1388 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1389 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1390 {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1391 {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1392 {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1393 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1394 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}} /* mixl */
1397 static const sh_map qh_shuffle[][4] = {
1398 {{vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* mixh */
1399 {{vt,0}, {vs,0}, {vt,1}, {vs,1}}, /* mixl */
1400 {{vt,1}, {vt,3}, {vs,1}, {vs,3}}, /* pach */
1401 { }, /* RSVD */
1402 {{vt,1}, {vs,0}, {vt,3}, {vs,2}}, /* bfla */
1403 { }, /* RSVD */
1404 {{vt,2}, {vt,3}, {vs,2}, {vs,3}}, /* repa */
1405 {{vt,0}, {vt,1}, {vs,0}, {vs,1}} /* repb */
1409 unsigned64
1410 mdmx_shuffle(sim_cpu *cpu,
1411 address_word cia,
1412 int shop,
1413 unsigned64 op1,
1414 unsigned64 op2)
1416 unsigned64 result = 0;
1417 int i, s;
1418 int op;
1420 if ((shop & 0x3) == 0x1) /* QH format. */
1422 op = shop >> 2;
1423 s = 0;
1424 for (i = 0; i < 4; i++)
1426 unsigned64 v;
1428 switch (qh_shuffle[op][i].source)
1430 case vs:
1431 v = op1;
1432 break;
1433 case vt:
1434 v = op2;
1435 break;
1436 default:
1437 Unpredictable ();
1438 v = 0;
1440 result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1441 s += 16;
1444 else if ((shop & 0x1) == 0x0) /* OB format. */
1446 op = shop >> 1;
1447 s = 0;
1448 for (i = 0; i < 8; i++)
1450 unsigned8 b;
1451 unsigned int ishift = 8*ob_shuffle[op][i].index;
1453 switch (ob_shuffle[op][i].source)
1455 case vs:
1456 b = (op1 >> ishift) & 0xFF;
1457 break;
1458 case ss:
1459 b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1460 break;
1461 case vt:
1462 b = (op2 >> ishift) & 0xFF;
1463 break;
1464 default:
1465 Unpredictable ();
1466 b = 0;
1468 result |= ((unsigned64)b << s);
1469 s += 8;
1472 else
1473 Unpredictable ();
1475 return result;