sim/mips/mdmx.c

   1 /* Simulation code for the MIPS MDMX ASE.
   2    Copyright (C) 2002-2024 Free Software Foundation, Inc.
   3    Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
   4    Corporation (SiByte).
   5
   6 This file is part of GDB, the GNU debugger.
   7
   8 This program is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* This must come before any other includes.  */
  22 #include "defs.h"
  23
  24 #include <stdio.h>
  25
  26 #include "sim-main.h"
  27
  28 /* Within mdmx.c we refer to the sim_cpu directly. */
  29 #define CPU cpu
  30 #define SD  (CPU_STATE(CPU))
  31
  32 /* XXX FIXME: temporary hack while the impact of making unpredictable()
  33    a "normal" (non-igen) function is evaluated.  */
  34 #undef Unpredictable
  35 #define Unpredictable() unpredictable_action (cpu, cia)
  36
  37 /* MDMX Representations
  38
  39    An 8-bit packed byte element (OB) is always unsigned.
  40    The 24-bit accumulators are signed and are represented as 32-bit
  41    signed values, which are reduced to 24-bit signed values prior to
  42    Round and Clamp operations.
  43
  44    A 16-bit packed halfword element (QH) is always signed.
  45    The 48-bit accumulators are signed and are represented as 64-bit
  46    signed values, which are reduced to 48-bit signed values prior to
  47    Round and Clamp operations.
  48
  49    The code below assumes a 2's-complement representation of signed
  50    quantities.  Care is required to clear extended sign bits when
  51    repacking fields.
  52
  53    The code (and the code for arithmetic shifts in mips.igen) also makes
  54    the (not guaranteed portable) assumption that right shifts of signed
  55    quantities in C do sign extension.  */
  56
  57 typedef uint64_t unsigned48;
  58 #define MASK48 (UNSIGNED64 (0xffffffffffff))
  59
  60 typedef uint32_t unsigned24;
  61 #define MASK24 (UNSIGNED32 (0xffffff))
  62
  63 typedef enum {
  64   mdmx_ob,          /* OB (octal byte) */
  65   mdmx_qh           /* QH (quad half-word) */
  66 } MX_fmt;
  67
  68 typedef enum {
  69   sel_elem,         /* element select */
  70   sel_vect,         /* vector select */
  71   sel_imm           /* immediate select */
  72 } VT_select;
  73
  74 #define OB_MAX  ((uint8_t)0xFF)
  75 #define QH_MIN  ((int16_t)0x8000)
  76 #define QH_MAX  ((int16_t)0x7FFF)
  77
  78 #define OB_CLAMP(x)  ((uint8_t)((x) > OB_MAX ? OB_MAX : (x)))
  79 #define QH_CLAMP(x)  ((int16_t)((x) < QH_MIN ? QH_MIN : \
  80                                 ((x) > QH_MAX ? QH_MAX : (x))))
  81
  82 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
  83 #define MX_VT(fmtsel)  (((fmtsel) & 0x10) == 0 ?    sel_elem : \
  84                        (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
  85
  86 #define QH_ELEM(v,fmtsel) \
  87         ((int16_t)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
  88 #define OB_ELEM(v,fmtsel) \
  89         ((uint8_t)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
  90
  91
  92 typedef int16_t (*QH_FUNC)(int16_t, int16_t);
  93 typedef uint8_t (*OB_FUNC)(uint8_t, uint8_t);
  94
  95 /* vectorized logical operators */
  96
  97 static int16_t
  98 AndQH(int16_t ts, int16_t tt)
  99 {
 100   return (int16_t)((uint16_t)ts & (uint16_t)tt);
 101 }
 102
 103 static uint8_t
 104 AndOB(uint8_t ts, uint8_t tt)
 105 {
 106   return ts & tt;
 107 }
 108
 109 static int16_t
 110 NorQH(int16_t ts, int16_t tt)
 111 {
 112   return (int16_t)(((uint16_t)ts | (uint16_t)tt) ^ 0xFFFF);
 113 }
 114
 115 static uint8_t
 116 NorOB(uint8_t ts, uint8_t tt)
 117 {
 118   return (ts | tt) ^ 0xFF;
 119 }
 120
 121 static int16_t
 122 OrQH(int16_t ts, int16_t tt)
 123 {
 124   return (int16_t)((uint16_t)ts | (uint16_t)tt);
 125 }
 126
 127 static uint8_t
 128 OrOB(uint8_t ts, uint8_t tt)
 129 {
 130   return ts | tt;
 131 }
 132
 133 static int16_t
 134 XorQH(int16_t ts, int16_t tt)
 135 {
 136   return (int16_t)((uint16_t)ts ^ (uint16_t)tt);
 137 }
 138
 139 static uint8_t
 140 XorOB(uint8_t ts, uint8_t tt)
 141 {
 142   return ts ^ tt;
 143 }
 144
 145 static int16_t
 146 SLLQH(int16_t ts, int16_t tt)
 147 {
 148   uint32_t s = (uint32_t)tt & 0xF;
 149   return (int16_t)(((uint32_t)ts << s) & 0xFFFF);
 150 }
 151
 152 static uint8_t
 153 SLLOB(uint8_t ts, uint8_t tt)
 154 {
 155   uint32_t s = tt & 0x7;
 156   return (ts << s) & 0xFF;
 157 }
 158
 159 static int16_t
 160 SRLQH(int16_t ts, int16_t tt)
 161 {
 162   uint32_t s = (uint32_t)tt & 0xF;
 163   return (int16_t)((uint16_t)ts >> s);
 164 }
 165
 166 static uint8_t
 167 SRLOB(uint8_t ts, uint8_t tt)
 168 {
 169   uint32_t s = tt & 0x7;
 170   return ts >> s;
 171 }
 172
 173
 174 /* Vectorized arithmetic operators.  */
 175
 176 static int16_t
 177 AddQH(int16_t ts, int16_t tt)
 178 {
 179   int32_t t = (int32_t)ts + (int32_t)tt;
 180   return QH_CLAMP(t);
 181 }
 182
 183 static uint8_t
 184 AddOB(uint8_t ts, uint8_t tt)
 185 {
 186   uint32_t t = (uint32_t)ts + (uint32_t)tt;
 187   return OB_CLAMP(t);
 188 }
 189
 190 static int16_t
 191 SubQH(int16_t ts, int16_t tt)
 192 {
 193   int32_t t = (int32_t)ts - (int32_t)tt;
 194   return QH_CLAMP(t);
 195 }
 196
 197 static uint8_t
 198 SubOB(uint8_t ts, uint8_t tt)
 199 {
 200   int32_t t;
 201   t = (int32_t)ts - (int32_t)tt;
 202   if (t < 0)
 203     t = 0;
 204   return (uint8_t)t;
 205 }
 206
 207 static int16_t
 208 MinQH(int16_t ts, int16_t tt)
 209 {
 210   return (ts < tt ? ts : tt);
 211 }
 212
 213 static uint8_t
 214 MinOB(uint8_t ts, uint8_t tt)
 215 {
 216   return (ts < tt ? ts : tt);
 217 }
 218
 219 static int16_t
 220 MaxQH(int16_t ts, int16_t tt)
 221 {
 222   return (ts > tt ? ts : tt);
 223 }
 224
 225 static uint8_t
 226 MaxOB(uint8_t ts, uint8_t tt)
 227 {
 228   return (ts > tt ? ts : tt);
 229 }
 230
 231 static int16_t
 232 MulQH(int16_t ts, int16_t tt)
 233 {
 234   int32_t t = (int32_t)ts * (int32_t)tt;
 235   return QH_CLAMP(t);
 236 }
 237
 238 static uint8_t
 239 MulOB(uint8_t ts, uint8_t tt)
 240 {
 241   uint32_t t = (uint32_t)ts * (uint32_t)tt;
 242   return OB_CLAMP(t);
 243 }
 244
 245 /* "msgn" and "sra" are defined only for QH format.  */
 246
 247 static int16_t
 248 MsgnQH(int16_t ts, int16_t tt)
 249 {
 250   int16_t t;
 251   if (ts < 0)
 252     t = (tt == QH_MIN ? QH_MAX : -tt);
 253   else if (ts == 0)
 254     t = 0;
 255   else
 256     t = tt;
 257   return t;
 258 }
 259
 260 static int16_t
 261 SRAQH(int16_t ts, int16_t tt)
 262 {
 263   uint32_t s = (uint32_t)tt & 0xF;
 264   return (int16_t)((int32_t)ts >> s);
 265 }
 266
 267
 268 /* "pabsdiff" and "pavg" are defined only for OB format.  */
 269
 270 static uint8_t
 271 AbsDiffOB(uint8_t ts, uint8_t tt)
 272 {
 273   return (ts >= tt ? ts - tt : tt - ts);
 274 }
 275
 276 static uint8_t
 277 AvgOB(uint8_t ts, uint8_t tt)
 278 {
 279   return ((uint32_t)ts + (uint32_t)tt + 1) >> 1;
 280 }
 281
 282
 283 /* Dispatch tables for operations that update a CPR.  */
 284
 285 static const QH_FUNC qh_func[] = {
 286   AndQH,  NorQH,  OrQH,   XorQH, SLLQH, SRLQH,
 287   AddQH,  SubQH,  MinQH,  MaxQH,
 288   MulQH,  MsgnQH, SRAQH,  NULL,  NULL
 289 };
 290
 291 static const OB_FUNC ob_func[] = {
 292   AndOB,  NorOB,  OrOB,   XorOB, SLLOB, SRLOB,
 293   AddOB,  SubOB,  MinOB,  MaxOB,
 294   MulOB,  NULL,   NULL,   AbsDiffOB, AvgOB
 295 };
 296
 297 /* Auxiliary functions for CPR updates.  */
 298
 299 /* Vector mapping for QH format.  */
 300 static uint64_t
 301 qh_vector_op(uint64_t v1, uint64_t v2, QH_FUNC func)
 302 {
 303   uint64_t result = 0;
 304   int  i;
 305   int16_t h, h1, h2;
 306
 307   for (i = 0; i < 64; i += 16)
 308     {
 309       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
 310       h2 = (int16_t)(v2 & 0xFFFF);  v2 >>= 16;
 311       h = (*func)(h1, h2);
 312       result |= ((uint64_t)((uint16_t)h) << i);
 313     }
 314   return result;
 315 }
 316
 317 static uint64_t
 318 qh_map_op(uint64_t v1, int16_t h2, QH_FUNC func)
 319 {
 320   uint64_t result = 0;
 321   int  i;
 322   int16_t h, h1;
 323
 324   for (i = 0; i < 64; i += 16)
 325     {
 326       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
 327       h = (*func)(h1, h2);
 328       result |= ((uint64_t)((uint16_t)h) << i);
 329     }
 330   return result;
 331 }
 332
 333
 334 /* Vector operations for OB format.  */
 335
 336 static uint64_t
 337 ob_vector_op(uint64_t v1, uint64_t v2, OB_FUNC func)
 338 {
 339   uint64_t result = 0;
 340   int  i;
 341   uint8_t b, b1, b2;
 342
 343   for (i = 0; i < 64; i += 8)
 344     {
 345       b1 = v1 & 0xFF;  v1 >>= 8;
 346       b2 = v2 & 0xFF;  v2 >>= 8;
 347       b = (*func)(b1, b2);
 348       result |= ((uint64_t)b << i);
 349     }
 350   return result;
 351 }
 352
 353 static uint64_t
 354 ob_map_op(uint64_t v1, uint8_t b2, OB_FUNC func)
 355 {
 356   uint64_t result = 0;
 357   int  i;
 358   uint8_t b, b1;
 359
 360   for (i = 0; i < 64; i += 8)
 361     {
 362       b1 = v1 & 0xFF;  v1 >>= 8;
 363       b = (*func)(b1, b2);
 364       result |= ((uint64_t)b << i);
 365     }
 366   return result;
 367 }
 368
 369
 370 /* Primary entry for operations that update CPRs.  */
 371 uint64_t
 372 mdmx_cpr_op(sim_cpu *cpu,
 373             address_word cia,
 374             int op,
 375             uint64_t op1,
 376             int vt,
 377             MX_fmtsel fmtsel)
 378 {
 379   uint64_t op2;
 380   uint64_t result = 0;
 381
 382   switch (MX_FMT (fmtsel))
 383     {
 384     case mdmx_qh:
 385       switch (MX_VT (fmtsel))
 386         {
 387         case sel_elem:
 388           op2 = ValueFPR(vt, fmt_mdmx);
 389           result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
 390           break;
 391         case sel_vect:
 392           result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
 393           break;
 394         case sel_imm:
 395           result = qh_map_op(op1, vt, qh_func[op]);
 396           break;
 397         }
 398       break;
 399     case mdmx_ob:
 400       switch (MX_VT (fmtsel))
 401         {
 402         case sel_elem:
 403           op2 = ValueFPR(vt, fmt_mdmx);
 404           result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
 405           break;
 406         case sel_vect:
 407           result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
 408           break;
 409         case sel_imm:
 410           result = ob_map_op(op1, vt, ob_func[op]);
 411           break;
 412         }
 413       break;
 414     default:
 415       Unpredictable ();
 416     }
 417
 418   return result;
 419 }
 420
 421
 422 /* Operations that update CCs */
 423
 424 static void
 425 qh_vector_test(sim_cpu *cpu, uint64_t v1, uint64_t v2, int cond)
 426 {
 427   int  i;
 428   int16_t h1, h2;
 429   int  boolean;
 430
 431   for (i = 0; i < 4; i++)
 432     {
 433       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
 434       h2 = (int16_t)(v2 & 0xFFFF);  v2 >>= 16;
 435       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
 436         ((cond & MX_C_LT) && (h1 < h2));
 437       SETFCC(i, boolean);
 438     }
 439 }
 440
 441 static void
 442 qh_map_test(sim_cpu *cpu, uint64_t v1, int16_t h2, int cond)
 443 {
 444   int  i;
 445   int16_t h1;
 446   int  boolean;
 447
 448   for (i = 0; i < 4; i++)
 449     {
 450       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
 451       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
 452         ((cond & MX_C_LT) && (h1 < h2));
 453       SETFCC(i, boolean);
 454     }
 455 }
 456
 457 static void
 458 ob_vector_test(sim_cpu *cpu, uint64_t v1, uint64_t v2, int cond)
 459 {
 460   int  i;
 461   uint8_t b1, b2;
 462   int  boolean;
 463
 464   for (i = 0; i < 8; i++)
 465     {
 466       b1 = v1 & 0xFF;  v1 >>= 8;
 467       b2 = v2 & 0xFF;  v2 >>= 8;
 468       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
 469         ((cond & MX_C_LT) && (b1 < b2));
 470       SETFCC(i, boolean);
 471     }
 472 }
 473
 474 static void
 475 ob_map_test(sim_cpu *cpu, uint64_t v1, uint8_t b2, int cond)
 476 {
 477   int  i;
 478   uint8_t b1;
 479   int  boolean;
 480
 481   for (i = 0; i < 8; i++)
 482     {
 483       b1 = (uint8_t)(v1 & 0xFF);  v1 >>= 8;
 484       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
 485         ((cond & MX_C_LT) && (b1 < b2));
 486       SETFCC(i, boolean);
 487     }
 488 }
 489
 490
 491 void
 492 mdmx_cc_op(sim_cpu *cpu,
 493            address_word cia,
 494            int cond,
 495            uint64_t v1,
 496            int vt,
 497            MX_fmtsel fmtsel)
 498 {
 499   uint64_t op2;
 500
 501   switch (MX_FMT (fmtsel))
 502     {
 503     case mdmx_qh:
 504       switch (MX_VT (fmtsel))
 505         {
 506         case sel_elem:
 507           op2 = ValueFPR(vt, fmt_mdmx);
 508           qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
 509           break;
 510         case sel_vect:
 511           qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
 512           break;
 513         case sel_imm:
 514           qh_map_test(cpu, v1, vt, cond);
 515           break;
 516         }
 517       break;
 518     case mdmx_ob:
 519       switch (MX_VT (fmtsel))
 520         {
 521         case sel_elem:
 522           op2 = ValueFPR(vt, fmt_mdmx);
 523           ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
 524           break;
 525         case sel_vect:
 526           ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
 527           break;
 528         case sel_imm:
 529           ob_map_test(cpu, v1, vt, cond);
 530           break;
 531         }
 532       break;
 533     default:
 534       Unpredictable ();
 535     }
 536 }
 537
 538
 539 /* Pick operations.  */
 540
 541 static uint64_t
 542 qh_vector_pick(sim_cpu *cpu, uint64_t v1, uint64_t v2, int tf)
 543 {
 544   uint64_t result = 0;
 545   int  i, s;
 546   uint16_t h;
 547
 548   s = 0;
 549   for (i = 0; i < 4; i++)
 550     {
 551       h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
 552       v1 >>= 16;  v2 >>= 16;
 553       result |= ((uint64_t)h << s);
 554       s += 16;
 555     }
 556   return result;
 557 }
 558
 559 static uint64_t
 560 qh_map_pick(sim_cpu *cpu, uint64_t v1, int16_t h2, int tf)
 561 {
 562   uint64_t result = 0;
 563   int  i, s;
 564   uint16_t h;
 565
 566   s = 0;
 567   for (i = 0; i < 4; i++)
 568     {
 569       h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (uint16_t)h2;
 570       v1 >>= 16;
 571       result |= ((uint64_t)h << s);
 572       s += 16;
 573     }
 574   return result;
 575 }
 576
 577 static uint64_t
 578 ob_vector_pick(sim_cpu *cpu, uint64_t v1, uint64_t v2, int tf)
 579 {
 580   uint64_t result = 0;
 581   int  i, s;
 582   uint8_t b;
 583
 584   s = 0;
 585   for (i = 0; i < 8; i++)
 586     {
 587       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
 588       v1 >>= 8;  v2 >>= 8;
 589       result |= ((uint64_t)b << s);
 590       s += 8;
 591     }
 592   return result;
 593 }
 594
 595 static uint64_t
 596 ob_map_pick(sim_cpu *cpu, uint64_t v1, uint8_t b2, int tf)
 597 {
 598   uint64_t result = 0;
 599   int  i, s;
 600   uint8_t b;
 601
 602   s = 0;
 603   for (i = 0; i < 8; i++)
 604     {
 605       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
 606       v1 >>= 8;
 607       result |= ((uint64_t)b << s);
 608       s += 8;
 609     }
 610   return result;
 611 }
 612
 613
 614 uint64_t
 615 mdmx_pick_op(sim_cpu *cpu,
 616              address_word cia,
 617              int tf,
 618              uint64_t v1,
 619              int vt,
 620              MX_fmtsel fmtsel)
 621 {
 622   uint64_t result = 0;
 623   uint64_t op2;
 624
 625   switch (MX_FMT (fmtsel))
 626     {
 627     case mdmx_qh:
 628       switch (MX_VT (fmtsel))
 629         {
 630         case sel_elem:
 631           op2 = ValueFPR(vt, fmt_mdmx);
 632           result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
 633           break;
 634         case sel_vect:
 635           result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
 636           break;
 637         case sel_imm:
 638           result = qh_map_pick(cpu, v1, vt, tf);
 639           break;
 640         }
 641       break;
 642     case mdmx_ob:
 643       switch (MX_VT (fmtsel))
 644         {
 645         case sel_elem:
 646           op2 = ValueFPR(vt, fmt_mdmx);
 647           result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
 648           break;
 649         case sel_vect:
 650           result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
 651           break;
 652         case sel_imm:
 653           result = ob_map_pick(cpu, v1, vt, tf);
 654           break;
 655         }
 656       break;
 657     default:
 658       Unpredictable ();
 659     }
 660   return result;
 661 }
 662
 663
 664 /* Accumulators.  */
 665
 666 typedef void (*QH_ACC)(signed48 *a, int16_t ts, int16_t tt);
 667
 668 static void
 669 AccAddAQH(signed48 *a, int16_t ts, int16_t tt)
 670 {
 671   *a += (signed48)ts + (signed48)tt;
 672 }
 673
 674 static void
 675 AccAddLQH(signed48 *a, int16_t ts, int16_t tt)
 676 {
 677   *a = (signed48)ts + (signed48)tt;
 678 }
 679
 680 static void
 681 AccMulAQH(signed48 *a, int16_t ts, int16_t tt)
 682 {
 683   *a += (signed48)ts * (signed48)tt;
 684 }
 685
 686 static void
 687 AccMulLQH(signed48 *a, int16_t ts, int16_t tt)
 688 {
 689   *a = (signed48)ts * (signed48)tt;
 690 }
 691
 692 static void
 693 SubMulAQH(signed48 *a, int16_t ts, int16_t tt)
 694 {
 695   *a -= (signed48)ts * (signed48)tt;
 696 }
 697
 698 static void
 699 SubMulLQH(signed48 *a, int16_t ts, int16_t tt)
 700 {
 701   *a = -((signed48)ts * (signed48)tt);
 702 }
 703
 704 static void
 705 AccSubAQH(signed48 *a, int16_t ts, int16_t tt)
 706 {
 707   *a += (signed48)ts - (signed48)tt;
 708 }
 709
 710 static void
 711 AccSubLQH(signed48 *a, int16_t ts, int16_t tt)
 712 {
 713   *a =  (signed48)ts - (signed48)tt;
 714 }
 715
 716
 717 typedef void (*OB_ACC)(signed24 *acc, uint8_t ts, uint8_t tt);
 718
 719 static void
 720 AccAddAOB(signed24 *a, uint8_t ts, uint8_t tt)
 721 {
 722   *a += (signed24)ts + (signed24)tt;
 723 }
 724
 725 static void
 726 AccAddLOB(signed24 *a, uint8_t ts, uint8_t tt)
 727 {
 728   *a = (signed24)ts + (signed24)tt;
 729 }
 730
 731 static void
 732 AccMulAOB(signed24 *a, uint8_t ts, uint8_t tt)
 733 {
 734   *a += (signed24)ts * (signed24)tt;
 735 }
 736
 737 static void
 738 AccMulLOB(signed24 *a, uint8_t ts, uint8_t tt)
 739 {
 740   *a = (signed24)ts * (signed24)tt;
 741 }
 742
 743 static void
 744 SubMulAOB(signed24 *a, uint8_t ts, uint8_t tt)
 745 {
 746   *a -= (signed24)ts * (signed24)tt;
 747 }
 748
 749 static void
 750 SubMulLOB(signed24 *a, uint8_t ts, uint8_t tt)
 751 {
 752   *a = -((signed24)ts * (signed24)tt);
 753 }
 754
 755 static void
 756 AccSubAOB(signed24 *a, uint8_t ts, uint8_t tt)
 757 {
 758   *a += (signed24)ts - (signed24)tt;
 759 }
 760
 761 static void
 762 AccSubLOB(signed24 *a, uint8_t ts, uint8_t tt)
 763 {
 764   *a = (signed24)ts - (signed24)tt;
 765 }
 766
 767 static void
 768 AccAbsDiffOB(signed24 *a, uint8_t ts, uint8_t tt)
 769 {
 770   uint8_t t = (ts >= tt ? ts - tt : tt - ts);
 771   *a += (signed24)t;
 772 }
 773
 774
 775 /* Dispatch tables for operations that update a CPR.  */
 776
 777 static const QH_ACC qh_acc[] = {
 778   AccAddAQH, AccAddLQH, AccMulAQH, AccMulLQH,
 779   SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
 780   NULL
 781 };
 782
 783 static const OB_ACC ob_acc[] = {
 784   AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
 785   SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
 786   AccAbsDiffOB
 787 };
 788
 789
 790 static void
 791 qh_vector_acc(signed48 a[], uint64_t v1, uint64_t v2, QH_ACC acc)
 792 {
 793   int  i;
 794   int16_t h1, h2;
 795
 796   for (i = 0; i < 4; i++)
 797     {
 798       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
 799       h2 = (int16_t)(v2 & 0xFFFF);  v2 >>= 16;
 800       (*acc)(&a[i], h1, h2);
 801     }
 802 }
 803
 804 static void
 805 qh_map_acc(signed48 a[], uint64_t v1, int16_t h2, QH_ACC acc)
 806 {
 807   int  i;
 808   int16_t h1;
 809
 810   for (i = 0; i < 4; i++)
 811     {
 812       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
 813       (*acc)(&a[i], h1, h2);
 814     }
 815 }
 816
 817 static void
 818 ob_vector_acc(signed24 a[], uint64_t v1, uint64_t v2, OB_ACC acc)
 819 {
 820   int  i;
 821   uint8_t  b1, b2;
 822
 823   for (i = 0; i < 8; i++)
 824     {
 825       b1 = v1 & 0xFF;  v1 >>= 8;
 826       b2 = v2 & 0xFF;  v2 >>= 8;
 827       (*acc)(&a[i], b1, b2);
 828     }
 829 }
 830
 831 static void
 832 ob_map_acc(signed24 a[], uint64_t v1, uint8_t b2, OB_ACC acc)
 833 {
 834   int  i;
 835   uint8_t b1;
 836
 837   for (i = 0; i < 8; i++)
 838     {
 839       b1 = v1 & 0xFF;  v1 >>= 8;
 840       (*acc)(&a[i], b1, b2);
 841     }
 842 }
 843
 844
 845 /* Primary entry for operations that accumulate */
 846 void
 847 mdmx_acc_op(sim_cpu *cpu,
 848             address_word cia,
 849             int op,
 850             uint64_t op1,
 851             int vt,
 852             MX_fmtsel fmtsel)
 853 {
 854   uint64_t op2;
 855
 856   switch (MX_FMT (fmtsel))
 857     {
 858     case mdmx_qh:
 859       switch (MX_VT (fmtsel))
 860         {
 861         case sel_elem:
 862           op2 = ValueFPR(vt, fmt_mdmx);
 863           qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
 864           break;
 865         case sel_vect:
 866           qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
 867           break;
 868         case sel_imm:
 869           qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
 870           break;
 871         }
 872       break;
 873     case mdmx_ob:
 874       switch (MX_VT (fmtsel))
 875         {
 876         case sel_elem:
 877           op2 = ValueFPR(vt, fmt_mdmx);
 878           ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
 879           break;
 880         case sel_vect:
 881           ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
 882           break;
 883         case sel_imm:
 884           ob_map_acc(ACC.ob, op1, vt, ob_acc[op]);
 885           break;
 886         }
 887       break;
 888     default:
 889       Unpredictable ();
 890     }
 891 }
 892
 893
 894 /* Reading and writing accumulator (no conversion).  */
 895
 896 uint64_t
 897 mdmx_rac_op(sim_cpu *cpu,
 898             address_word cia,
 899             int op,
 900             int fmt)
 901 {
 902   uint64_t    result;
 903   unsigned int  shift;
 904   int           i;
 905
 906   shift = op;          /* L = 00, M = 01, H = 10.  */
 907   result = 0;
 908
 909   switch (fmt)
 910     {
 911     case MX_FMT_QH:
 912       shift <<= 4;              /* 16 bits per element.  */
 913       for (i = 3; i >= 0; --i)
 914         {
 915           result <<= 16;
 916           result |= ((ACC.qh[i] >> shift) & 0xFFFF);
 917         }
 918       break;
 919     case MX_FMT_OB:
 920       shift <<= 3;              /*  8 bits per element.  */
 921       for (i = 7; i >= 0; --i)
 922         {
 923           result <<= 8;
 924           result |= ((ACC.ob[i] >> shift) & 0xFF);
 925         }
 926       break;
 927     default:
 928       Unpredictable ();
 929     }
 930   return result;
 931 }
 932
 933 void
 934 mdmx_wacl(sim_cpu *cpu,
 935           address_word cia,
 936           int fmt,
 937           uint64_t vs,
 938           uint64_t vt)
 939 {
 940   int           i;
 941
 942   switch (fmt)
 943     {
 944     case MX_FMT_QH:
 945       for (i = 0; i < 4; i++)
 946         {
 947           int32_t  s = (int16_t)(vs & 0xFFFF);
 948           ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
 949           vs >>= 16;  vt >>= 16;
 950         }
 951       break;
 952     case MX_FMT_OB:
 953       for (i = 0; i < 8; i++)
 954         {
 955           int16_t  s = (int8_t)(vs & 0xFF);
 956           ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
 957           vs >>= 8;   vt >>= 8;
 958         }
 959       break;
 960     default:
 961       Unpredictable ();
 962     }
 963 }
 964
 965 void
 966 mdmx_wach(sim_cpu *cpu,
 967           address_word cia,
 968           int fmt,
 969           uint64_t vs)
 970 {
 971   int           i;
 972
 973   switch (fmt)
 974     {
 975     case MX_FMT_QH:
 976       for (i = 0; i < 4; i++)
 977         {
 978           int32_t  s = (int16_t)(vs & 0xFFFF);
 979           ACC.qh[i] &= ~((signed48)0xFFFF << 32);
 980           ACC.qh[i] |=  ((signed48)s << 32);
 981           vs >>= 16;
 982         }
 983       break;
 984     case MX_FMT_OB:
 985       for (i = 0; i < 8; i++)
 986         {
 987           ACC.ob[i] &= ~((signed24)0xFF << 16);
 988           ACC.ob[i] |=  ((signed24)(vs & 0xFF) << 16);
 989           vs >>= 8;
 990         }
 991       break;
 992     default:
 993       Unpredictable ();
 994     }
 995 }
 996
 997
 998 /* Reading and writing accumulator (rounding conversions).
 999    Enumerating function guarantees s >= 0 for QH ops.  */
1000
1001 typedef int16_t (*QH_ROUND)(signed48 a, int16_t s);
1002
1003 #define QH_BIT(n)  ((unsigned48)1 << (n))
1004 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1005
1006 static int16_t
1007 RNASQH(signed48 a, int16_t s)
1008 {
1009   signed48 t;
1010   int16_t result = 0;
1011
1012   if (s > 48)
1013     result = 0;
1014   else
1015     {
1016       t = (a >> s);
1017       if ((a & QH_BIT(47)) == 0)
1018         {
1019           if (s > 0 && ((a >> (s-1)) & 1) == 1)
1020             t++;
1021           if (t > QH_MAX)
1022             t = QH_MAX;
1023         }
1024       else
1025         {
1026           if (s > 0 && ((a >> (s-1)) & 1) == 1)
1027             {
1028               if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1029                 t++;
1030             }
1031           if (t < QH_MIN)
1032             t = QH_MIN;
1033         }
1034       result = (int16_t)t;
1035     }
1036   return result;
1037 }
1038
1039 static int16_t
1040 RNAUQH(signed48 a, int16_t s)
1041 {
1042   unsigned48 t;
1043   int16_t result;
1044
1045   if (s > 48)
1046     result = 0;
1047   else if (s == 48)
1048     result = ((unsigned48)a & MASK48) >> 47;
1049   else
1050     {
1051       t = ((unsigned48)a & MASK48) >> s;
1052       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1053         t++;
1054       if (t > 0xFFFF)
1055         t = 0xFFFF;
1056       result = (int16_t)t;
1057     }
1058   return result;
1059 }
1060
1061 static int16_t
1062 RNESQH(signed48 a, int16_t s)
1063 {
1064   signed48 t;
1065   int16_t result = 0;
1066
1067   if (s > 47)
1068     result = 0;
1069   else
1070     {
1071       t = (a >> s);
1072       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1073         {
1074           if (s == 1 || (a & QH_ONES(s-1)) == 0)
1075             t += t & 1;
1076           else
1077             t += 1;
1078         }
1079       if ((a & QH_BIT(47)) == 0)
1080         {
1081           if (t > QH_MAX)
1082             t = QH_MAX;
1083         }
1084       else
1085         {
1086           if (t < QH_MIN)
1087             t = QH_MIN;
1088         }
1089       result = (int16_t)t;
1090     }
1091   return result;
1092 }
1093
1094 static int16_t
1095 RNEUQH(signed48 a, int16_t s)
1096 {
1097   unsigned48 t;
1098   int16_t result;
1099
1100   if (s > 48)
1101     result = 0;
1102   else if (s == 48)
1103     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1104   else
1105     {
1106       t = ((unsigned48)a & MASK48) >> s;
1107       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1108         {
1109           if (s > 1 && (a & QH_ONES(s-1)) != 0)
1110             t++;
1111           else
1112             t += t & 1;
1113         }
1114       if (t > 0xFFFF)
1115         t = 0xFFFF;
1116       result = (int16_t)t;
1117     }
1118   return result;
1119 }
1120
1121 static int16_t
1122 RZSQH(signed48 a, int16_t s)
1123 {
1124   signed48 t;
1125   int16_t result = 0;
1126
1127   if (s > 47)
1128     result = 0;
1129   else
1130     {
1131       t = (a >> s);
1132       if ((a & QH_BIT(47)) == 0)
1133         {
1134           if (t > QH_MAX)
1135             t = QH_MAX;
1136         }
1137       else
1138         {
1139           if (t < QH_MIN)
1140             t = QH_MIN;
1141         }
1142       result = (int16_t)t;
1143     }
1144   return result;
1145 }
1146
1147 static int16_t
1148 RZUQH(signed48 a, int16_t s)
1149 {
1150   unsigned48 t;
1151   int16_t result = 0;
1152
1153   if (s > 48)
1154     result = 0;
1155   else if (s == 48)
1156     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1157   else
1158     {
1159       t = ((unsigned48)a & MASK48) >> s;
1160       if (t > 0xFFFF)
1161         t = 0xFFFF;
1162       result = (int16_t)t;
1163     }
1164   return result;
1165 }
1166
1167
1168 typedef uint8_t (*OB_ROUND)(signed24 a, uint8_t s);
1169
1170 #define OB_BIT(n)  ((unsigned24)1 << (n))
1171 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1172
1173 static uint8_t
1174 RNAUOB(signed24 a, uint8_t s)
1175 {
1176   uint8_t result;
1177   unsigned24 t;
1178
1179   if (s > 24)
1180     result = 0;
1181   else if (s == 24)
1182     result = ((unsigned24)a & MASK24) >> 23;
1183   else
1184     {
1185       t = ((unsigned24)a & MASK24) >> s;
1186       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1187         t ++;
1188       result = OB_CLAMP(t);
1189     }
1190   return result;
1191 }
1192
1193 static uint8_t
1194 RNEUOB(signed24 a, uint8_t s)
1195 {
1196   uint8_t result;
1197   unsigned24 t;
1198
1199   if (s > 24)
1200     result = 0;
1201   else if (s == 24)
1202     result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1203   else
1204     {
1205       t = ((unsigned24)a & MASK24) >> s;
1206       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1207         {
1208           if (s > 1 && (a & OB_ONES(s-1)) != 0)
1209             t++;
1210           else
1211             t += t & 1;
1212         }
1213       result = OB_CLAMP(t);
1214     }
1215   return result;
1216 }
1217
1218 static uint8_t
1219 RZUOB(signed24 a, uint8_t s)
1220 {
1221   uint8_t result;
1222   unsigned24 t;
1223
1224   if (s >= 24)
1225     result = 0;
1226   else
1227     {
1228       t = ((unsigned24)a & MASK24) >> s;
1229       result = OB_CLAMP(t);
1230     }
1231   return result;
1232 }
1233
1234
1235 static const QH_ROUND qh_round[] = {
1236   RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH,  RZUQH
1237 };
1238
1239 static const OB_ROUND ob_round[] = {
1240   NULL,   RNAUOB, NULL,   RNEUOB, NULL,   RZUOB
1241 };
1242
1243
1244 static uint64_t
1245 qh_vector_round(sim_cpu *cpu, address_word cia, uint64_t v2, QH_ROUND round)
1246 {
1247   uint64_t result = 0;
1248   int  i, s;
1249   int16_t h, h2;
1250
1251   s = 0;
1252   for (i = 0; i < 4; i++)
1253     {
1254       h2 = (int16_t)(v2 & 0xFFFF);
1255       if (h2 >= 0)
1256         h = (*round)(ACC.qh[i], h2);
1257       else
1258         {
1259           UnpredictableResult ();
1260           h = 0xdead;
1261         }
1262       v2 >>= 16;
1263       result |= ((uint64_t)((uint16_t)h) << s);
1264       s += 16;
1265     }
1266   return result;
1267 }
1268
1269 static uint64_t
1270 qh_map_round(sim_cpu *cpu, address_word cia, int16_t h2, QH_ROUND round)
1271 {
1272   uint64_t result = 0;
1273   int  i, s;
1274   int16_t  h;
1275
1276   s = 0;
1277   for (i = 0; i < 4; i++)
1278     {
1279       if (h2 >= 0)
1280         h = (*round)(ACC.qh[i], h2);
1281       else
1282         {
1283           UnpredictableResult ();
1284           h = 0xdead;
1285         }
1286       result |= ((uint64_t)((uint16_t)h) << s);
1287       s += 16;
1288     }
1289   return result;
1290 }
1291
1292 static uint64_t
1293 ob_vector_round(sim_cpu *cpu, address_word cia, uint64_t v2, OB_ROUND round)
1294 {
1295   uint64_t result = 0;
1296   int  i, s;
1297   uint8_t b, b2;
1298
1299   s = 0;
1300   for (i = 0; i < 8; i++)
1301     {
1302       b2 = v2 & 0xFF;  v2 >>= 8;
1303       b = (*round)(ACC.ob[i], b2);
1304       result |= ((uint64_t)b << s);
1305       s += 8;
1306     }
1307   return result;
1308 }
1309
1310 static uint64_t
1311 ob_map_round(sim_cpu *cpu, address_word cia, uint8_t b2, OB_ROUND round)
1312 {
1313   uint64_t result = 0;
1314   int  i, s;
1315   uint8_t b;
1316
1317   s = 0;
1318   for (i = 0; i < 8; i++)
1319     {
1320       b = (*round)(ACC.ob[i], b2);
1321       result |= ((uint64_t)b << s);
1322       s += 8;
1323     }
1324   return result;
1325 }
1326
1327
1328 uint64_t
1329 mdmx_round_op(sim_cpu *cpu,
1330               address_word cia,
1331               int rm,
1332               int vt,
1333               MX_fmtsel fmtsel)
1334 {
1335   uint64_t op2;
1336   uint64_t result = 0;
1337
1338   switch (MX_FMT (fmtsel))
1339     {
1340     case mdmx_qh:
1341       switch (MX_VT (fmtsel))
1342         {
1343         case sel_elem:
1344           op2 = ValueFPR(vt, fmt_mdmx);
1345           result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1346           break;
1347         case sel_vect:
1348           op2 = ValueFPR(vt, fmt_mdmx);
1349           result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1350           break;
1351         case sel_imm:
1352           result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1353           break;
1354         }
1355       break;
1356     case mdmx_ob:
1357       switch (MX_VT (fmtsel))
1358         {
1359         case sel_elem:
1360           op2 = ValueFPR(vt, fmt_mdmx);
1361           result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1362           break;
1363         case sel_vect:
1364           op2 = ValueFPR(vt, fmt_mdmx);
1365           result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1366           break;
1367         case sel_imm:
1368           result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1369           break;
1370         }
1371       break;
1372     default:
1373       Unpredictable ();
1374     }
1375
1376   return result;
1377 }
1378
1379
1380 /* Shuffle operation.  */
1381
1382 typedef struct {
1383   enum {vs, ss, vt} source;
1384   unsigned int      index;
1385 } sh_map;
1386
1387 static const sh_map ob_shuffle[][8] = {
1388   /* MDMX 2.0 encodings (3-4, 6-7).  */
1389   /* vr5400   encoding  (5), otherwise.  */
1390   {                                                              }, /* RSVD */
1391   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1392   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1393   {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1394   {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1395   {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1396   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1397   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}  /* mixl */
1398 };
1399
1400 static const sh_map qh_shuffle[][4] = {
1401   {{vt,2}, {vs,2}, {vt,3}, {vs,3}},  /* mixh */
1402   {{vt,0}, {vs,0}, {vt,1}, {vs,1}},  /* mixl */
1403   {{vt,1}, {vt,3}, {vs,1}, {vs,3}},  /* pach */
1404   {                              },  /* RSVD */
1405   {{vt,1}, {vs,0}, {vt,3}, {vs,2}},  /* bfla */
1406   {                              },  /* RSVD */
1407   {{vt,2}, {vt,3}, {vs,2}, {vs,3}},  /* repa */
1408   {{vt,0}, {vt,1}, {vs,0}, {vs,1}}   /* repb */
1409 };
1410
1411
1412 uint64_t
1413 mdmx_shuffle(sim_cpu *cpu,
1414              address_word cia,
1415              int shop,
1416              uint64_t op1,
1417              uint64_t op2)
1418 {
1419   uint64_t result = 0;
1420   int  i, s;
1421   int  op;
1422
1423   if ((shop & 0x3) == 0x1)       /* QH format.  */
1424     {
1425       op = shop >> 2;
1426       s = 0;
1427       for (i = 0; i < 4; i++)
1428         {
1429           uint64_t v;
1430
1431           switch (qh_shuffle[op][i].source)
1432             {
1433             case vs:
1434               v = op1;
1435               break;
1436             case vt:
1437               v = op2;
1438               break;
1439             default:
1440               Unpredictable ();
1441               v = 0;
1442             }
1443           result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1444           s += 16;
1445         }
1446     }
1447   else if ((shop & 0x1) == 0x0)  /* OB format.  */
1448     {
1449       op = shop >> 1;
1450       s = 0;
1451       for (i = 0; i < 8; i++)
1452         {
1453           uint8_t b;
1454           unsigned int ishift = 8*ob_shuffle[op][i].index;
1455
1456           switch (ob_shuffle[op][i].source)
1457             {
1458             case vs:
1459               b = (op1 >> ishift) & 0xFF;
1460               break;
1461             case ss:
1462               b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1463               break;
1464             case vt:
1465               b = (op2 >> ishift) & 0xFF;
1466               break;
1467             default:
1468               Unpredictable ();
1469               b = 0;
1470             }
1471           result |= ((uint64_t)b << s);
1472           s += 8;
1473         }
1474     }
1475   else
1476     Unpredictable ();
1477
1478   return result;
1479 }