hw/milkymist-pfpu.c

   1 /*
   2  *  QEMU model of the Milkymist programmable FPU.
   3  *
   4  *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  *
  19  *
  20  * Specification available at:
  21  *   http://www.milkymist.org/socdoc/pfpu.pdf
  22  *
  23  */
  24
  25 #include "hw.h"
  26 #include "sysbus.h"
  27 #include "trace.h"
  28 #include "qemu-log.h"
  29 #include "qemu-error.h"
  30 #include <math.h>
  31
  32 /* #define TRACE_EXEC */
  33
  34 #ifdef TRACE_EXEC
  35 #    define D_EXEC(x) x
  36 #else
  37 #    define D_EXEC(x)
  38 #endif
  39
  40 enum {
  41     R_CTL = 0,
  42     R_MESHBASE,
  43     R_HMESHLAST,
  44     R_VMESHLAST,
  45     R_CODEPAGE,
  46     R_VERTICES,
  47     R_COLLISIONS,
  48     R_STRAYWRITES,
  49     R_LASTDMA,
  50     R_PC,
  51     R_DREGBASE,
  52     R_CODEBASE,
  53     R_MAX
  54 };
  55
  56 enum {
  57     CTL_START_BUSY = (1<<0),
  58 };
  59
  60 enum {
  61     OP_NOP = 0,
  62     OP_FADD,
  63     OP_FSUB,
  64     OP_FMUL,
  65     OP_FABS,
  66     OP_F2I,
  67     OP_I2F,
  68     OP_VECTOUT,
  69     OP_SIN,
  70     OP_COS,
  71     OP_ABOVE,
  72     OP_EQUAL,
  73     OP_COPY,
  74     OP_IF,
  75     OP_TSIGN,
  76     OP_QUAKE,
  77 };
  78
  79 enum {
  80     GPR_X = 0,
  81     GPR_Y = 1,
  82     GPR_FLAGS = 2,
  83 };
  84
  85 enum {
  86     LATENCY_FADD = 5,
  87     LATENCY_FSUB = 5,
  88     LATENCY_FMUL = 7,
  89     LATENCY_FABS = 2,
  90     LATENCY_F2I = 2,
  91     LATENCY_I2F = 3,
  92     LATENCY_VECTOUT = 0,
  93     LATENCY_SIN = 4,
  94     LATENCY_COS = 4,
  95     LATENCY_ABOVE = 2,
  96     LATENCY_EQUAL = 2,
  97     LATENCY_COPY = 2,
  98     LATENCY_IF = 2,
  99     LATENCY_TSIGN = 2,
 100     LATENCY_QUAKE = 2,
 101     MAX_LATENCY = 7
 102 };
 103
 104 #define GPR_BEGIN       0x100
 105 #define GPR_END         0x17f
 106 #define MICROCODE_BEGIN 0x200
 107 #define MICROCODE_END   0x3ff
 108 #define MICROCODE_WORDS 2048
 109
 110 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 111
 112 #ifdef TRACE_EXEC
 113 static const char *opcode_to_str[] = {
 114     "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 115     "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 116 };
 117 #endif
 118
 119 struct MilkymistPFPUState {
 120     SysBusDevice busdev;
 121     MemoryRegion regs_region;
 122     CharDriverState *chr;
 123     qemu_irq irq;
 124
 125     uint32_t regs[R_MAX];
 126     uint32_t gp_regs[128];
 127     uint32_t microcode[MICROCODE_WORDS];
 128
 129     int output_queue_pos;
 130     uint32_t output_queue[MAX_LATENCY];
 131 };
 132 typedef struct MilkymistPFPUState MilkymistPFPUState;
 133
 134 static inline target_phys_addr_t
 135 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 136 {
 137     return base + 8 * (128 * y + x);
 138 }
 139
 140 static inline void
 141 output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 142 {
 143     s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 144 }
 145
 146 static inline uint32_t
 147 output_queue_remove(MilkymistPFPUState *s)
 148 {
 149     return s->output_queue[s->output_queue_pos];
 150 }
 151
 152 static inline void
 153 output_queue_advance(MilkymistPFPUState *s)
 154 {
 155     s->output_queue[s->output_queue_pos] = 0;
 156     s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 157 }
 158
 159 static int pfpu_decode_insn(MilkymistPFPUState *s)
 160 {
 161     uint32_t pc = s->regs[R_PC];
 162     uint32_t insn = s->microcode[pc];
 163     uint32_t reg_a = (insn >> 18) & 0x7f;
 164     uint32_t reg_b = (insn >> 11) & 0x7f;
 165     uint32_t op = (insn >> 7) & 0xf;
 166     uint32_t reg_d = insn & 0x7f;
 167     uint32_t r = 0;
 168     int latency = 0;
 169
 170     switch (op) {
 171     case OP_NOP:
 172         break;
 173     case OP_FADD:
 174     {
 175         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 176         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 177         float t = a + b;
 178         r = REINTERPRET_CAST(uint32_t, t);
 179         latency = LATENCY_FADD;
 180         D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 181     } break;
 182     case OP_FSUB:
 183     {
 184         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 185         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 186         float t = a - b;
 187         r = REINTERPRET_CAST(uint32_t, t);
 188         latency = LATENCY_FSUB;
 189         D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 190     } break;
 191     case OP_FMUL:
 192     {
 193         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 194         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 195         float t = a * b;
 196         r = REINTERPRET_CAST(uint32_t, t);
 197         latency = LATENCY_FMUL;
 198         D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 199     } break;
 200     case OP_FABS:
 201     {
 202         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 203         float t = fabsf(a);
 204         r = REINTERPRET_CAST(uint32_t, t);
 205         latency = LATENCY_FABS;
 206         D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 207     } break;
 208     case OP_F2I:
 209     {
 210         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 211         int32_t t = a;
 212         r = REINTERPRET_CAST(uint32_t, t);
 213         latency = LATENCY_F2I;
 214         D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 215     } break;
 216     case OP_I2F:
 217     {
 218         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 219         float t = a;
 220         r = REINTERPRET_CAST(uint32_t, t);
 221         latency = LATENCY_I2F;
 222         D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 223     } break;
 224     case OP_VECTOUT:
 225     {
 226         uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 227         uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 228         target_phys_addr_t dma_ptr =
 229             get_dma_address(s->regs[R_MESHBASE],
 230                     s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 231         cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4);
 232         cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4);
 233         s->regs[R_LASTDMA] = dma_ptr + 4;
 234         D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 235         trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 236     } break;
 237     case OP_SIN:
 238     {
 239         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 240         float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 241         r = REINTERPRET_CAST(uint32_t, t);
 242         latency = LATENCY_SIN;
 243         D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 244     } break;
 245     case OP_COS:
 246     {
 247         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 248         float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 249         r = REINTERPRET_CAST(uint32_t, t);
 250         latency = LATENCY_COS;
 251         D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 252     } break;
 253     case OP_ABOVE:
 254     {
 255         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 256         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 257         float t = (a > b) ? 1.0f : 0.0f;
 258         r = REINTERPRET_CAST(uint32_t, t);
 259         latency = LATENCY_ABOVE;
 260         D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 261     } break;
 262     case OP_EQUAL:
 263     {
 264         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 265         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 266         float t = (a == b) ? 1.0f : 0.0f;
 267         r = REINTERPRET_CAST(uint32_t, t);
 268         latency = LATENCY_EQUAL;
 269         D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 270     } break;
 271     case OP_COPY:
 272     {
 273         r = s->gp_regs[reg_a];
 274         latency = LATENCY_COPY;
 275         D_EXEC(qemu_log("COPY"));
 276     } break;
 277     case OP_IF:
 278     {
 279         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 280         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 281         uint32_t f = s->gp_regs[GPR_FLAGS];
 282         float t = (f != 0) ? a : b;
 283         r = REINTERPRET_CAST(uint32_t, t);
 284         latency = LATENCY_IF;
 285         D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 286     } break;
 287     case OP_TSIGN:
 288     {
 289         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 290         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 291         float t = (b < 0) ? -a : a;
 292         r = REINTERPRET_CAST(uint32_t, t);
 293         latency = LATENCY_TSIGN;
 294         D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 295     } break;
 296     case OP_QUAKE:
 297     {
 298         uint32_t a = s->gp_regs[reg_a];
 299         r = 0x5f3759df - (a >> 1);
 300         latency = LATENCY_QUAKE;
 301         D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 302     } break;
 303
 304     default:
 305         error_report("milkymist_pfpu: unknown opcode %d", op);
 306         break;
 307     }
 308
 309     if (!reg_d) {
 310         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 311                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 312                     s->regs[R_PC] + latency));
 313     } else {
 314         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 315                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 316                     s->regs[R_PC] + latency, reg_d));
 317     }
 318
 319     if (op == OP_VECTOUT) {
 320         return 0;
 321     }
 322
 323     /* store output for this cycle */
 324     if (reg_d) {
 325         uint32_t val = output_queue_remove(s);
 326         D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 327         s->gp_regs[reg_d] = val;
 328     }
 329
 330     output_queue_advance(s);
 331
 332     /* store op output */
 333     if (op != OP_NOP) {
 334         output_queue_insert(s, r, latency-1);
 335     }
 336
 337     /* advance PC */
 338     s->regs[R_PC]++;
 339
 340     return 1;
 341 };
 342
 343 static void pfpu_start(MilkymistPFPUState *s)
 344 {
 345     int x, y;
 346     int i;
 347
 348     for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 349         for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 350             D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 351
 352             /* set current position */
 353             s->gp_regs[GPR_X] = x;
 354             s->gp_regs[GPR_Y] = y;
 355
 356             /* run microcode on this position */
 357             i = 0;
 358             while (pfpu_decode_insn(s)) {
 359                 /* decode at most MICROCODE_WORDS instructions */
 360                 if (i++ >= MICROCODE_WORDS) {
 361                     error_report("milkymist_pfpu: too many instructions "
 362                             "executed in microcode. No VECTOUT?");
 363                     break;
 364                 }
 365             }
 366
 367             /* reset pc for next run */
 368             s->regs[R_PC] = 0;
 369         }
 370     }
 371
 372     s->regs[R_VERTICES] = x * y;
 373
 374     trace_milkymist_pfpu_pulse_irq();
 375     qemu_irq_pulse(s->irq);
 376 }
 377
 378 static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 379 {
 380     return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 381 }
 382
 383 static uint64_t pfpu_read(void *opaque, target_phys_addr_t addr,
 384                           unsigned size)
 385 {
 386     MilkymistPFPUState *s = opaque;
 387     uint32_t r = 0;
 388
 389     addr >>= 2;
 390     switch (addr) {
 391     case R_CTL:
 392     case R_MESHBASE:
 393     case R_HMESHLAST:
 394     case R_VMESHLAST:
 395     case R_CODEPAGE:
 396     case R_VERTICES:
 397     case R_COLLISIONS:
 398     case R_STRAYWRITES:
 399     case R_LASTDMA:
 400     case R_PC:
 401     case R_DREGBASE:
 402     case R_CODEBASE:
 403         r = s->regs[addr];
 404         break;
 405     case GPR_BEGIN ... GPR_END:
 406         r = s->gp_regs[addr - GPR_BEGIN];
 407         break;
 408     case MICROCODE_BEGIN ...  MICROCODE_END:
 409         r = s->microcode[get_microcode_address(s, addr)];
 410         break;
 411
 412     default:
 413         error_report("milkymist_pfpu: read access to unknown register 0x"
 414                 TARGET_FMT_plx, addr << 2);
 415         break;
 416     }
 417
 418     trace_milkymist_pfpu_memory_read(addr << 2, r);
 419
 420     return r;
 421 }
 422
 423 static void pfpu_write(void *opaque, target_phys_addr_t addr, uint64_t value,
 424                        unsigned size)
 425 {
 426     MilkymistPFPUState *s = opaque;
 427
 428     trace_milkymist_pfpu_memory_write(addr, value);
 429
 430     addr >>= 2;
 431     switch (addr) {
 432     case R_CTL:
 433         if (value & CTL_START_BUSY) {
 434             pfpu_start(s);
 435         }
 436         break;
 437     case R_MESHBASE:
 438     case R_HMESHLAST:
 439     case R_VMESHLAST:
 440     case R_CODEPAGE:
 441     case R_VERTICES:
 442     case R_COLLISIONS:
 443     case R_STRAYWRITES:
 444     case R_LASTDMA:
 445     case R_PC:
 446     case R_DREGBASE:
 447     case R_CODEBASE:
 448         s->regs[addr] = value;
 449         break;
 450     case GPR_BEGIN ...  GPR_END:
 451         s->gp_regs[addr - GPR_BEGIN] = value;
 452         break;
 453     case MICROCODE_BEGIN ...  MICROCODE_END:
 454         s->microcode[get_microcode_address(s, addr)] = value;
 455         break;
 456
 457     default:
 458         error_report("milkymist_pfpu: write access to unknown register 0x"
 459                 TARGET_FMT_plx, addr << 2);
 460         break;
 461     }
 462 }
 463
 464 static const MemoryRegionOps pfpu_mmio_ops = {
 465     .read = pfpu_read,
 466     .write = pfpu_write,
 467     .valid = {
 468         .min_access_size = 4,
 469         .max_access_size = 4,
 470     },
 471     .endianness = DEVICE_NATIVE_ENDIAN,
 472 };
 473
 474 static void milkymist_pfpu_reset(DeviceState *d)
 475 {
 476     MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev);
 477     int i;
 478
 479     for (i = 0; i < R_MAX; i++) {
 480         s->regs[i] = 0;
 481     }
 482     for (i = 0; i < 128; i++) {
 483         s->gp_regs[i] = 0;
 484     }
 485     for (i = 0; i < MICROCODE_WORDS; i++) {
 486         s->microcode[i] = 0;
 487     }
 488     s->output_queue_pos = 0;
 489     for (i = 0; i < MAX_LATENCY; i++) {
 490         s->output_queue[i] = 0;
 491     }
 492 }
 493
 494 static int milkymist_pfpu_init(SysBusDevice *dev)
 495 {
 496     MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev);
 497
 498     sysbus_init_irq(dev, &s->irq);
 499
 500     memory_region_init_io(&s->regs_region, &pfpu_mmio_ops, s,
 501             "milkymist-pfpu", MICROCODE_END * 4);
 502     sysbus_init_mmio(dev, &s->regs_region);
 503
 504     return 0;
 505 }
 506
 507 static const VMStateDescription vmstate_milkymist_pfpu = {
 508     .name = "milkymist-pfpu",
 509     .version_id = 1,
 510     .minimum_version_id = 1,
 511     .minimum_version_id_old = 1,
 512     .fields      = (VMStateField[]) {
 513         VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 514         VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 515         VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 516         VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 517         VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 518         VMSTATE_END_OF_LIST()
 519     }
 520 };
 521
 522 static SysBusDeviceInfo milkymist_pfpu_info = {
 523     .init = milkymist_pfpu_init,
 524     .qdev.name  = "milkymist-pfpu",
 525     .qdev.size  = sizeof(MilkymistPFPUState),
 526     .qdev.vmsd  = &vmstate_milkymist_pfpu,
 527     .qdev.reset = milkymist_pfpu_reset,
 528 };
 529
 530 static void milkymist_pfpu_register(void)
 531 {
 532     sysbus_register_withprop(&milkymist_pfpu_info);
 533 }
 534
 535 device_init(milkymist_pfpu_register)