1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* This must come before any other includes. */
28 #include "libiberty.h"
30 #include "opcode/rx.h"
39 static const char * id_names
[] = {
41 "RXO_mov", /* d = s (signed) */
42 "RXO_movbi", /* d = [s,s2] (signed) */
43 "RXO_movbir", /* [s,s2] = d (signed) */
44 "RXO_pushm", /* s..s2 */
45 "RXO_popm", /* s..s2 */
46 "RXO_xchg", /* s <-> d */
47 "RXO_stcc", /* d = s if cond(s2) */
48 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
50 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
51 that d may be "None". */
64 "RXO_adc", /* d = d + s + carry */
65 "RXO_sbb", /* d = d - s - ~carry */
66 "RXO_abs", /* d = |s| */
67 "RXO_max", /* d = max(d,s) */
68 "RXO_min", /* d = min(d,s) */
69 "RXO_emul", /* d:64 = d:32 * s */
70 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
72 "RXO_rolc", /* d <<= 1 through carry */
73 "RXO_rorc", /* d >>= 1 through carry*/
74 "RXO_rotl", /* d <<= #s without carry */
75 "RXO_rotr", /* d >>= #s without carry*/
76 "RXO_revw", /* d = revw(s) */
77 "RXO_revl", /* d = revl(s) */
78 "RXO_branch", /* pc = d if cond(s) */
79 "RXO_branchrel",/* pc += d if cond(s) */
80 "RXO_jsr", /* pc = d */
81 "RXO_jsrrel", /* pc += d */
111 "RXO_sat", /* sat(d) */
114 "RXO_fadd", /* d op= s */
123 "RXO_bset", /* d |= (1<<s) */
124 "RXO_bclr", /* d &= ~(1<<s) */
125 "RXO_btst", /* s & (1<<s2) */
126 "RXO_bnot", /* d ^= (1<<s) */
127 "RXO_bmcc", /* d<s> = cond(s2) */
129 "RXO_clrpsw", /* flag index in d */
130 "RXO_setpsw", /* flag index in d */
131 "RXO_mvtipl", /* new IPL in s */
135 "RXO_rtd", /* undocumented */
137 "RXO_dbt", /* undocumented */
138 "RXO_int", /* vector id in s */
142 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
145 static const char * optype_names
[] = {
147 "#Imm", /* #addend */
149 "[Rn]", /* [Rn + addend] */
152 " cc ", /* eq, gtu, etc */
153 "Flag", /* [UIOSZC] */
154 "RbRi" /* [Rb + scale * Ri] */
157 #define N_RXO ARRAY_SIZE (id_names)
158 #define N_RXT ARRAY_SIZE (optype_names)
161 static unsigned long long benchmark_start_cycle
;
162 static unsigned long long benchmark_end_cycle
;
164 static int op_cache
[N_RXT
][N_RXT
][N_RXT
];
165 static int op_cache_rev
[N_MAP
];
166 static int op_cache_idx
= 0;
169 op_lookup (int a
, int b
, int c
)
171 if (op_cache
[a
][b
][c
])
172 return op_cache
[a
][b
][c
];
174 if (op_cache_idx
>= N_MAP
)
176 printf("op_cache_idx exceeds %d\n", N_MAP
);
179 op_cache
[a
][b
][c
] = op_cache_idx
;
180 op_cache_rev
[op_cache_idx
] = (a
<<8) | (b
<<4) | c
;
185 op_cache_string (int map
)
188 static char cb
[5][20];
191 map
= op_cache_rev
[map
];
196 sprintf(cb
[ci
], "%s %s %s", optype_names
[a
], optype_names
[b
], optype_names
[c
]);
200 static unsigned long long cycles_per_id
[N_RXO
][N_MAP
];
201 static unsigned long long times_per_id
[N_RXO
][N_MAP
];
202 static unsigned long long memory_stalls
;
203 static unsigned long long register_stalls
;
204 static unsigned long long branch_stalls
;
205 static unsigned long long branch_alignment_stalls
;
206 static unsigned long long fast_returns
;
208 static unsigned long times_per_pair
[N_RXO
][N_MAP
][N_RXO
][N_MAP
];
209 static int prev_opcode_id
= RXO_unknown
;
216 #endif /* WITH_PROFILE */
219 #ifdef CYCLE_ACCURATE
221 static int new_rt
= -1;
223 /* Number of cycles to add if an insn spans an 8-byte boundary. */
224 static int branch_alignment_penalty
= 0;
228 static int running_benchmark
= 1;
230 #define tprintf if (trace && running_benchmark) printf
232 jmp_buf decode_jmp_buf
;
233 unsigned int rx_cycles
= 0;
235 #ifdef CYCLE_ACCURATE
236 /* If nonzero, memory was read at some point and cycle latency might
238 static int memory_source
= 0;
239 /* If nonzero, memory was written and extra cycles might be
241 static int memory_dest
= 0;
244 cycles (int throughput
)
246 tprintf("%d cycles\n", throughput
);
247 regs
.cycle_count
+= throughput
;
250 /* Number of execution (E) cycles the op uses. For memory sources, we
251 include the load micro-op stall as two extra E cycles. */
252 #define E(c) cycles (memory_source ? c + 2 : c)
253 #define E1 cycles (1)
254 #define E2 cycles (2)
255 #define EBIT cycles (memory_source ? 2 : 1)
257 /* Check to see if a read latency must be applied for a given register. */
261 tprintf("register %d load stall\n", r); \
262 regs.cycle_count ++; \
263 STATS(register_stalls ++); \
270 tprintf ("Rt now %d\n", r); \
275 lsb_count (unsigned long v
, int is_signed
)
278 if (is_signed
&& (v
& 0x80000000U
))
279 v
= (unsigned long)(long)(-v
);
280 for (i
=31; i
>=0; i
--)
283 /* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
291 divu_cycles(unsigned long num
, unsigned long den
)
293 int nb
= lsb_count (num
, 0);
294 int db
= lsb_count (den
, 0);
306 div_cycles(long num
, long den
)
308 int nb
= lsb_count ((unsigned long)num
, 1);
309 int db
= lsb_count ((unsigned long)den
, 1);
320 #else /* !CYCLE_ACCURATE */
330 #define divu_cycles(n,d)
331 #define div_cycles(n,d)
333 #endif /* else CYCLE_ACCURATE */
335 static int size2bytes
[] = {
336 4, 1, 1, 1, 2, 2, 2, 3, 4
343 #define rx_abort() _rx_abort(__FILE__, __LINE__)
345 _rx_abort (const char *file
, int line
)
347 if (strrchr (file
, '/'))
348 file
= strrchr (file
, '/') + 1;
349 fprintf(stderr
, "abort at %s:%d\n", file
, line
);
353 static unsigned char *get_byte_base
;
354 static RX_Opcode_Decoded
**decode_cache_base
;
355 static SI get_byte_page
;
361 decode_cache_base
= 0;
366 maybe_get_mem_page (SI tpc
)
368 if (((tpc
^ get_byte_page
) & NONPAGE_MASK
) || enable_counting
)
370 get_byte_page
= tpc
& NONPAGE_MASK
;
371 get_byte_base
= rx_mem_ptr (get_byte_page
, MPA_READING
) - get_byte_page
;
372 decode_cache_base
= rx_mem_decode_cache (get_byte_page
) - get_byte_page
;
376 /* This gets called a *lot* so optimize it. */
378 rx_get_byte (void *vdata
)
380 RX_Data
*rx_data
= (RX_Data
*)vdata
;
381 SI tpc
= rx_data
->dpc
;
383 /* See load.c for an explanation of this. */
387 maybe_get_mem_page (tpc
);
390 return get_byte_base
[tpc
];
394 get_op (const RX_Opcode_Decoded
*rd
, int i
)
396 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
401 case RX_Operand_None
:
404 case RX_Operand_Immediate
: /* #addend */
407 case RX_Operand_Register
: /* Rn */
409 rv
= get_reg (o
->reg
);
412 case RX_Operand_Predec
: /* [-Rn] */
413 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
415 case RX_Operand_Postinc
: /* [Rn+] */
416 case RX_Operand_Zero_Indirect
: /* [Rn + 0] */
417 case RX_Operand_Indirect
: /* [Rn + addend] */
418 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
419 #ifdef CYCLE_ACCURATE
421 if (o
->type
== RX_Operand_TwoReg
)
424 if (regs
.m2m
== M2M_BOTH
)
426 tprintf("src memory stall\n");
437 if (o
->type
== RX_Operand_TwoReg
)
438 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
440 addr
= get_reg (o
->reg
) + o
->addend
;
448 case RX_Byte
: /* undefined extension */
451 rv
= mem_get_qi (addr
);
454 case RX_Word
: /* undefined extension */
457 rv
= mem_get_hi (addr
);
461 rv
= mem_get_psi (addr
);
465 rv
= mem_get_si (addr
);
469 if (o
->type
== RX_Operand_Postinc
)
470 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
474 case RX_Operand_Condition
: /* eq, gtu, etc */
475 return condition_true (o
->reg
);
477 case RX_Operand_Flag
: /* [UIOSZC] */
478 return (regs
.r_psw
& (1 << o
->reg
)) ? 1 : 0;
481 /* if we've gotten here, we need to clip/extend the value according
489 case RX_Byte
: /* undefined extension */
490 rv
|= 0xdeadbe00; /* keep them honest */
498 rv
= sign_ext (rv
, 8);
501 case RX_Word
: /* undefined extension */
502 rv
|= 0xdead0000; /* keep them honest */
510 rv
= sign_ext (rv
, 16);
524 put_op (const RX_Opcode_Decoded
*rd
, int i
, int v
)
526 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
533 if (o
->type
!= RX_Operand_Register
)
537 case RX_Byte
: /* undefined extension */
538 v
|= 0xdeadbe00; /* keep them honest */
549 case RX_Word
: /* undefined extension */
550 v
|= 0xdead0000; /* keep them honest */
558 v
= sign_ext (v
, 16);
571 case RX_Operand_None
:
572 /* Opcodes like TST and CMP use this. */
575 case RX_Operand_Immediate
: /* #addend */
576 case RX_Operand_Condition
: /* eq, gtu, etc */
579 case RX_Operand_Register
: /* Rn */
584 case RX_Operand_Predec
: /* [-Rn] */
585 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
587 case RX_Operand_Postinc
: /* [Rn+] */
588 case RX_Operand_Zero_Indirect
: /* [Rn + 0] */
589 case RX_Operand_Indirect
: /* [Rn + addend] */
590 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
592 #ifdef CYCLE_ACCURATE
593 if (regs
.m2m
== M2M_BOTH
)
595 tprintf("dst memory stall\n");
605 if (o
->type
== RX_Operand_TwoReg
)
606 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
608 addr
= get_reg (o
->reg
) + o
->addend
;
616 case RX_Byte
: /* undefined extension */
619 mem_put_qi (addr
, v
);
622 case RX_Word
: /* undefined extension */
625 mem_put_hi (addr
, v
);
629 mem_put_psi (addr
, v
);
633 mem_put_si (addr
, v
);
637 if (o
->type
== RX_Operand_Postinc
)
638 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
642 case RX_Operand_Flag
: /* [UIOSZC] */
644 regs
.r_psw
|= (1 << o
->reg
);
646 regs
.r_psw
&= ~(1 << o
->reg
);
651 #define PD(x) put_op (opcode, 0, x)
652 #define PS(x) put_op (opcode, 1, x)
653 #define PS2(x) put_op (opcode, 2, x)
654 #define GD() get_op (opcode, 0)
655 #define GS() get_op (opcode, 1)
656 #define GS2() get_op (opcode, 2)
657 #define DSZ() size2bytes[opcode->op[0].size]
658 #define SSZ() size2bytes[opcode->op[0].size]
659 #define S2SZ() size2bytes[opcode->op[0].size]
661 /* "Universal" sources. */
662 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
663 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
668 int rsp
= get_reg (sp
);
671 mem_put_si (rsp
, val
);
674 /* Just like the above, but tag the memory as "pushed pc" so if anyone
675 tries to write to it, it will cause an error. */
679 int rsp
= get_reg (sp
);
682 mem_put_si (rsp
, val
);
683 mem_set_content_range (rsp
, rsp
+3, MC_PUSHED_PC
);
690 int rsp
= get_reg (sp
);
691 rv
= mem_get_si (rsp
);
701 int rsp
= get_reg (sp
);
702 if (mem_get_content_type (rsp
) != MC_PUSHED_PC
)
703 execution_error (SIM_ERR_CORRUPT_STACK
, rsp
);
704 rv
= mem_get_si (rsp
);
705 mem_set_content_range (rsp
, rsp
+3, MC_UNINIT
);
711 #define MATH_OP(vop,c) \
715 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
716 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
717 ma = sign_ext (uma, DSZ() * 8); \
718 mb = sign_ext (umb, DSZ() * 8); \
719 sll = (long long) ma vop (long long) mb vop c; \
720 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
721 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
726 #define LOGIC_OP(vop) \
731 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
737 #define SHIFT_OP(val, type, count, OP, carry_mask) \
742 tprintf("%lld " #OP " %d\n", val, count); \
743 for (i = 0; i < count; i ++) \
745 c = val & carry_mask; \
748 set_oszc (val, 4, c); \
774 fop_fadd (fp_t s1
, fp_t s2
, fp_t
*d
)
776 *d
= rxfp_add (s1
, s2
);
781 fop_fmul (fp_t s1
, fp_t s2
, fp_t
*d
)
783 *d
= rxfp_mul (s1
, s2
);
788 fop_fdiv (fp_t s1
, fp_t s2
, fp_t
*d
)
790 *d
= rxfp_div (s1
, s2
);
795 fop_fsub (fp_t s1
, fp_t s2
, fp_t
*d
)
797 *d
= rxfp_sub (s1
, s2
);
801 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
802 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
805 return do_fp_exception (opcode_pc)
807 #define FLOAT_OP(func) \
814 do_store = fop_##func (fa, fb, &fc); \
815 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
820 if ((fc & 0x80000000UL) != 0) \
822 if ((fc & 0x7fffffffUL) == 0) \
824 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
827 #define carry (FLAG_C ? 1 : 0)
833 } exception_info
[] = {
834 { 0xFFFFFFD0UL
, "priviledged opcode", SIGILL
},
835 { 0xFFFFFFD4UL
, "access violation", SIGSEGV
},
836 { 0xFFFFFFDCUL
, "undefined opcode", SIGILL
},
837 { 0xFFFFFFE4UL
, "floating point", SIGFPE
}
839 #define EX_PRIVILEDGED 0
841 #define EX_UNDEFINED 2
842 #define EX_FLOATING 3
843 #define EXCEPTION(n) \
844 return generate_exception (n, opcode_pc)
846 #define PRIVILEDGED() \
848 EXCEPTION (EX_PRIVILEDGED)
851 generate_exception (unsigned long type
, SI opcode_pc
)
853 SI old_psw
, old_pc
, new_pc
;
855 new_pc
= mem_get_si (exception_info
[type
].vaddr
);
856 /* 0x00020000 is the value used to initialise the known
857 exception vectors (see rx.ld), but it is a reserved
858 area of memory so do not try to access it, and if the
859 value has not been changed by the program then the
860 vector has not been installed. */
861 if (new_pc
== 0 || new_pc
== 0x00020000)
864 return RX_MAKE_STOPPED (exception_info
[type
].signal
);
866 fprintf(stderr
, "Unhandled %s exception at pc = %#lx\n",
867 exception_info
[type
].str
, (unsigned long) opcode_pc
);
868 if (type
== EX_FLOATING
)
870 int mask
= FPPENDING ();
871 fprintf (stderr
, "Pending FP exceptions:");
872 if (mask
& FPSWBITS_FV
)
873 fprintf(stderr
, " Invalid");
874 if (mask
& FPSWBITS_FO
)
875 fprintf(stderr
, " Overflow");
876 if (mask
& FPSWBITS_FZ
)
877 fprintf(stderr
, " Division-by-zero");
878 if (mask
& FPSWBITS_FU
)
879 fprintf(stderr
, " Underflow");
880 if (mask
& FPSWBITS_FX
)
881 fprintf(stderr
, " Inexact");
882 if (mask
& FPSWBITS_CE
)
883 fprintf(stderr
, " Unimplemented");
884 fprintf(stderr
, "\n");
886 return RX_MAKE_EXITED (1);
889 tprintf ("Triggering %s exception\n", exception_info
[type
].str
);
891 old_psw
= regs
.r_psw
;
892 regs
.r_psw
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
897 return RX_MAKE_STEPPED ();
901 generate_access_exception (void)
905 rv
= generate_exception (EX_ACCESS
, regs
.r_pc
);
907 longjmp (decode_jmp_buf
, rv
);
911 do_fp_exception (unsigned long opcode_pc
)
914 EXCEPTION (EX_FLOATING
);
915 return RX_MAKE_STEPPED ();
919 op_is_memory (const RX_Opcode_Decoded
*rd
, int i
)
921 switch (rd
->op
[i
].type
)
923 case RX_Operand_Predec
:
924 case RX_Operand_Postinc
:
925 case RX_Operand_Indirect
:
931 #define OM(i) op_is_memory (opcode, i)
933 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
938 unsigned int uma
=0, umb
=0;
941 unsigned long long ll
;
943 unsigned long opcode_pc
;
945 const RX_Opcode_Decoded
*opcode
;
947 unsigned long long prev_cycle_count
;
949 #ifdef CYCLE_ACCURATE
954 prev_cycle_count
= regs
.cycle_count
;
957 #ifdef CYCLE_ACCURATE
964 maybe_get_mem_page (regs
.r_pc
);
966 opcode_pc
= regs
.r_pc
;
968 /* Note that we don't word-swap this point, there's no point. */
969 if (decode_cache_base
[opcode_pc
] == NULL
)
971 RX_Opcode_Decoded
*opcode_w
;
972 rx_data
.dpc
= opcode_pc
;
973 opcode_w
= decode_cache_base
[opcode_pc
] = calloc (1, sizeof (RX_Opcode_Decoded
));
974 opcode_size
= rx_decode_opcode (opcode_pc
, opcode_w
,
975 rx_get_byte
, &rx_data
);
980 opcode
= decode_cache_base
[opcode_pc
];
981 opcode_size
= opcode
->n_bytes
;
984 #ifdef CYCLE_ACCURATE
985 if (branch_alignment_penalty
)
987 if ((regs
.r_pc
^ (regs
.r_pc
+ opcode_size
- 1)) & ~7)
989 tprintf("1 cycle branch alignment penalty\n");
990 cycles (branch_alignment_penalty
);
992 branch_alignment_stalls
++;
995 branch_alignment_penalty
= 0;
999 regs
.r_pc
+= opcode_size
;
1001 rx_flagmask
= opcode
->flags_s
;
1002 rx_flagand
= ~(int)opcode
->flags_0
;
1003 rx_flagor
= opcode
->flags_1
;
1009 tprintf("|%lld| = ", sll
);
1012 tprintf("%lld\n", sll
);
1033 if (opcode
->op
[0].type
== RX_Operand_Register
)
1045 if (opcode
->op
[0].type
== RX_Operand_Register
)
1060 if (opcode
->op
[0].type
== RX_Operand_Register
)
1070 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1072 #ifdef CYCLE_ACCURATE
1073 SI old_pc
= regs
.r_pc
;
1077 #ifdef CYCLE_ACCURATE
1078 delta
= regs
.r_pc
- old_pc
;
1079 if (delta
>= 0 && delta
< 16
1082 tprintf("near forward branch bonus\n");
1088 branch_alignment_penalty
= 1;
1095 #ifdef CYCLE_ACCURATE
1102 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1105 regs
.r_pc
= opcode_pc
+ delta
;
1106 #ifdef CYCLE_ACCURATE
1107 /* Note: specs say 3, chip says 2. */
1108 if (delta
>= 0 && delta
< 16
1111 tprintf("near forward branch bonus\n");
1117 branch_alignment_penalty
= 1;
1124 #ifdef CYCLE_ACCURATE
1132 int old_psw
= regs
.r_psw
;
1134 DO_RETURN (RX_MAKE_HIT_BREAK ());
1135 if (regs
.r_intb
== 0)
1137 tprintf("BREAK hit, no vector table.\n");
1138 DO_RETURN (RX_MAKE_EXITED(1));
1140 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1143 regs
.r_pc
= mem_get_si (regs
.r_intb
);
1151 if (opcode
->op
[0].type
== RX_Operand_Register
)
1163 if (opcode
->op
[1].type
== RX_Operand_Register
)
1167 umb
= ma
& (1 << mb
);
1168 set_zc (! umb
, umb
);
1173 v
= 1 << opcode
->op
[0].reg
;
1182 case RXO_div
: /* d = d / s */
1185 tprintf("%d / %d = ", mb
, ma
);
1186 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1189 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1196 set_flags (FLAGBIT_O
, 0);
1198 div_cycles (mb
, ma
);
1202 case RXO_divu
: /* d = d / s */
1205 tprintf("%u / %u = ", umb
, uma
);
1209 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1216 set_flags (FLAGBIT_O
, 0);
1218 divu_cycles (umb
, uma
);
1225 sll
= (long long)ma
* (long long)mb
;
1226 tprintf("%d * %d = %lld\n", ma
, mb
, sll
);
1227 put_reg (opcode
->op
[0].reg
, sll
);
1228 put_reg (opcode
->op
[0].reg
+ 1, sll
>> 32);
1235 ll
= (long long)uma
* (long long)umb
;
1236 tprintf("%#x * %#x = %#llx\n", uma
, umb
, ll
);
1237 put_reg (opcode
->op
[0].reg
, ll
);
1238 put_reg (opcode
->op
[0].reg
+ 1, ll
>> 32);
1268 regs
.r_psw
= regs
.r_bpsw
;
1269 regs
.r_pc
= regs
.r_bpc
;
1270 #ifdef CYCLE_ACCURATE
1271 regs
.fast_return
= 0;
1284 mb
= rxfp_ftoi (ma
, FPRM_ZERO
);
1287 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1296 int rc
= rx_syscall (regs
.r
[5]);
1297 if (! RX_STEPPED (rc
))
1302 int old_psw
= regs
.r_psw
;
1303 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1306 regs
.r_pc
= mem_get_si (regs
.r_intb
+ 4 * v
);
1314 mb
= rxfp_itof (ma
, regs
.r_fpsw
);
1316 tprintf("(float) %d = %x\n", ma
, mb
);
1325 #ifdef CYCLE_ACCURATE
1330 #ifdef CYCLE_ACCURATE
1331 regs
.link_register
= regs
.r_pc
;
1333 pushpc (get_reg (pc
));
1334 if (opcode
->id
== RXO_jsrrel
)
1336 #ifdef CYCLE_ACCURATE
1337 delta
= v
- regs
.r_pc
;
1340 #ifdef CYCLE_ACCURATE
1341 /* Note: docs say 3, chip says 2 */
1342 if (delta
>= 0 && delta
< 16)
1344 tprintf ("near forward jsr bonus\n");
1349 branch_alignment_penalty
= 1;
1352 regs
.fast_return
= 1;
1358 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1360 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1365 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1367 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1394 if (opcode
->op
[1].type
== RX_Operand_Register
1395 && opcode
->op
[1].reg
== 17 /* PC */)
1397 /* Special case. We want the address of the insn, not the
1398 address of the next insn. */
1402 if (opcode
->op
[0].type
== RX_Operand_Register
1403 && opcode
->op
[0].reg
== 16 /* PSW */)
1405 /* Special case, LDC and POPC can't ever modify PM. */
1406 int pm
= regs
.r_psw
& FLAGBIT_PM
;
1411 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1417 /* various things can't be changed in user mode. */
1418 if (opcode
->op
[0].type
== RX_Operand_Register
)
1419 if (opcode
->op
[0].reg
== 32)
1421 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1422 v
|= regs
.r_psw
& (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1424 if (opcode
->op
[0].reg
== 34 /* ISP */
1425 || opcode
->op
[0].reg
== 37 /* BPSW */
1426 || opcode
->op
[0].reg
== 39 /* INTB */
1427 || opcode
->op
[0].reg
== 38 /* VCT */)
1428 /* These are ignored. */
1438 #ifdef CYCLE_ACCURATE
1439 if ((opcode
->op
[0].type
== RX_Operand_Predec
1440 && opcode
->op
[1].type
== RX_Operand_Register
)
1441 || (opcode
->op
[0].type
== RX_Operand_Postinc
1442 && opcode
->op
[1].type
== RX_Operand_Register
))
1444 /* Special case: push reg doesn't cause a memory stall. */
1446 tprintf("push special case\n");
1465 ll
= (unsigned long long) US1() * (unsigned long long) v
;
1472 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v
>> 16);
1474 put_reg64 (acc64
, ll
);
1480 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(v
);
1482 put_reg64 (acc64
, ll
);
1487 PD (get_reg (acchi
));
1492 PD (get_reg (acclo
));
1497 PD (get_reg (accmi
));
1502 put_reg (acchi
, GS ());
1507 put_reg (acclo
, GS ());
1512 regs
.r_psw
&= ~ FLAGBITS_IPL
;
1513 regs
.r_psw
|= (GS () << FLAGSHIFT_IPL
) & FLAGBITS_IPL
;
1532 /* POPM cannot pop R0 (sp). */
1533 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1534 EXCEPTION (EX_UNDEFINED
);
1535 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1537 regs
.r_pc
= opcode_pc
;
1538 DO_RETURN (RX_MAKE_STOPPED (SIGILL
));
1540 for (v
= opcode
->op
[1].reg
; v
<= opcode
->op
[2].reg
; v
++)
1544 put_reg (v
, pop ());
1549 /* PUSHM cannot push R0 (sp). */
1550 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1551 EXCEPTION (EX_UNDEFINED
);
1552 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1554 regs
.r_pc
= opcode_pc
;
1555 return RX_MAKE_STOPPED (SIGILL
);
1557 for (v
= opcode
->op
[2].reg
; v
>= opcode
->op
[1].reg
; v
--)
1562 cycles (opcode
->op
[2].reg
- opcode
->op
[1].reg
+ 1);
1566 ll
= get_reg64 (acc64
) << GS ();
1567 ll
+= 0x80000000ULL
;
1568 if ((signed long long)ll
> (signed long long)0x00007fff00000000ULL
)
1569 ll
= 0x00007fff00000000ULL
;
1570 else if ((signed long long)ll
< (signed long long)0xffff800000000000ULL
)
1571 ll
= 0xffff800000000000ULL
;
1573 ll
&= 0xffffffff00000000ULL
;
1574 put_reg64 (acc64
, ll
);
1580 regs
.r_pc
= poppc ();
1581 regs
.r_psw
= poppc ();
1583 regs
.r_psw
|= FLAGBIT_U
;
1584 #ifdef CYCLE_ACCURATE
1585 regs
.fast_return
= 0;
1592 umb
= (((uma
>> 24) & 0xff)
1593 | ((uma
>> 8) & 0xff00)
1594 | ((uma
<< 8) & 0xff0000)
1595 | ((uma
<< 24) & 0xff000000UL
));
1602 umb
= (((uma
>> 8) & 0x00ff00ff)
1603 | ((uma
<< 8) & 0xff00ff00UL
));
1611 #ifdef CYCLE_ACCURATE
1615 while (regs
.r
[3] != 0)
1619 switch (opcode
->size
)
1622 ma
= mem_get_si (regs
.r
[1]);
1623 mb
= mem_get_si (regs
.r
[2]);
1628 ma
= sign_ext (mem_get_hi (regs
.r
[1]), 16);
1629 mb
= sign_ext (mem_get_hi (regs
.r
[2]), 16);
1634 ma
= sign_ext (mem_get_qi (regs
.r
[1]), 8);
1635 mb
= sign_ext (mem_get_qi (regs
.r
[2]), 8);
1642 /* We do the multiply as a signed value. */
1643 sll
= (long long)ma
* (long long)mb
;
1644 tprintf(" %016llx = %d * %d\n", sll
, ma
, mb
);
1645 /* but we do the sum as unsigned, while sign extending the operands. */
1646 tmp
= regs
.r
[4] + (sll
& 0xffffffffUL
);
1647 regs
.r
[4] = tmp
& 0xffffffffUL
;
1650 tmp
+= regs
.r
[5] + (sll
& 0xffffffffUL
);
1651 regs
.r
[5] = tmp
& 0xffffffffUL
;
1654 tmp
+= regs
.r
[6] + (sll
& 0xffffffffUL
);
1655 regs
.r
[6] = tmp
& 0xffffffffUL
;
1656 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1657 (unsigned long) regs
.r
[6],
1658 (unsigned long) regs
.r
[5],
1659 (unsigned long) regs
.r
[4]);
1663 if (regs
.r
[6] & 0x00008000)
1664 regs
.r
[6] |= 0xffff0000UL
;
1666 regs
.r
[6] &= 0x0000ffff;
1667 ma
= (regs
.r
[6] & 0x80000000UL
) ? FLAGBIT_S
: 0;
1668 if (regs
.r
[6] != 0 && regs
.r
[6] != 0xffffffffUL
)
1669 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
| FLAGBIT_O
);
1671 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
);
1672 #ifdef CYCLE_ACCURATE
1673 switch (opcode
->size
)
1676 cycles (6 + 4 * tx
);
1679 cycles (6 + 5 * (tx
/ 2) + 4 * (tx
% 2));
1682 cycles (6 + 7 * (tx
/ 4) + 4 * (tx
% 4));
1692 ma
= v
& 0x80000000UL
;
1704 uma
|= (carry
? 0x80000000UL
: 0);
1705 set_szc (uma
, 4, mb
);
1715 uma
= (uma
<< mb
) | (uma
>> (32-mb
));
1718 set_szc (uma
, 4, mb
);
1728 uma
= (uma
>> mb
) | (uma
<< (32-mb
));
1729 mb
= uma
& 0x80000000;
1731 set_szc (uma
, 4, mb
);
1739 mb
= rxfp_ftoi (ma
, regs
.r_fpsw
);
1742 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1749 #ifdef CYCLE_ACCURATE
1752 regs
.r_pc
= poppc ();
1753 #ifdef CYCLE_ACCURATE
1754 /* Note: specs say 5, chip says 3. */
1755 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1760 tprintf("fast return bonus\n");
1764 regs
.fast_return
= 0;
1765 branch_alignment_penalty
= 1;
1771 if (opcode
->op
[2].type
== RX_Operand_Register
)
1774 /* RTSD cannot pop R0 (sp). */
1775 put_reg (0, get_reg (0) + GS() - (opcode
->op
[0].reg
-opcode
->op
[2].reg
+1)*4);
1776 if (opcode
->op
[2].reg
== 0)
1777 EXCEPTION (EX_UNDEFINED
);
1778 #ifdef CYCLE_ACCURATE
1779 tx
= opcode
->op
[0].reg
- opcode
->op
[2].reg
+ 1;
1781 for (i
= opcode
->op
[2].reg
; i
<= opcode
->op
[0].reg
; i
++)
1784 put_reg (i
, pop ());
1789 #ifdef CYCLE_ACCURATE
1792 put_reg (0, get_reg (0) + GS());
1794 put_reg (pc
, poppc());
1795 #ifdef CYCLE_ACCURATE
1796 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1798 tprintf("fast return bonus\n");
1802 cycles (tx
< 3 ? 3 : tx
+ 1);
1806 cycles (tx
< 5 ? 5 : tx
+ 1);
1808 regs
.fast_return
= 0;
1809 branch_alignment_penalty
= 1;
1814 if (FLAG_O
&& FLAG_S
)
1816 else if (FLAG_O
&& ! FLAG_S
)
1822 if (FLAG_O
&& ! FLAG_S
)
1825 put_reg (5, 0x7fffffff);
1826 put_reg (4, 0xffffffff);
1828 else if (FLAG_O
&& FLAG_S
)
1830 put_reg (6, 0xffffffff);
1831 put_reg (5, 0x80000000);
1838 MATH_OP (-, ! carry
);
1850 #ifdef CYCLE_ACCURATE
1853 while (regs
.r
[3] != 0)
1855 uma
= mem_get_qi (regs
.r
[1] ++);
1856 umb
= mem_get_qi (regs
.r
[2] ++);
1858 if (uma
!= umb
|| uma
== 0)
1864 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1865 cycles (2 + 4 * (tx
/ 4) + 4 * (tx
% 4));
1869 v
= 1 << opcode
->op
[0].reg
;
1880 #ifdef CYCLE_ACCURATE
1885 uma
= mem_get_qi (regs
.r
[2] --);
1886 mem_put_qi (regs
.r
[1]--, uma
);
1889 #ifdef CYCLE_ACCURATE
1891 cycles (6 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1893 cycles (2 + 3 * (tx
% 4));
1899 #ifdef CYCLE_ACCURATE
1904 uma
= mem_get_qi (regs
.r
[2] ++);
1905 mem_put_qi (regs
.r
[1]++, uma
);
1908 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1912 #ifdef CYCLE_ACCURATE
1915 while (regs
.r
[3] != 0)
1917 uma
= mem_get_qi (regs
.r
[2] ++);
1918 mem_put_qi (regs
.r
[1]++, uma
);
1923 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1926 case RXO_shar
: /* d = ma >> mb */
1927 SHIFT_OP (sll
, int, mb
, >>=, 1);
1931 case RXO_shll
: /* d = ma << mb */
1932 SHIFT_OP (ll
, int, mb
, <<=, 0x80000000UL
);
1936 case RXO_shlr
: /* d = ma >> mb */
1937 SHIFT_OP (ll
, unsigned int, mb
, >>=, 1);
1943 #ifdef CYCLE_ACCURATE
1946 switch (opcode
->size
)
1949 while (regs
.r
[3] != 0)
1951 mem_put_si (regs
.r
[1], regs
.r
[2]);
1958 while (regs
.r
[3] != 0)
1960 mem_put_hi (regs
.r
[1], regs
.r
[2]);
1964 cycles (2 + (int)(tx
/ 2) + tx
% 2);
1967 while (regs
.r
[3] != 0)
1969 mem_put_qi (regs
.r
[1], regs
.r
[2]);
1973 cycles (2 + (int)(tx
/ 4) + tx
% 4);
1988 regs
.r_psw
|= FLAGBIT_I
;
1989 DO_RETURN (RX_MAKE_STOPPED(0));
1997 #ifdef CYCLE_ACCURATE
2005 switch (opcode
->size
)
2009 while (regs
.r
[3] != 0)
2012 umb
= mem_get_si (get_reg (1));
2014 #ifdef CYCLE_ACCURATE
2020 #ifdef CYCLE_ACCURATE
2021 cycles (3 + 3 * tx
);
2025 uma
= get_reg (2) & 0xffff;
2026 while (regs
.r
[3] != 0)
2029 umb
= mem_get_hi (get_reg (1));
2031 #ifdef CYCLE_ACCURATE
2037 #ifdef CYCLE_ACCURATE
2038 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2042 uma
= get_reg (2) & 0xff;
2043 while (regs
.r
[3] != 0)
2046 umb
= mem_get_qi (regs
.r
[1]);
2048 #ifdef CYCLE_ACCURATE
2054 #ifdef CYCLE_ACCURATE
2055 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2064 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2069 #ifdef CYCLE_ACCURATE
2074 switch (opcode
->size
)
2078 while (regs
.r
[3] != 0)
2081 umb
= mem_get_si (get_reg (1));
2083 #ifdef CYCLE_ACCURATE
2089 #ifdef CYCLE_ACCURATE
2090 cycles (3 + 3 * tx
);
2094 uma
= get_reg (2) & 0xffff;
2095 while (regs
.r
[3] != 0)
2098 umb
= mem_get_hi (get_reg (1));
2100 #ifdef CYCLE_ACCURATE
2106 #ifdef CYCLE_ACCURATE
2107 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2111 uma
= get_reg (2) & 0xff;
2112 while (regs
.r
[3] != 0)
2115 umb
= mem_get_qi (regs
.r
[1]);
2117 #ifdef CYCLE_ACCURATE
2123 #ifdef CYCLE_ACCURATE
2124 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2133 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2138 regs
.r_psw
|= FLAGBIT_I
;
2139 DO_RETURN (RX_MAKE_STOPPED(0));
2142 #ifdef CYCLE_ACCURATE
2145 v
= GS (); /* This is the memory operand, if any. */
2146 PS (GD ()); /* and this may change the address register. */
2149 #ifdef CYCLE_ACCURATE
2150 /* all M cycles happen during xchg's cycles. */
2161 EXCEPTION (EX_UNDEFINED
);
2164 #ifdef CYCLE_ACCURATE
2167 regs
.m2m
|= M2M_SRC
;
2169 regs
.m2m
|= M2M_DST
;
2176 if (prev_cycle_count
== regs
.cycle_count
)
2178 printf("Cycle count not updated! id %s\n", id_names
[opcode
->id
]);
2184 if (running_benchmark
)
2186 int omap
= op_lookup (opcode
->op
[0].type
, opcode
->op
[1].type
, opcode
->op
[2].type
);
2189 cycles_per_id
[opcode
->id
][omap
] += regs
.cycle_count
- prev_cycle_count
;
2190 times_per_id
[opcode
->id
][omap
] ++;
2192 times_per_pair
[prev_opcode_id
][po0
][opcode
->id
][omap
] ++;
2194 prev_opcode_id
= opcode
->id
;
2199 return RX_MAKE_STEPPED ();
2204 reset_pipeline_stats (void)
2206 memset (cycles_per_id
, 0, sizeof(cycles_per_id
));
2207 memset (times_per_id
, 0, sizeof(times_per_id
));
2209 register_stalls
= 0;
2211 branch_alignment_stalls
= 0;
2213 memset (times_per_pair
, 0, sizeof(times_per_pair
));
2214 running_benchmark
= 1;
2216 benchmark_start_cycle
= regs
.cycle_count
;
2220 halt_pipeline_stats (void)
2222 running_benchmark
= 0;
2223 benchmark_end_cycle
= regs
.cycle_count
;
2228 pipeline_stats (void)
2235 #ifdef CYCLE_ACCURATE
2238 printf ("cycles: %llu\n", regs
.cycle_count
);
2242 printf ("cycles: %13s\n", comma (regs
.cycle_count
));
2246 if (benchmark_start_cycle
)
2247 printf ("bmark: %13s\n", comma (benchmark_end_cycle
- benchmark_start_cycle
));
2250 for (i
= 0; i
< N_RXO
; i
++)
2251 for (o1
= 0; o1
< N_MAP
; o1
++)
2252 if (times_per_id
[i
][o1
])
2253 printf("%13s %13s %7.2f %s %s\n",
2254 comma (cycles_per_id
[i
][o1
]),
2255 comma (times_per_id
[i
][o1
]),
2256 (double)cycles_per_id
[i
][o1
] / times_per_id
[i
][o1
],
2257 op_cache_string(o1
),
2261 for (p
= 0; p
< N_RXO
; p
++)
2262 for (p1
= 0; p1
< N_MAP
; p1
++)
2263 for (i
= 0; i
< N_RXO
; i
++)
2264 for (o1
= 0; o1
< N_MAP
; o1
++)
2265 if (times_per_pair
[p
][p1
][i
][o1
])
2267 printf("%13s %s %-9s -> %s %s\n",
2268 comma (times_per_pair
[p
][p1
][i
][o1
]),
2269 op_cache_string(p1
),
2271 op_cache_string(o1
),
2276 printf("%13s memory stalls\n", comma (memory_stalls
));
2277 printf("%13s register stalls\n", comma (register_stalls
));
2278 printf("%13s branches taken (non-return)\n", comma (branch_stalls
));
2279 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls
));
2280 printf("%13s fast returns\n", comma (fast_returns
));