1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* This must come before any other includes. */
28 #include "libiberty.h"
30 #include "opcode/rx.h"
39 static const char * const id_names
[] = {
41 "RXO_mov", /* d = s (signed) */
42 "RXO_movbi", /* d = [s,s2] (signed) */
43 "RXO_movbir", /* [s,s2] = d (signed) */
44 "RXO_pushm", /* s..s2 */
45 "RXO_popm", /* s..s2 */
46 "RXO_xchg", /* s <-> d */
47 "RXO_stcc", /* d = s if cond(s2) */
48 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
50 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
51 that d may be "None". */
64 "RXO_adc", /* d = d + s + carry */
65 "RXO_sbb", /* d = d - s - ~carry */
66 "RXO_abs", /* d = |s| */
67 "RXO_max", /* d = max(d,s) */
68 "RXO_min", /* d = min(d,s) */
69 "RXO_emul", /* d:64 = d:32 * s */
70 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
72 "RXO_rolc", /* d <<= 1 through carry */
73 "RXO_rorc", /* d >>= 1 through carry*/
74 "RXO_rotl", /* d <<= #s without carry */
75 "RXO_rotr", /* d >>= #s without carry*/
76 "RXO_revw", /* d = revw(s) */
77 "RXO_revl", /* d = revl(s) */
78 "RXO_branch", /* pc = d if cond(s) */
79 "RXO_branchrel",/* pc += d if cond(s) */
80 "RXO_jsr", /* pc = d */
81 "RXO_jsrrel", /* pc += d */
111 "RXO_sat", /* sat(d) */
114 "RXO_fadd", /* d op= s */
123 "RXO_bset", /* d |= (1<<s) */
124 "RXO_bclr", /* d &= ~(1<<s) */
125 "RXO_btst", /* s & (1<<s2) */
126 "RXO_bnot", /* d ^= (1<<s) */
127 "RXO_bmcc", /* d<s> = cond(s2) */
129 "RXO_clrpsw", /* flag index in d */
130 "RXO_setpsw", /* flag index in d */
131 "RXO_mvtipl", /* new IPL in s */
135 "RXO_rtd", /* undocumented */
137 "RXO_dbt", /* undocumented */
138 "RXO_int", /* vector id in s */
142 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
145 static const char * const optype_names
[] = {
147 "#Imm", /* #addend */
149 "[Rn]", /* [Rn + addend] */
152 " cc ", /* eq, gtu, etc */
153 "Flag", /* [UIOSZC] */
154 "RbRi" /* [Rb + scale * Ri] */
157 #define N_RXO ARRAY_SIZE (id_names)
158 #define N_RXT ARRAY_SIZE (optype_names)
161 static unsigned long long benchmark_start_cycle
;
162 static unsigned long long benchmark_end_cycle
;
164 static int op_cache
[N_RXT
][N_RXT
][N_RXT
];
165 static int op_cache_rev
[N_MAP
];
166 static int op_cache_idx
= 0;
169 op_lookup (int a
, int b
, int c
)
171 if (op_cache
[a
][b
][c
])
172 return op_cache
[a
][b
][c
];
174 if (op_cache_idx
>= N_MAP
)
176 printf("op_cache_idx exceeds %d\n", N_MAP
);
179 op_cache
[a
][b
][c
] = op_cache_idx
;
180 op_cache_rev
[op_cache_idx
] = (a
<<8) | (b
<<4) | c
;
185 op_cache_string (int map
)
188 static char cb
[5][20];
191 map
= op_cache_rev
[map
];
196 sprintf(cb
[ci
], "%s %s %s", optype_names
[a
], optype_names
[b
], optype_names
[c
]);
200 static unsigned long long cycles_per_id
[N_RXO
][N_MAP
];
201 static unsigned long long times_per_id
[N_RXO
][N_MAP
];
202 static unsigned long long memory_stalls
;
203 static unsigned long long register_stalls
;
204 static unsigned long long branch_stalls
;
205 static unsigned long long branch_alignment_stalls
;
206 static unsigned long long fast_returns
;
208 static unsigned long times_per_pair
[N_RXO
][N_MAP
][N_RXO
][N_MAP
];
209 static int prev_opcode_id
= RXO_unknown
;
216 #endif /* WITH_PROFILE */
219 #ifdef CYCLE_ACCURATE
221 static int new_rt
= -1;
223 /* Number of cycles to add if an insn spans an 8-byte boundary. */
224 static int branch_alignment_penalty
= 0;
228 static int running_benchmark
= 1;
230 #define tprintf if (trace && running_benchmark) printf
232 jmp_buf decode_jmp_buf
;
233 unsigned int rx_cycles
= 0;
235 #ifdef CYCLE_ACCURATE
236 /* If nonzero, memory was read at some point and cycle latency might
238 static int memory_source
= 0;
239 /* If nonzero, memory was written and extra cycles might be
241 static int memory_dest
= 0;
244 cycles (int throughput
)
246 tprintf("%d cycles\n", throughput
);
247 regs
.cycle_count
+= throughput
;
250 /* Number of execution (E) cycles the op uses. For memory sources, we
251 include the load micro-op stall as two extra E cycles. */
252 #define E(c) cycles (memory_source ? c + 2 : c)
253 #define E1 cycles (1)
254 #define E2 cycles (2)
255 #define EBIT cycles (memory_source ? 2 : 1)
257 /* Check to see if a read latency must be applied for a given register. */
261 tprintf("register %d load stall\n", r); \
262 regs.cycle_count ++; \
263 STATS(register_stalls ++); \
270 tprintf ("Rt now %d\n", r); \
275 lsb_count (unsigned long v
, int is_signed
)
278 if (is_signed
&& (v
& 0x80000000U
))
279 v
= (unsigned long)(long)(-v
);
280 for (i
=31; i
>=0; i
--)
283 /* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
291 divu_cycles(unsigned long num
, unsigned long den
)
293 int nb
= lsb_count (num
, 0);
294 int db
= lsb_count (den
, 0);
306 div_cycles(long num
, long den
)
308 int nb
= lsb_count ((unsigned long)num
, 1);
309 int db
= lsb_count ((unsigned long)den
, 1);
320 #else /* !CYCLE_ACCURATE */
330 #define divu_cycles(n,d)
331 #define div_cycles(n,d)
333 #endif /* else CYCLE_ACCURATE */
335 static const int size2bytes
[] = {
336 4, 1, 1, 1, 2, 2, 2, 3, 4
343 #define rx_abort() _rx_abort(__FILE__, __LINE__)
344 static void ATTRIBUTE_NORETURN
345 _rx_abort (const char *file
, int line
)
347 if (strrchr (file
, '/'))
348 file
= strrchr (file
, '/') + 1;
349 fprintf(stderr
, "abort at %s:%d\n", file
, line
);
353 static unsigned char *get_byte_base
;
354 static RX_Opcode_Decoded
**decode_cache_base
;
355 static SI get_byte_page
;
361 decode_cache_base
= 0;
366 maybe_get_mem_page (SI tpc
)
368 if (((tpc
^ get_byte_page
) & NONPAGE_MASK
) || enable_counting
)
370 get_byte_page
= tpc
& NONPAGE_MASK
;
371 get_byte_base
= rx_mem_ptr (get_byte_page
, MPA_READING
) - get_byte_page
;
372 decode_cache_base
= rx_mem_decode_cache (get_byte_page
) - get_byte_page
;
376 /* This gets called a *lot* so optimize it. */
378 rx_get_byte (void *vdata
)
380 RX_Data
*rx_data
= (RX_Data
*)vdata
;
381 SI tpc
= rx_data
->dpc
;
383 /* See load.c for an explanation of this. */
387 maybe_get_mem_page (tpc
);
390 return get_byte_base
[tpc
];
394 get_op (const RX_Opcode_Decoded
*rd
, int i
)
396 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
401 case RX_Operand_None
:
404 case RX_Operand_Immediate
: /* #addend */
407 case RX_Operand_Register
: /* Rn */
409 rv
= get_reg (o
->reg
);
412 case RX_Operand_Predec
: /* [-Rn] */
413 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
414 ATTRIBUTE_FALLTHROUGH
;
415 case RX_Operand_Postinc
: /* [Rn+] */
416 case RX_Operand_Zero_Indirect
: /* [Rn + 0] */
417 case RX_Operand_Indirect
: /* [Rn + addend] */
418 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
419 #ifdef CYCLE_ACCURATE
421 if (o
->type
== RX_Operand_TwoReg
)
424 if (regs
.m2m
== M2M_BOTH
)
426 tprintf("src memory stall\n");
437 if (o
->type
== RX_Operand_TwoReg
)
438 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
440 addr
= get_reg (o
->reg
) + o
->addend
;
448 case RX_Byte
: /* undefined extension */
451 rv
= mem_get_qi (addr
);
454 case RX_Word
: /* undefined extension */
457 rv
= mem_get_hi (addr
);
461 rv
= mem_get_psi (addr
);
465 rv
= mem_get_si (addr
);
469 if (o
->type
== RX_Operand_Postinc
)
470 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
474 case RX_Operand_Condition
: /* eq, gtu, etc */
475 return condition_true (o
->reg
);
477 case RX_Operand_Flag
: /* [UIOSZC] */
478 return (regs
.r_psw
& (1 << o
->reg
)) ? 1 : 0;
481 /* if we've gotten here, we need to clip/extend the value according
489 case RX_Byte
: /* undefined extension */
490 rv
|= 0xdeadbe00; /* keep them honest */
498 rv
= sign_ext (rv
, 8);
501 case RX_Word
: /* undefined extension */
502 rv
|= 0xdead0000; /* keep them honest */
510 rv
= sign_ext (rv
, 16);
524 put_op (const RX_Opcode_Decoded
*rd
, int i
, int v
)
526 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
533 if (o
->type
!= RX_Operand_Register
)
537 case RX_Byte
: /* undefined extension */
538 v
|= 0xdeadbe00; /* keep them honest */
549 case RX_Word
: /* undefined extension */
550 v
|= 0xdead0000; /* keep them honest */
558 v
= sign_ext (v
, 16);
571 case RX_Operand_None
:
572 /* Opcodes like TST and CMP use this. */
575 case RX_Operand_Immediate
: /* #addend */
576 case RX_Operand_Condition
: /* eq, gtu, etc */
579 case RX_Operand_Register
: /* Rn */
584 case RX_Operand_Predec
: /* [-Rn] */
585 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
586 ATTRIBUTE_FALLTHROUGH
;
587 case RX_Operand_Postinc
: /* [Rn+] */
588 case RX_Operand_Zero_Indirect
: /* [Rn + 0] */
589 case RX_Operand_Indirect
: /* [Rn + addend] */
590 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
592 #ifdef CYCLE_ACCURATE
593 if (regs
.m2m
== M2M_BOTH
)
595 tprintf("dst memory stall\n");
605 if (o
->type
== RX_Operand_TwoReg
)
606 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
608 addr
= get_reg (o
->reg
) + o
->addend
;
616 case RX_Byte
: /* undefined extension */
619 mem_put_qi (addr
, v
);
622 case RX_Word
: /* undefined extension */
625 mem_put_hi (addr
, v
);
629 mem_put_psi (addr
, v
);
633 mem_put_si (addr
, v
);
637 if (o
->type
== RX_Operand_Postinc
)
638 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
642 case RX_Operand_Flag
: /* [UIOSZC] */
644 regs
.r_psw
|= (1 << o
->reg
);
646 regs
.r_psw
&= ~(1 << o
->reg
);
651 #define PD(x) put_op (opcode, 0, x)
652 #define PS(x) put_op (opcode, 1, x)
653 #define PS2(x) put_op (opcode, 2, x)
654 #define GD() get_op (opcode, 0)
655 #define GS() get_op (opcode, 1)
656 #define GS2() get_op (opcode, 2)
657 #define DSZ() size2bytes[opcode->op[0].size]
658 #define SSZ() size2bytes[opcode->op[0].size]
659 #define S2SZ() size2bytes[opcode->op[0].size]
661 /* "Universal" sources. */
662 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
663 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
668 int rsp
= get_reg (sp
);
671 mem_put_si (rsp
, val
);
674 /* Just like the above, but tag the memory as "pushed pc" so if anyone
675 tries to write to it, it will cause an error. */
679 int rsp
= get_reg (sp
);
682 mem_put_si (rsp
, val
);
683 mem_set_content_range (rsp
, rsp
+3, MC_PUSHED_PC
);
690 int rsp
= get_reg (sp
);
691 rv
= mem_get_si (rsp
);
701 int rsp
= get_reg (sp
);
702 if (mem_get_content_type (rsp
) != MC_PUSHED_PC
)
703 execution_error (SIM_ERR_CORRUPT_STACK
, rsp
);
704 rv
= mem_get_si (rsp
);
705 mem_set_content_range (rsp
, rsp
+3, MC_UNINIT
);
711 #define MATH_OP(vop,c) \
715 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
716 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
717 ma = sign_ext (uma, DSZ() * 8); \
718 mb = sign_ext (umb, DSZ() * 8); \
719 sll = (long long) ma vop (long long) mb vop c; \
720 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
721 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
726 #define LOGIC_OP(vop) \
731 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
737 #define SHIFT_OP(val, type, count, OP, carry_mask) \
742 tprintf("%lld " #OP " %d\n", val, count); \
743 for (i = 0; i < count; i ++) \
745 c = val & carry_mask; \
748 set_oszc (val, 4, c); \
775 fop_fadd (fp_t s1
, fp_t s2
, fp_t
*d
)
777 *d
= rxfp_add (s1
, s2
);
782 fop_fmul (fp_t s1
, fp_t s2
, fp_t
*d
)
784 *d
= rxfp_mul (s1
, s2
);
789 fop_fdiv (fp_t s1
, fp_t s2
, fp_t
*d
)
791 *d
= rxfp_div (s1
, s2
);
796 fop_fsub (fp_t s1
, fp_t s2
, fp_t
*d
)
798 *d
= rxfp_sub (s1
, s2
);
802 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
803 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
806 return do_fp_exception (opcode_pc)
808 #define FLOAT_OP(func) \
815 do_store = fop_##func (fa, fb, &fc); \
816 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
821 if ((fc & 0x80000000UL) != 0) \
823 if ((fc & 0x7fffffffUL) == 0) \
825 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
828 #define carry (FLAG_C ? 1 : 0)
834 } exception_info
[] = {
835 { 0xFFFFFFD0UL
, "priviledged opcode", SIGILL
},
836 { 0xFFFFFFD4UL
, "access violation", SIGSEGV
},
837 { 0xFFFFFFDCUL
, "undefined opcode", SIGILL
},
838 { 0xFFFFFFE4UL
, "floating point", SIGFPE
}
840 #define EX_PRIVILEDGED 0
842 #define EX_UNDEFINED 2
843 #define EX_FLOATING 3
844 #define EXCEPTION(n) \
845 return generate_exception (n, opcode_pc)
847 #define PRIVILEDGED() \
849 EXCEPTION (EX_PRIVILEDGED)
852 generate_exception (unsigned long type
, SI opcode_pc
)
854 SI old_psw
, old_pc
, new_pc
;
856 new_pc
= mem_get_si (exception_info
[type
].vaddr
);
857 /* 0x00020000 is the value used to initialise the known
858 exception vectors (see rx.ld), but it is a reserved
859 area of memory so do not try to access it, and if the
860 value has not been changed by the program then the
861 vector has not been installed. */
862 if (new_pc
== 0 || new_pc
== 0x00020000)
865 return RX_MAKE_STOPPED (exception_info
[type
].signal
);
867 fprintf(stderr
, "Unhandled %s exception at pc = %#lx\n",
868 exception_info
[type
].str
, (unsigned long) opcode_pc
);
869 if (type
== EX_FLOATING
)
871 int mask
= FPPENDING ();
872 fprintf (stderr
, "Pending FP exceptions:");
873 if (mask
& FPSWBITS_FV
)
874 fprintf(stderr
, " Invalid");
875 if (mask
& FPSWBITS_FO
)
876 fprintf(stderr
, " Overflow");
877 if (mask
& FPSWBITS_FZ
)
878 fprintf(stderr
, " Division-by-zero");
879 if (mask
& FPSWBITS_FU
)
880 fprintf(stderr
, " Underflow");
881 if (mask
& FPSWBITS_FX
)
882 fprintf(stderr
, " Inexact");
883 if (mask
& FPSWBITS_CE
)
884 fprintf(stderr
, " Unimplemented");
885 fprintf(stderr
, "\n");
887 return RX_MAKE_EXITED (1);
890 tprintf ("Triggering %s exception\n", exception_info
[type
].str
);
892 old_psw
= regs
.r_psw
;
893 regs
.r_psw
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
898 return RX_MAKE_STEPPED ();
902 generate_access_exception (void)
906 rv
= generate_exception (EX_ACCESS
, regs
.r_pc
);
908 longjmp (decode_jmp_buf
, rv
);
912 do_fp_exception (unsigned long opcode_pc
)
915 EXCEPTION (EX_FLOATING
);
916 return RX_MAKE_STEPPED ();
920 op_is_memory (const RX_Opcode_Decoded
*rd
, int i
)
922 switch (rd
->op
[i
].type
)
924 case RX_Operand_Predec
:
925 case RX_Operand_Postinc
:
926 case RX_Operand_Indirect
:
932 #define OM(i) op_is_memory (opcode, i)
934 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
939 unsigned int uma
=0, umb
=0;
942 unsigned long long ll
;
944 unsigned long opcode_pc
;
946 const RX_Opcode_Decoded
*opcode
;
948 unsigned long long prev_cycle_count
;
950 #ifdef CYCLE_ACCURATE
955 prev_cycle_count
= regs
.cycle_count
;
958 #ifdef CYCLE_ACCURATE
965 maybe_get_mem_page (regs
.r_pc
);
967 opcode_pc
= regs
.r_pc
;
969 /* Note that we don't word-swap this point, there's no point. */
970 if (decode_cache_base
[opcode_pc
] == NULL
)
972 RX_Opcode_Decoded
*opcode_w
;
973 rx_data
.dpc
= opcode_pc
;
974 opcode_w
= decode_cache_base
[opcode_pc
] = calloc (1, sizeof (RX_Opcode_Decoded
));
975 opcode_size
= rx_decode_opcode (opcode_pc
, opcode_w
,
976 rx_get_byte
, &rx_data
);
981 opcode
= decode_cache_base
[opcode_pc
];
982 opcode_size
= opcode
->n_bytes
;
985 #ifdef CYCLE_ACCURATE
986 if (branch_alignment_penalty
)
988 if ((regs
.r_pc
^ (regs
.r_pc
+ opcode_size
- 1)) & ~7)
990 tprintf("1 cycle branch alignment penalty\n");
991 cycles (branch_alignment_penalty
);
993 branch_alignment_stalls
++;
996 branch_alignment_penalty
= 0;
1000 regs
.r_pc
+= opcode_size
;
1002 rx_flagmask
= opcode
->flags_s
;
1003 rx_flagand
= ~(int)opcode
->flags_0
;
1004 rx_flagor
= opcode
->flags_1
;
1010 tprintf("|%lld| = ", sll
);
1013 tprintf("%lld\n", sll
);
1034 if (opcode
->op
[0].type
== RX_Operand_Register
)
1046 if (opcode
->op
[0].type
== RX_Operand_Register
)
1061 if (opcode
->op
[0].type
== RX_Operand_Register
)
1071 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1073 #ifdef CYCLE_ACCURATE
1074 SI old_pc
= regs
.r_pc
;
1078 #ifdef CYCLE_ACCURATE
1079 delta
= regs
.r_pc
- old_pc
;
1080 if (delta
>= 0 && delta
< 16
1083 tprintf("near forward branch bonus\n");
1089 branch_alignment_penalty
= 1;
1096 #ifdef CYCLE_ACCURATE
1103 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1106 regs
.r_pc
= opcode_pc
+ delta
;
1107 #ifdef CYCLE_ACCURATE
1108 /* Note: specs say 3, chip says 2. */
1109 if (delta
>= 0 && delta
< 16
1112 tprintf("near forward branch bonus\n");
1118 branch_alignment_penalty
= 1;
1125 #ifdef CYCLE_ACCURATE
1133 int old_psw
= regs
.r_psw
;
1135 DO_RETURN (RX_MAKE_HIT_BREAK ());
1136 if (regs
.r_intb
== 0)
1138 tprintf("BREAK hit, no vector table.\n");
1139 DO_RETURN (RX_MAKE_EXITED(1));
1141 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1144 regs
.r_pc
= mem_get_si (regs
.r_intb
);
1152 if (opcode
->op
[0].type
== RX_Operand_Register
)
1164 if (opcode
->op
[1].type
== RX_Operand_Register
)
1168 umb
= ma
& (1 << mb
);
1169 set_zc (! umb
, umb
);
1174 v
= 1 << opcode
->op
[0].reg
;
1183 case RXO_div
: /* d = d / s */
1186 tprintf("%d / %d = ", mb
, ma
);
1187 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1190 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1197 set_flags (FLAGBIT_O
, 0);
1199 div_cycles (mb
, ma
);
1203 case RXO_divu
: /* d = d / s */
1206 tprintf("%u / %u = ", umb
, uma
);
1210 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1217 set_flags (FLAGBIT_O
, 0);
1219 divu_cycles (umb
, uma
);
1226 sll
= (long long)ma
* (long long)mb
;
1227 tprintf("%d * %d = %lld\n", ma
, mb
, sll
);
1228 put_reg (opcode
->op
[0].reg
, sll
);
1229 put_reg (opcode
->op
[0].reg
+ 1, sll
>> 32);
1236 ll
= (long long)uma
* (long long)umb
;
1237 tprintf("%#x * %#x = %#llx\n", uma
, umb
, ll
);
1238 put_reg (opcode
->op
[0].reg
, ll
);
1239 put_reg (opcode
->op
[0].reg
+ 1, ll
>> 32);
1269 regs
.r_psw
= regs
.r_bpsw
;
1270 regs
.r_pc
= regs
.r_bpc
;
1271 #ifdef CYCLE_ACCURATE
1272 regs
.fast_return
= 0;
1285 mb
= rxfp_ftoi (ma
, FPRM_ZERO
);
1288 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1297 int rc
= rx_syscall (regs
.r
[5]);
1298 if (! RX_STEPPED (rc
))
1303 int old_psw
= regs
.r_psw
;
1304 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1307 regs
.r_pc
= mem_get_si (regs
.r_intb
+ 4 * v
);
1315 mb
= rxfp_itof (ma
, regs
.r_fpsw
);
1317 tprintf("(float) %d = %x\n", ma
, mb
);
1326 #ifdef CYCLE_ACCURATE
1331 #ifdef CYCLE_ACCURATE
1332 regs
.link_register
= regs
.r_pc
;
1334 pushpc (get_reg (pc
));
1335 if (opcode
->id
== RXO_jsrrel
)
1337 #ifdef CYCLE_ACCURATE
1338 delta
= v
- regs
.r_pc
;
1341 #ifdef CYCLE_ACCURATE
1342 /* Note: docs say 3, chip says 2 */
1343 if (delta
>= 0 && delta
< 16)
1345 tprintf ("near forward jsr bonus\n");
1350 branch_alignment_penalty
= 1;
1353 regs
.fast_return
= 1;
1359 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1361 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1366 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1368 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1395 if (opcode
->op
[1].type
== RX_Operand_Register
1396 && opcode
->op
[1].reg
== 17 /* PC */)
1398 /* Special case. We want the address of the insn, not the
1399 address of the next insn. */
1403 if (opcode
->op
[0].type
== RX_Operand_Register
1404 && opcode
->op
[0].reg
== 16 /* PSW */)
1406 /* Special case, LDC and POPC can't ever modify PM. */
1407 int pm
= regs
.r_psw
& FLAGBIT_PM
;
1412 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1418 /* various things can't be changed in user mode. */
1419 if (opcode
->op
[0].type
== RX_Operand_Register
)
1420 if (opcode
->op
[0].reg
== 32)
1422 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1423 v
|= regs
.r_psw
& (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1425 if (opcode
->op
[0].reg
== 34 /* ISP */
1426 || opcode
->op
[0].reg
== 37 /* BPSW */
1427 || opcode
->op
[0].reg
== 39 /* INTB */
1428 || opcode
->op
[0].reg
== 38 /* VCT */)
1429 /* These are ignored. */
1439 #ifdef CYCLE_ACCURATE
1440 if ((opcode
->op
[0].type
== RX_Operand_Predec
1441 && opcode
->op
[1].type
== RX_Operand_Register
)
1442 || (opcode
->op
[0].type
== RX_Operand_Postinc
1443 && opcode
->op
[1].type
== RX_Operand_Register
))
1445 /* Special case: push reg doesn't cause a memory stall. */
1447 tprintf("push special case\n");
1466 ll
= (unsigned long long) US1() * (unsigned long long) v
;
1473 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v
>> 16);
1475 put_reg64 (acc64
, ll
);
1481 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(v
);
1483 put_reg64 (acc64
, ll
);
1488 PD (get_reg (acchi
));
1493 PD (get_reg (acclo
));
1498 PD (get_reg (accmi
));
1503 put_reg (acchi
, GS ());
1508 put_reg (acclo
, GS ());
1513 regs
.r_psw
&= ~ FLAGBITS_IPL
;
1514 regs
.r_psw
|= (GS () << FLAGSHIFT_IPL
) & FLAGBITS_IPL
;
1533 /* POPM cannot pop R0 (sp). */
1534 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1535 EXCEPTION (EX_UNDEFINED
);
1536 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1538 regs
.r_pc
= opcode_pc
;
1539 DO_RETURN (RX_MAKE_STOPPED (SIGILL
));
1541 for (v
= opcode
->op
[1].reg
; v
<= opcode
->op
[2].reg
; v
++)
1545 put_reg (v
, pop ());
1550 /* PUSHM cannot push R0 (sp). */
1551 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1552 EXCEPTION (EX_UNDEFINED
);
1553 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1555 regs
.r_pc
= opcode_pc
;
1556 return RX_MAKE_STOPPED (SIGILL
);
1558 for (v
= opcode
->op
[2].reg
; v
>= opcode
->op
[1].reg
; v
--)
1563 cycles (opcode
->op
[2].reg
- opcode
->op
[1].reg
+ 1);
1567 ll
= get_reg64 (acc64
) << GS ();
1568 ll
+= 0x80000000ULL
;
1569 if ((signed long long)ll
> (signed long long)0x00007fff00000000ULL
)
1570 ll
= 0x00007fff00000000ULL
;
1571 else if ((signed long long)ll
< (signed long long)0xffff800000000000ULL
)
1572 ll
= 0xffff800000000000ULL
;
1574 ll
&= 0xffffffff00000000ULL
;
1575 put_reg64 (acc64
, ll
);
1581 regs
.r_pc
= poppc ();
1582 regs
.r_psw
= poppc ();
1584 regs
.r_psw
|= FLAGBIT_U
;
1585 #ifdef CYCLE_ACCURATE
1586 regs
.fast_return
= 0;
1593 umb
= (((uma
>> 24) & 0xff)
1594 | ((uma
>> 8) & 0xff00)
1595 | ((uma
<< 8) & 0xff0000)
1596 | ((uma
<< 24) & 0xff000000UL
));
1603 umb
= (((uma
>> 8) & 0x00ff00ff)
1604 | ((uma
<< 8) & 0xff00ff00UL
));
1612 #ifdef CYCLE_ACCURATE
1616 while (regs
.r
[3] != 0)
1620 switch (opcode
->size
)
1623 ma
= mem_get_si (regs
.r
[1]);
1624 mb
= mem_get_si (regs
.r
[2]);
1629 ma
= sign_ext (mem_get_hi (regs
.r
[1]), 16);
1630 mb
= sign_ext (mem_get_hi (regs
.r
[2]), 16);
1635 ma
= sign_ext (mem_get_qi (regs
.r
[1]), 8);
1636 mb
= sign_ext (mem_get_qi (regs
.r
[2]), 8);
1643 /* We do the multiply as a signed value. */
1644 sll
= (long long)ma
* (long long)mb
;
1645 tprintf(" %016llx = %d * %d\n", sll
, ma
, mb
);
1646 /* but we do the sum as unsigned, while sign extending the operands. */
1647 tmp
= regs
.r
[4] + (sll
& 0xffffffffUL
);
1648 regs
.r
[4] = tmp
& 0xffffffffUL
;
1651 tmp
+= regs
.r
[5] + (sll
& 0xffffffffUL
);
1652 regs
.r
[5] = tmp
& 0xffffffffUL
;
1655 tmp
+= regs
.r
[6] + (sll
& 0xffffffffUL
);
1656 regs
.r
[6] = tmp
& 0xffffffffUL
;
1657 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1658 (unsigned long) regs
.r
[6],
1659 (unsigned long) regs
.r
[5],
1660 (unsigned long) regs
.r
[4]);
1664 if (regs
.r
[6] & 0x00008000)
1665 regs
.r
[6] |= 0xffff0000UL
;
1667 regs
.r
[6] &= 0x0000ffff;
1668 ma
= (regs
.r
[6] & 0x80000000UL
) ? FLAGBIT_S
: 0;
1669 if (regs
.r
[6] != 0 && regs
.r
[6] != 0xffffffffUL
)
1670 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
| FLAGBIT_O
);
1672 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
);
1673 #ifdef CYCLE_ACCURATE
1674 switch (opcode
->size
)
1677 cycles (6 + 4 * tx
);
1680 cycles (6 + 5 * (tx
/ 2) + 4 * (tx
% 2));
1683 cycles (6 + 7 * (tx
/ 4) + 4 * (tx
% 4));
1693 ma
= v
& 0x80000000UL
;
1705 uma
|= (carry
? 0x80000000UL
: 0);
1706 set_szc (uma
, 4, mb
);
1716 uma
= (uma
<< mb
) | (uma
>> (32-mb
));
1719 set_szc (uma
, 4, mb
);
1729 uma
= (uma
>> mb
) | (uma
<< (32-mb
));
1730 mb
= uma
& 0x80000000;
1732 set_szc (uma
, 4, mb
);
1740 mb
= rxfp_ftoi (ma
, regs
.r_fpsw
);
1743 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1750 #ifdef CYCLE_ACCURATE
1753 regs
.r_pc
= poppc ();
1754 #ifdef CYCLE_ACCURATE
1755 /* Note: specs say 5, chip says 3. */
1756 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1761 tprintf("fast return bonus\n");
1765 regs
.fast_return
= 0;
1766 branch_alignment_penalty
= 1;
1772 if (opcode
->op
[2].type
== RX_Operand_Register
)
1775 /* RTSD cannot pop R0 (sp). */
1776 put_reg (0, get_reg (0) + GS() - (opcode
->op
[0].reg
-opcode
->op
[2].reg
+1)*4);
1777 if (opcode
->op
[2].reg
== 0)
1778 EXCEPTION (EX_UNDEFINED
);
1779 #ifdef CYCLE_ACCURATE
1780 tx
= opcode
->op
[0].reg
- opcode
->op
[2].reg
+ 1;
1782 for (i
= opcode
->op
[2].reg
; i
<= opcode
->op
[0].reg
; i
++)
1785 put_reg (i
, pop ());
1790 #ifdef CYCLE_ACCURATE
1793 put_reg (0, get_reg (0) + GS());
1795 put_reg (pc
, poppc());
1796 #ifdef CYCLE_ACCURATE
1797 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1799 tprintf("fast return bonus\n");
1803 cycles (tx
< 3 ? 3 : tx
+ 1);
1807 cycles (tx
< 5 ? 5 : tx
+ 1);
1809 regs
.fast_return
= 0;
1810 branch_alignment_penalty
= 1;
1815 if (FLAG_O
&& FLAG_S
)
1817 else if (FLAG_O
&& ! FLAG_S
)
1823 if (FLAG_O
&& ! FLAG_S
)
1826 put_reg (5, 0x7fffffff);
1827 put_reg (4, 0xffffffff);
1829 else if (FLAG_O
&& FLAG_S
)
1831 put_reg (6, 0xffffffff);
1832 put_reg (5, 0x80000000);
1839 MATH_OP (-, ! carry
);
1851 #ifdef CYCLE_ACCURATE
1854 while (regs
.r
[3] != 0)
1856 uma
= mem_get_qi (regs
.r
[1] ++);
1857 umb
= mem_get_qi (regs
.r
[2] ++);
1859 if (uma
!= umb
|| uma
== 0)
1865 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1866 cycles (2 + 4 * (tx
/ 4) + 4 * (tx
% 4));
1870 v
= 1 << opcode
->op
[0].reg
;
1881 #ifdef CYCLE_ACCURATE
1886 uma
= mem_get_qi (regs
.r
[2] --);
1887 mem_put_qi (regs
.r
[1]--, uma
);
1890 #ifdef CYCLE_ACCURATE
1892 cycles (6 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1894 cycles (2 + 3 * (tx
% 4));
1900 #ifdef CYCLE_ACCURATE
1905 uma
= mem_get_qi (regs
.r
[2] ++);
1906 mem_put_qi (regs
.r
[1]++, uma
);
1909 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1913 #ifdef CYCLE_ACCURATE
1916 while (regs
.r
[3] != 0)
1918 uma
= mem_get_qi (regs
.r
[2] ++);
1919 mem_put_qi (regs
.r
[1]++, uma
);
1924 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1927 case RXO_shar
: /* d = ma >> mb */
1928 SHIFT_OP (sll
, int, mb
, >>=, 1);
1932 case RXO_shll
: /* d = ma << mb */
1933 SHIFT_OP (ll
, int, mb
, <<=, 0x80000000UL
);
1937 case RXO_shlr
: /* d = ma >> mb */
1938 SHIFT_OP (ll
, unsigned int, mb
, >>=, 1);
1944 #ifdef CYCLE_ACCURATE
1947 switch (opcode
->size
)
1950 while (regs
.r
[3] != 0)
1952 mem_put_si (regs
.r
[1], regs
.r
[2]);
1959 while (regs
.r
[3] != 0)
1961 mem_put_hi (regs
.r
[1], regs
.r
[2]);
1965 cycles (2 + (int)(tx
/ 2) + tx
% 2);
1968 while (regs
.r
[3] != 0)
1970 mem_put_qi (regs
.r
[1], regs
.r
[2]);
1974 cycles (2 + (int)(tx
/ 4) + tx
% 4);
1989 regs
.r_psw
|= FLAGBIT_I
;
1990 DO_RETURN (RX_MAKE_STOPPED(0));
1998 #ifdef CYCLE_ACCURATE
2006 switch (opcode
->size
)
2010 while (regs
.r
[3] != 0)
2013 umb
= mem_get_si (get_reg (1));
2015 #ifdef CYCLE_ACCURATE
2021 #ifdef CYCLE_ACCURATE
2022 cycles (3 + 3 * tx
);
2026 uma
= get_reg (2) & 0xffff;
2027 while (regs
.r
[3] != 0)
2030 umb
= mem_get_hi (get_reg (1));
2032 #ifdef CYCLE_ACCURATE
2038 #ifdef CYCLE_ACCURATE
2039 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2043 uma
= get_reg (2) & 0xff;
2044 while (regs
.r
[3] != 0)
2047 umb
= mem_get_qi (regs
.r
[1]);
2049 #ifdef CYCLE_ACCURATE
2055 #ifdef CYCLE_ACCURATE
2056 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2065 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2070 #ifdef CYCLE_ACCURATE
2075 switch (opcode
->size
)
2079 while (regs
.r
[3] != 0)
2082 umb
= mem_get_si (get_reg (1));
2084 #ifdef CYCLE_ACCURATE
2090 #ifdef CYCLE_ACCURATE
2091 cycles (3 + 3 * tx
);
2095 uma
= get_reg (2) & 0xffff;
2096 while (regs
.r
[3] != 0)
2099 umb
= mem_get_hi (get_reg (1));
2101 #ifdef CYCLE_ACCURATE
2107 #ifdef CYCLE_ACCURATE
2108 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2112 uma
= get_reg (2) & 0xff;
2113 while (regs
.r
[3] != 0)
2116 umb
= mem_get_qi (regs
.r
[1]);
2118 #ifdef CYCLE_ACCURATE
2124 #ifdef CYCLE_ACCURATE
2125 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2134 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2139 regs
.r_psw
|= FLAGBIT_I
;
2140 DO_RETURN (RX_MAKE_STOPPED(0));
2143 #ifdef CYCLE_ACCURATE
2146 v
= GS (); /* This is the memory operand, if any. */
2147 PS (GD ()); /* and this may change the address register. */
2150 #ifdef CYCLE_ACCURATE
2151 /* all M cycles happen during xchg's cycles. */
2162 EXCEPTION (EX_UNDEFINED
);
2165 #ifdef CYCLE_ACCURATE
2168 regs
.m2m
|= M2M_SRC
;
2170 regs
.m2m
|= M2M_DST
;
2177 if (prev_cycle_count
== regs
.cycle_count
)
2179 printf("Cycle count not updated! id %s\n", id_names
[opcode
->id
]);
2185 if (running_benchmark
)
2187 int omap
= op_lookup (opcode
->op
[0].type
, opcode
->op
[1].type
, opcode
->op
[2].type
);
2190 cycles_per_id
[opcode
->id
][omap
] += regs
.cycle_count
- prev_cycle_count
;
2191 times_per_id
[opcode
->id
][omap
] ++;
2193 times_per_pair
[prev_opcode_id
][po0
][opcode
->id
][omap
] ++;
2195 prev_opcode_id
= opcode
->id
;
2200 return RX_MAKE_STEPPED ();
2205 reset_pipeline_stats (void)
2207 memset (cycles_per_id
, 0, sizeof(cycles_per_id
));
2208 memset (times_per_id
, 0, sizeof(times_per_id
));
2210 register_stalls
= 0;
2212 branch_alignment_stalls
= 0;
2214 memset (times_per_pair
, 0, sizeof(times_per_pair
));
2215 running_benchmark
= 1;
2217 benchmark_start_cycle
= regs
.cycle_count
;
2221 halt_pipeline_stats (void)
2223 running_benchmark
= 0;
2224 benchmark_end_cycle
= regs
.cycle_count
;
2229 pipeline_stats (void)
2236 #ifdef CYCLE_ACCURATE
2239 printf ("cycles: %llu\n", regs
.cycle_count
);
2243 printf ("cycles: %13s\n", comma (regs
.cycle_count
));
2247 if (benchmark_start_cycle
)
2248 printf ("bmark: %13s\n", comma (benchmark_end_cycle
- benchmark_start_cycle
));
2251 for (i
= 0; i
< N_RXO
; i
++)
2252 for (o1
= 0; o1
< N_MAP
; o1
++)
2253 if (times_per_id
[i
][o1
])
2254 printf("%13s %13s %7.2f %s %s\n",
2255 comma (cycles_per_id
[i
][o1
]),
2256 comma (times_per_id
[i
][o1
]),
2257 (double)cycles_per_id
[i
][o1
] / times_per_id
[i
][o1
],
2258 op_cache_string(o1
),
2262 for (p
= 0; p
< N_RXO
; p
++)
2263 for (p1
= 0; p1
< N_MAP
; p1
++)
2264 for (i
= 0; i
< N_RXO
; i
++)
2265 for (o1
= 0; o1
< N_MAP
; o1
++)
2266 if (times_per_pair
[p
][p1
][i
][o1
])
2268 printf("%13s %s %-9s -> %s %s\n",
2269 comma (times_per_pair
[p
][p1
][i
][o1
]),
2270 op_cache_string(p1
),
2272 op_cache_string(o1
),
2277 printf("%13s memory stalls\n", comma (memory_stalls
));
2278 printf("%13s register stalls\n", comma (register_stalls
));
2279 printf("%13s branches taken (non-return)\n", comma (branch_stalls
));
2280 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls
));
2281 printf("%13s fast returns\n", comma (fast_returns
));