1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008-2019 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "libiberty.h"
28 #include "opcode/rx.h"
37 static const char * id_names
[] = {
39 "RXO_mov", /* d = s (signed) */
40 "RXO_movbi", /* d = [s,s2] (signed) */
41 "RXO_movbir", /* [s,s2] = d (signed) */
42 "RXO_pushm", /* s..s2 */
43 "RXO_popm", /* s..s2 */
44 "RXO_xchg", /* s <-> d */
45 "RXO_stcc", /* d = s if cond(s2) */
46 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
48 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
49 that d may be "None". */
62 "RXO_adc", /* d = d + s + carry */
63 "RXO_sbb", /* d = d - s - ~carry */
64 "RXO_abs", /* d = |s| */
65 "RXO_max", /* d = max(d,s) */
66 "RXO_min", /* d = min(d,s) */
67 "RXO_emul", /* d:64 = d:32 * s */
68 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
70 "RXO_rolc", /* d <<= 1 through carry */
71 "RXO_rorc", /* d >>= 1 through carry*/
72 "RXO_rotl", /* d <<= #s without carry */
73 "RXO_rotr", /* d >>= #s without carry*/
74 "RXO_revw", /* d = revw(s) */
75 "RXO_revl", /* d = revl(s) */
76 "RXO_branch", /* pc = d if cond(s) */
77 "RXO_branchrel",/* pc += d if cond(s) */
78 "RXO_jsr", /* pc = d */
79 "RXO_jsrrel", /* pc += d */
109 "RXO_sat", /* sat(d) */
112 "RXO_fadd", /* d op= s */
121 "RXO_bset", /* d |= (1<<s) */
122 "RXO_bclr", /* d &= ~(1<<s) */
123 "RXO_btst", /* s & (1<<s2) */
124 "RXO_bnot", /* d ^= (1<<s) */
125 "RXO_bmcc", /* d<s> = cond(s2) */
127 "RXO_clrpsw", /* flag index in d */
128 "RXO_setpsw", /* flag index in d */
129 "RXO_mvtipl", /* new IPL in s */
133 "RXO_rtd", /* undocumented */
135 "RXO_dbt", /* undocumented */
136 "RXO_int", /* vector id in s */
140 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
143 static const char * optype_names
[] = {
145 "#Imm", /* #addend */
147 "[Rn]", /* [Rn + addend] */
150 " cc ", /* eq, gtu, etc */
151 "Flag", /* [UIOSZC] */
152 "RbRi" /* [Rb + scale * Ri] */
155 #define N_RXO ARRAY_SIZE (id_names)
156 #define N_RXT ARRAY_SIZE (optype_names)
159 static unsigned long long benchmark_start_cycle
;
160 static unsigned long long benchmark_end_cycle
;
162 static int op_cache
[N_RXT
][N_RXT
][N_RXT
];
163 static int op_cache_rev
[N_MAP
];
164 static int op_cache_idx
= 0;
167 op_lookup (int a
, int b
, int c
)
169 if (op_cache
[a
][b
][c
])
170 return op_cache
[a
][b
][c
];
172 if (op_cache_idx
>= N_MAP
)
174 printf("op_cache_idx exceeds %d\n", N_MAP
);
177 op_cache
[a
][b
][c
] = op_cache_idx
;
178 op_cache_rev
[op_cache_idx
] = (a
<<8) | (b
<<4) | c
;
183 op_cache_string (int map
)
186 static char cb
[5][20];
189 map
= op_cache_rev
[map
];
194 sprintf(cb
[ci
], "%s %s %s", optype_names
[a
], optype_names
[b
], optype_names
[c
]);
198 static unsigned long long cycles_per_id
[N_RXO
][N_MAP
];
199 static unsigned long long times_per_id
[N_RXO
][N_MAP
];
200 static unsigned long long memory_stalls
;
201 static unsigned long long register_stalls
;
202 static unsigned long long branch_stalls
;
203 static unsigned long long branch_alignment_stalls
;
204 static unsigned long long fast_returns
;
206 static unsigned long times_per_pair
[N_RXO
][N_MAP
][N_RXO
][N_MAP
];
207 static int prev_opcode_id
= RXO_unknown
;
214 #endif /* CYCLE_STATS */
217 #ifdef CYCLE_ACCURATE
219 static int new_rt
= -1;
221 /* Number of cycles to add if an insn spans an 8-byte boundary. */
222 static int branch_alignment_penalty
= 0;
226 static int running_benchmark
= 1;
228 #define tprintf if (trace && running_benchmark) printf
230 jmp_buf decode_jmp_buf
;
231 unsigned int rx_cycles
= 0;
233 #ifdef CYCLE_ACCURATE
234 /* If nonzero, memory was read at some point and cycle latency might
236 static int memory_source
= 0;
237 /* If nonzero, memory was written and extra cycles might be
239 static int memory_dest
= 0;
242 cycles (int throughput
)
244 tprintf("%d cycles\n", throughput
);
245 regs
.cycle_count
+= throughput
;
248 /* Number of execution (E) cycles the op uses. For memory sources, we
249 include the load micro-op stall as two extra E cycles. */
250 #define E(c) cycles (memory_source ? c + 2 : c)
251 #define E1 cycles (1)
252 #define E2 cycles (2)
253 #define EBIT cycles (memory_source ? 2 : 1)
255 /* Check to see if a read latency must be applied for a given register. */
259 tprintf("register %d load stall\n", r); \
260 regs.cycle_count ++; \
261 STATS(register_stalls ++); \
268 tprintf ("Rt now %d\n", r); \
273 lsb_count (unsigned long v
, int is_signed
)
276 if (is_signed
&& (v
& 0x80000000U
))
277 v
= (unsigned long)(long)(-v
);
278 for (i
=31; i
>=0; i
--)
281 /* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
289 divu_cycles(unsigned long num
, unsigned long den
)
291 int nb
= lsb_count (num
, 0);
292 int db
= lsb_count (den
, 0);
304 div_cycles(long num
, long den
)
306 int nb
= lsb_count ((unsigned long)num
, 1);
307 int db
= lsb_count ((unsigned long)den
, 1);
318 #else /* !CYCLE_ACCURATE */
328 #define divu_cycles(n,d)
329 #define div_cycles(n,d)
331 #endif /* else CYCLE_ACCURATE */
333 static int size2bytes
[] = {
334 4, 1, 1, 1, 2, 2, 2, 3, 4
341 #define rx_abort() _rx_abort(__FILE__, __LINE__)
343 _rx_abort (const char *file
, int line
)
345 if (strrchr (file
, '/'))
346 file
= strrchr (file
, '/') + 1;
347 fprintf(stderr
, "abort at %s:%d\n", file
, line
);
351 static unsigned char *get_byte_base
;
352 static RX_Opcode_Decoded
**decode_cache_base
;
353 static SI get_byte_page
;
359 decode_cache_base
= 0;
364 maybe_get_mem_page (SI tpc
)
366 if (((tpc
^ get_byte_page
) & NONPAGE_MASK
) || enable_counting
)
368 get_byte_page
= tpc
& NONPAGE_MASK
;
369 get_byte_base
= rx_mem_ptr (get_byte_page
, MPA_READING
) - get_byte_page
;
370 decode_cache_base
= rx_mem_decode_cache (get_byte_page
) - get_byte_page
;
374 /* This gets called a *lot* so optimize it. */
376 rx_get_byte (void *vdata
)
378 RX_Data
*rx_data
= (RX_Data
*)vdata
;
379 SI tpc
= rx_data
->dpc
;
381 /* See load.c for an explanation of this. */
385 maybe_get_mem_page (tpc
);
388 return get_byte_base
[tpc
];
392 get_op (const RX_Opcode_Decoded
*rd
, int i
)
394 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
399 case RX_Operand_None
:
402 case RX_Operand_Immediate
: /* #addend */
405 case RX_Operand_Register
: /* Rn */
407 rv
= get_reg (o
->reg
);
410 case RX_Operand_Predec
: /* [-Rn] */
411 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
413 case RX_Operand_Postinc
: /* [Rn+] */
414 case RX_Operand_Zero_Indirect
: /* [Rn + 0] */
415 case RX_Operand_Indirect
: /* [Rn + addend] */
416 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
417 #ifdef CYCLE_ACCURATE
419 if (o
->type
== RX_Operand_TwoReg
)
422 if (regs
.m2m
== M2M_BOTH
)
424 tprintf("src memory stall\n");
435 if (o
->type
== RX_Operand_TwoReg
)
436 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
438 addr
= get_reg (o
->reg
) + o
->addend
;
446 case RX_Byte
: /* undefined extension */
449 rv
= mem_get_qi (addr
);
452 case RX_Word
: /* undefined extension */
455 rv
= mem_get_hi (addr
);
459 rv
= mem_get_psi (addr
);
463 rv
= mem_get_si (addr
);
467 if (o
->type
== RX_Operand_Postinc
)
468 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
472 case RX_Operand_Condition
: /* eq, gtu, etc */
473 return condition_true (o
->reg
);
475 case RX_Operand_Flag
: /* [UIOSZC] */
476 return (regs
.r_psw
& (1 << o
->reg
)) ? 1 : 0;
479 /* if we've gotten here, we need to clip/extend the value according
487 case RX_Byte
: /* undefined extension */
488 rv
|= 0xdeadbe00; /* keep them honest */
496 rv
= sign_ext (rv
, 8);
499 case RX_Word
: /* undefined extension */
500 rv
|= 0xdead0000; /* keep them honest */
508 rv
= sign_ext (rv
, 16);
522 put_op (const RX_Opcode_Decoded
*rd
, int i
, int v
)
524 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
531 if (o
->type
!= RX_Operand_Register
)
535 case RX_Byte
: /* undefined extension */
536 v
|= 0xdeadbe00; /* keep them honest */
547 case RX_Word
: /* undefined extension */
548 v
|= 0xdead0000; /* keep them honest */
556 v
= sign_ext (v
, 16);
569 case RX_Operand_None
:
570 /* Opcodes like TST and CMP use this. */
573 case RX_Operand_Immediate
: /* #addend */
574 case RX_Operand_Condition
: /* eq, gtu, etc */
577 case RX_Operand_Register
: /* Rn */
582 case RX_Operand_Predec
: /* [-Rn] */
583 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
585 case RX_Operand_Postinc
: /* [Rn+] */
586 case RX_Operand_Zero_Indirect
: /* [Rn + 0] */
587 case RX_Operand_Indirect
: /* [Rn + addend] */
588 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
590 #ifdef CYCLE_ACCURATE
591 if (regs
.m2m
== M2M_BOTH
)
593 tprintf("dst memory stall\n");
603 if (o
->type
== RX_Operand_TwoReg
)
604 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
606 addr
= get_reg (o
->reg
) + o
->addend
;
614 case RX_Byte
: /* undefined extension */
617 mem_put_qi (addr
, v
);
620 case RX_Word
: /* undefined extension */
623 mem_put_hi (addr
, v
);
627 mem_put_psi (addr
, v
);
631 mem_put_si (addr
, v
);
635 if (o
->type
== RX_Operand_Postinc
)
636 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
640 case RX_Operand_Flag
: /* [UIOSZC] */
642 regs
.r_psw
|= (1 << o
->reg
);
644 regs
.r_psw
&= ~(1 << o
->reg
);
649 #define PD(x) put_op (opcode, 0, x)
650 #define PS(x) put_op (opcode, 1, x)
651 #define PS2(x) put_op (opcode, 2, x)
652 #define GD() get_op (opcode, 0)
653 #define GS() get_op (opcode, 1)
654 #define GS2() get_op (opcode, 2)
655 #define DSZ() size2bytes[opcode->op[0].size]
656 #define SSZ() size2bytes[opcode->op[0].size]
657 #define S2SZ() size2bytes[opcode->op[0].size]
659 /* "Universal" sources. */
660 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
661 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
666 int rsp
= get_reg (sp
);
669 mem_put_si (rsp
, val
);
672 /* Just like the above, but tag the memory as "pushed pc" so if anyone
673 tries to write to it, it will cause an error. */
677 int rsp
= get_reg (sp
);
680 mem_put_si (rsp
, val
);
681 mem_set_content_range (rsp
, rsp
+3, MC_PUSHED_PC
);
688 int rsp
= get_reg (sp
);
689 rv
= mem_get_si (rsp
);
699 int rsp
= get_reg (sp
);
700 if (mem_get_content_type (rsp
) != MC_PUSHED_PC
)
701 execution_error (SIM_ERR_CORRUPT_STACK
, rsp
);
702 rv
= mem_get_si (rsp
);
703 mem_set_content_range (rsp
, rsp
+3, MC_UNINIT
);
709 #define MATH_OP(vop,c) \
713 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
714 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
715 ma = sign_ext (uma, DSZ() * 8); \
716 mb = sign_ext (umb, DSZ() * 8); \
717 sll = (long long) ma vop (long long) mb vop c; \
718 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
719 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
724 #define LOGIC_OP(vop) \
729 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
735 #define SHIFT_OP(val, type, count, OP, carry_mask) \
740 tprintf("%lld " #OP " %d\n", val, count); \
741 for (i = 0; i < count; i ++) \
743 c = val & carry_mask; \
746 set_oszc (val, 4, c); \
772 fop_fadd (fp_t s1
, fp_t s2
, fp_t
*d
)
774 *d
= rxfp_add (s1
, s2
);
779 fop_fmul (fp_t s1
, fp_t s2
, fp_t
*d
)
781 *d
= rxfp_mul (s1
, s2
);
786 fop_fdiv (fp_t s1
, fp_t s2
, fp_t
*d
)
788 *d
= rxfp_div (s1
, s2
);
793 fop_fsub (fp_t s1
, fp_t s2
, fp_t
*d
)
795 *d
= rxfp_sub (s1
, s2
);
799 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
800 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
803 return do_fp_exception (opcode_pc)
805 #define FLOAT_OP(func) \
812 do_store = fop_##func (fa, fb, &fc); \
813 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
818 if ((fc & 0x80000000UL) != 0) \
820 if ((fc & 0x7fffffffUL) == 0) \
822 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
825 #define carry (FLAG_C ? 1 : 0)
831 } exception_info
[] = {
832 { 0xFFFFFFD0UL
, "priviledged opcode", SIGILL
},
833 { 0xFFFFFFD4UL
, "access violation", SIGSEGV
},
834 { 0xFFFFFFDCUL
, "undefined opcode", SIGILL
},
835 { 0xFFFFFFE4UL
, "floating point", SIGFPE
}
837 #define EX_PRIVILEDGED 0
839 #define EX_UNDEFINED 2
840 #define EX_FLOATING 3
841 #define EXCEPTION(n) \
842 return generate_exception (n, opcode_pc)
844 #define PRIVILEDGED() \
846 EXCEPTION (EX_PRIVILEDGED)
849 generate_exception (unsigned long type
, SI opcode_pc
)
851 SI old_psw
, old_pc
, new_pc
;
853 new_pc
= mem_get_si (exception_info
[type
].vaddr
);
854 /* 0x00020000 is the value used to initialise the known
855 exception vectors (see rx.ld), but it is a reserved
856 area of memory so do not try to access it, and if the
857 value has not been changed by the program then the
858 vector has not been installed. */
859 if (new_pc
== 0 || new_pc
== 0x00020000)
862 return RX_MAKE_STOPPED (exception_info
[type
].signal
);
864 fprintf(stderr
, "Unhandled %s exception at pc = %#lx\n",
865 exception_info
[type
].str
, (unsigned long) opcode_pc
);
866 if (type
== EX_FLOATING
)
868 int mask
= FPPENDING ();
869 fprintf (stderr
, "Pending FP exceptions:");
870 if (mask
& FPSWBITS_FV
)
871 fprintf(stderr
, " Invalid");
872 if (mask
& FPSWBITS_FO
)
873 fprintf(stderr
, " Overflow");
874 if (mask
& FPSWBITS_FZ
)
875 fprintf(stderr
, " Division-by-zero");
876 if (mask
& FPSWBITS_FU
)
877 fprintf(stderr
, " Underflow");
878 if (mask
& FPSWBITS_FX
)
879 fprintf(stderr
, " Inexact");
880 if (mask
& FPSWBITS_CE
)
881 fprintf(stderr
, " Unimplemented");
882 fprintf(stderr
, "\n");
884 return RX_MAKE_EXITED (1);
887 tprintf ("Triggering %s exception\n", exception_info
[type
].str
);
889 old_psw
= regs
.r_psw
;
890 regs
.r_psw
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
895 return RX_MAKE_STEPPED ();
899 generate_access_exception (void)
903 rv
= generate_exception (EX_ACCESS
, regs
.r_pc
);
905 longjmp (decode_jmp_buf
, rv
);
909 do_fp_exception (unsigned long opcode_pc
)
912 EXCEPTION (EX_FLOATING
);
913 return RX_MAKE_STEPPED ();
917 op_is_memory (const RX_Opcode_Decoded
*rd
, int i
)
919 switch (rd
->op
[i
].type
)
921 case RX_Operand_Predec
:
922 case RX_Operand_Postinc
:
923 case RX_Operand_Indirect
:
929 #define OM(i) op_is_memory (opcode, i)
931 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
936 unsigned int uma
=0, umb
=0;
939 unsigned long long ll
;
941 unsigned long opcode_pc
;
943 const RX_Opcode_Decoded
*opcode
;
945 unsigned long long prev_cycle_count
;
947 #ifdef CYCLE_ACCURATE
952 prev_cycle_count
= regs
.cycle_count
;
955 #ifdef CYCLE_ACCURATE
962 maybe_get_mem_page (regs
.r_pc
);
964 opcode_pc
= regs
.r_pc
;
966 /* Note that we don't word-swap this point, there's no point. */
967 if (decode_cache_base
[opcode_pc
] == NULL
)
969 RX_Opcode_Decoded
*opcode_w
;
970 rx_data
.dpc
= opcode_pc
;
971 opcode_w
= decode_cache_base
[opcode_pc
] = calloc (1, sizeof (RX_Opcode_Decoded
));
972 opcode_size
= rx_decode_opcode (opcode_pc
, opcode_w
,
973 rx_get_byte
, &rx_data
);
978 opcode
= decode_cache_base
[opcode_pc
];
979 opcode_size
= opcode
->n_bytes
;
982 #ifdef CYCLE_ACCURATE
983 if (branch_alignment_penalty
)
985 if ((regs
.r_pc
^ (regs
.r_pc
+ opcode_size
- 1)) & ~7)
987 tprintf("1 cycle branch alignment penalty\n");
988 cycles (branch_alignment_penalty
);
990 branch_alignment_stalls
++;
993 branch_alignment_penalty
= 0;
997 regs
.r_pc
+= opcode_size
;
999 rx_flagmask
= opcode
->flags_s
;
1000 rx_flagand
= ~(int)opcode
->flags_0
;
1001 rx_flagor
= opcode
->flags_1
;
1007 tprintf("|%lld| = ", sll
);
1010 tprintf("%lld\n", sll
);
1031 if (opcode
->op
[0].type
== RX_Operand_Register
)
1043 if (opcode
->op
[0].type
== RX_Operand_Register
)
1058 if (opcode
->op
[0].type
== RX_Operand_Register
)
1068 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1070 #ifdef CYCLE_ACCURATE
1071 SI old_pc
= regs
.r_pc
;
1075 #ifdef CYCLE_ACCURATE
1076 delta
= regs
.r_pc
- old_pc
;
1077 if (delta
>= 0 && delta
< 16
1080 tprintf("near forward branch bonus\n");
1086 branch_alignment_penalty
= 1;
1093 #ifdef CYCLE_ACCURATE
1100 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1103 regs
.r_pc
= opcode_pc
+ delta
;
1104 #ifdef CYCLE_ACCURATE
1105 /* Note: specs say 3, chip says 2. */
1106 if (delta
>= 0 && delta
< 16
1109 tprintf("near forward branch bonus\n");
1115 branch_alignment_penalty
= 1;
1122 #ifdef CYCLE_ACCURATE
1130 int old_psw
= regs
.r_psw
;
1132 DO_RETURN (RX_MAKE_HIT_BREAK ());
1133 if (regs
.r_intb
== 0)
1135 tprintf("BREAK hit, no vector table.\n");
1136 DO_RETURN (RX_MAKE_EXITED(1));
1138 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1141 regs
.r_pc
= mem_get_si (regs
.r_intb
);
1149 if (opcode
->op
[0].type
== RX_Operand_Register
)
1161 if (opcode
->op
[1].type
== RX_Operand_Register
)
1165 umb
= ma
& (1 << mb
);
1166 set_zc (! umb
, umb
);
1171 v
= 1 << opcode
->op
[0].reg
;
1180 case RXO_div
: /* d = d / s */
1183 tprintf("%d / %d = ", mb
, ma
);
1184 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1187 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1194 set_flags (FLAGBIT_O
, 0);
1196 div_cycles (mb
, ma
);
1200 case RXO_divu
: /* d = d / s */
1203 tprintf("%u / %u = ", umb
, uma
);
1207 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1214 set_flags (FLAGBIT_O
, 0);
1216 divu_cycles (umb
, uma
);
1223 sll
= (long long)ma
* (long long)mb
;
1224 tprintf("%d * %d = %lld\n", ma
, mb
, sll
);
1225 put_reg (opcode
->op
[0].reg
, sll
);
1226 put_reg (opcode
->op
[0].reg
+ 1, sll
>> 32);
1233 ll
= (long long)uma
* (long long)umb
;
1234 tprintf("%#x * %#x = %#llx\n", uma
, umb
, ll
);
1235 put_reg (opcode
->op
[0].reg
, ll
);
1236 put_reg (opcode
->op
[0].reg
+ 1, ll
>> 32);
1266 regs
.r_psw
= regs
.r_bpsw
;
1267 regs
.r_pc
= regs
.r_bpc
;
1268 #ifdef CYCLE_ACCURATE
1269 regs
.fast_return
= 0;
1282 mb
= rxfp_ftoi (ma
, FPRM_ZERO
);
1285 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1294 int rc
= rx_syscall (regs
.r
[5]);
1295 if (! RX_STEPPED (rc
))
1300 int old_psw
= regs
.r_psw
;
1301 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1304 regs
.r_pc
= mem_get_si (regs
.r_intb
+ 4 * v
);
1312 mb
= rxfp_itof (ma
, regs
.r_fpsw
);
1314 tprintf("(float) %d = %x\n", ma
, mb
);
1323 #ifdef CYCLE_ACCURATE
1328 #ifdef CYCLE_ACCURATE
1329 regs
.link_register
= regs
.r_pc
;
1331 pushpc (get_reg (pc
));
1332 if (opcode
->id
== RXO_jsrrel
)
1334 #ifdef CYCLE_ACCURATE
1335 delta
= v
- regs
.r_pc
;
1338 #ifdef CYCLE_ACCURATE
1339 /* Note: docs say 3, chip says 2 */
1340 if (delta
>= 0 && delta
< 16)
1342 tprintf ("near forward jsr bonus\n");
1347 branch_alignment_penalty
= 1;
1350 regs
.fast_return
= 1;
1356 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1358 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1363 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1365 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1392 if (opcode
->op
[1].type
== RX_Operand_Register
1393 && opcode
->op
[1].reg
== 17 /* PC */)
1395 /* Special case. We want the address of the insn, not the
1396 address of the next insn. */
1400 if (opcode
->op
[0].type
== RX_Operand_Register
1401 && opcode
->op
[0].reg
== 16 /* PSW */)
1403 /* Special case, LDC and POPC can't ever modify PM. */
1404 int pm
= regs
.r_psw
& FLAGBIT_PM
;
1409 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1415 /* various things can't be changed in user mode. */
1416 if (opcode
->op
[0].type
== RX_Operand_Register
)
1417 if (opcode
->op
[0].reg
== 32)
1419 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1420 v
|= regs
.r_psw
& (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1422 if (opcode
->op
[0].reg
== 34 /* ISP */
1423 || opcode
->op
[0].reg
== 37 /* BPSW */
1424 || opcode
->op
[0].reg
== 39 /* INTB */
1425 || opcode
->op
[0].reg
== 38 /* VCT */)
1426 /* These are ignored. */
1436 #ifdef CYCLE_ACCURATE
1437 if ((opcode
->op
[0].type
== RX_Operand_Predec
1438 && opcode
->op
[1].type
== RX_Operand_Register
)
1439 || (opcode
->op
[0].type
== RX_Operand_Postinc
1440 && opcode
->op
[1].type
== RX_Operand_Register
))
1442 /* Special case: push reg doesn't cause a memory stall. */
1444 tprintf("push special case\n");
1463 ll
= (unsigned long long) US1() * (unsigned long long) v
;
1470 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v
>> 16);
1472 put_reg64 (acc64
, ll
);
1478 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(v
);
1480 put_reg64 (acc64
, ll
);
1485 PD (get_reg (acchi
));
1490 PD (get_reg (acclo
));
1495 PD (get_reg (accmi
));
1500 put_reg (acchi
, GS ());
1505 put_reg (acclo
, GS ());
1510 regs
.r_psw
&= ~ FLAGBITS_IPL
;
1511 regs
.r_psw
|= (GS () << FLAGSHIFT_IPL
) & FLAGBITS_IPL
;
1530 /* POPM cannot pop R0 (sp). */
1531 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1532 EXCEPTION (EX_UNDEFINED
);
1533 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1535 regs
.r_pc
= opcode_pc
;
1536 DO_RETURN (RX_MAKE_STOPPED (SIGILL
));
1538 for (v
= opcode
->op
[1].reg
; v
<= opcode
->op
[2].reg
; v
++)
1542 put_reg (v
, pop ());
1547 /* PUSHM cannot push R0 (sp). */
1548 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1549 EXCEPTION (EX_UNDEFINED
);
1550 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1552 regs
.r_pc
= opcode_pc
;
1553 return RX_MAKE_STOPPED (SIGILL
);
1555 for (v
= opcode
->op
[2].reg
; v
>= opcode
->op
[1].reg
; v
--)
1560 cycles (opcode
->op
[2].reg
- opcode
->op
[1].reg
+ 1);
1564 ll
= get_reg64 (acc64
) << GS ();
1565 ll
+= 0x80000000ULL
;
1566 if ((signed long long)ll
> (signed long long)0x00007fff00000000ULL
)
1567 ll
= 0x00007fff00000000ULL
;
1568 else if ((signed long long)ll
< (signed long long)0xffff800000000000ULL
)
1569 ll
= 0xffff800000000000ULL
;
1571 ll
&= 0xffffffff00000000ULL
;
1572 put_reg64 (acc64
, ll
);
1578 regs
.r_pc
= poppc ();
1579 regs
.r_psw
= poppc ();
1581 regs
.r_psw
|= FLAGBIT_U
;
1582 #ifdef CYCLE_ACCURATE
1583 regs
.fast_return
= 0;
1590 umb
= (((uma
>> 24) & 0xff)
1591 | ((uma
>> 8) & 0xff00)
1592 | ((uma
<< 8) & 0xff0000)
1593 | ((uma
<< 24) & 0xff000000UL
));
1600 umb
= (((uma
>> 8) & 0x00ff00ff)
1601 | ((uma
<< 8) & 0xff00ff00UL
));
1609 #ifdef CYCLE_ACCURATE
1613 while (regs
.r
[3] != 0)
1617 switch (opcode
->size
)
1620 ma
= mem_get_si (regs
.r
[1]);
1621 mb
= mem_get_si (regs
.r
[2]);
1626 ma
= sign_ext (mem_get_hi (regs
.r
[1]), 16);
1627 mb
= sign_ext (mem_get_hi (regs
.r
[2]), 16);
1632 ma
= sign_ext (mem_get_qi (regs
.r
[1]), 8);
1633 mb
= sign_ext (mem_get_qi (regs
.r
[2]), 8);
1640 /* We do the multiply as a signed value. */
1641 sll
= (long long)ma
* (long long)mb
;
1642 tprintf(" %016llx = %d * %d\n", sll
, ma
, mb
);
1643 /* but we do the sum as unsigned, while sign extending the operands. */
1644 tmp
= regs
.r
[4] + (sll
& 0xffffffffUL
);
1645 regs
.r
[4] = tmp
& 0xffffffffUL
;
1648 tmp
+= regs
.r
[5] + (sll
& 0xffffffffUL
);
1649 regs
.r
[5] = tmp
& 0xffffffffUL
;
1652 tmp
+= regs
.r
[6] + (sll
& 0xffffffffUL
);
1653 regs
.r
[6] = tmp
& 0xffffffffUL
;
1654 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1655 (unsigned long) regs
.r
[6],
1656 (unsigned long) regs
.r
[5],
1657 (unsigned long) regs
.r
[4]);
1661 if (regs
.r
[6] & 0x00008000)
1662 regs
.r
[6] |= 0xffff0000UL
;
1664 regs
.r
[6] &= 0x0000ffff;
1665 ma
= (regs
.r
[6] & 0x80000000UL
) ? FLAGBIT_S
: 0;
1666 if (regs
.r
[6] != 0 && regs
.r
[6] != 0xffffffffUL
)
1667 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
| FLAGBIT_O
);
1669 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
);
1670 #ifdef CYCLE_ACCURATE
1671 switch (opcode
->size
)
1674 cycles (6 + 4 * tx
);
1677 cycles (6 + 5 * (tx
/ 2) + 4 * (tx
% 2));
1680 cycles (6 + 7 * (tx
/ 4) + 4 * (tx
% 4));
1690 ma
= v
& 0x80000000UL
;
1702 uma
|= (carry
? 0x80000000UL
: 0);
1703 set_szc (uma
, 4, mb
);
1713 uma
= (uma
<< mb
) | (uma
>> (32-mb
));
1716 set_szc (uma
, 4, mb
);
1726 uma
= (uma
>> mb
) | (uma
<< (32-mb
));
1727 mb
= uma
& 0x80000000;
1729 set_szc (uma
, 4, mb
);
1737 mb
= rxfp_ftoi (ma
, regs
.r_fpsw
);
1740 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1747 #ifdef CYCLE_ACCURATE
1750 regs
.r_pc
= poppc ();
1751 #ifdef CYCLE_ACCURATE
1752 /* Note: specs say 5, chip says 3. */
1753 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1758 tprintf("fast return bonus\n");
1762 regs
.fast_return
= 0;
1763 branch_alignment_penalty
= 1;
1769 if (opcode
->op
[2].type
== RX_Operand_Register
)
1772 /* RTSD cannot pop R0 (sp). */
1773 put_reg (0, get_reg (0) + GS() - (opcode
->op
[0].reg
-opcode
->op
[2].reg
+1)*4);
1774 if (opcode
->op
[2].reg
== 0)
1775 EXCEPTION (EX_UNDEFINED
);
1776 #ifdef CYCLE_ACCURATE
1777 tx
= opcode
->op
[0].reg
- opcode
->op
[2].reg
+ 1;
1779 for (i
= opcode
->op
[2].reg
; i
<= opcode
->op
[0].reg
; i
++)
1782 put_reg (i
, pop ());
1787 #ifdef CYCLE_ACCURATE
1790 put_reg (0, get_reg (0) + GS());
1792 put_reg (pc
, poppc());
1793 #ifdef CYCLE_ACCURATE
1794 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1796 tprintf("fast return bonus\n");
1800 cycles (tx
< 3 ? 3 : tx
+ 1);
1804 cycles (tx
< 5 ? 5 : tx
+ 1);
1806 regs
.fast_return
= 0;
1807 branch_alignment_penalty
= 1;
1812 if (FLAG_O
&& FLAG_S
)
1814 else if (FLAG_O
&& ! FLAG_S
)
1820 if (FLAG_O
&& ! FLAG_S
)
1823 put_reg (5, 0x7fffffff);
1824 put_reg (4, 0xffffffff);
1826 else if (FLAG_O
&& FLAG_S
)
1828 put_reg (6, 0xffffffff);
1829 put_reg (5, 0x80000000);
1836 MATH_OP (-, ! carry
);
1848 #ifdef CYCLE_ACCURATE
1851 while (regs
.r
[3] != 0)
1853 uma
= mem_get_qi (regs
.r
[1] ++);
1854 umb
= mem_get_qi (regs
.r
[2] ++);
1856 if (uma
!= umb
|| uma
== 0)
1862 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1863 cycles (2 + 4 * (tx
/ 4) + 4 * (tx
% 4));
1867 v
= 1 << opcode
->op
[0].reg
;
1878 #ifdef CYCLE_ACCURATE
1883 uma
= mem_get_qi (regs
.r
[2] --);
1884 mem_put_qi (regs
.r
[1]--, uma
);
1887 #ifdef CYCLE_ACCURATE
1889 cycles (6 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1891 cycles (2 + 3 * (tx
% 4));
1897 #ifdef CYCLE_ACCURATE
1902 uma
= mem_get_qi (regs
.r
[2] ++);
1903 mem_put_qi (regs
.r
[1]++, uma
);
1906 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1910 #ifdef CYCLE_ACCURATE
1913 while (regs
.r
[3] != 0)
1915 uma
= mem_get_qi (regs
.r
[2] ++);
1916 mem_put_qi (regs
.r
[1]++, uma
);
1921 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1924 case RXO_shar
: /* d = ma >> mb */
1925 SHIFT_OP (sll
, int, mb
, >>=, 1);
1929 case RXO_shll
: /* d = ma << mb */
1930 SHIFT_OP (ll
, int, mb
, <<=, 0x80000000UL
);
1934 case RXO_shlr
: /* d = ma >> mb */
1935 SHIFT_OP (ll
, unsigned int, mb
, >>=, 1);
1941 #ifdef CYCLE_ACCURATE
1944 switch (opcode
->size
)
1947 while (regs
.r
[3] != 0)
1949 mem_put_si (regs
.r
[1], regs
.r
[2]);
1956 while (regs
.r
[3] != 0)
1958 mem_put_hi (regs
.r
[1], regs
.r
[2]);
1962 cycles (2 + (int)(tx
/ 2) + tx
% 2);
1965 while (regs
.r
[3] != 0)
1967 mem_put_qi (regs
.r
[1], regs
.r
[2]);
1971 cycles (2 + (int)(tx
/ 4) + tx
% 4);
1986 regs
.r_psw
|= FLAGBIT_I
;
1987 DO_RETURN (RX_MAKE_STOPPED(0));
1995 #ifdef CYCLE_ACCURATE
2003 switch (opcode
->size
)
2007 while (regs
.r
[3] != 0)
2010 umb
= mem_get_si (get_reg (1));
2012 #ifdef CYCLE_ACCURATE
2018 #ifdef CYCLE_ACCURATE
2019 cycles (3 + 3 * tx
);
2023 uma
= get_reg (2) & 0xffff;
2024 while (regs
.r
[3] != 0)
2027 umb
= mem_get_hi (get_reg (1));
2029 #ifdef CYCLE_ACCURATE
2035 #ifdef CYCLE_ACCURATE
2036 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2040 uma
= get_reg (2) & 0xff;
2041 while (regs
.r
[3] != 0)
2044 umb
= mem_get_qi (regs
.r
[1]);
2046 #ifdef CYCLE_ACCURATE
2052 #ifdef CYCLE_ACCURATE
2053 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2062 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2067 #ifdef CYCLE_ACCURATE
2072 switch (opcode
->size
)
2076 while (regs
.r
[3] != 0)
2079 umb
= mem_get_si (get_reg (1));
2081 #ifdef CYCLE_ACCURATE
2087 #ifdef CYCLE_ACCURATE
2088 cycles (3 + 3 * tx
);
2092 uma
= get_reg (2) & 0xffff;
2093 while (regs
.r
[3] != 0)
2096 umb
= mem_get_hi (get_reg (1));
2098 #ifdef CYCLE_ACCURATE
2104 #ifdef CYCLE_ACCURATE
2105 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2109 uma
= get_reg (2) & 0xff;
2110 while (regs
.r
[3] != 0)
2113 umb
= mem_get_qi (regs
.r
[1]);
2115 #ifdef CYCLE_ACCURATE
2121 #ifdef CYCLE_ACCURATE
2122 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2131 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2136 regs
.r_psw
|= FLAGBIT_I
;
2137 DO_RETURN (RX_MAKE_STOPPED(0));
2140 #ifdef CYCLE_ACCURATE
2143 v
= GS (); /* This is the memory operand, if any. */
2144 PS (GD ()); /* and this may change the address register. */
2147 #ifdef CYCLE_ACCURATE
2148 /* all M cycles happen during xchg's cycles. */
2159 EXCEPTION (EX_UNDEFINED
);
2162 #ifdef CYCLE_ACCURATE
2165 regs
.m2m
|= M2M_SRC
;
2167 regs
.m2m
|= M2M_DST
;
2174 if (prev_cycle_count
== regs
.cycle_count
)
2176 printf("Cycle count not updated! id %s\n", id_names
[opcode
->id
]);
2182 if (running_benchmark
)
2184 int omap
= op_lookup (opcode
->op
[0].type
, opcode
->op
[1].type
, opcode
->op
[2].type
);
2187 cycles_per_id
[opcode
->id
][omap
] += regs
.cycle_count
- prev_cycle_count
;
2188 times_per_id
[opcode
->id
][omap
] ++;
2190 times_per_pair
[prev_opcode_id
][po0
][opcode
->id
][omap
] ++;
2192 prev_opcode_id
= opcode
->id
;
2197 return RX_MAKE_STEPPED ();
2202 reset_pipeline_stats (void)
2204 memset (cycles_per_id
, 0, sizeof(cycles_per_id
));
2205 memset (times_per_id
, 0, sizeof(times_per_id
));
2207 register_stalls
= 0;
2209 branch_alignment_stalls
= 0;
2211 memset (times_per_pair
, 0, sizeof(times_per_pair
));
2212 running_benchmark
= 1;
2214 benchmark_start_cycle
= regs
.cycle_count
;
2218 halt_pipeline_stats (void)
2220 running_benchmark
= 0;
2221 benchmark_end_cycle
= regs
.cycle_count
;
2226 pipeline_stats (void)
2233 #ifdef CYCLE_ACCURATE
2236 printf ("cycles: %llu\n", regs
.cycle_count
);
2240 printf ("cycles: %13s\n", comma (regs
.cycle_count
));
2244 if (benchmark_start_cycle
)
2245 printf ("bmark: %13s\n", comma (benchmark_end_cycle
- benchmark_start_cycle
));
2248 for (i
= 0; i
< N_RXO
; i
++)
2249 for (o1
= 0; o1
< N_MAP
; o1
++)
2250 if (times_per_id
[i
][o1
])
2251 printf("%13s %13s %7.2f %s %s\n",
2252 comma (cycles_per_id
[i
][o1
]),
2253 comma (times_per_id
[i
][o1
]),
2254 (double)cycles_per_id
[i
][o1
] / times_per_id
[i
][o1
],
2255 op_cache_string(o1
),
2259 for (p
= 0; p
< N_RXO
; p
++)
2260 for (p1
= 0; p1
< N_MAP
; p1
++)
2261 for (i
= 0; i
< N_RXO
; i
++)
2262 for (o1
= 0; o1
< N_MAP
; o1
++)
2263 if (times_per_pair
[p
][p1
][i
][o1
])
2265 printf("%13s %s %-9s -> %s %s\n",
2266 comma (times_per_pair
[p
][p1
][i
][o1
]),
2267 op_cache_string(p1
),
2269 op_cache_string(o1
),
2274 printf("%13s memory stalls\n", comma (memory_stalls
));
2275 printf("%13s register stalls\n", comma (register_stalls
));
2276 printf("%13s branches taken (non-return)\n", comma (branch_stalls
));
2277 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls
));
2278 printf("%13s fast returns\n", comma (fast_returns
));