Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / cachegrind / cg_branchpred.c
blob927b7bf21cb307d3cc5e89c51a03219ac2fac9d6
2 /*--------------------------------------------------------------------*/
3 /*--- Branch predictor simulation cg_branchpred.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Cachegrind, a high-precision tracing profiler
8 built with Valgrind.
10 Copyright (C) 2002-2017 Nicholas Nethercote
11 njn@valgrind.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
30 /* This file contains the actual branch predictor simulator and its
31 associated state. As with cg_sim.c it is #included directly into
32 cg_main.c. It provides:
34 - a taken/not-taken predictor for conditional branches
35 - a branch target address predictor for indirect branches
37 Function return-address prediction is not modelled, on the basis
38 that return stack predictors almost always predict correctly, and
39 also that it is difficult for Valgrind to robustly identify
40 function calls and returns.
43 /* How many bits at the bottom of an instruction address are
44 guaranteed to be zero? */
45 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
46 || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \
47 || defined(VGA_arm64)
48 # define N_IADDR_LO_ZERO_BITS 2
49 #elif defined(VGA_x86) || defined(VGA_amd64)
50 # define N_IADDR_LO_ZERO_BITS 0
51 #elif defined(VGA_s390x) || defined(VGA_arm)
52 # define N_IADDR_LO_ZERO_BITS 1
53 #else
54 # error "Unsupported architecture"
55 #endif
58 /* Get a taken/not-taken prediction for the instruction (presumably a
59 conditional branch) at instr_addr. Once that's done, update the
60 predictor state based on whether or not it was actually taken, as
61 indicated by 'taken'. Finally, return 1 for a mispredict and 0 for
62 a successful predict.
64 The predictor is an array of 16k (== 2^14) 2-bit saturating
65 counters. Given the address of the branch instruction, the array
66 index to use is computed both from the low order bits of the branch
67 instruction's address, and the global history - that is, from the
68 taken/not-taken behaviour of the most recent few branches. This
69 makes the predictor able to correlate this branch's behaviour with
70 that of other branches.
72 TODO: use predictor written by someone who understands this stuff.
73 Perhaps it would be better to move to a standard GShare predictor
74 and/or tournament predictor.
76 /* The index is composed of N_HIST bits at the top and N_IADD bits at
77 the bottom. These numbers chosen somewhat arbitrarily, but note
78 that making N_IADD_BITS too small (eg 4) can cause large amounts of
79 aliasing, and hence misprediction, particularly if the history bits
80 are mostly unchanging. */
81 #define N_HIST_BITS 7
82 #define N_IADD_BITS 7
84 #define N_BITS (N_HIST_BITS + N_IADD_BITS)
85 #define N_COUNTERS (1 << N_BITS)
87 static UWord shift_register = 0; /* Contains global history */
88 static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
91 static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
93 UWord indx;
94 Bool predicted_taken, actually_taken, mispredict;
96 const UWord hist_mask = (1 << N_HIST_BITS) - 1;
97 const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
98 UWord hist_bits = shift_register & hist_mask;
99 UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
100 & iadd_mask;
102 tl_assert(hist_bits <= hist_mask);
103 tl_assert(iadd_bits <= iadd_mask);
104 indx = (hist_bits << N_IADD_BITS) | iadd_bits;
105 tl_assert(indx < N_COUNTERS);
106 if (0) VG_(printf)("index = %d\n", (Int)indx);
108 tl_assert(takenW <= 1);
109 predicted_taken = counters[ indx ] >= 2;
110 actually_taken = takenW > 0;
112 mispredict = (actually_taken && (!predicted_taken))
113 || ((!actually_taken) && predicted_taken);
115 shift_register <<= 1;
116 shift_register |= (actually_taken ? 1 : 0);
118 if (actually_taken) {
119 if (counters[indx] < 3)
120 counters[indx]++;
121 } else {
122 if (counters[indx] > 0)
123 counters[indx]--;
126 tl_assert(counters[indx] <= 3);
128 return mispredict ? 1 : 0;
132 /* A very simple indirect branch predictor. Use the branch's address
133 to index a table which records the previous target address for this
134 branch (or whatever aliased with it) and use that as the
135 prediction. */
136 #define N_BTAC_BITS 9
137 #define N_BTAC (1 << N_BTAC_BITS)
138 static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
140 static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
142 Bool mispredict;
143 const UWord mask = (1 << N_BTAC_BITS) - 1;
144 UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS)
145 & mask;
146 tl_assert(indx < N_BTAC);
147 mispredict = btac[indx] != actual;
148 btac[indx] = actual;
149 return mispredict ? 1 : 0;
153 /*--------------------------------------------------------------------*/
154 /*--- end cg_branchpred.c ---*/
155 /*--------------------------------------------------------------------*/