NEWS: Mention the MinGW include/valgrind.h fix
[valgrind.git] / cachegrind / cg_sim.c
blobc8f0a8fc266f0a637b902bc6167c71a4ec9eb49f
2 /*--------------------------------------------------------------------*/
3 /*--- Cache simulation cg_sim.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Cachegrind, a Valgrind tool for cache
8 profiling programs.
10 Copyright (C) 2002-2017 Nicholas Nethercote
11 njn@valgrind.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 /* Notes:
30 - simulates a write-allocate cache
31 - (block --> set) hash function uses simple bit selection
32 - handling of references straddling two cache blocks:
33 - counts as only one cache access (not two)
34 - both blocks hit --> one hit
35 - one block hits, the other misses --> one miss
36 - both blocks miss --> one miss (not two)
39 typedef struct {
40 Int size; /* bytes */
41 Int assoc;
42 Int line_size; /* bytes */
43 Int sets;
44 Int sets_min_1;
45 Int line_size_bits;
46 Int tag_shift;
47 HChar desc_line[128]; /* large enough */
48 UWord* tags;
49 } cache_t2;
51 /* By this point, the size/assoc/line_size has been checked. */
52 static void cachesim_initcache(cache_t config, cache_t2* c)
54 Int i;
56 c->size = config.size;
57 c->assoc = config.assoc;
58 c->line_size = config.line_size;
60 c->sets = (c->size / c->line_size) / c->assoc;
61 c->sets_min_1 = c->sets - 1;
62 c->line_size_bits = VG_(log2)(c->line_size);
63 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
65 if (c->assoc == 1) {
66 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
67 c->size, c->line_size);
68 } else {
69 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
70 c->size, c->line_size, c->assoc);
73 c->tags = VG_(malloc)("cg.sim.ci.1",
74 sizeof(UWord) * c->sets * c->assoc);
76 for (i = 0; i < c->sets * c->assoc; i++)
77 c->tags[i] = 0;
80 /* This attribute forces GCC to inline the function, getting rid of a
81 * lot of indirection around the cache_t2 pointer, if it is known to be
82 * constant in the caller (the caller is inlined itself).
83 * Without inlining of simulator functions, cachegrind can get 40% slower.
85 __attribute__((always_inline))
86 static __inline__
87 Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
89 int i, j;
90 UWord *set;
92 set = &(c->tags[set_no * c->assoc]);
94 /* This loop is unrolled for just the first case, which is the most */
95 /* common. We can't unroll any further because it would screw up */
96 /* if we have a direct-mapped (1-way) cache. */
97 if (tag == set[0])
98 return False;
100 /* If the tag is one other than the MRU, move it into the MRU spot */
101 /* and shuffle the rest down. */
102 for (i = 1; i < c->assoc; i++) {
103 if (tag == set[i]) {
104 for (j = i; j > 0; j--) {
105 set[j] = set[j - 1];
107 set[0] = tag;
109 return False;
113 /* A miss; install this tag as MRU, shuffle rest down. */
114 for (j = c->assoc - 1; j > 0; j--) {
115 set[j] = set[j - 1];
117 set[0] = tag;
119 return True;
122 __attribute__((always_inline))
123 static __inline__
124 Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
126 /* A memory block has the size of a cache line */
127 UWord block1 = a >> c->line_size_bits;
128 UWord block2 = (a+size-1) >> c->line_size_bits;
129 UInt set1 = block1 & c->sets_min_1;
131 /* Tags used in real caches are minimal to save space.
132 * As the last bits of the block number of addresses mapping
133 * into one cache set are the same, real caches use as tag
134 * tag = block >> log2(#sets)
135 * But using the memory block as more specific tag is fine,
136 * and saves instructions.
138 UWord tag1 = block1;
140 /* Access entirely within line. */
141 if (block1 == block2)
142 return cachesim_setref_is_miss(c, set1, tag1);
144 /* Access straddles two lines. */
145 else if (block1 + 1 == block2) {
146 UInt set2 = block2 & c->sets_min_1;
147 UWord tag2 = block2;
149 /* always do both, as state is updated as side effect */
150 if (cachesim_setref_is_miss(c, set1, tag1)) {
151 cachesim_setref_is_miss(c, set2, tag2);
152 return True;
154 return cachesim_setref_is_miss(c, set2, tag2);
156 VG_(printf)("addr: %lx size: %u blocks: %lu %lu",
157 a, size, block1, block2);
158 VG_(tool_panic)("item straddles more than two cache sets");
159 /* not reached */
160 return True;
164 static cache_t2 LL;
165 static cache_t2 I1;
166 static cache_t2 D1;
168 static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
170 cachesim_initcache(I1c, &I1);
171 cachesim_initcache(D1c, &D1);
172 cachesim_initcache(LLc, &LL);
175 __attribute__((always_inline))
176 static __inline__
177 void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
179 if (cachesim_ref_is_miss(&I1, a, size)) {
180 (*m1)++;
181 if (cachesim_ref_is_miss(&LL, a, size))
182 (*mL)++;
186 // common special case IrNoX
187 __attribute__((always_inline))
188 static __inline__
189 void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
191 UWord block = a >> I1.line_size_bits;
192 UInt I1_set = block & I1.sets_min_1;
194 // use block as tag
195 if (cachesim_setref_is_miss(&I1, I1_set, block)) {
196 UInt LL_set = block & LL.sets_min_1;
197 (*m1)++;
198 // can use block as tag as L1I and LL cache line sizes are equal
199 if (cachesim_setref_is_miss(&LL, LL_set, block))
200 (*mL)++;
204 __attribute__((always_inline))
205 static __inline__
206 void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
208 if (cachesim_ref_is_miss(&D1, a, size)) {
209 (*m1)++;
210 if (cachesim_ref_is_miss(&LL, a, size))
211 (*mL)++;
215 /* Check for special case IrNoX. Called at instrumentation time.
217 * Does this Ir only touch one cache line, and are L1I/LL cache
218 * line sizes the same? This allows to get rid of a runtime check.
220 * Returning false is always fine, as this calls the generic case
222 static Bool cachesim_is_IrNoX(Addr a, UChar size)
224 UWord block1, block2;
226 if (I1.line_size_bits != LL.line_size_bits) return False;
227 block1 = a >> I1.line_size_bits;
228 block2 = (a+size-1) >> I1.line_size_bits;
229 if (block1 != block2) return False;
231 return True;
234 /*--------------------------------------------------------------------*/
235 /*--- end cg_sim.c ---*/
236 /*--------------------------------------------------------------------*/