2 /*--------------------------------------------------------------------*/
3 /*--- Cache simulation cg_sim.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Cachegrind, a high-precision tracing profiler
10 Copyright (C) 2002-2017 Nicholas Nethercote
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
30 - simulates a write-allocate cache
31 - (block --> set) hash function uses simple bit selection
32 - handling of references straddling two cache blocks:
33 - counts as only one cache access (not two)
34 - both blocks hit --> one hit
35 - one block hits, the other misses --> one miss
36 - both blocks miss --> one miss (not two)
42 Int line_size
; /* bytes */
47 HChar desc_line
[128]; /* large enough */
51 /* By this point, the size/assoc/line_size has been checked. */
52 static void cachesim_initcache(cache_t config
, cache_t2
* c
)
56 c
->size
= config
.size
;
57 c
->assoc
= config
.assoc
;
58 c
->line_size
= config
.line_size
;
60 c
->sets
= (c
->size
/ c
->line_size
) / c
->assoc
;
61 c
->sets_min_1
= c
->sets
- 1;
62 c
->line_size_bits
= VG_(log2
)(c
->line_size
);
63 c
->tag_shift
= c
->line_size_bits
+ VG_(log2
)(c
->sets
);
66 VG_(sprintf
)(c
->desc_line
, "%d B, %d B, direct-mapped",
67 c
->size
, c
->line_size
);
69 VG_(sprintf
)(c
->desc_line
, "%d B, %d B, %d-way associative",
70 c
->size
, c
->line_size
, c
->assoc
);
73 c
->tags
= VG_(malloc
)("cg.sim.ci.1",
74 sizeof(UWord
) * c
->sets
* c
->assoc
);
76 for (i
= 0; i
< c
->sets
* c
->assoc
; i
++)
80 /* This attribute forces GCC to inline the function, getting rid of a
81 * lot of indirection around the cache_t2 pointer, if it is known to be
82 * constant in the caller (the caller is inlined itself).
83 * Without inlining of simulator functions, cachegrind can get 40% slower.
85 __attribute__((always_inline
))
87 Bool
cachesim_setref_is_miss(cache_t2
* c
, UInt set_no
, UWord tag
)
92 set
= &(c
->tags
[set_no
* c
->assoc
]);
94 /* This loop is unrolled for just the first case, which is the most */
95 /* common. We can't unroll any further because it would screw up */
96 /* if we have a direct-mapped (1-way) cache. */
100 /* If the tag is one other than the MRU, move it into the MRU spot */
101 /* and shuffle the rest down. */
102 for (i
= 1; i
< c
->assoc
; i
++) {
104 for (j
= i
; j
> 0; j
--) {
113 /* A miss; install this tag as MRU, shuffle rest down. */
114 for (j
= c
->assoc
- 1; j
> 0; j
--) {
122 __attribute__((always_inline
))
124 Bool
cachesim_ref_is_miss(cache_t2
* c
, Addr a
, UChar size
)
126 /* A memory block has the size of a cache line */
127 UWord block1
= a
>> c
->line_size_bits
;
128 UWord block2
= (a
+size
-1) >> c
->line_size_bits
;
129 UInt set1
= block1
& c
->sets_min_1
;
131 /* Tags used in real caches are minimal to save space.
132 * As the last bits of the block number of addresses mapping
133 * into one cache set are the same, real caches use as tag
134 * tag = block >> log2(#sets)
135 * But using the memory block as more specific tag is fine,
136 * and saves instructions.
140 /* Access entirely within line. */
141 if (block1
== block2
)
142 return cachesim_setref_is_miss(c
, set1
, tag1
);
144 /* Access straddles two lines. */
145 else if (block1
+ 1 == block2
) {
146 UInt set2
= block2
& c
->sets_min_1
;
149 /* always do both, as state is updated as side effect */
150 if (cachesim_setref_is_miss(c
, set1
, tag1
)) {
151 cachesim_setref_is_miss(c
, set2
, tag2
);
154 return cachesim_setref_is_miss(c
, set2
, tag2
);
156 VG_(printf
)("addr: %lx size: %u blocks: %lu %lu",
157 a
, size
, block1
, block2
);
158 VG_(tool_panic
)("item straddles more than two cache sets");
168 static void cachesim_initcaches(cache_t I1c
, cache_t D1c
, cache_t LLc
)
170 cachesim_initcache(I1c
, &I1
);
171 cachesim_initcache(D1c
, &D1
);
172 cachesim_initcache(LLc
, &LL
);
175 __attribute__((always_inline
))
177 void cachesim_I1_doref_Gen(Addr a
, UChar size
, ULong
* m1
, ULong
*mL
)
179 if (cachesim_ref_is_miss(&I1
, a
, size
)) {
181 if (cachesim_ref_is_miss(&LL
, a
, size
))
186 // common special case IrNoX
187 __attribute__((always_inline
))
189 void cachesim_I1_doref_NoX(Addr a
, UChar size
, ULong
* m1
, ULong
*mL
)
191 UWord block
= a
>> I1
.line_size_bits
;
192 UInt I1_set
= block
& I1
.sets_min_1
;
195 if (cachesim_setref_is_miss(&I1
, I1_set
, block
)) {
196 UInt LL_set
= block
& LL
.sets_min_1
;
198 // can use block as tag as L1I and LL cache line sizes are equal
199 if (cachesim_setref_is_miss(&LL
, LL_set
, block
))
204 __attribute__((always_inline
))
206 void cachesim_D1_doref(Addr a
, UChar size
, ULong
* m1
, ULong
*mL
)
208 if (cachesim_ref_is_miss(&D1
, a
, size
)) {
210 if (cachesim_ref_is_miss(&LL
, a
, size
))
215 /* Check for special case IrNoX. Called at instrumentation time.
217 * Does this Ir only touch one cache line, and are L1I/LL cache
218 * line sizes the same? This allows to get rid of a runtime check.
220 * Returning false is always fine, as this calls the generic case
222 static Bool
cachesim_is_IrNoX(Addr a
, UChar size
)
224 UWord block1
, block2
;
226 if (I1
.line_size_bits
!= LL
.line_size_bits
) return False
;
227 block1
= a
>> I1
.line_size_bits
;
228 block2
= (a
+size
-1) >> I1
.line_size_bits
;
229 if (block1
!= block2
) return False
;
234 /*--------------------------------------------------------------------*/
235 /*--- end cg_sim.c ---*/
236 /*--------------------------------------------------------------------*/