cachegrind/cg_sim.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Cache simulation                                    cg_sim.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Cachegrind, a Valgrind tool for cache
   8    profiling programs.
   9
  10    Copyright (C) 2002-2017 Nicholas Nethercote
  11       njn@valgrind.org
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 /* Notes:
  30   - simulates a write-allocate cache
  31   - (block --> set) hash function uses simple bit selection
  32   - handling of references straddling two cache blocks:
  33       - counts as only one cache access (not two)
  34       - both blocks hit                  --> one hit
  35       - one block hits, the other misses --> one miss
  36       - both blocks miss                 --> one miss (not two)
  37 */
  38
  39 typedef struct {
  40    Int          size;                   /* bytes */
  41    Int          assoc;
  42    Int          line_size;              /* bytes */
  43    Int          sets;
  44    Int          sets_min_1;
  45    Int          line_size_bits;
  46    Int          tag_shift;
  47    HChar        desc_line[128];         /* large enough */
  48    UWord*       tags;
  49 } cache_t2;
  50
  51 /* By this point, the size/assoc/line_size has been checked. */
  52 static void cachesim_initcache(cache_t config, cache_t2* c)
  53 {
  54    Int i;
  55
  56    c->size      = config.size;
  57    c->assoc     = config.assoc;
  58    c->line_size = config.line_size;
  59
  60    c->sets           = (c->size / c->line_size) / c->assoc;
  61    c->sets_min_1     = c->sets - 1;
  62    c->line_size_bits = VG_(log2)(c->line_size);
  63    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
  64
  65    if (c->assoc == 1) {
  66       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
  67                                  c->size, c->line_size);
  68    } else {
  69       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
  70                                  c->size, c->line_size, c->assoc);
  71    }
  72
  73    c->tags = VG_(malloc)("cg.sim.ci.1",
  74                          sizeof(UWord) * c->sets * c->assoc);
  75
  76    for (i = 0; i < c->sets * c->assoc; i++)
  77       c->tags[i] = 0;
  78 }
  79
  80 /* This attribute forces GCC to inline the function, getting rid of a
  81  * lot of indirection around the cache_t2 pointer, if it is known to be
  82  * constant in the caller (the caller is inlined itself).
  83  * Without inlining of simulator functions, cachegrind can get 40% slower.
  84  */
  85 __attribute__((always_inline))
  86 static __inline__
  87 Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
  88 {
  89    int i, j;
  90    UWord *set;
  91
  92    set = &(c->tags[set_no * c->assoc]);
  93
  94    /* This loop is unrolled for just the first case, which is the most */
  95    /* common.  We can't unroll any further because it would screw up   */
  96    /* if we have a direct-mapped (1-way) cache.                        */
  97    if (tag == set[0])
  98       return False;
  99
 100    /* If the tag is one other than the MRU, move it into the MRU spot  */
 101    /* and shuffle the rest down.                                       */
 102    for (i = 1; i < c->assoc; i++) {
 103       if (tag == set[i]) {
 104          for (j = i; j > 0; j--) {
 105             set[j] = set[j - 1];
 106          }
 107          set[0] = tag;
 108
 109          return False;
 110       }
 111    }
 112
 113    /* A miss;  install this tag as MRU, shuffle rest down. */
 114    for (j = c->assoc - 1; j > 0; j--) {
 115       set[j] = set[j - 1];
 116    }
 117    set[0] = tag;
 118
 119    return True;
 120 }
 121
 122 __attribute__((always_inline))
 123 static __inline__
 124 Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
 125 {
 126    /* A memory block has the size of a cache line */
 127    UWord block1 =  a         >> c->line_size_bits;
 128    UWord block2 = (a+size-1) >> c->line_size_bits;
 129    UInt  set1   = block1 & c->sets_min_1;
 130
 131    /* Tags used in real caches are minimal to save space.
 132     * As the last bits of the block number of addresses mapping
 133     * into one cache set are the same, real caches use as tag
 134     *   tag = block >> log2(#sets)
 135     * But using the memory block as more specific tag is fine,
 136     * and saves instructions.
 137     */
 138    UWord tag1   = block1;
 139
 140    /* Access entirely within line. */
 141    if (block1 == block2)
 142       return cachesim_setref_is_miss(c, set1, tag1);
 143
 144    /* Access straddles two lines. */
 145    else if (block1 + 1 == block2) {
 146       UInt  set2 = block2 & c->sets_min_1;
 147       UWord tag2 = block2;
 148
 149       /* always do both, as state is updated as side effect */
 150       if (cachesim_setref_is_miss(c, set1, tag1)) {
 151          cachesim_setref_is_miss(c, set2, tag2);
 152          return True;
 153       }
 154       return cachesim_setref_is_miss(c, set2, tag2);
 155    }
 156    VG_(printf)("addr: %lx  size: %u  blocks: %lu %lu",
 157                a, size, block1, block2);
 158    VG_(tool_panic)("item straddles more than two cache sets");
 159    /* not reached */
 160    return True;
 161 }
 162
 163
 164 static cache_t2 LL;
 165 static cache_t2 I1;
 166 static cache_t2 D1;
 167
 168 static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
 169 {
 170    cachesim_initcache(I1c, &I1);
 171    cachesim_initcache(D1c, &D1);
 172    cachesim_initcache(LLc, &LL);
 173 }
 174
 175 __attribute__((always_inline))
 176 static __inline__
 177 void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
 178 {
 179    if (cachesim_ref_is_miss(&I1, a, size)) {
 180       (*m1)++;
 181       if (cachesim_ref_is_miss(&LL, a, size))
 182          (*mL)++;
 183    }
 184 }
 185
 186 // common special case IrNoX
 187 __attribute__((always_inline))
 188 static __inline__
 189 void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
 190 {
 191    UWord block  = a >> I1.line_size_bits;
 192    UInt  I1_set = block & I1.sets_min_1;
 193
 194    // use block as tag
 195    if (cachesim_setref_is_miss(&I1, I1_set, block)) {
 196       UInt  LL_set = block & LL.sets_min_1;
 197       (*m1)++;
 198       // can use block as tag as L1I and LL cache line sizes are equal
 199       if (cachesim_setref_is_miss(&LL, LL_set, block))
 200          (*mL)++;
 201    }
 202 }
 203
 204 __attribute__((always_inline))
 205 static __inline__
 206 void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
 207 {
 208    if (cachesim_ref_is_miss(&D1, a, size)) {
 209       (*m1)++;
 210       if (cachesim_ref_is_miss(&LL, a, size))
 211          (*mL)++;
 212    }
 213 }
 214
 215 /* Check for special case IrNoX. Called at instrumentation time.
 216  *
 217  * Does this Ir only touch one cache line, and are L1I/LL cache
 218  * line sizes the same? This allows to get rid of a runtime check.
 219  *
 220  * Returning false is always fine, as this calls the generic case
 221  */
 222 static Bool cachesim_is_IrNoX(Addr a, UChar size)
 223 {
 224    UWord block1, block2;
 225
 226    if (I1.line_size_bits != LL.line_size_bits) return False;
 227    block1 =  a         >> I1.line_size_bits;
 228    block2 = (a+size-1) >> I1.line_size_bits;
 229    if (block1 != block2) return False;
 230
 231    return True;
 232 }
 233
 234 /*--------------------------------------------------------------------*/
 235 /*--- end                                                 cg_sim.c ---*/
 236 /*--------------------------------------------------------------------*/
 237