cachegrind/cg_arch.c

   1 /*--------------------------------------------------------------------*/
   2 /*--- Cachegrind: cache configuration.                   cg-arch.c ---*/
   3 /*--------------------------------------------------------------------*/
   4
   5 /*
   6    This file is part of Cachegrind, a Valgrind tool for cache
   7    profiling programs.
   8
   9    Copyright (C) 2011-2015 Nicholas Nethercote
  10       njn@valgrind.org
  11
  12    This program is free software; you can redistribute it and/or
  13    modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation; either version 2 of the
  15    License, or (at your option) any later version.
  16
  17    This program is distributed in the hope that it will be useful, but
  18    WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20    General Public License for more details.
  21
  22    You should have received a copy of the GNU General Public License
  23    along with this program; if not, write to the Free Software
  24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  25    02111-1307, USA.
  26
  27    The GNU General Public License is contained in the file COPYING.
  28 */
  29
  30 #include "pub_tool_basics.h"
  31 #include "pub_tool_libcassert.h"
  32 #include "pub_tool_libcbase.h"
  33 #include "pub_tool_libcprint.h"
  34 #include "pub_tool_options.h"
  35 #include "pub_tool_machine.h"
  36
  37 #include "cg_arch.h"
  38
  39 static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
  40                              Bool all_caches_clo_defined);
  41
  42 // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
  43 // string otherwise.
  44 static const HChar* check_cache(cache_t* cache)
  45 {
  46    // Simulator requires set count to be a power of two.
  47    if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
  48        (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
  49    {
  50       return "Cache set count is not a power of two.\n";
  51    }
  52
  53    // Simulator requires line size to be a power of two.
  54    if (-1 == VG_(log2)(cache->line_size)) {
  55       return "Cache line size is not a power of two.\n";
  56    }
  57
  58    // Then check line size >= 16 -- any smaller and a single instruction could
  59    // straddle three cache lines, which breaks a simulation assertion and is
  60    // stupid anyway.
  61    if (cache->line_size < MIN_LINE_SIZE) {
  62       return "Cache line size is too small.\n";
  63    }
  64
  65    /* Then check cache size > line size (causes seg faults if not). */
  66    if (cache->size <= cache->line_size) {
  67       return "Cache size <= line size.\n";
  68    }
  69
  70    /* Then check assoc <= (size / line size) (seg faults otherwise). */
  71    if (cache->assoc > (cache->size / cache->line_size)) {
  72       return "Cache associativity > (size / line size).\n";
  73    }
  74
  75    return NULL;
  76 }
  77
  78
  79 static void parse_cache_opt ( cache_t* cache, const HChar* opt,
  80                               const HChar* optval )
  81 {
  82    Long i1, i2, i3;
  83    HChar* endptr;
  84    const HChar* checkRes;
  85
  86    // Option argument looks like "65536,2,64".  Extract them.
  87    i1 = VG_(strtoll10)(optval,   &endptr); if (*endptr != ',')  goto bad;
  88    i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
  89    i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
  90
  91    // Check for overflow.
  92    cache->size      = (Int)i1;
  93    cache->assoc     = (Int)i2;
  94    cache->line_size = (Int)i3;
  95    if (cache->size      != i1) goto overflow;
  96    if (cache->assoc     != i2) goto overflow;
  97    if (cache->line_size != i3) goto overflow;
  98
  99    checkRes = check_cache(cache);
 100    if (checkRes) {
 101       VG_(fmsg)("%s", checkRes);
 102       goto bad;
 103    }
 104
 105    return;
 106
 107   bad:
 108    VG_(fmsg_bad_option)(opt, "Bad argument '%s'\n", optval);
 109
 110   overflow:
 111    VG_(fmsg_bad_option)(opt,
 112       "One of the cache parameters was too large and overflowed.\n");
 113 }
 114
 115
 116 Bool VG_(str_clo_cache_opt)(const HChar *arg,
 117                             cache_t* clo_I1c,
 118                             cache_t* clo_D1c,
 119                             cache_t* clo_LLc)
 120 {
 121    const HChar* tmp_str;
 122
 123    if      VG_STR_CLO(arg, "--I1", tmp_str) {
 124       parse_cache_opt(clo_I1c, arg, tmp_str);
 125       return True;
 126    } else if VG_STR_CLO(arg, "--D1", tmp_str) {
 127       parse_cache_opt(clo_D1c, arg, tmp_str);
 128       return True;
 129    } else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
 130               VG_STR_CLO(arg, "--LL", tmp_str)) {
 131       parse_cache_opt(clo_LLc, arg, tmp_str);
 132       return True;
 133    } else
 134       return False;
 135 }
 136
 137 static void umsg_cache_img(const HChar* desc, cache_t* c)
 138 {
 139    VG_(umsg)("  %s: %'d B, %d-way, %d B lines\n", desc,
 140              c->size, c->assoc, c->line_size);
 141 }
 142
 143 // Verifies if c is a valid cache.
 144 // An invalid value causes an assert, unless clo_redefined is True.
 145 static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_redefined)
 146 {
 147    const HChar* checkRes;
 148
 149    checkRes = check_cache(c);
 150    if (checkRes) {
 151       VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
 152                 desc, checkRes);
 153       umsg_cache_img(desc, c);
 154       if (!clo_redefined) {
 155          VG_(umsg)("As it probably should be supported, please report a bug!\n");
 156          VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
 157          tl_assert(0);
 158       }
 159    }
 160 }
 161
 162
 163 /* If the LL cache config isn't something the simulation functions
 164    can handle, try to adjust it so it is.  Caches are characterised
 165    by (total size T, line size L, associativity A), and then we
 166    have
 167
 168      number of sets S = T / (L * A)
 169
 170    The required constraints are:
 171
 172    * L must be a power of 2, but it always is in practice, so
 173      no problem there
 174
 175    * A can be any value >= 1
 176
 177    * T can be any value, but ..
 178
 179    * S must be a power of 2.
 180
 181    That sometimes gives a problem.  For example, some Core iX based
 182    Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
 183    sets.  Some AMD cpus have T = 5MB, A = 48, L = 64, which gives
 184    1706.667 sets (!).
 185
 186    The "fix" is to force S down to the nearest power of two below its
 187    original value, and increase A proportionately, so as to keep the
 188    total cache size the same.  In fact to be safe we recalculate the
 189    cache size afterwards anyway, to guarantee that it divides exactly
 190    between the new number of sets.
 191
 192    The "fix" is "justified" (cough, cough) by alleging that
 193    increases of associativity above about 4 have very little effect
 194    on the actual miss rate.  It would be far more inaccurate to
 195    fudge this by changing the size of the simulated cache --
 196    changing the associativity is a much better option.
 197 */
 198
 199 /* (Helper function) Returns the largest power of 2 that is <= |x|.
 200    Even works when |x| == 0. */
 201 static UInt floor_power_of_2 ( UInt x )
 202 {
 203    x = x | (x >> 1);
 204    x = x | (x >> 2);
 205    x = x | (x >> 4);
 206    x = x | (x >> 8);
 207    x = x | (x >> 16);
 208    return x - (x >> 1);
 209 }
 210
 211 static void
 212 maybe_tweak_LLc(cache_t *LLc)
 213 {
 214   if (LLc->size == 0 || LLc->assoc == 0 || LLc->line_size == 0)
 215      return;
 216
 217   tl_assert(LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0);
 218
 219   UInt old_size      = (UInt)LLc->size;
 220   UInt old_assoc     = (UInt)LLc->assoc;
 221   UInt old_line_size = (UInt)LLc->line_size;
 222
 223   UInt new_size      = old_size;
 224   UInt new_assoc     = old_assoc;
 225   UInt new_line_size = old_line_size;
 226
 227   UInt old_nSets = old_size / (old_assoc * old_line_size);
 228   if (old_nSets == 0) {
 229      /* This surely can't happen; but would cause chaos with the maths
 230       * below if it did.  Just give up if it does. */
 231      return;
 232   }
 233
 234   if (-1 != VG_(log2_64)(old_nSets)) {
 235      /* The number of sets is already a power of 2.  Make sure that
 236         the size divides exactly between the sets.  Almost all of the
 237         time this will have no effect. */
 238      new_size = old_line_size * old_assoc * old_nSets;
 239   } else {
 240      /* The number of sets isn't a power of two.  Calculate some
 241         scale-down factor which causes the number of sets to become a
 242         power of two.  Then, increase the associativity by that
 243         factor.  Finally, re-calculate the total size so as to make
 244         sure it divides exactly between the sets. */
 245      tl_assert(old_nSets >= 0);
 246      UInt new_nSets = floor_power_of_2 ( old_nSets );
 247      tl_assert(new_nSets > 0 && new_nSets < old_nSets);
 248      Double factor = (Double)old_nSets / (Double)new_nSets;
 249      tl_assert(factor >= 1.0);
 250
 251      new_assoc = (UInt)(0.5 + factor * (Double)old_assoc);
 252      tl_assert(new_assoc >= old_assoc);
 253
 254      new_size = old_line_size * new_assoc * new_nSets;
 255   }
 256
 257   tl_assert(new_line_size == old_line_size); /* we never change this */
 258   if (new_size == old_size && new_assoc == old_assoc)
 259      return;
 260
 261   VG_(dmsg)("warning: "
 262             "specified LL cache: line_size %u  assoc %u  total_size %'u\n",
 263             old_line_size, old_assoc, old_size);
 264   VG_(dmsg)("warning: "
 265             "simulated LL cache: line_size %u  assoc %u  total_size %'u\n",\
 266             new_line_size, new_assoc, new_size);
 267
 268   LLc->size      = new_size;
 269   LLc->assoc     = new_assoc;
 270   LLc->line_size = new_line_size;
 271 }
 272
 273 void VG_(post_clo_init_configure_caches)(cache_t* I1c,
 274                                          cache_t* D1c,
 275                                          cache_t* LLc,
 276                                          cache_t* clo_I1c,
 277                                          cache_t* clo_D1c,
 278                                          cache_t* clo_LLc)
 279 {
 280 #define DEFINED(L)   (-1 != L->size  || -1 != L->assoc || -1 != L->line_size)
 281
 282    // Count how many were defined on the command line.
 283    Bool all_caches_clo_defined =
 284       (DEFINED(clo_I1c) &&
 285        DEFINED(clo_D1c) &&
 286        DEFINED(clo_LLc));
 287
 288    // Set the cache config (using auto-detection, if supported by the
 289    // architecture).
 290    configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
 291
 292    maybe_tweak_LLc( LLc );
 293
 294    // Check the default/auto-detected values.
 295    // Allow the user to override invalid auto-detected caches
 296    // with command line.
 297    check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
 298    check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
 299    check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
 300
 301    // Then replace with any defined on the command line.  (Already checked in
 302    // VG(parse_clo_cache_opt)().)
 303    if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
 304    if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
 305    if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
 306
 307    if (VG_(clo_verbosity) >= 2) {
 308       VG_(umsg)("Cache configuration used:\n");
 309       umsg_cache_img ("I1", I1c);
 310       umsg_cache_img ("D1", D1c);
 311       umsg_cache_img ("LL", LLc);
 312    }
 313 #undef DEFINED
 314 }
 315
 316 void VG_(print_cache_clo_opts)()
 317 {
 318    VG_(printf)(
 319 "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
 320 "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
 321 "    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
 322                );
 323 }
 324
 325
 326 // Traverse the cache info and return a cache of the given kind and level.
 327 // Return NULL if no such cache exists.
 328 static const VexCache *
 329 locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
 330 {
 331    const VexCache *c;
 332
 333    for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
 334       if (c->level == level && c->kind == kind) {
 335          return c;
 336       }
 337    }
 338    return NULL;  // not found
 339 }
 340
 341
 342 // Gives the auto-detected configuration of I1, D1 and LL caches.  They get
 343 // overridden by any cache configurations specified on the command line.
 344 static void
 345 configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
 346                  Bool all_caches_clo_defined)
 347 {
 348    VexArchInfo vai;
 349    const VexCacheInfo *ci;
 350    const VexCache *i1, *d1, *ll;
 351
 352    VG_(machine_get_VexArchInfo)(NULL, &vai);
 353    ci = &vai.hwcache_info;
 354
 355    // Extract what we need
 356    i1 = locate_cache(ci, INSN_CACHE, 1);
 357    d1 = locate_cache(ci, DATA_CACHE, 1);
 358    ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
 359
 360    if (ci->num_caches > 0 && ll == NULL) {
 361       VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
 362    }
 363
 364    if (ll && ci->num_levels > 2) {
 365       VG_(dmsg)("warning: L%u cache found, using its data for the "
 366                 "LL simulation.\n", ci->num_levels);
 367    }
 368
 369    if (i1 && d1 && ll) {
 370       if (i1->is_trace_cache) {
 371          /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
 372           * conversion to byte size is a total guess;  treat the 12K and 16K
 373           * cases the same since the cache byte size must be a power of two for
 374           * everything to work!.  Also guessing 32 bytes for the line size...
 375           */
 376          UInt adjusted_size, guessed_line_size = 32;
 377
 378          if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
 379             adjusted_size = 16 * 1024;
 380          } else {
 381             adjusted_size = 32 * 1024;
 382          }
 383          VG_(dmsg)("warning: Pentium 4 with %u KB micro-op instruction trace cache\n",
 384                    i1->sizeB / 1024);
 385          VG_(dmsg)("         Simulating a %u KB I-cache with %u B lines\n",
 386                    adjusted_size / 1024, guessed_line_size);
 387
 388          *I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
 389       } else {
 390          *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
 391       }
 392       *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
 393       *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
 394
 395       return;
 396    }
 397
 398    // Cache information could not be queried; choose some default
 399    // architecture specific default setting.
 400
 401 #if defined(VGA_ppc32)
 402
 403    // Default cache configuration
 404    *I1c = (cache_t) {  65536, 2, 64 };
 405    *D1c = (cache_t) {  65536, 2, 64 };
 406    *LLc = (cache_t) { 262144, 8, 64 };
 407
 408 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
 409
 410    // Default cache configuration
 411    *I1c = (cache_t) {  65536, 2, 64 };
 412    *D1c = (cache_t) {  65536, 2, 64 };
 413    *LLc = (cache_t) { 262144, 8, 64 };
 414
 415 #elif defined(VGA_arm)
 416
 417    // Set caches to default (for Cortex-A8 ?)
 418    *I1c = (cache_t) {  16384, 4, 64 };
 419    *D1c = (cache_t) {  16384, 4, 64 };
 420    *LLc = (cache_t) { 262144, 8, 64 };
 421
 422 #elif defined(VGA_arm64)
 423
 424    // Copy the 32-bit ARM version until such time as we have
 425    // some real hardware to run on
 426    *I1c = (cache_t) {  16384, 4, 64 };
 427    *D1c = (cache_t) {  16384, 4, 64 };
 428    *LLc = (cache_t) { 262144, 8, 64 };
 429
 430 #elif defined(VGA_s390x)
 431    //
 432    // Here is the cache data from older machine models:
 433    //
 434    //           I1            D1      I/D L2
 435    // z900  256k/256/4    256k/256/4   16MB
 436    // z800  256k/256/4    256k/256/4    8MB
 437    // z990  256k/256/4    256k/256/4   32MB
 438    // z890  256k/256/4    256k/256/4   32MB
 439    // z9    256k/256/4    256k/256/4   40MB
 440    //
 441    // Sources:
 442    // (1) IBM System z9 109 Technical Introduction
 443    //     www.redbooks.ibm.com/redbooks/pdfs/sg246669.pdf
 444    // (2) The microarchitecture of the IBM eServer z900 processor
 445    //     IBM Journal of Research and Development
 446    //     Volume 46, Number 4/5, pp 381-395, July/September 2002
 447    // (3) The IBM eServer z990 microprocessor
 448    //     IBM Journal of Research and Development
 449    //     Volume 48, Number 3/4, pp 295-309, May/July 2004
 450    // (4) Charles Webb, IBM
 451    //
 452    // L2 data is unfortunately incomplete. Otherwise, we could support
 453    // machines without the ECAG insn by looking at VEX_S390X_MODEL(hwcaps).
 454
 455    // Default cache configuration is z10-EC  (Source: ECAG insn)
 456    *I1c = (cache_t) {    65536,  4, 256 };
 457    *D1c = (cache_t) {   131072,  8, 256 };
 458    *LLc = (cache_t) { 50331648, 24, 256 };
 459
 460 #elif defined(VGA_mips32)
 461
 462    // Set caches to default (for MIPS32-r2(mips 74kc))
 463    *I1c = (cache_t) {  32768, 4, 32 };
 464    *D1c = (cache_t) {  32768, 4, 32 };
 465    *LLc = (cache_t) { 524288, 8, 32 };
 466
 467 #elif defined(VGA_mips64)
 468
 469    // Set caches to default (for MIPS64 - 5kc)
 470    *I1c = (cache_t) {  32768, 4, 32 };
 471    *D1c = (cache_t) {  32768, 4, 32 };
 472    *LLc = (cache_t) { 524288, 8, 32 };
 473
 474 #elif defined(VGA_x86) || defined(VGA_amd64)
 475
 476    *I1c = (cache_t) {  65536, 2, 64 };
 477    *D1c = (cache_t) {  65536, 2, 64 };
 478    *LLc = (cache_t) { 262144, 8, 64 };
 479
 480 #elif defined(VGA_tilegx)
 481
 482    // Set caches to default for Tilegx.
 483    *I1c = (cache_t) { 0x8000,  2, 64 };
 484    *D1c = (cache_t) { 0x8000,  2, 64 };
 485    *LLc = (cache_t) { 0x40000, 8, 64 };
 486
 487 #else
 488
 489 #error "Unknown arch"
 490
 491 #endif
 492
 493    if (!all_caches_clo_defined) {
 494       const HChar warning[] =
 495         "Warning: Cannot auto-detect cache config, using defaults.\n"
 496         "         Run with -v to see.\n";
 497       VG_(dmsg)("%s", warning);
 498    }
 499 }
 500
 501 /*--------------------------------------------------------------------*/
 502 /*--- end                                                          ---*/
 503 /*--------------------------------------------------------------------*/