tools/perf/util/stat-shadow.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #include <stdio.h>
   3 #include "evsel.h"
   4 #include "stat.h"
   5 #include "color.h"
   6 #include "pmu.h"
   7 #include "rblist.h"
   8 #include "evlist.h"
   9 #include "expr.h"
  10 #include "metricgroup.h"
  11
  12 /*
  13  * AGGR_GLOBAL: Use CPU 0
  14  * AGGR_SOCKET: Use first CPU of socket
  15  * AGGR_DIE: Use first CPU of die
  16  * AGGR_CORE: Use first CPU of core
  17  * AGGR_NONE: Use matching CPU
  18  * AGGR_THREAD: Not supported?
  19  */
  20 static bool have_frontend_stalled;
  21
  22 struct runtime_stat rt_stat;
  23 struct stats walltime_nsecs_stats;
  24
  25 struct saved_value {
  26         struct rb_node rb_node;
  27         struct perf_evsel *evsel;
  28         enum stat_type type;
  29         int ctx;
  30         int cpu;
  31         struct runtime_stat *stat;
  32         struct stats stats;
  33 };
  34
  35 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
  36 {
  37         struct saved_value *a = container_of(rb_node,
  38                                              struct saved_value,
  39                                              rb_node);
  40         const struct saved_value *b = entry;
  41
  42         if (a->cpu != b->cpu)
  43                 return a->cpu - b->cpu;
  44
  45         /*
  46          * Previously the rbtree was used to link generic metrics.
  47          * The keys were evsel/cpu. Now the rbtree is extended to support
  48          * per-thread shadow stats. For shadow stats case, the keys
  49          * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
  50          * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
  51          */
  52         if (a->type != b->type)
  53                 return a->type - b->type;
  54
  55         if (a->ctx != b->ctx)
  56                 return a->ctx - b->ctx;
  57
  58         if (a->evsel == NULL && b->evsel == NULL) {
  59                 if (a->stat == b->stat)
  60                         return 0;
  61
  62                 if ((char *)a->stat < (char *)b->stat)
  63                         return -1;
  64
  65                 return 1;
  66         }
  67
  68         if (a->evsel == b->evsel)
  69                 return 0;
  70         if ((char *)a->evsel < (char *)b->evsel)
  71                 return -1;
  72         return +1;
  73 }
  74
  75 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
  76                                      const void *entry)
  77 {
  78         struct saved_value *nd = malloc(sizeof(struct saved_value));
  79
  80         if (!nd)
  81                 return NULL;
  82         memcpy(nd, entry, sizeof(struct saved_value));
  83         return &nd->rb_node;
  84 }
  85
  86 static void saved_value_delete(struct rblist *rblist __maybe_unused,
  87                                struct rb_node *rb_node)
  88 {
  89         struct saved_value *v;
  90
  91         BUG_ON(!rb_node);
  92         v = container_of(rb_node, struct saved_value, rb_node);
  93         free(v);
  94 }
  95
  96 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
  97                                               int cpu,
  98                                               bool create,
  99                                               enum stat_type type,
 100                                               int ctx,
 101                                               struct runtime_stat *st)
 102 {
 103         struct rblist *rblist;
 104         struct rb_node *nd;
 105         struct saved_value dm = {
 106                 .cpu = cpu,
 107                 .evsel = evsel,
 108                 .type = type,
 109                 .ctx = ctx,
 110                 .stat = st,
 111         };
 112
 113         rblist = &st->value_list;
 114
 115         nd = rblist__find(rblist, &dm);
 116         if (nd)
 117                 return container_of(nd, struct saved_value, rb_node);
 118         if (create) {
 119                 rblist__add_node(rblist, &dm);
 120                 nd = rblist__find(rblist, &dm);
 121                 if (nd)
 122                         return container_of(nd, struct saved_value, rb_node);
 123         }
 124         return NULL;
 125 }
 126
 127 void runtime_stat__init(struct runtime_stat *st)
 128 {
 129         struct rblist *rblist = &st->value_list;
 130
 131         rblist__init(rblist);
 132         rblist->node_cmp = saved_value_cmp;
 133         rblist->node_new = saved_value_new;
 134         rblist->node_delete = saved_value_delete;
 135 }
 136
 137 void runtime_stat__exit(struct runtime_stat *st)
 138 {
 139         rblist__exit(&st->value_list);
 140 }
 141
 142 void perf_stat__init_shadow_stats(void)
 143 {
 144         have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
 145         runtime_stat__init(&rt_stat);
 146 }
 147
 148 static int evsel_context(struct perf_evsel *evsel)
 149 {
 150         int ctx = 0;
 151
 152         if (evsel->attr.exclude_kernel)
 153                 ctx |= CTX_BIT_KERNEL;
 154         if (evsel->attr.exclude_user)
 155                 ctx |= CTX_BIT_USER;
 156         if (evsel->attr.exclude_hv)
 157                 ctx |= CTX_BIT_HV;
 158         if (evsel->attr.exclude_host)
 159                 ctx |= CTX_BIT_HOST;
 160         if (evsel->attr.exclude_idle)
 161                 ctx |= CTX_BIT_IDLE;
 162
 163         return ctx;
 164 }
 165
 166 static void reset_stat(struct runtime_stat *st)
 167 {
 168         struct rblist *rblist;
 169         struct rb_node *pos, *next;
 170
 171         rblist = &st->value_list;
 172         next = rb_first_cached(&rblist->entries);
 173         while (next) {
 174                 pos = next;
 175                 next = rb_next(pos);
 176                 memset(&container_of(pos, struct saved_value, rb_node)->stats,
 177                        0,
 178                        sizeof(struct stats));
 179         }
 180 }
 181
 182 void perf_stat__reset_shadow_stats(void)
 183 {
 184         reset_stat(&rt_stat);
 185         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 186 }
 187
 188 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
 189 {
 190         reset_stat(st);
 191 }
 192
 193 static void update_runtime_stat(struct runtime_stat *st,
 194                                 enum stat_type type,
 195                                 int ctx, int cpu, u64 count)
 196 {
 197         struct saved_value *v = saved_value_lookup(NULL, cpu, true,
 198                                                    type, ctx, st);
 199
 200         if (v)
 201                 update_stats(&v->stats, count);
 202 }
 203
 204 /*
 205  * Update various tracking values we maintain to print
 206  * more semantic information such as miss/hit ratios,
 207  * instruction rates, etc:
 208  */
 209 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
 210                                     int cpu, struct runtime_stat *st)
 211 {
 212         int ctx = evsel_context(counter);
 213         u64 count_ns = count;
 214
 215         count *= counter->scale;
 216
 217         if (perf_evsel__is_clock(counter))
 218                 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
 219         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 220                 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
 221         else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
 222                 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
 223         else if (perf_stat_evsel__is(counter, TRANSACTION_START))
 224                 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
 225         else if (perf_stat_evsel__is(counter, ELISION_START))
 226                 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
 227         else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
 228                 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
 229                                     ctx, cpu, count);
 230         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
 231                 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
 232                                     ctx, cpu, count);
 233         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
 234                 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
 235                                     ctx, cpu, count);
 236         else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
 237                 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
 238                                     ctx, cpu, count);
 239         else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
 240                 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
 241                                     ctx, cpu, count);
 242         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 243                 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
 244                                     ctx, cpu, count);
 245         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 246                 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
 247                                     ctx, cpu, count);
 248         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 249                 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
 250         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
 251                 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
 252         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
 253                 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
 254         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
 255                 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
 256         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
 257                 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
 258         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
 259                 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
 260         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 261                 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
 262         else if (perf_stat_evsel__is(counter, SMI_NUM))
 263                 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
 264         else if (perf_stat_evsel__is(counter, APERF))
 265                 update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
 266
 267         if (counter->collect_stat) {
 268                 struct saved_value *v = saved_value_lookup(counter, cpu, true,
 269                                                            STAT_NONE, 0, st);
 270                 update_stats(&v->stats, count);
 271         }
 272 }
 273
 274 /* used for get_ratio_color() */
 275 enum grc_type {
 276         GRC_STALLED_CYCLES_FE,
 277         GRC_STALLED_CYCLES_BE,
 278         GRC_CACHE_MISSES,
 279         GRC_MAX_NR
 280 };
 281
 282 static const char *get_ratio_color(enum grc_type type, double ratio)
 283 {
 284         static const double grc_table[GRC_MAX_NR][3] = {
 285                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
 286                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
 287                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
 288         };
 289         const char *color = PERF_COLOR_NORMAL;
 290
 291         if (ratio > grc_table[type][0])
 292                 color = PERF_COLOR_RED;
 293         else if (ratio > grc_table[type][1])
 294                 color = PERF_COLOR_MAGENTA;
 295         else if (ratio > grc_table[type][2])
 296                 color = PERF_COLOR_YELLOW;
 297
 298         return color;
 299 }
 300
 301 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
 302                                                 const char *name)
 303 {
 304         struct perf_evsel *c2;
 305
 306         evlist__for_each_entry (evsel_list, c2) {
 307                 if (!strcasecmp(c2->name, name))
 308                         return c2;
 309         }
 310         return NULL;
 311 }
 312
 313 /* Mark MetricExpr target events and link events using them to them. */
 314 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
 315 {
 316         struct perf_evsel *counter, *leader, **metric_events, *oc;
 317         bool found;
 318         const char **metric_names;
 319         int i;
 320         int num_metric_names;
 321
 322         evlist__for_each_entry(evsel_list, counter) {
 323                 bool invalid = false;
 324
 325                 leader = counter->leader;
 326                 if (!counter->metric_expr)
 327                         continue;
 328                 metric_events = counter->metric_events;
 329                 if (!metric_events) {
 330                         if (expr__find_other(counter->metric_expr, counter->name,
 331                                                 &metric_names, &num_metric_names) < 0)
 332                                 continue;
 333
 334                         metric_events = calloc(sizeof(struct perf_evsel *),
 335                                                num_metric_names + 1);
 336                         if (!metric_events)
 337                                 return;
 338                         counter->metric_events = metric_events;
 339                 }
 340
 341                 for (i = 0; i < num_metric_names; i++) {
 342                         found = false;
 343                         if (leader) {
 344                                 /* Search in group */
 345                                 for_each_group_member (oc, leader) {
 346                                         if (!strcasecmp(oc->name, metric_names[i])) {
 347                                                 found = true;
 348                                                 break;
 349                                         }
 350                                 }
 351                         }
 352                         if (!found) {
 353                                 /* Search ignoring groups */
 354                                 oc = perf_stat__find_event(evsel_list, metric_names[i]);
 355                         }
 356                         if (!oc) {
 357                                 /* Deduping one is good enough to handle duplicated PMUs. */
 358                                 static char *printed;
 359
 360                                 /*
 361                                  * Adding events automatically would be difficult, because
 362                                  * it would risk creating groups that are not schedulable.
 363                                  * perf stat doesn't understand all the scheduling constraints
 364                                  * of events. So we ask the user instead to add the missing
 365                                  * events.
 366                                  */
 367                                 if (!printed || strcasecmp(printed, metric_names[i])) {
 368                                         fprintf(stderr,
 369                                                 "Add %s event to groups to get metric expression for %s\n",
 370                                                 metric_names[i],
 371                                                 counter->name);
 372                                         printed = strdup(metric_names[i]);
 373                                 }
 374                                 invalid = true;
 375                                 continue;
 376                         }
 377                         metric_events[i] = oc;
 378                         oc->collect_stat = true;
 379                 }
 380                 metric_events[i] = NULL;
 381                 free(metric_names);
 382                 if (invalid) {
 383                         free(metric_events);
 384                         counter->metric_events = NULL;
 385                         counter->metric_expr = NULL;
 386                 }
 387         }
 388 }
 389
 390 static double runtime_stat_avg(struct runtime_stat *st,
 391                                enum stat_type type, int ctx, int cpu)
 392 {
 393         struct saved_value *v;
 394
 395         v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
 396         if (!v)
 397                 return 0.0;
 398
 399         return avg_stats(&v->stats);
 400 }
 401
 402 static double runtime_stat_n(struct runtime_stat *st,
 403                              enum stat_type type, int ctx, int cpu)
 404 {
 405         struct saved_value *v;
 406
 407         v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
 408         if (!v)
 409                 return 0.0;
 410
 411         return v->stats.n;
 412 }
 413
 414 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
 415                                           int cpu,
 416                                           struct perf_evsel *evsel, double avg,
 417                                           struct perf_stat_output_ctx *out,
 418                                           struct runtime_stat *st)
 419 {
 420         double total, ratio = 0.0;
 421         const char *color;
 422         int ctx = evsel_context(evsel);
 423
 424         total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 425
 426         if (total)
 427                 ratio = avg / total * 100.0;
 428
 429         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 430
 431         if (ratio)
 432                 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
 433                                   ratio);
 434         else
 435                 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
 436 }
 437
 438 static void print_stalled_cycles_backend(struct perf_stat_config *config,
 439                                          int cpu,
 440                                          struct perf_evsel *evsel, double avg,
 441                                          struct perf_stat_output_ctx *out,
 442                                          struct runtime_stat *st)
 443 {
 444         double total, ratio = 0.0;
 445         const char *color;
 446         int ctx = evsel_context(evsel);
 447
 448         total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 449
 450         if (total)
 451                 ratio = avg / total * 100.0;
 452
 453         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 454
 455         out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
 456 }
 457
 458 static void print_branch_misses(struct perf_stat_config *config,
 459                                 int cpu,
 460                                 struct perf_evsel *evsel,
 461                                 double avg,
 462                                 struct perf_stat_output_ctx *out,
 463                                 struct runtime_stat *st)
 464 {
 465         double total, ratio = 0.0;
 466         const char *color;
 467         int ctx = evsel_context(evsel);
 468
 469         total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
 470
 471         if (total)
 472                 ratio = avg / total * 100.0;
 473
 474         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 475
 476         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
 477 }
 478
 479 static void print_l1_dcache_misses(struct perf_stat_config *config,
 480                                    int cpu,
 481                                    struct perf_evsel *evsel,
 482                                    double avg,
 483                                    struct perf_stat_output_ctx *out,
 484                                    struct runtime_stat *st)
 485
 486 {
 487         double total, ratio = 0.0;
 488         const char *color;
 489         int ctx = evsel_context(evsel);
 490
 491         total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
 492
 493         if (total)
 494                 ratio = avg / total * 100.0;
 495
 496         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 497
 498         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 499 }
 500
 501 static void print_l1_icache_misses(struct perf_stat_config *config,
 502                                    int cpu,
 503                                    struct perf_evsel *evsel,
 504                                    double avg,
 505                                    struct perf_stat_output_ctx *out,
 506                                    struct runtime_stat *st)
 507
 508 {
 509         double total, ratio = 0.0;
 510         const char *color;
 511         int ctx = evsel_context(evsel);
 512
 513         total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
 514
 515         if (total)
 516                 ratio = avg / total * 100.0;
 517
 518         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 519         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 520 }
 521
 522 static void print_dtlb_cache_misses(struct perf_stat_config *config,
 523                                     int cpu,
 524                                     struct perf_evsel *evsel,
 525                                     double avg,
 526                                     struct perf_stat_output_ctx *out,
 527                                     struct runtime_stat *st)
 528 {
 529         double total, ratio = 0.0;
 530         const char *color;
 531         int ctx = evsel_context(evsel);
 532
 533         total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
 534
 535         if (total)
 536                 ratio = avg / total * 100.0;
 537
 538         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 539         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 540 }
 541
 542 static void print_itlb_cache_misses(struct perf_stat_config *config,
 543                                     int cpu,
 544                                     struct perf_evsel *evsel,
 545                                     double avg,
 546                                     struct perf_stat_output_ctx *out,
 547                                     struct runtime_stat *st)
 548 {
 549         double total, ratio = 0.0;
 550         const char *color;
 551         int ctx = evsel_context(evsel);
 552
 553         total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
 554
 555         if (total)
 556                 ratio = avg / total * 100.0;
 557
 558         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 559         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 560 }
 561
 562 static void print_ll_cache_misses(struct perf_stat_config *config,
 563                                   int cpu,
 564                                   struct perf_evsel *evsel,
 565                                   double avg,
 566                                   struct perf_stat_output_ctx *out,
 567                                   struct runtime_stat *st)
 568 {
 569         double total, ratio = 0.0;
 570         const char *color;
 571         int ctx = evsel_context(evsel);
 572
 573         total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
 574
 575         if (total)
 576                 ratio = avg / total * 100.0;
 577
 578         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 579         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 580 }
 581
 582 /*
 583  * High level "TopDown" CPU core pipe line bottleneck break down.
 584  *
 585  * Basic concept following
 586  * Yasin, A Top Down Method for Performance analysis and Counter architecture
 587  * ISPASS14
 588  *
 589  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
 590  *
 591  * Frontend -> Backend -> Retiring
 592  * BadSpeculation in addition means out of order execution that is thrown away
 593  * (for example branch mispredictions)
 594  * Frontend is instruction decoding.
 595  * Backend is execution, like computation and accessing data in memory
 596  * Retiring is good execution that is not directly bottlenecked
 597  *
 598  * The formulas are computed in slots.
 599  * A slot is an entry in the pipeline each for the pipeline width
 600  * (for example a 4-wide pipeline has 4 slots for each cycle)
 601  *
 602  * Formulas:
 603  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
 604  *                      TotalSlots
 605  * Retiring = SlotsRetired / TotalSlots
 606  * FrontendBound = FetchBubbles / TotalSlots
 607  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
 608  *
 609  * The kernel provides the mapping to the low level CPU events and any scaling
 610  * needed for the CPU pipeline width, for example:
 611  *
 612  * TotalSlots = Cycles * 4
 613  *
 614  * The scaling factor is communicated in the sysfs unit.
 615  *
 616  * In some cases the CPU may not be able to measure all the formulas due to
 617  * missing events. In this case multiple formulas are combined, as possible.
 618  *
 619  * Full TopDown supports more levels to sub-divide each area: for example
 620  * BackendBound into computing bound and memory bound. For now we only
 621  * support Level 1 TopDown.
 622  */
 623
 624 static double sanitize_val(double x)
 625 {
 626         if (x < 0 && x >= -0.02)
 627                 return 0.0;
 628         return x;
 629 }
 630
 631 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
 632 {
 633         return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
 634 }
 635
 636 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
 637 {
 638         double bad_spec = 0;
 639         double total_slots;
 640         double total;
 641
 642         total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
 643                 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
 644                 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
 645
 646         total_slots = td_total_slots(ctx, cpu, st);
 647         if (total_slots)
 648                 bad_spec = total / total_slots;
 649         return sanitize_val(bad_spec);
 650 }
 651
 652 static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
 653 {
 654         double retiring = 0;
 655         double total_slots = td_total_slots(ctx, cpu, st);
 656         double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
 657                                             ctx, cpu);
 658
 659         if (total_slots)
 660                 retiring = ret_slots / total_slots;
 661         return retiring;
 662 }
 663
 664 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
 665 {
 666         double fe_bound = 0;
 667         double total_slots = td_total_slots(ctx, cpu, st);
 668         double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
 669                                             ctx, cpu);
 670
 671         if (total_slots)
 672                 fe_bound = fetch_bub / total_slots;
 673         return fe_bound;
 674 }
 675
 676 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
 677 {
 678         double sum = (td_fe_bound(ctx, cpu, st) +
 679                       td_bad_spec(ctx, cpu, st) +
 680                       td_retiring(ctx, cpu, st));
 681         if (sum == 0)
 682                 return 0;
 683         return sanitize_val(1.0 - sum);
 684 }
 685
 686 static void print_smi_cost(struct perf_stat_config *config,
 687                            int cpu, struct perf_evsel *evsel,
 688                            struct perf_stat_output_ctx *out,
 689                            struct runtime_stat *st)
 690 {
 691         double smi_num, aperf, cycles, cost = 0.0;
 692         int ctx = evsel_context(evsel);
 693         const char *color = NULL;
 694
 695         smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
 696         aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
 697         cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 698
 699         if ((cycles == 0) || (aperf == 0))
 700                 return;
 701
 702         if (smi_num)
 703                 cost = (aperf - cycles) / aperf * 100.00;
 704
 705         if (cost > 10)
 706                 color = PERF_COLOR_RED;
 707         out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
 708         out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
 709 }
 710
 711 static void generic_metric(struct perf_stat_config *config,
 712                            const char *metric_expr,
 713                            struct perf_evsel **metric_events,
 714                            char *name,
 715                            const char *metric_name,
 716                            double avg,
 717                            int cpu,
 718                            struct perf_stat_output_ctx *out,
 719                            struct runtime_stat *st)
 720 {
 721         print_metric_t print_metric = out->print_metric;
 722         struct parse_ctx pctx;
 723         double ratio;
 724         int i;
 725         void *ctxp = out->ctx;
 726
 727         expr__ctx_init(&pctx);
 728         expr__add_id(&pctx, name, avg);
 729         for (i = 0; metric_events[i]; i++) {
 730                 struct saved_value *v;
 731                 struct stats *stats;
 732                 double scale;
 733
 734                 if (!strcmp(metric_events[i]->name, "duration_time")) {
 735                         stats = &walltime_nsecs_stats;
 736                         scale = 1e-9;
 737                 } else {
 738                         v = saved_value_lookup(metric_events[i], cpu, false,
 739                                                STAT_NONE, 0, st);
 740                         if (!v)
 741                                 break;
 742                         stats = &v->stats;
 743                         scale = 1.0;
 744                 }
 745                 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
 746         }
 747         if (!metric_events[i]) {
 748                 const char *p = metric_expr;
 749
 750                 if (expr__parse(&ratio, &pctx, &p) == 0)
 751                         print_metric(config, ctxp, NULL, "%8.1f",
 752                                 metric_name ?
 753                                 metric_name :
 754                                 out->force_header ?  name : "",
 755                                 ratio);
 756                 else
 757                         print_metric(config, ctxp, NULL, NULL,
 758                                      out->force_header ?
 759                                      (metric_name ? metric_name : name) : "", 0);
 760         } else
 761                 print_metric(config, ctxp, NULL, NULL, "", 0);
 762 }
 763
 764 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 765                                    struct perf_evsel *evsel,
 766                                    double avg, int cpu,
 767                                    struct perf_stat_output_ctx *out,
 768                                    struct rblist *metric_events,
 769                                    struct runtime_stat *st)
 770 {
 771         void *ctxp = out->ctx;
 772         print_metric_t print_metric = out->print_metric;
 773         double total, ratio = 0.0, total2;
 774         const char *color = NULL;
 775         int ctx = evsel_context(evsel);
 776         struct metric_event *me;
 777         int num = 1;
 778
 779         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 780                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 781
 782                 if (total) {
 783                         ratio = avg / total;
 784                         print_metric(config, ctxp, NULL, "%7.2f ",
 785                                         "insn per cycle", ratio);
 786                 } else {
 787                         print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
 788                 }
 789
 790                 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
 791                                          ctx, cpu);
 792
 793                 total = max(total, runtime_stat_avg(st,
 794                                                     STAT_STALLED_CYCLES_BACK,
 795                                                     ctx, cpu));
 796
 797                 if (total && avg) {
 798                         out->new_line(config, ctxp);
 799                         ratio = total / avg;
 800                         print_metric(config, ctxp, NULL, "%7.2f ",
 801                                         "stalled cycles per insn",
 802                                         ratio);
 803                 } else if (have_frontend_stalled) {
 804                         print_metric(config, ctxp, NULL, NULL,
 805                                      "stalled cycles per insn", 0);
 806                 }
 807         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
 808                 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
 809                         print_branch_misses(config, cpu, evsel, avg, out, st);
 810                 else
 811                         print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
 812         } else if (
 813                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
 814                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 815                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 816                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 817
 818                 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
 819                         print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
 820                 else
 821                         print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
 822         } else if (
 823                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
 824                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 825                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 826                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 827
 828                 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
 829                         print_l1_icache_misses(config, cpu, evsel, avg, out, st);
 830                 else
 831                         print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
 832         } else if (
 833                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
 834                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 835                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 836                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 837
 838                 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
 839                         print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
 840                 else
 841                         print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
 842         } else if (
 843                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
 844                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 845                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 846                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 847
 848                 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
 849                         print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
 850                 else
 851                         print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
 852         } else if (
 853                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
 854                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 855                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 856                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 857
 858                 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
 859                         print_ll_cache_misses(config, cpu, evsel, avg, out, st);
 860                 else
 861                         print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
 862         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 863                 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
 864
 865                 if (total)
 866                         ratio = avg * 100 / total;
 867
 868                 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
 869                         print_metric(config, ctxp, NULL, "%8.3f %%",
 870                                      "of all cache refs", ratio);
 871                 else
 872                         print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
 873         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
 874                 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
 875         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
 876                 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
 877         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 878                 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 879
 880                 if (total) {
 881                         ratio = avg / total;
 882                         print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
 883                 } else {
 884                         print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
 885                 }
 886         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 887                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 888
 889                 if (total)
 890                         print_metric(config, ctxp, NULL,
 891                                         "%7.2f%%", "transactional cycles",
 892                                         100.0 * (avg / total));
 893                 else
 894                         print_metric(config, ctxp, NULL, NULL, "transactional cycles",
 895                                      0);
 896         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 897                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 898                 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
 899
 900                 if (total2 < avg)
 901                         total2 = avg;
 902                 if (total)
 903                         print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
 904                                 100.0 * ((total2-avg) / total));
 905                 else
 906                         print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
 907         } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
 908                 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
 909                                          ctx, cpu);
 910
 911                 if (avg)
 912                         ratio = total / avg;
 913
 914                 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
 915                         print_metric(config, ctxp, NULL, "%8.0f",
 916                                      "cycles / transaction", ratio);
 917                 else
 918                         print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
 919                                       0);
 920         } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
 921                 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
 922                                          ctx, cpu);
 923
 924                 if (avg)
 925                         ratio = total / avg;
 926
 927                 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
 928         } else if (perf_evsel__is_clock(evsel)) {
 929                 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
 930                         print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
 931                                      avg / (ratio * evsel->scale));
 932                 else
 933                         print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
 934         } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
 935                 double fe_bound = td_fe_bound(ctx, cpu, st);
 936
 937                 if (fe_bound > 0.2)
 938                         color = PERF_COLOR_RED;
 939                 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
 940                                 fe_bound * 100.);
 941         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
 942                 double retiring = td_retiring(ctx, cpu, st);
 943
 944                 if (retiring > 0.7)
 945                         color = PERF_COLOR_GREEN;
 946                 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
 947                                 retiring * 100.);
 948         } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
 949                 double bad_spec = td_bad_spec(ctx, cpu, st);
 950
 951                 if (bad_spec > 0.1)
 952                         color = PERF_COLOR_RED;
 953                 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
 954                                 bad_spec * 100.);
 955         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
 956                 double be_bound = td_be_bound(ctx, cpu, st);
 957                 const char *name = "backend bound";
 958                 static int have_recovery_bubbles = -1;
 959
 960                 /* In case the CPU does not support topdown-recovery-bubbles */
 961                 if (have_recovery_bubbles < 0)
 962                         have_recovery_bubbles = pmu_have_event("cpu",
 963                                         "topdown-recovery-bubbles");
 964                 if (!have_recovery_bubbles)
 965                         name = "backend bound/bad spec";
 966
 967                 if (be_bound > 0.2)
 968                         color = PERF_COLOR_RED;
 969                 if (td_total_slots(ctx, cpu, st) > 0)
 970                         print_metric(config, ctxp, color, "%8.1f%%", name,
 971                                         be_bound * 100.);
 972                 else
 973                         print_metric(config, ctxp, NULL, NULL, name, 0);
 974         } else if (evsel->metric_expr) {
 975                 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
 976                                 evsel->metric_name, avg, cpu, out, st);
 977         } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 978                 char unit = 'M';
 979                 char unit_buf[10];
 980
 981                 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 982
 983                 if (total)
 984                         ratio = 1000.0 * avg / total;
 985                 if (ratio < 0.001) {
 986                         ratio *= 1000;
 987                         unit = 'K';
 988                 }
 989                 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 990                 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
 991         } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
 992                 print_smi_cost(config, cpu, evsel, out, st);
 993         } else {
 994                 num = 0;
 995         }
 996
 997         if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
 998                 struct metric_expr *mexp;
 999
1000                 list_for_each_entry (mexp, &me->head, nd) {
1001                         if (num++ > 0)
1002                                 out->new_line(config, ctxp);
1003                         generic_metric(config, mexp->metric_expr, mexp->metric_events,
1004                                         evsel->name, mexp->metric_name,
1005                                         avg, cpu, out, st);
1006                 }
1007         }
1008         if (num == 0)
1009                 print_metric(config, ctxp, NULL, NULL, NULL, 0);
1010 }