1 // SPDX-License-Identifier: GPL-2.0
10 #include <linux/kernel.h>
12 #include "map_symbol.h"
13 #include "mem-events.h"
21 unsigned int perf_mem_events__loads_ldlat
= 30;
23 #define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
25 struct perf_mem_event perf_mem_events
[PERF_MEM_EVENTS__MAX
] = {
26 E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "mem-loads", true, 0),
27 E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0),
28 E(NULL
, NULL
, NULL
, false, 0),
32 bool perf_mem_record
[PERF_MEM_EVENTS__MAX
] = { 0 };
34 static char mem_loads_name
[100];
35 static char mem_stores_name
[100];
37 struct perf_mem_event
*perf_pmu__mem_events_ptr(struct perf_pmu
*pmu
, int i
)
39 if (i
>= PERF_MEM_EVENTS__MAX
|| !pmu
)
42 return &pmu
->mem_events
[i
];
45 static struct perf_pmu
*perf_pmus__scan_mem(struct perf_pmu
*pmu
)
47 while ((pmu
= perf_pmus__scan(pmu
)) != NULL
) {
54 struct perf_pmu
*perf_mem_events_find_pmu(void)
57 * The current perf mem doesn't support per-PMU configuration.
58 * The exact same configuration is applied to all the
59 * mem_events supported PMUs.
60 * Return the first mem_events supported PMU.
62 * Notes: The only case which may support multiple mem_events
63 * supported PMUs is Intel hybrid. The exact same mem_events
64 * is shared among the PMUs. Only configure the first PMU
65 * is good enough as well.
67 return perf_pmus__scan_mem(NULL
);
71 * perf_pmu__mem_events_num_mem_pmus - Get the number of mem PMUs since the given pmu
72 * @pmu: Start pmu. If it's NULL, search the entire PMU list.
74 int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu
*pmu
)
78 while ((pmu
= perf_pmus__scan_mem(pmu
)) != NULL
)
84 static const char *perf_pmu__mem_events_name(int i
, struct perf_pmu
*pmu
)
86 struct perf_mem_event
*e
;
88 if (i
>= PERF_MEM_EVENTS__MAX
|| !pmu
)
91 e
= &pmu
->mem_events
[i
];
95 if (i
== PERF_MEM_EVENTS__LOAD
|| i
== PERF_MEM_EVENTS__LOAD_STORE
) {
98 /* ARM and Most of Intel */
99 scnprintf(mem_loads_name
, sizeof(mem_loads_name
),
101 perf_mem_events__loads_ldlat
);
103 /* Intel with mem-loads-aux event */
104 scnprintf(mem_loads_name
, sizeof(mem_loads_name
),
105 e
->name
, pmu
->name
, pmu
->name
,
106 perf_mem_events__loads_ldlat
);
111 scnprintf(mem_loads_name
, sizeof(mem_loads_name
),
117 return mem_loads_name
;
120 if (i
== PERF_MEM_EVENTS__STORE
) {
121 scnprintf(mem_stores_name
, sizeof(mem_stores_name
),
123 return mem_stores_name
;
129 bool is_mem_loads_aux_event(struct evsel
*leader
)
131 struct perf_pmu
*pmu
= leader
->pmu
;
132 struct perf_mem_event
*e
;
134 if (!pmu
|| !pmu
->mem_events
)
137 e
= &pmu
->mem_events
[PERF_MEM_EVENTS__LOAD
];
141 return leader
->core
.attr
.config
== e
->aux_event
;
144 int perf_pmu__mem_events_parse(struct perf_pmu
*pmu
, const char *str
)
146 char *tok
, *saveptr
= NULL
;
151 /* We need buffer that we know we can write to. */
152 buf
= malloc(strlen(str
) + 1);
158 tok
= strtok_r((char *)buf
, ",", &saveptr
);
161 for (j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
162 struct perf_mem_event
*e
= perf_pmu__mem_events_ptr(pmu
, j
);
167 if (strstr(e
->tag
, tok
))
168 perf_mem_record
[j
] = found
= true;
171 tok
= strtok_r(NULL
, ",", &saveptr
);
179 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str
);
183 static bool perf_pmu__mem_events_supported(const char *mnt
, struct perf_pmu
*pmu
,
184 struct perf_mem_event
*e
)
192 scnprintf(path
, PATH_MAX
, "%s/devices/%s/events/%s", mnt
, pmu
->name
, e
->event_name
);
194 return !stat(path
, &st
);
197 static int __perf_pmu__mem_events_init(struct perf_pmu
*pmu
)
199 const char *mnt
= sysfs__mount();
206 for (j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
207 struct perf_mem_event
*e
= perf_pmu__mem_events_ptr(pmu
, j
);
210 * If the event entry isn't valid, skip initialization
211 * and "e->supported" will keep false.
216 e
->supported
|= perf_pmu__mem_events_supported(mnt
, pmu
, e
);
221 return found
? 0 : -ENOENT
;
224 int perf_pmu__mem_events_init(void)
226 struct perf_pmu
*pmu
= NULL
;
228 while ((pmu
= perf_pmus__scan_mem(pmu
)) != NULL
) {
229 if (__perf_pmu__mem_events_init(pmu
))
236 void perf_pmu__mem_events_list(struct perf_pmu
*pmu
)
240 for (j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
241 struct perf_mem_event
*e
= perf_pmu__mem_events_ptr(pmu
, j
);
243 fprintf(stderr
, "%-*s%-*s%s",
246 e
->tag
&& verbose
> 0 ? 25 : 0,
247 e
->tag
&& verbose
> 0 ? perf_pmu__mem_events_name(j
, pmu
) : "",
248 e
->supported
? ": available\n" : "");
252 int perf_mem_events__record_args(const char **rec_argv
, int *argv_nr
)
254 const char *mnt
= sysfs__mount();
255 struct perf_pmu
*pmu
= NULL
;
256 struct perf_mem_event
*e
;
260 struct perf_cpu_map
*cpu_map
= NULL
;
262 while ((pmu
= perf_pmus__scan_mem(pmu
)) != NULL
) {
263 for (int j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
264 e
= perf_pmu__mem_events_ptr(pmu
, j
);
266 if (!perf_mem_record
[j
])
270 pr_err("failed: event '%s' not supported\n",
271 perf_pmu__mem_events_name(j
, pmu
));
275 s
= perf_pmu__mem_events_name(j
, pmu
);
276 if (!s
|| !perf_pmu__mem_events_supported(mnt
, pmu
, e
))
283 rec_argv
[i
++] = "-e";
284 rec_argv
[i
++] = copy
;
286 cpu_map
= perf_cpu_map__merge(cpu_map
, pmu
->cpus
);
291 if (!perf_cpu_map__equal(cpu_map
, cpu_map__online())) {
294 cpu_map__snprint(cpu_map
, buf
, sizeof(buf
));
295 pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf
);
297 perf_cpu_map__put(cpu_map
);
304 static const char * const tlb_access
[] = {
314 int perf_mem__tlb_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
317 u64 m
= PERF_MEM_TLB_NA
;
320 sz
-= 1; /* -1 for null termination */
324 m
= mem_info__const_data_src(mem_info
)->mem_dtlb
;
326 hit
= m
& PERF_MEM_TLB_HIT
;
327 miss
= m
& PERF_MEM_TLB_MISS
;
329 /* already taken care of */
330 m
&= ~(PERF_MEM_TLB_HIT
|PERF_MEM_TLB_MISS
);
332 for (i
= 0; m
&& i
< ARRAY_SIZE(tlb_access
); i
++, m
>>= 1) {
339 l
+= scnprintf(out
+ l
, sz
- l
, tlb_access
[i
]);
342 l
+= scnprintf(out
, sz
- l
, "N/A");
344 l
+= scnprintf(out
+ l
, sz
- l
, " hit");
346 l
+= scnprintf(out
+ l
, sz
- l
, " miss");
351 static const char * const mem_lvl
[] = {
360 "Remote RAM (1 hop)",
361 "Remote RAM (2 hops)",
362 "Remote Cache (1 hop)",
363 "Remote Cache (2 hops)",
368 static const char * const mem_lvlnum
[] = {
369 [PERF_MEM_LVLNUM_L1
] = "L1",
370 [PERF_MEM_LVLNUM_L2
] = "L2",
371 [PERF_MEM_LVLNUM_L3
] = "L3",
372 [PERF_MEM_LVLNUM_L4
] = "L4",
373 [PERF_MEM_LVLNUM_L2_MHB
] = "L2 MHB",
374 [PERF_MEM_LVLNUM_MSC
] = "Memory-side Cache",
375 [PERF_MEM_LVLNUM_UNC
] = "Uncached",
376 [PERF_MEM_LVLNUM_CXL
] = "CXL",
377 [PERF_MEM_LVLNUM_IO
] = "I/O",
378 [PERF_MEM_LVLNUM_ANY_CACHE
] = "Any cache",
379 [PERF_MEM_LVLNUM_LFB
] = "LFB/MAB",
380 [PERF_MEM_LVLNUM_RAM
] = "RAM",
381 [PERF_MEM_LVLNUM_PMEM
] = "PMEM",
382 [PERF_MEM_LVLNUM_NA
] = "N/A",
385 static const char * const mem_hops
[] = {
388 * While printing, 'Remote' will be added to represent
389 * 'Remote core, same node' accesses as remote field need
390 * to be set with mem_hops field.
394 "socket, same board",
398 static int perf_mem__op_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
400 u64 op
= PERF_MEM_LOCK_NA
;
404 op
= mem_info__const_data_src(mem_info
)->mem_op
;
406 if (op
& PERF_MEM_OP_NA
)
407 l
= scnprintf(out
, sz
, "N/A");
408 else if (op
& PERF_MEM_OP_LOAD
)
409 l
= scnprintf(out
, sz
, "LOAD");
410 else if (op
& PERF_MEM_OP_STORE
)
411 l
= scnprintf(out
, sz
, "STORE");
412 else if (op
& PERF_MEM_OP_PFETCH
)
413 l
= scnprintf(out
, sz
, "PFETCH");
414 else if (op
& PERF_MEM_OP_EXEC
)
415 l
= scnprintf(out
, sz
, "EXEC");
417 l
= scnprintf(out
, sz
, "No");
422 int perf_mem__lvl_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
424 union perf_mem_data_src data_src
;
429 char hit_miss
[5] = {0};
431 sz
-= 1; /* -1 for null termination */
437 data_src
= *mem_info__const_data_src(mem_info
);
439 if (data_src
.mem_lvl
& PERF_MEM_LVL_HIT
)
440 memcpy(hit_miss
, "hit", 3);
441 else if (data_src
.mem_lvl
& PERF_MEM_LVL_MISS
)
442 memcpy(hit_miss
, "miss", 4);
444 lvl
= data_src
.mem_lvl_num
;
445 if (lvl
&& lvl
!= PERF_MEM_LVLNUM_NA
) {
446 if (data_src
.mem_remote
) {
447 strcat(out
, "Remote ");
451 if (data_src
.mem_hops
)
452 l
+= scnprintf(out
+ l
, sz
- l
, "%s ", mem_hops
[data_src
.mem_hops
]);
455 l
+= scnprintf(out
+ l
, sz
- l
, mem_lvlnum
[lvl
]);
457 l
+= scnprintf(out
+ l
, sz
- l
, "Unknown level %d", lvl
);
459 l
+= scnprintf(out
+ l
, sz
- l
, " %s", hit_miss
);
463 lvl
= data_src
.mem_lvl
;
467 lvl
&= ~(PERF_MEM_LVL_NA
| PERF_MEM_LVL_HIT
| PERF_MEM_LVL_MISS
);
471 for (i
= 0; lvl
&& i
< ARRAY_SIZE(mem_lvl
); i
++, lvl
>>= 1) {
478 l
+= scnprintf(out
+ l
, sz
- l
, mem_lvl
[i
]);
482 l
+= scnprintf(out
+ l
, sz
- l
, " %s", hit_miss
);
491 static const char * const snoop_access
[] = {
499 static const char * const snoopx_access
[] = {
504 int perf_mem__snp_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
507 u64 m
= PERF_MEM_SNOOP_NA
;
509 sz
-= 1; /* -1 for null termination */
513 m
= mem_info__const_data_src(mem_info
)->mem_snoop
;
515 for (i
= 0; m
&& i
< ARRAY_SIZE(snoop_access
); i
++, m
>>= 1) {
522 l
+= scnprintf(out
+ l
, sz
- l
, snoop_access
[i
]);
527 m
= mem_info__const_data_src(mem_info
)->mem_snoopx
;
529 for (i
= 0; m
&& i
< ARRAY_SIZE(snoopx_access
); i
++, m
>>= 1) {
537 l
+= scnprintf(out
+ l
, sz
- l
, snoopx_access
[i
]);
541 l
+= scnprintf(out
, sz
- l
, "N/A");
546 int perf_mem__lck_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
548 u64 mask
= PERF_MEM_LOCK_NA
;
552 mask
= mem_info__const_data_src(mem_info
)->mem_lock
;
554 if (mask
& PERF_MEM_LOCK_NA
)
555 l
= scnprintf(out
, sz
, "N/A");
556 else if (mask
& PERF_MEM_LOCK_LOCKED
)
557 l
= scnprintf(out
, sz
, "Yes");
559 l
= scnprintf(out
, sz
, "No");
564 int perf_mem__blk_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
567 u64 mask
= PERF_MEM_BLK_NA
;
569 sz
-= 1; /* -1 for null termination */
573 mask
= mem_info__const_data_src(mem_info
)->mem_blk
;
575 if (!mask
|| (mask
& PERF_MEM_BLK_NA
)) {
576 l
+= scnprintf(out
+ l
, sz
- l
, " N/A");
579 if (mask
& PERF_MEM_BLK_DATA
)
580 l
+= scnprintf(out
+ l
, sz
- l
, " Data");
581 if (mask
& PERF_MEM_BLK_ADDR
)
582 l
+= scnprintf(out
+ l
, sz
- l
, " Addr");
587 int perf_script__meminfo_scnprintf(char *out
, size_t sz
, const struct mem_info
*mem_info
)
591 i
+= scnprintf(out
, sz
, "|OP ");
592 i
+= perf_mem__op_scnprintf(out
+ i
, sz
- i
, mem_info
);
593 i
+= scnprintf(out
+ i
, sz
- i
, "|LVL ");
594 i
+= perf_mem__lvl_scnprintf(out
+ i
, sz
, mem_info
);
595 i
+= scnprintf(out
+ i
, sz
- i
, "|SNP ");
596 i
+= perf_mem__snp_scnprintf(out
+ i
, sz
- i
, mem_info
);
597 i
+= scnprintf(out
+ i
, sz
- i
, "|TLB ");
598 i
+= perf_mem__tlb_scnprintf(out
+ i
, sz
- i
, mem_info
);
599 i
+= scnprintf(out
+ i
, sz
- i
, "|LCK ");
600 i
+= perf_mem__lck_scnprintf(out
+ i
, sz
- i
, mem_info
);
601 i
+= scnprintf(out
+ i
, sz
- i
, "|BLK ");
602 i
+= perf_mem__blk_scnprintf(out
+ i
, sz
- i
, mem_info
);
607 int c2c_decode_stats(struct c2c_stats
*stats
, struct mem_info
*mi
)
609 union perf_mem_data_src
*data_src
= mem_info__data_src(mi
);
610 u64 daddr
= mem_info__daddr(mi
)->addr
;
611 u64 op
= data_src
->mem_op
;
612 u64 lvl
= data_src
->mem_lvl
;
613 u64 snoop
= data_src
->mem_snoop
;
614 u64 snoopx
= data_src
->mem_snoopx
;
615 u64 lock
= data_src
->mem_lock
;
616 u64 blk
= data_src
->mem_blk
;
618 * Skylake might report unknown remote level via this
619 * bit, consider it when evaluating remote HITMs.
621 * Incase of power, remote field can also be used to denote cache
622 * accesses from the another core of same node. Hence, setting
623 * mrem only when HOPS is zero along with set remote field.
625 bool mrem
= (data_src
->mem_remote
&& !data_src
->mem_hops
);
628 #define HITM_INC(__f) \
634 #define PEER_INC(__f) \
640 #define P(a, b) PERF_MEM_##a##_##b
644 if (lock
& P(LOCK
, LOCKED
)) stats
->locks
++;
646 if (blk
& P(BLK
, DATA
)) stats
->blk_data
++;
647 if (blk
& P(BLK
, ADDR
)) stats
->blk_addr
++;
649 if (op
& P(OP
, LOAD
)) {
658 if (lvl
& P(LVL
, HIT
)) {
659 if (lvl
& P(LVL
, UNC
)) stats
->ld_uncache
++;
660 if (lvl
& P(LVL
, IO
)) stats
->ld_io
++;
661 if (lvl
& P(LVL
, LFB
)) stats
->ld_fbhit
++;
662 if (lvl
& P(LVL
, L1
)) stats
->ld_l1hit
++;
663 if (lvl
& P(LVL
, L2
)) {
666 if (snoopx
& P(SNOOPX
, PEER
))
669 if (lvl
& P(LVL
, L3
)) {
670 if (snoop
& P(SNOOP
, HITM
))
675 if (snoopx
& P(SNOOPX
, PEER
))
679 if (lvl
& P(LVL
, LOC_RAM
)) {
681 if (snoop
& P(SNOOP
, HIT
))
687 if ((lvl
& P(LVL
, REM_RAM1
)) ||
688 (lvl
& P(LVL
, REM_RAM2
)) ||
691 if (snoop
& P(SNOOP
, HIT
))
698 if ((lvl
& P(LVL
, REM_CCE1
)) ||
699 (lvl
& P(LVL
, REM_CCE2
)) ||
701 if (snoop
& P(SNOOP
, HIT
)) {
703 } else if (snoop
& P(SNOOP
, HITM
)) {
705 } else if (snoopx
& P(SNOOPX
, PEER
)) {
711 if ((lvl
& P(LVL
, MISS
)))
714 } else if (op
& P(OP
, STORE
)) {
723 if (lvl
& P(LVL
, HIT
)) {
724 if (lvl
& P(LVL
, UNC
)) stats
->st_uncache
++;
725 if (lvl
& P(LVL
, L1
)) stats
->st_l1hit
++;
727 if (lvl
& P(LVL
, MISS
))
728 if (lvl
& P(LVL
, L1
)) stats
->st_l1miss
++;
729 if (lvl
& P(LVL
, NA
))
732 /* unparsable data_src? */
737 if (!mem_info__daddr(mi
)->ms
.map
|| !mem_info__iaddr(mi
)->ms
.map
) {
747 void c2c_add_stats(struct c2c_stats
*stats
, struct c2c_stats
*add
)
749 stats
->nr_entries
+= add
->nr_entries
;
751 stats
->locks
+= add
->locks
;
752 stats
->store
+= add
->store
;
753 stats
->st_uncache
+= add
->st_uncache
;
754 stats
->st_noadrs
+= add
->st_noadrs
;
755 stats
->st_l1hit
+= add
->st_l1hit
;
756 stats
->st_l1miss
+= add
->st_l1miss
;
757 stats
->st_na
+= add
->st_na
;
758 stats
->load
+= add
->load
;
759 stats
->ld_excl
+= add
->ld_excl
;
760 stats
->ld_shared
+= add
->ld_shared
;
761 stats
->ld_uncache
+= add
->ld_uncache
;
762 stats
->ld_io
+= add
->ld_io
;
763 stats
->ld_miss
+= add
->ld_miss
;
764 stats
->ld_noadrs
+= add
->ld_noadrs
;
765 stats
->ld_fbhit
+= add
->ld_fbhit
;
766 stats
->ld_l1hit
+= add
->ld_l1hit
;
767 stats
->ld_l2hit
+= add
->ld_l2hit
;
768 stats
->ld_llchit
+= add
->ld_llchit
;
769 stats
->lcl_hitm
+= add
->lcl_hitm
;
770 stats
->rmt_hitm
+= add
->rmt_hitm
;
771 stats
->tot_hitm
+= add
->tot_hitm
;
772 stats
->lcl_peer
+= add
->lcl_peer
;
773 stats
->rmt_peer
+= add
->rmt_peer
;
774 stats
->tot_peer
+= add
->tot_peer
;
775 stats
->rmt_hit
+= add
->rmt_hit
;
776 stats
->lcl_dram
+= add
->lcl_dram
;
777 stats
->rmt_dram
+= add
->rmt_dram
;
778 stats
->blk_data
+= add
->blk_data
;
779 stats
->blk_addr
+= add
->blk_addr
;
780 stats
->nomap
+= add
->nomap
;
781 stats
->noparse
+= add
->noparse
;