1 // SPDX-License-Identifier: GPL-2.0
10 #include <linux/kernel.h>
11 #include "map_symbol.h"
12 #include "mem-events.h"
16 unsigned int perf_mem_events__loads_ldlat
= 30;
18 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
20 static struct perf_mem_event perf_mem_events
[PERF_MEM_EVENTS__MAX
] = {
21 E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"),
22 E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"),
29 static char mem_loads_name
[100];
30 static bool mem_loads_name__init
;
32 struct perf_mem_event
* __weak
perf_mem_events__ptr(int i
)
34 if (i
>= PERF_MEM_EVENTS__MAX
)
37 return &perf_mem_events
[i
];
40 char * __weak
perf_mem_events__name(int i
)
42 struct perf_mem_event
*e
= perf_mem_events__ptr(i
);
47 if (i
== PERF_MEM_EVENTS__LOAD
) {
48 if (!mem_loads_name__init
) {
49 mem_loads_name__init
= true;
50 scnprintf(mem_loads_name
, sizeof(mem_loads_name
),
51 e
->name
, perf_mem_events__loads_ldlat
);
53 return mem_loads_name
;
56 return (char *)e
->name
;
59 int perf_mem_events__parse(const char *str
)
61 char *tok
, *saveptr
= NULL
;
66 /* We need buffer that we know we can write to. */
67 buf
= malloc(strlen(str
) + 1);
73 tok
= strtok_r((char *)buf
, ",", &saveptr
);
76 for (j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
77 struct perf_mem_event
*e
= perf_mem_events__ptr(j
);
82 if (strstr(e
->tag
, tok
))
83 e
->record
= found
= true;
86 tok
= strtok_r(NULL
, ",", &saveptr
);
94 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str
);
98 int perf_mem_events__init(void)
100 const char *mnt
= sysfs__mount();
107 for (j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
109 struct perf_mem_event
*e
= perf_mem_events__ptr(j
);
113 * If the event entry isn't valid, skip initialization
114 * and "e->supported" will keep false.
119 scnprintf(path
, PATH_MAX
, "%s/devices/%s",
122 if (!stat(path
, &st
))
123 e
->supported
= found
= true;
126 return found
? 0 : -ENOENT
;
129 void perf_mem_events__list(void)
133 for (j
= 0; j
< PERF_MEM_EVENTS__MAX
; j
++) {
134 struct perf_mem_event
*e
= perf_mem_events__ptr(j
);
136 fprintf(stderr
, "%-13s%-*s%s\n",
138 verbose
> 0 ? 25 : 0,
139 verbose
> 0 ? perf_mem_events__name(j
) : "",
140 e
->supported
? ": available" : "");
144 static const char * const tlb_access
[] = {
154 int perf_mem__tlb_scnprintf(char *out
, size_t sz
, struct mem_info
*mem_info
)
157 u64 m
= PERF_MEM_TLB_NA
;
160 sz
-= 1; /* -1 for null termination */
164 m
= mem_info
->data_src
.mem_dtlb
;
166 hit
= m
& PERF_MEM_TLB_HIT
;
167 miss
= m
& PERF_MEM_TLB_MISS
;
169 /* already taken care of */
170 m
&= ~(PERF_MEM_TLB_HIT
|PERF_MEM_TLB_MISS
);
172 for (i
= 0; m
&& i
< ARRAY_SIZE(tlb_access
); i
++, m
>>= 1) {
179 l
+= scnprintf(out
+ l
, sz
- l
, tlb_access
[i
]);
182 l
+= scnprintf(out
, sz
- l
, "N/A");
184 l
+= scnprintf(out
+ l
, sz
- l
, " hit");
186 l
+= scnprintf(out
+ l
, sz
- l
, " miss");
191 static const char * const mem_lvl
[] = {
200 "Remote RAM (1 hop)",
201 "Remote RAM (2 hops)",
202 "Remote Cache (1 hop)",
203 "Remote Cache (2 hops)",
208 static const char * const mem_lvlnum
[] = {
209 [PERF_MEM_LVLNUM_ANY_CACHE
] = "Any cache",
210 [PERF_MEM_LVLNUM_LFB
] = "LFB",
211 [PERF_MEM_LVLNUM_RAM
] = "RAM",
212 [PERF_MEM_LVLNUM_PMEM
] = "PMEM",
213 [PERF_MEM_LVLNUM_NA
] = "N/A",
216 int perf_mem__lvl_scnprintf(char *out
, size_t sz
, struct mem_info
*mem_info
)
219 u64 m
= PERF_MEM_LVL_NA
;
224 m
= mem_info
->data_src
.mem_lvl
;
226 sz
-= 1; /* -1 for null termination */
229 hit
= m
& PERF_MEM_LVL_HIT
;
230 miss
= m
& PERF_MEM_LVL_MISS
;
232 /* already taken care of */
233 m
&= ~(PERF_MEM_LVL_HIT
|PERF_MEM_LVL_MISS
);
236 if (mem_info
&& mem_info
->data_src
.mem_remote
) {
237 strcat(out
, "Remote ");
242 for (i
= 0; m
&& i
< ARRAY_SIZE(mem_lvl
); i
++, m
>>= 1) {
249 l
+= scnprintf(out
+ l
, sz
- l
, mem_lvl
[i
]);
252 if (mem_info
&& mem_info
->data_src
.mem_lvl_num
) {
253 int lvl
= mem_info
->data_src
.mem_lvl_num
;
259 l
+= scnprintf(out
+ l
, sz
- l
, mem_lvlnum
[lvl
]);
261 l
+= scnprintf(out
+ l
, sz
- l
, "L%d", lvl
);
265 l
+= scnprintf(out
+ l
, sz
- l
, "N/A");
267 l
+= scnprintf(out
+ l
, sz
- l
, " hit");
269 l
+= scnprintf(out
+ l
, sz
- l
, " miss");
274 static const char * const snoop_access
[] = {
282 int perf_mem__snp_scnprintf(char *out
, size_t sz
, struct mem_info
*mem_info
)
285 u64 m
= PERF_MEM_SNOOP_NA
;
287 sz
-= 1; /* -1 for null termination */
291 m
= mem_info
->data_src
.mem_snoop
;
293 for (i
= 0; m
&& i
< ARRAY_SIZE(snoop_access
); i
++, m
>>= 1) {
300 l
+= scnprintf(out
+ l
, sz
- l
, snoop_access
[i
]);
303 (mem_info
->data_src
.mem_snoopx
& PERF_MEM_SNOOPX_FWD
)) {
308 l
+= scnprintf(out
+ l
, sz
- l
, "Fwd");
312 l
+= scnprintf(out
, sz
- l
, "N/A");
317 int perf_mem__lck_scnprintf(char *out
, size_t sz
, struct mem_info
*mem_info
)
319 u64 mask
= PERF_MEM_LOCK_NA
;
323 mask
= mem_info
->data_src
.mem_lock
;
325 if (mask
& PERF_MEM_LOCK_NA
)
326 l
= scnprintf(out
, sz
, "N/A");
327 else if (mask
& PERF_MEM_LOCK_LOCKED
)
328 l
= scnprintf(out
, sz
, "Yes");
330 l
= scnprintf(out
, sz
, "No");
335 int perf_script__meminfo_scnprintf(char *out
, size_t sz
, struct mem_info
*mem_info
)
339 i
+= perf_mem__lvl_scnprintf(out
, sz
, mem_info
);
340 i
+= scnprintf(out
+ i
, sz
- i
, "|SNP ");
341 i
+= perf_mem__snp_scnprintf(out
+ i
, sz
- i
, mem_info
);
342 i
+= scnprintf(out
+ i
, sz
- i
, "|TLB ");
343 i
+= perf_mem__tlb_scnprintf(out
+ i
, sz
- i
, mem_info
);
344 i
+= scnprintf(out
+ i
, sz
- i
, "|LCK ");
345 i
+= perf_mem__lck_scnprintf(out
+ i
, sz
- i
, mem_info
);
350 int c2c_decode_stats(struct c2c_stats
*stats
, struct mem_info
*mi
)
352 union perf_mem_data_src
*data_src
= &mi
->data_src
;
353 u64 daddr
= mi
->daddr
.addr
;
354 u64 op
= data_src
->mem_op
;
355 u64 lvl
= data_src
->mem_lvl
;
356 u64 snoop
= data_src
->mem_snoop
;
357 u64 lock
= data_src
->mem_lock
;
359 * Skylake might report unknown remote level via this
360 * bit, consider it when evaluating remote HITMs.
362 bool mrem
= data_src
->mem_remote
;
365 #define HITM_INC(__f) \
371 #define P(a, b) PERF_MEM_##a##_##b
375 if (lock
& P(LOCK
, LOCKED
)) stats
->locks
++;
377 if (op
& P(OP
, LOAD
)) {
386 if (lvl
& P(LVL
, HIT
)) {
387 if (lvl
& P(LVL
, UNC
)) stats
->ld_uncache
++;
388 if (lvl
& P(LVL
, IO
)) stats
->ld_io
++;
389 if (lvl
& P(LVL
, LFB
)) stats
->ld_fbhit
++;
390 if (lvl
& P(LVL
, L1
)) stats
->ld_l1hit
++;
391 if (lvl
& P(LVL
, L2
)) stats
->ld_l2hit
++;
392 if (lvl
& P(LVL
, L3
)) {
393 if (snoop
& P(SNOOP
, HITM
))
399 if (lvl
& P(LVL
, LOC_RAM
)) {
401 if (snoop
& P(SNOOP
, HIT
))
407 if ((lvl
& P(LVL
, REM_RAM1
)) ||
408 (lvl
& P(LVL
, REM_RAM2
)) ||
411 if (snoop
& P(SNOOP
, HIT
))
418 if ((lvl
& P(LVL
, REM_CCE1
)) ||
419 (lvl
& P(LVL
, REM_CCE2
)) ||
421 if (snoop
& P(SNOOP
, HIT
))
423 else if (snoop
& P(SNOOP
, HITM
))
427 if ((lvl
& P(LVL
, MISS
)))
430 } else if (op
& P(OP
, STORE
)) {
439 if (lvl
& P(LVL
, HIT
)) {
440 if (lvl
& P(LVL
, UNC
)) stats
->st_uncache
++;
441 if (lvl
& P(LVL
, L1
)) stats
->st_l1hit
++;
443 if (lvl
& P(LVL
, MISS
))
444 if (lvl
& P(LVL
, L1
)) stats
->st_l1miss
++;
446 /* unparsable data_src? */
451 if (!mi
->daddr
.ms
.map
|| !mi
->iaddr
.ms
.map
) {
461 void c2c_add_stats(struct c2c_stats
*stats
, struct c2c_stats
*add
)
463 stats
->nr_entries
+= add
->nr_entries
;
465 stats
->locks
+= add
->locks
;
466 stats
->store
+= add
->store
;
467 stats
->st_uncache
+= add
->st_uncache
;
468 stats
->st_noadrs
+= add
->st_noadrs
;
469 stats
->st_l1hit
+= add
->st_l1hit
;
470 stats
->st_l1miss
+= add
->st_l1miss
;
471 stats
->load
+= add
->load
;
472 stats
->ld_excl
+= add
->ld_excl
;
473 stats
->ld_shared
+= add
->ld_shared
;
474 stats
->ld_uncache
+= add
->ld_uncache
;
475 stats
->ld_io
+= add
->ld_io
;
476 stats
->ld_miss
+= add
->ld_miss
;
477 stats
->ld_noadrs
+= add
->ld_noadrs
;
478 stats
->ld_fbhit
+= add
->ld_fbhit
;
479 stats
->ld_l1hit
+= add
->ld_l1hit
;
480 stats
->ld_l2hit
+= add
->ld_l2hit
;
481 stats
->ld_llchit
+= add
->ld_llchit
;
482 stats
->lcl_hitm
+= add
->lcl_hitm
;
483 stats
->rmt_hitm
+= add
->rmt_hitm
;
484 stats
->tot_hitm
+= add
->tot_hitm
;
485 stats
->rmt_hit
+= add
->rmt_hit
;
486 stats
->lcl_dram
+= add
->lcl_dram
;
487 stats
->rmt_dram
+= add
->rmt_dram
;
488 stats
->nomap
+= add
->nomap
;
489 stats
->noparse
+= add
->noparse
;