1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
5 * Parts came from builtin-{top,stat,record}.c, see those files for further
9 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
10 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
12 #define __SANE_USERSPACE_TYPES__
17 #include <linux/bitops.h>
18 #include <api/fs/fs.h>
19 #include <api/fs/tracing_path.h>
20 #include <linux/hw_breakpoint.h>
21 #include <linux/perf_event.h>
22 #include <linux/compiler.h>
23 #include <linux/err.h>
24 #include <linux/zalloc.h>
25 #include <sys/ioctl.h>
26 #include <sys/resource.h>
27 #include <sys/syscall.h>
28 #include <sys/types.h>
31 #include <perf/evsel.h>
33 #include "bpf_counter.h"
34 #include "callchain.h"
39 #include "time-utils.h"
41 #include "util/evsel_config.h"
42 #include "util/evsel_fprintf.h"
44 #include <perf/cpumap.h>
45 #include "thread_map.h"
47 #include "perf_regs.h"
50 #include "trace-event.h"
55 #include "util/hashmap.h"
59 #include "hwmon_pmu.h"
62 #include "../perf-sys.h"
63 #include "util/parse-branch-options.h"
64 #include "util/bpf-filter.h"
65 #include "util/hist.h"
66 #include <internal/xyarray.h>
67 #include <internal/lib.h>
68 #include <internal/threadmap.h>
69 #include "util/intel-tpebs.h"
71 #include <linux/ctype.h>
73 #ifdef HAVE_LIBTRACEEVENT
74 #include <event-parse.h>
77 struct perf_missing_features perf_missing_features
;
79 static clockid_t clockid
;
81 static int evsel__no_extra_init(struct evsel
*evsel __maybe_unused
)
86 static bool test_attr__enabled(void)
88 static bool test_attr__enabled
;
89 static bool test_attr__enabled_tested
;
91 if (!test_attr__enabled_tested
) {
92 char *dir
= getenv("PERF_TEST_ATTR");
94 test_attr__enabled
= (dir
!= NULL
);
95 test_attr__enabled_tested
= true;
97 return test_attr__enabled
;
100 #define __WRITE_ASS(str, fmt, data) \
102 if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \
103 perror("test attr - failed to write event file"); \
109 #define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
111 static int store_event(struct perf_event_attr
*attr
, pid_t pid
, struct perf_cpu cpu
,
112 int fd
, int group_fd
, unsigned long flags
)
116 char *dir
= getenv("PERF_TEST_ATTR");
118 snprintf(path
, PATH_MAX
, "%s/event-%d-%llu-%d", dir
,
119 attr
->type
, attr
->config
, fd
);
121 file
= fopen(path
, "w+");
123 perror("test attr - failed to open event file");
127 if (fprintf(file
, "[event-%d-%llu-%d]\n",
128 attr
->type
, attr
->config
, fd
) < 0) {
129 perror("test attr - failed to write event file");
134 /* syscall arguments */
135 __WRITE_ASS(fd
, "d", fd
);
136 __WRITE_ASS(group_fd
, "d", group_fd
);
137 __WRITE_ASS(cpu
, "d", cpu
.cpu
);
138 __WRITE_ASS(pid
, "d", pid
);
139 __WRITE_ASS(flags
, "lu", flags
);
141 /* struct perf_event_attr */
142 WRITE_ASS(type
, PRIu32
);
143 WRITE_ASS(size
, PRIu32
);
144 WRITE_ASS(config
, "llu");
145 WRITE_ASS(sample_period
, "llu");
146 WRITE_ASS(sample_type
, "llu");
147 WRITE_ASS(read_format
, "llu");
148 WRITE_ASS(disabled
, "d");
149 WRITE_ASS(inherit
, "d");
150 WRITE_ASS(pinned
, "d");
151 WRITE_ASS(exclusive
, "d");
152 WRITE_ASS(exclude_user
, "d");
153 WRITE_ASS(exclude_kernel
, "d");
154 WRITE_ASS(exclude_hv
, "d");
155 WRITE_ASS(exclude_idle
, "d");
156 WRITE_ASS(mmap
, "d");
157 WRITE_ASS(comm
, "d");
158 WRITE_ASS(freq
, "d");
159 WRITE_ASS(inherit_stat
, "d");
160 WRITE_ASS(enable_on_exec
, "d");
161 WRITE_ASS(task
, "d");
162 WRITE_ASS(watermark
, "d");
163 WRITE_ASS(precise_ip
, "d");
164 WRITE_ASS(mmap_data
, "d");
165 WRITE_ASS(sample_id_all
, "d");
166 WRITE_ASS(exclude_host
, "d");
167 WRITE_ASS(exclude_guest
, "d");
168 WRITE_ASS(exclude_callchain_kernel
, "d");
169 WRITE_ASS(exclude_callchain_user
, "d");
170 WRITE_ASS(mmap2
, "d");
171 WRITE_ASS(comm_exec
, "d");
172 WRITE_ASS(context_switch
, "d");
173 WRITE_ASS(write_backward
, "d");
174 WRITE_ASS(namespaces
, "d");
175 WRITE_ASS(use_clockid
, "d");
176 WRITE_ASS(wakeup_events
, PRIu32
);
177 WRITE_ASS(bp_type
, PRIu32
);
178 WRITE_ASS(config1
, "llu");
179 WRITE_ASS(config2
, "llu");
180 WRITE_ASS(branch_sample_type
, "llu");
181 WRITE_ASS(sample_regs_user
, "llu");
182 WRITE_ASS(sample_stack_user
, PRIu32
);
191 static void test_attr__open(struct perf_event_attr
*attr
, pid_t pid
, struct perf_cpu cpu
,
192 int fd
, int group_fd
, unsigned long flags
)
194 int errno_saved
= errno
;
196 if ((fd
!= -1) && store_event(attr
, pid
, cpu
, fd
, group_fd
, flags
)) {
197 pr_err("test attr FAILED");
204 static void evsel__no_extra_fini(struct evsel
*evsel __maybe_unused
)
210 int (*init
)(struct evsel
*evsel
);
211 void (*fini
)(struct evsel
*evsel
);
212 } perf_evsel__object
= {
213 .size
= sizeof(struct evsel
),
214 .init
= evsel__no_extra_init
,
215 .fini
= evsel__no_extra_fini
,
218 int evsel__object_config(size_t object_size
, int (*init
)(struct evsel
*evsel
),
219 void (*fini
)(struct evsel
*evsel
))
222 if (object_size
== 0)
225 if (perf_evsel__object
.size
> object_size
)
228 perf_evsel__object
.size
= object_size
;
232 perf_evsel__object
.init
= init
;
235 perf_evsel__object
.fini
= fini
;
240 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
242 int __evsel__sample_size(u64 sample_type
)
244 u64 mask
= sample_type
& PERF_SAMPLE_MASK
;
248 for (i
= 0; i
< 64; i
++) {
249 if (mask
& (1ULL << i
))
259 * __perf_evsel__calc_id_pos - calculate id_pos.
260 * @sample_type: sample type
262 * This function returns the position of the event id (PERF_SAMPLE_ID or
263 * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
264 * perf_record_sample.
266 static int __perf_evsel__calc_id_pos(u64 sample_type
)
270 if (sample_type
& PERF_SAMPLE_IDENTIFIER
)
273 if (!(sample_type
& PERF_SAMPLE_ID
))
276 if (sample_type
& PERF_SAMPLE_IP
)
279 if (sample_type
& PERF_SAMPLE_TID
)
282 if (sample_type
& PERF_SAMPLE_TIME
)
285 if (sample_type
& PERF_SAMPLE_ADDR
)
292 * __perf_evsel__calc_is_pos - calculate is_pos.
293 * @sample_type: sample type
295 * This function returns the position (counting backwards) of the event id
296 * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
297 * sample_id_all is used there is an id sample appended to non-sample events.
299 static int __perf_evsel__calc_is_pos(u64 sample_type
)
303 if (sample_type
& PERF_SAMPLE_IDENTIFIER
)
306 if (!(sample_type
& PERF_SAMPLE_ID
))
309 if (sample_type
& PERF_SAMPLE_CPU
)
312 if (sample_type
& PERF_SAMPLE_STREAM_ID
)
318 void evsel__calc_id_pos(struct evsel
*evsel
)
320 evsel
->id_pos
= __perf_evsel__calc_id_pos(evsel
->core
.attr
.sample_type
);
321 evsel
->is_pos
= __perf_evsel__calc_is_pos(evsel
->core
.attr
.sample_type
);
324 void __evsel__set_sample_bit(struct evsel
*evsel
,
325 enum perf_event_sample_format bit
)
327 if (!(evsel
->core
.attr
.sample_type
& bit
)) {
328 evsel
->core
.attr
.sample_type
|= bit
;
329 evsel
->sample_size
+= sizeof(u64
);
330 evsel__calc_id_pos(evsel
);
334 void __evsel__reset_sample_bit(struct evsel
*evsel
,
335 enum perf_event_sample_format bit
)
337 if (evsel
->core
.attr
.sample_type
& bit
) {
338 evsel
->core
.attr
.sample_type
&= ~bit
;
339 evsel
->sample_size
-= sizeof(u64
);
340 evsel__calc_id_pos(evsel
);
344 void evsel__set_sample_id(struct evsel
*evsel
,
345 bool can_sample_identifier
)
347 if (can_sample_identifier
) {
348 evsel__reset_sample_bit(evsel
, ID
);
349 evsel__set_sample_bit(evsel
, IDENTIFIER
);
351 evsel__set_sample_bit(evsel
, ID
);
353 evsel
->core
.attr
.read_format
|= PERF_FORMAT_ID
;
357 * evsel__is_function_event - Return whether given evsel is a function
360 * @evsel - evsel selector to be tested
362 * Return %true if event is function trace event
364 bool evsel__is_function_event(struct evsel
*evsel
)
366 #define FUNCTION_EVENT "ftrace:function"
368 return evsel
->name
&&
369 !strncmp(FUNCTION_EVENT
, evsel
->name
, sizeof(FUNCTION_EVENT
));
371 #undef FUNCTION_EVENT
374 void evsel__init(struct evsel
*evsel
,
375 struct perf_event_attr
*attr
, int idx
)
377 perf_evsel__init(&evsel
->core
, attr
, idx
);
378 evsel
->tracking
= !idx
;
379 evsel
->unit
= strdup("");
381 evsel
->max_events
= ULONG_MAX
;
382 evsel
->evlist
= NULL
;
383 evsel
->bpf_obj
= NULL
;
385 INIT_LIST_HEAD(&evsel
->config_terms
);
386 INIT_LIST_HEAD(&evsel
->bpf_counter_list
);
387 INIT_LIST_HEAD(&evsel
->bpf_filters
);
388 perf_evsel__object
.init(evsel
);
389 evsel
->sample_size
= __evsel__sample_size(attr
->sample_type
);
390 evsel__calc_id_pos(evsel
);
391 evsel
->cmdline_group_boundary
= false;
392 evsel
->metric_events
= NULL
;
393 evsel
->per_pkg_mask
= NULL
;
394 evsel
->collect_stat
= false;
395 evsel
->group_pmu_name
= NULL
;
396 evsel
->skippable
= false;
397 evsel
->alternate_hw_config
= PERF_COUNT_HW_MAX
;
400 struct evsel
*evsel__new_idx(struct perf_event_attr
*attr
, int idx
)
402 struct evsel
*evsel
= zalloc(perf_evsel__object
.size
);
406 evsel__init(evsel
, attr
, idx
);
408 if (evsel__is_bpf_output(evsel
) && !attr
->sample_type
) {
409 evsel
->core
.attr
.sample_type
= (PERF_SAMPLE_RAW
| PERF_SAMPLE_TIME
|
410 PERF_SAMPLE_CPU
| PERF_SAMPLE_PERIOD
),
411 evsel
->core
.attr
.sample_period
= 1;
414 if (evsel__is_clock(evsel
)) {
415 free((char *)evsel
->unit
);
416 evsel
->unit
= strdup("msec");
423 int copy_config_terms(struct list_head
*dst
, struct list_head
*src
)
425 struct evsel_config_term
*pos
, *tmp
;
427 list_for_each_entry(pos
, src
, list
) {
428 tmp
= malloc(sizeof(*tmp
));
434 tmp
->val
.str
= strdup(pos
->val
.str
);
435 if (tmp
->val
.str
== NULL
) {
440 list_add_tail(&tmp
->list
, dst
);
445 static int evsel__copy_config_terms(struct evsel
*dst
, struct evsel
*src
)
447 return copy_config_terms(&dst
->config_terms
, &src
->config_terms
);
451 * evsel__clone - create a new evsel copied from @orig
452 * @orig: original evsel
454 * The assumption is that @orig is not configured nor opened yet.
455 * So we only care about the attributes that can be set while it's parsed.
457 struct evsel
*evsel__clone(struct evsel
*orig
)
461 BUG_ON(orig
->core
.fd
);
462 BUG_ON(orig
->counts
);
464 BUG_ON(orig
->per_pkg_mask
);
466 /* cannot handle BPF objects for now */
470 evsel
= evsel__new(&orig
->core
.attr
);
474 evsel
->core
.cpus
= perf_cpu_map__get(orig
->core
.cpus
);
475 evsel
->core
.own_cpus
= perf_cpu_map__get(orig
->core
.own_cpus
);
476 evsel
->core
.threads
= perf_thread_map__get(orig
->core
.threads
);
477 evsel
->core
.nr_members
= orig
->core
.nr_members
;
478 evsel
->core
.system_wide
= orig
->core
.system_wide
;
479 evsel
->core
.requires_cpu
= orig
->core
.requires_cpu
;
480 evsel
->core
.is_pmu_core
= orig
->core
.is_pmu_core
;
483 evsel
->name
= strdup(orig
->name
);
484 if (evsel
->name
== NULL
)
487 if (orig
->group_name
) {
488 evsel
->group_name
= strdup(orig
->group_name
);
489 if (evsel
->group_name
== NULL
)
492 if (orig
->group_pmu_name
) {
493 evsel
->group_pmu_name
= strdup(orig
->group_pmu_name
);
494 if (evsel
->group_pmu_name
== NULL
)
498 evsel
->filter
= strdup(orig
->filter
);
499 if (evsel
->filter
== NULL
)
502 if (orig
->metric_id
) {
503 evsel
->metric_id
= strdup(orig
->metric_id
);
504 if (evsel
->metric_id
== NULL
)
507 evsel
->cgrp
= cgroup__get(orig
->cgrp
);
508 #ifdef HAVE_LIBTRACEEVENT
509 evsel
->tp_format
= orig
->tp_format
;
511 evsel
->handler
= orig
->handler
;
512 evsel
->core
.leader
= orig
->core
.leader
;
514 evsel
->max_events
= orig
->max_events
;
515 free((char *)evsel
->unit
);
516 evsel
->unit
= strdup(orig
->unit
);
517 if (evsel
->unit
== NULL
)
520 evsel
->scale
= orig
->scale
;
521 evsel
->snapshot
= orig
->snapshot
;
522 evsel
->per_pkg
= orig
->per_pkg
;
523 evsel
->percore
= orig
->percore
;
524 evsel
->precise_max
= orig
->precise_max
;
525 evsel
->is_libpfm_event
= orig
->is_libpfm_event
;
527 evsel
->exclude_GH
= orig
->exclude_GH
;
528 evsel
->sample_read
= orig
->sample_read
;
529 evsel
->auto_merge_stats
= orig
->auto_merge_stats
;
530 evsel
->collect_stat
= orig
->collect_stat
;
531 evsel
->weak_group
= orig
->weak_group
;
532 evsel
->use_config_name
= orig
->use_config_name
;
533 evsel
->pmu
= orig
->pmu
;
535 if (evsel__copy_config_terms(evsel
, orig
) < 0)
538 evsel
->alternate_hw_config
= orig
->alternate_hw_config
;
543 evsel__delete(evsel
);
548 * Returns pointer with encoded error via <linux/err.h> interface.
550 #ifdef HAVE_LIBTRACEEVENT
551 struct evsel
*evsel__newtp_idx(const char *sys
, const char *name
, int idx
, bool format
)
553 struct evsel
*evsel
= zalloc(perf_evsel__object
.size
);
559 struct perf_event_attr attr
= {
560 .type
= PERF_TYPE_TRACEPOINT
,
561 .sample_type
= (PERF_SAMPLE_RAW
| PERF_SAMPLE_TIME
|
562 PERF_SAMPLE_CPU
| PERF_SAMPLE_PERIOD
),
565 if (asprintf(&evsel
->name
, "%s:%s", sys
, name
) < 0)
568 event_attr_init(&attr
);
571 evsel
->tp_format
= trace_event__tp_format(sys
, name
);
572 if (IS_ERR(evsel
->tp_format
)) {
573 err
= PTR_ERR(evsel
->tp_format
);
576 attr
.config
= evsel
->tp_format
->id
;
578 attr
.config
= (__u64
) -1;
582 attr
.sample_period
= 1;
583 evsel__init(evsel
, &attr
, idx
);
596 const char *const evsel__hw_names
[PERF_COUNT_HW_MAX
] = {
604 "stalled-cycles-frontend",
605 "stalled-cycles-backend",
609 char *evsel__bpf_counter_events
;
611 bool evsel__match_bpf_counter_events(const char *name
)
617 if (!evsel__bpf_counter_events
)
620 ptr
= strstr(evsel__bpf_counter_events
, name
);
621 name_len
= strlen(name
);
623 /* check name matches a full token in evsel__bpf_counter_events */
624 match
= (ptr
!= NULL
) &&
625 ((ptr
== evsel__bpf_counter_events
) || (*(ptr
- 1) == ',')) &&
626 ((*(ptr
+ name_len
) == ',') || (*(ptr
+ name_len
) == '\0'));
631 static const char *__evsel__hw_name(u64 config
)
633 if (config
< PERF_COUNT_HW_MAX
&& evsel__hw_names
[config
])
634 return evsel__hw_names
[config
];
636 return "unknown-hardware";
639 static int evsel__add_modifiers(struct evsel
*evsel
, char *bf
, size_t size
)
641 int colon
= 0, r
= 0;
642 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
644 #define MOD_PRINT(context, mod) do { \
645 if (!attr->exclude_##context) { \
646 if (!colon) colon = ++r; \
647 r += scnprintf(bf + r, size - r, "%c", mod); \
650 if (attr
->exclude_kernel
|| attr
->exclude_user
|| attr
->exclude_hv
) {
651 MOD_PRINT(kernel
, 'k');
652 MOD_PRINT(user
, 'u');
656 if (attr
->precise_ip
) {
659 r
+= scnprintf(bf
+ r
, size
- r
, "%.*s", attr
->precise_ip
, "ppp");
662 if (attr
->exclude_host
|| attr
->exclude_guest
) {
663 MOD_PRINT(host
, 'H');
664 MOD_PRINT(guest
, 'G');
672 int __weak
arch_evsel__hw_name(struct evsel
*evsel
, char *bf
, size_t size
)
674 return scnprintf(bf
, size
, "%s", __evsel__hw_name(evsel
->core
.attr
.config
));
677 static int evsel__hw_name(struct evsel
*evsel
, char *bf
, size_t size
)
679 int r
= arch_evsel__hw_name(evsel
, bf
, size
);
680 return r
+ evsel__add_modifiers(evsel
, bf
+ r
, size
- r
);
683 const char *const evsel__sw_names
[PERF_COUNT_SW_MAX
] = {
696 static const char *__evsel__sw_name(u64 config
)
698 if (config
< PERF_COUNT_SW_MAX
&& evsel__sw_names
[config
])
699 return evsel__sw_names
[config
];
700 return "unknown-software";
703 static int evsel__sw_name(struct evsel
*evsel
, char *bf
, size_t size
)
705 int r
= scnprintf(bf
, size
, "%s", __evsel__sw_name(evsel
->core
.attr
.config
));
706 return r
+ evsel__add_modifiers(evsel
, bf
+ r
, size
- r
);
709 static int __evsel__bp_name(char *bf
, size_t size
, u64 addr
, u64 type
)
713 r
= scnprintf(bf
, size
, "mem:0x%" PRIx64
":", addr
);
715 if (type
& HW_BREAKPOINT_R
)
716 r
+= scnprintf(bf
+ r
, size
- r
, "r");
718 if (type
& HW_BREAKPOINT_W
)
719 r
+= scnprintf(bf
+ r
, size
- r
, "w");
721 if (type
& HW_BREAKPOINT_X
)
722 r
+= scnprintf(bf
+ r
, size
- r
, "x");
727 static int evsel__bp_name(struct evsel
*evsel
, char *bf
, size_t size
)
729 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
730 int r
= __evsel__bp_name(bf
, size
, attr
->bp_addr
, attr
->bp_type
);
731 return r
+ evsel__add_modifiers(evsel
, bf
+ r
, size
- r
);
734 const char *const evsel__hw_cache
[PERF_COUNT_HW_CACHE_MAX
][EVSEL__MAX_ALIASES
] = {
735 { "L1-dcache", "l1-d", "l1d", "L1-data", },
736 { "L1-icache", "l1-i", "l1i", "L1-instruction", },
738 { "dTLB", "d-tlb", "Data-TLB", },
739 { "iTLB", "i-tlb", "Instruction-TLB", },
740 { "branch", "branches", "bpu", "btb", "bpc", },
744 const char *const evsel__hw_cache_op
[PERF_COUNT_HW_CACHE_OP_MAX
][EVSEL__MAX_ALIASES
] = {
745 { "load", "loads", "read", },
746 { "store", "stores", "write", },
747 { "prefetch", "prefetches", "speculative-read", "speculative-load", },
750 const char *const evsel__hw_cache_result
[PERF_COUNT_HW_CACHE_RESULT_MAX
][EVSEL__MAX_ALIASES
] = {
751 { "refs", "Reference", "ops", "access", },
752 { "misses", "miss", },
755 #define C(x) PERF_COUNT_HW_CACHE_##x
756 #define CACHE_READ (1 << C(OP_READ))
757 #define CACHE_WRITE (1 << C(OP_WRITE))
758 #define CACHE_PREFETCH (1 << C(OP_PREFETCH))
759 #define COP(x) (1 << x)
762 * cache operation stat
763 * L1I : Read and prefetch only
764 * ITLB and BPU : Read-only
766 static const unsigned long evsel__hw_cache_stat
[C(MAX
)] = {
767 [C(L1D
)] = (CACHE_READ
| CACHE_WRITE
| CACHE_PREFETCH
),
768 [C(L1I
)] = (CACHE_READ
| CACHE_PREFETCH
),
769 [C(LL
)] = (CACHE_READ
| CACHE_WRITE
| CACHE_PREFETCH
),
770 [C(DTLB
)] = (CACHE_READ
| CACHE_WRITE
| CACHE_PREFETCH
),
771 [C(ITLB
)] = (CACHE_READ
),
772 [C(BPU
)] = (CACHE_READ
),
773 [C(NODE
)] = (CACHE_READ
| CACHE_WRITE
| CACHE_PREFETCH
),
776 bool evsel__is_cache_op_valid(u8 type
, u8 op
)
778 if (evsel__hw_cache_stat
[type
] & COP(op
))
779 return true; /* valid */
781 return false; /* invalid */
784 int __evsel__hw_cache_type_op_res_name(u8 type
, u8 op
, u8 result
, char *bf
, size_t size
)
787 return scnprintf(bf
, size
, "%s-%s-%s", evsel__hw_cache
[type
][0],
788 evsel__hw_cache_op
[op
][0],
789 evsel__hw_cache_result
[result
][0]);
792 return scnprintf(bf
, size
, "%s-%s", evsel__hw_cache
[type
][0],
793 evsel__hw_cache_op
[op
][1]);
796 static int __evsel__hw_cache_name(u64 config
, char *bf
, size_t size
)
798 u8 op
, result
, type
= (config
>> 0) & 0xff;
799 const char *err
= "unknown-ext-hardware-cache-type";
801 if (type
>= PERF_COUNT_HW_CACHE_MAX
)
804 op
= (config
>> 8) & 0xff;
805 err
= "unknown-ext-hardware-cache-op";
806 if (op
>= PERF_COUNT_HW_CACHE_OP_MAX
)
809 result
= (config
>> 16) & 0xff;
810 err
= "unknown-ext-hardware-cache-result";
811 if (result
>= PERF_COUNT_HW_CACHE_RESULT_MAX
)
814 err
= "invalid-cache";
815 if (!evsel__is_cache_op_valid(type
, op
))
818 return __evsel__hw_cache_type_op_res_name(type
, op
, result
, bf
, size
);
820 return scnprintf(bf
, size
, "%s", err
);
823 static int evsel__hw_cache_name(struct evsel
*evsel
, char *bf
, size_t size
)
825 int ret
= __evsel__hw_cache_name(evsel
->core
.attr
.config
, bf
, size
);
826 return ret
+ evsel__add_modifiers(evsel
, bf
+ ret
, size
- ret
);
829 static int evsel__raw_name(struct evsel
*evsel
, char *bf
, size_t size
)
831 int ret
= scnprintf(bf
, size
, "raw 0x%" PRIx64
, evsel
->core
.attr
.config
);
832 return ret
+ evsel__add_modifiers(evsel
, bf
+ ret
, size
- ret
);
835 const char *evsel__name(struct evsel
*evsel
)
845 switch (evsel
->core
.attr
.type
) {
847 evsel__raw_name(evsel
, bf
, sizeof(bf
));
850 case PERF_TYPE_HARDWARE
:
851 evsel__hw_name(evsel
, bf
, sizeof(bf
));
854 case PERF_TYPE_HW_CACHE
:
855 evsel__hw_cache_name(evsel
, bf
, sizeof(bf
));
858 case PERF_TYPE_SOFTWARE
:
859 evsel__sw_name(evsel
, bf
, sizeof(bf
));
862 case PERF_TYPE_TRACEPOINT
:
863 scnprintf(bf
, sizeof(bf
), "%s", "unknown tracepoint");
866 case PERF_TYPE_BREAKPOINT
:
867 evsel__bp_name(evsel
, bf
, sizeof(bf
));
870 case PERF_PMU_TYPE_TOOL
:
871 scnprintf(bf
, sizeof(bf
), "%s", evsel__tool_pmu_event_name(evsel
));
875 scnprintf(bf
, sizeof(bf
), "unknown attr type: %d",
876 evsel
->core
.attr
.type
);
880 evsel
->name
= strdup(bf
);
888 bool evsel__name_is(struct evsel
*evsel
, const char *name
)
890 return !strcmp(evsel__name(evsel
), name
);
893 const char *evsel__metric_id(const struct evsel
*evsel
)
895 if (evsel
->metric_id
)
896 return evsel
->metric_id
;
898 if (evsel__is_tool(evsel
))
899 return evsel__tool_pmu_event_name(evsel
);
904 const char *evsel__group_name(struct evsel
*evsel
)
906 return evsel
->group_name
?: "anon group";
910 * Returns the group details for the specified leader,
911 * with following rules.
913 * For record -e '{cycles,instructions}'
914 * 'anon group { cycles:u, instructions:u }'
916 * For record -e 'cycles,instructions' and report --group
917 * 'cycles:u, instructions:u'
919 int evsel__group_desc(struct evsel
*evsel
, char *buf
, size_t size
)
924 const char *group_name
= evsel__group_name(evsel
);
926 if (!evsel
->forced_leader
)
927 ret
= scnprintf(buf
, size
, "%s { ", group_name
);
929 for_each_group_evsel(pos
, evsel
) {
930 if (symbol_conf
.skip_empty
&&
931 evsel__hists(pos
)->stats
.nr_samples
== 0)
934 ret
+= scnprintf(buf
+ ret
, size
- ret
, "%s%s",
935 first
? "" : ", ", evsel__name(pos
));
939 if (!evsel
->forced_leader
)
940 ret
+= scnprintf(buf
+ ret
, size
- ret
, " }");
945 static void __evsel__config_callchain(struct evsel
*evsel
, struct record_opts
*opts
,
946 struct callchain_param
*param
)
948 bool function
= evsel__is_function_event(evsel
);
949 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
951 evsel__set_sample_bit(evsel
, CALLCHAIN
);
953 attr
->sample_max_stack
= param
->max_stack
;
955 if (opts
->kernel_callchains
)
956 attr
->exclude_callchain_user
= 1;
957 if (opts
->user_callchains
)
958 attr
->exclude_callchain_kernel
= 1;
959 if (param
->record_mode
== CALLCHAIN_LBR
) {
960 if (!opts
->branch_stack
) {
961 if (attr
->exclude_user
) {
962 pr_warning("LBR callstack option is only available "
963 "to get user callchain information. "
964 "Falling back to framepointers.\n");
966 evsel__set_sample_bit(evsel
, BRANCH_STACK
);
967 attr
->branch_sample_type
= PERF_SAMPLE_BRANCH_USER
|
968 PERF_SAMPLE_BRANCH_CALL_STACK
|
969 PERF_SAMPLE_BRANCH_NO_CYCLES
|
970 PERF_SAMPLE_BRANCH_NO_FLAGS
|
971 PERF_SAMPLE_BRANCH_HW_INDEX
;
974 pr_warning("Cannot use LBR callstack with branch stack. "
975 "Falling back to framepointers.\n");
978 if (param
->record_mode
== CALLCHAIN_DWARF
) {
980 const char *arch
= perf_env__arch(evsel__env(evsel
));
982 evsel__set_sample_bit(evsel
, REGS_USER
);
983 evsel__set_sample_bit(evsel
, STACK_USER
);
984 if (opts
->sample_user_regs
&&
985 DWARF_MINIMAL_REGS(arch
) != arch__user_reg_mask()) {
986 attr
->sample_regs_user
|= DWARF_MINIMAL_REGS(arch
);
987 pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
988 "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
989 "so the minimal registers set (IP, SP) is explicitly forced.\n");
991 attr
->sample_regs_user
|= arch__user_reg_mask();
993 attr
->sample_stack_user
= param
->dump_size
;
994 attr
->exclude_callchain_user
= 1;
996 pr_info("Cannot use DWARF unwind for function trace event,"
997 " falling back to framepointers.\n");
1002 pr_info("Disabling user space callchains for function trace event.\n");
1003 attr
->exclude_callchain_user
= 1;
1007 void evsel__config_callchain(struct evsel
*evsel
, struct record_opts
*opts
,
1008 struct callchain_param
*param
)
1011 return __evsel__config_callchain(evsel
, opts
, param
);
1014 static void evsel__reset_callgraph(struct evsel
*evsel
, struct callchain_param
*param
)
1016 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
1018 evsel__reset_sample_bit(evsel
, CALLCHAIN
);
1019 if (param
->record_mode
== CALLCHAIN_LBR
) {
1020 evsel__reset_sample_bit(evsel
, BRANCH_STACK
);
1021 attr
->branch_sample_type
&= ~(PERF_SAMPLE_BRANCH_USER
|
1022 PERF_SAMPLE_BRANCH_CALL_STACK
|
1023 PERF_SAMPLE_BRANCH_HW_INDEX
);
1025 if (param
->record_mode
== CALLCHAIN_DWARF
) {
1026 evsel__reset_sample_bit(evsel
, REGS_USER
);
1027 evsel__reset_sample_bit(evsel
, STACK_USER
);
1031 static void evsel__apply_config_terms(struct evsel
*evsel
,
1032 struct record_opts
*opts
, bool track
)
1034 struct evsel_config_term
*term
;
1035 struct list_head
*config_terms
= &evsel
->config_terms
;
1036 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
1037 /* callgraph default */
1038 struct callchain_param param
= {
1039 .record_mode
= callchain_param
.record_mode
,
1043 const char *callgraph_buf
= NULL
;
1045 list_for_each_entry(term
, config_terms
, list
) {
1046 switch (term
->type
) {
1047 case EVSEL__CONFIG_TERM_PERIOD
:
1048 if (!(term
->weak
&& opts
->user_interval
!= ULLONG_MAX
)) {
1049 attr
->sample_period
= term
->val
.period
;
1051 evsel__reset_sample_bit(evsel
, PERIOD
);
1054 case EVSEL__CONFIG_TERM_FREQ
:
1055 if (!(term
->weak
&& opts
->user_freq
!= UINT_MAX
)) {
1056 attr
->sample_freq
= term
->val
.freq
;
1058 evsel__set_sample_bit(evsel
, PERIOD
);
1061 case EVSEL__CONFIG_TERM_TIME
:
1063 evsel__set_sample_bit(evsel
, TIME
);
1065 evsel__reset_sample_bit(evsel
, TIME
);
1067 case EVSEL__CONFIG_TERM_CALLGRAPH
:
1068 callgraph_buf
= term
->val
.str
;
1070 case EVSEL__CONFIG_TERM_BRANCH
:
1071 if (term
->val
.str
&& strcmp(term
->val
.str
, "no")) {
1072 evsel__set_sample_bit(evsel
, BRANCH_STACK
);
1073 parse_branch_str(term
->val
.str
,
1074 &attr
->branch_sample_type
);
1076 evsel__reset_sample_bit(evsel
, BRANCH_STACK
);
1078 case EVSEL__CONFIG_TERM_STACK_USER
:
1079 dump_size
= term
->val
.stack_user
;
1081 case EVSEL__CONFIG_TERM_MAX_STACK
:
1082 max_stack
= term
->val
.max_stack
;
1084 case EVSEL__CONFIG_TERM_MAX_EVENTS
:
1085 evsel
->max_events
= term
->val
.max_events
;
1087 case EVSEL__CONFIG_TERM_INHERIT
:
1089 * attr->inherit should has already been set by
1090 * evsel__config. If user explicitly set
1091 * inherit using config terms, override global
1092 * opt->no_inherit setting.
1094 attr
->inherit
= term
->val
.inherit
? 1 : 0;
1096 case EVSEL__CONFIG_TERM_OVERWRITE
:
1097 attr
->write_backward
= term
->val
.overwrite
? 1 : 0;
1099 case EVSEL__CONFIG_TERM_DRV_CFG
:
1101 case EVSEL__CONFIG_TERM_PERCORE
:
1103 case EVSEL__CONFIG_TERM_AUX_OUTPUT
:
1104 attr
->aux_output
= term
->val
.aux_output
? 1 : 0;
1106 case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE
:
1107 /* Already applied by auxtrace */
1109 case EVSEL__CONFIG_TERM_CFG_CHG
:
1116 /* User explicitly set per-event callgraph, clear the old setting and reset. */
1117 if ((callgraph_buf
!= NULL
) || (dump_size
> 0) || max_stack
) {
1118 bool sample_address
= false;
1121 param
.max_stack
= max_stack
;
1122 if (callgraph_buf
== NULL
)
1123 callgraph_buf
= "fp";
1126 /* parse callgraph parameters */
1127 if (callgraph_buf
!= NULL
) {
1128 if (!strcmp(callgraph_buf
, "no")) {
1129 param
.enabled
= false;
1130 param
.record_mode
= CALLCHAIN_NONE
;
1132 param
.enabled
= true;
1133 if (parse_callchain_record(callgraph_buf
, ¶m
)) {
1134 pr_err("per-event callgraph setting for %s failed. "
1135 "Apply callgraph global setting for it\n",
1139 if (param
.record_mode
== CALLCHAIN_DWARF
)
1140 sample_address
= true;
1143 if (dump_size
> 0) {
1144 dump_size
= round_up(dump_size
, sizeof(u64
));
1145 param
.dump_size
= dump_size
;
1148 /* If global callgraph set, clear it */
1149 if (callchain_param
.enabled
)
1150 evsel__reset_callgraph(evsel
, &callchain_param
);
1152 /* set perf-event callgraph */
1153 if (param
.enabled
) {
1154 if (sample_address
) {
1155 evsel__set_sample_bit(evsel
, ADDR
);
1156 evsel__set_sample_bit(evsel
, DATA_SRC
);
1157 evsel
->core
.attr
.mmap_data
= track
;
1159 evsel__config_callchain(evsel
, opts
, ¶m
);
1164 struct evsel_config_term
*__evsel__get_config_term(struct evsel
*evsel
, enum evsel_term_type type
)
1166 struct evsel_config_term
*term
, *found_term
= NULL
;
1168 list_for_each_entry(term
, &evsel
->config_terms
, list
) {
1169 if (term
->type
== type
)
1176 void __weak
arch_evsel__set_sample_weight(struct evsel
*evsel
)
1178 evsel__set_sample_bit(evsel
, WEIGHT
);
1181 void __weak
arch__post_evsel_config(struct evsel
*evsel __maybe_unused
,
1182 struct perf_event_attr
*attr __maybe_unused
)
1186 static void evsel__set_default_freq_period(struct record_opts
*opts
,
1187 struct perf_event_attr
*attr
)
1191 attr
->sample_freq
= opts
->freq
;
1193 attr
->sample_period
= opts
->default_interval
;
1197 static bool evsel__is_offcpu_event(struct evsel
*evsel
)
1199 return evsel__is_bpf_output(evsel
) && evsel__name_is(evsel
, OFFCPU_EVENT
);
1203 * The enable_on_exec/disabled value strategy:
1205 * 1) For any type of traced program:
1206 * - all independent events and group leaders are disabled
1207 * - all group members are enabled
1209 * Group members are ruled by group leaders. They need to
1210 * be enabled, because the group scheduling relies on that.
1212 * 2) For traced programs executed by perf:
1213 * - all independent events and group leaders have
1214 * enable_on_exec set
1215 * - we don't specifically enable or disable any event during
1216 * the record command
1218 * Independent events and group leaders are initially disabled
1219 * and get enabled by exec. Group members are ruled by group
1220 * leaders as stated in 1).
1222 * 3) For traced programs attached by perf (pid/tid):
1223 * - we specifically enable or disable all events during
1224 * the record command
1226 * When attaching events to already running traced we
1227 * enable/disable events specifically, as there's no
1228 * initial traced exec call.
1230 void evsel__config(struct evsel
*evsel
, struct record_opts
*opts
,
1231 struct callchain_param
*callchain
)
1233 struct evsel
*leader
= evsel__leader(evsel
);
1234 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
1235 int track
= evsel
->tracking
;
1236 bool per_cpu
= opts
->target
.default_per_cpu
&& !opts
->target
.per_thread
;
1238 attr
->sample_id_all
= perf_missing_features
.sample_id_all
? 0 : 1;
1239 attr
->inherit
= target__has_cpu(&opts
->target
) ? 0 : !opts
->no_inherit
;
1240 attr
->write_backward
= opts
->overwrite
? 1 : 0;
1241 attr
->read_format
= PERF_FORMAT_LOST
;
1243 evsel__set_sample_bit(evsel
, IP
);
1244 evsel__set_sample_bit(evsel
, TID
);
1246 if (evsel
->sample_read
) {
1247 evsel__set_sample_bit(evsel
, READ
);
1250 * We need ID even in case of single event, because
1251 * PERF_SAMPLE_READ process ID specific data.
1253 evsel__set_sample_id(evsel
, false);
1256 * Apply group format only if we belong to group
1257 * with more than one members.
1259 if (leader
->core
.nr_members
> 1) {
1260 attr
->read_format
|= PERF_FORMAT_GROUP
;
1264 * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format
1266 if (attr
->inherit
) {
1267 evsel__set_sample_bit(evsel
, TID
);
1268 evsel
->core
.attr
.read_format
|=
1274 * We default some events to have a default interval. But keep
1275 * it a weak assumption overridable by the user.
1277 if ((evsel
->is_libpfm_event
&& !attr
->sample_period
) ||
1278 (!evsel
->is_libpfm_event
&& (!attr
->sample_period
||
1279 opts
->user_freq
!= UINT_MAX
||
1280 opts
->user_interval
!= ULLONG_MAX
)))
1281 evsel__set_default_freq_period(opts
, attr
);
1284 * If attr->freq was set (here or earlier), ask for period
1288 evsel__set_sample_bit(evsel
, PERIOD
);
1290 if (opts
->no_samples
)
1291 attr
->sample_freq
= 0;
1293 if (opts
->inherit_stat
) {
1294 evsel
->core
.attr
.read_format
|=
1295 PERF_FORMAT_TOTAL_TIME_ENABLED
|
1296 PERF_FORMAT_TOTAL_TIME_RUNNING
|
1298 attr
->inherit_stat
= 1;
1301 if (opts
->sample_address
) {
1302 evsel__set_sample_bit(evsel
, ADDR
);
1303 attr
->mmap_data
= track
;
1307 * We don't allow user space callchains for function trace
1308 * event, due to issues with page faults while tracing page
1309 * fault handler and its overall trickiness nature.
1311 if (evsel__is_function_event(evsel
))
1312 evsel
->core
.attr
.exclude_callchain_user
= 1;
1314 if (callchain
&& callchain
->enabled
&& !evsel
->no_aux_samples
)
1315 evsel__config_callchain(evsel
, opts
, callchain
);
1317 if (opts
->sample_intr_regs
&& !evsel
->no_aux_samples
&&
1318 !evsel__is_dummy_event(evsel
)) {
1319 attr
->sample_regs_intr
= opts
->sample_intr_regs
;
1320 evsel__set_sample_bit(evsel
, REGS_INTR
);
1323 if (opts
->sample_user_regs
&& !evsel
->no_aux_samples
&&
1324 !evsel__is_dummy_event(evsel
)) {
1325 attr
->sample_regs_user
|= opts
->sample_user_regs
;
1326 evsel__set_sample_bit(evsel
, REGS_USER
);
1329 if (target__has_cpu(&opts
->target
) || opts
->sample_cpu
)
1330 evsel__set_sample_bit(evsel
, CPU
);
1333 * When the user explicitly disabled time don't force it here.
1335 if (opts
->sample_time
&&
1336 (!perf_missing_features
.sample_id_all
&&
1337 (!opts
->no_inherit
|| target__has_cpu(&opts
->target
) || per_cpu
||
1338 opts
->sample_time_set
)))
1339 evsel__set_sample_bit(evsel
, TIME
);
1341 if (opts
->raw_samples
&& !evsel
->no_aux_samples
) {
1342 evsel__set_sample_bit(evsel
, TIME
);
1343 evsel__set_sample_bit(evsel
, RAW
);
1344 evsel__set_sample_bit(evsel
, CPU
);
1347 if (opts
->sample_address
)
1348 evsel__set_sample_bit(evsel
, DATA_SRC
);
1350 if (opts
->sample_phys_addr
)
1351 evsel__set_sample_bit(evsel
, PHYS_ADDR
);
1353 if (opts
->no_buffering
) {
1354 attr
->watermark
= 0;
1355 attr
->wakeup_events
= 1;
1357 if (opts
->branch_stack
&& !evsel
->no_aux_samples
) {
1358 evsel__set_sample_bit(evsel
, BRANCH_STACK
);
1359 attr
->branch_sample_type
= opts
->branch_stack
;
1362 if (opts
->sample_weight
)
1363 arch_evsel__set_sample_weight(evsel
);
1367 attr
->mmap2
= track
&& !perf_missing_features
.mmap2
;
1369 attr
->build_id
= track
&& opts
->build_id
;
1372 * ksymbol is tracked separately with text poke because it needs to be
1373 * system wide and enabled immediately.
1375 if (!opts
->text_poke
)
1376 attr
->ksymbol
= track
&& !perf_missing_features
.ksymbol
;
1377 attr
->bpf_event
= track
&& !opts
->no_bpf_event
&& !perf_missing_features
.bpf
;
1379 if (opts
->record_namespaces
)
1380 attr
->namespaces
= track
;
1382 if (opts
->record_cgroup
) {
1383 attr
->cgroup
= track
&& !perf_missing_features
.cgroup
;
1384 evsel__set_sample_bit(evsel
, CGROUP
);
1387 if (opts
->sample_data_page_size
)
1388 evsel__set_sample_bit(evsel
, DATA_PAGE_SIZE
);
1390 if (opts
->sample_code_page_size
)
1391 evsel__set_sample_bit(evsel
, CODE_PAGE_SIZE
);
1393 if (opts
->record_switch_events
)
1394 attr
->context_switch
= track
;
1396 if (opts
->sample_transaction
)
1397 evsel__set_sample_bit(evsel
, TRANSACTION
);
1399 if (opts
->running_time
) {
1400 evsel
->core
.attr
.read_format
|=
1401 PERF_FORMAT_TOTAL_TIME_ENABLED
|
1402 PERF_FORMAT_TOTAL_TIME_RUNNING
;
1406 * XXX see the function comment above
1408 * Disabling only independent events or group leaders,
1409 * keeping group members enabled.
1411 if (evsel__is_group_leader(evsel
))
1415 * Setting enable_on_exec for independent events and
1416 * group leaders for traced executed by perf.
1418 if (target__none(&opts
->target
) && evsel__is_group_leader(evsel
) &&
1419 !opts
->target
.initial_delay
)
1420 attr
->enable_on_exec
= 1;
1422 if (evsel
->immediate
) {
1424 attr
->enable_on_exec
= 0;
1427 clockid
= opts
->clockid
;
1428 if (opts
->use_clockid
) {
1429 attr
->use_clockid
= 1;
1430 attr
->clockid
= opts
->clockid
;
1433 if (evsel
->precise_max
)
1434 attr
->precise_ip
= 3;
1436 if (opts
->all_user
) {
1437 attr
->exclude_kernel
= 1;
1438 attr
->exclude_user
= 0;
1441 if (opts
->all_kernel
) {
1442 attr
->exclude_kernel
= 0;
1443 attr
->exclude_user
= 1;
1446 if (evsel
->core
.own_cpus
|| evsel
->unit
)
1447 evsel
->core
.attr
.read_format
|= PERF_FORMAT_ID
;
1450 * Apply event specific term settings,
1451 * it overloads any global configuration.
1453 evsel__apply_config_terms(evsel
, opts
, track
);
1455 evsel
->ignore_missing_thread
= opts
->ignore_missing_thread
;
1457 /* The --period option takes the precedence. */
1458 if (opts
->period_set
) {
1460 evsel__set_sample_bit(evsel
, PERIOD
);
1462 evsel__reset_sample_bit(evsel
, PERIOD
);
1466 * A dummy event never triggers any actual counter and therefore
1467 * cannot be used with branch_stack.
1469 * For initial_delay, a dummy event is added implicitly.
1470 * The software event will trigger -EOPNOTSUPP error out,
1471 * if BRANCH_STACK bit is set.
1473 if (evsel__is_dummy_event(evsel
))
1474 evsel__reset_sample_bit(evsel
, BRANCH_STACK
);
1476 if (evsel__is_offcpu_event(evsel
))
1477 evsel
->core
.attr
.sample_type
&= OFFCPU_SAMPLE_TYPES
;
1479 arch__post_evsel_config(evsel
, attr
);
1482 int evsel__set_filter(struct evsel
*evsel
, const char *filter
)
1484 char *new_filter
= strdup(filter
);
1486 if (new_filter
!= NULL
) {
1487 free(evsel
->filter
);
1488 evsel
->filter
= new_filter
;
1495 static int evsel__append_filter(struct evsel
*evsel
, const char *fmt
, const char *filter
)
1499 if (evsel
->filter
== NULL
)
1500 return evsel__set_filter(evsel
, filter
);
1502 if (asprintf(&new_filter
, fmt
, evsel
->filter
, filter
) > 0) {
1503 free(evsel
->filter
);
1504 evsel
->filter
= new_filter
;
1511 int evsel__append_tp_filter(struct evsel
*evsel
, const char *filter
)
1513 return evsel__append_filter(evsel
, "(%s) && (%s)", filter
);
1516 int evsel__append_addr_filter(struct evsel
*evsel
, const char *filter
)
1518 return evsel__append_filter(evsel
, "%s,%s", filter
);
1521 /* Caller has to clear disabled after going through all CPUs. */
1522 int evsel__enable_cpu(struct evsel
*evsel
, int cpu_map_idx
)
1524 return perf_evsel__enable_cpu(&evsel
->core
, cpu_map_idx
);
1527 int evsel__enable(struct evsel
*evsel
)
1529 int err
= perf_evsel__enable(&evsel
->core
);
1532 evsel
->disabled
= false;
1536 /* Caller has to set disabled after going through all CPUs. */
1537 int evsel__disable_cpu(struct evsel
*evsel
, int cpu_map_idx
)
1539 return perf_evsel__disable_cpu(&evsel
->core
, cpu_map_idx
);
1542 int evsel__disable(struct evsel
*evsel
)
1544 int err
= perf_evsel__disable(&evsel
->core
);
1546 * We mark it disabled here so that tools that disable a event can
1547 * ignore events after they disable it. I.e. the ring buffer may have
1548 * already a few more events queued up before the kernel got the stop
1552 evsel
->disabled
= true;
1557 void free_config_terms(struct list_head
*config_terms
)
1559 struct evsel_config_term
*term
, *h
;
1561 list_for_each_entry_safe(term
, h
, config_terms
, list
) {
1562 list_del_init(&term
->list
);
1564 zfree(&term
->val
.str
);
1569 static void evsel__free_config_terms(struct evsel
*evsel
)
1571 free_config_terms(&evsel
->config_terms
);
1574 void evsel__exit(struct evsel
*evsel
)
1576 assert(list_empty(&evsel
->core
.node
));
1577 assert(evsel
->evlist
== NULL
);
1578 bpf_counter__destroy(evsel
);
1579 perf_bpf_filter__destroy(evsel
);
1580 evsel__free_counts(evsel
);
1581 perf_evsel__free_fd(&evsel
->core
);
1582 perf_evsel__free_id(&evsel
->core
);
1583 evsel__free_config_terms(evsel
);
1584 cgroup__put(evsel
->cgrp
);
1585 perf_cpu_map__put(evsel
->core
.cpus
);
1586 perf_cpu_map__put(evsel
->core
.own_cpus
);
1587 perf_thread_map__put(evsel
->core
.threads
);
1588 zfree(&evsel
->group_name
);
1589 zfree(&evsel
->name
);
1590 zfree(&evsel
->filter
);
1591 zfree(&evsel
->group_pmu_name
);
1592 zfree(&evsel
->unit
);
1593 zfree(&evsel
->metric_id
);
1594 evsel__zero_per_pkg(evsel
);
1595 hashmap__free(evsel
->per_pkg_mask
);
1596 evsel
->per_pkg_mask
= NULL
;
1597 zfree(&evsel
->metric_events
);
1598 perf_evsel__object
.fini(evsel
);
1599 if (evsel__tool_event(evsel
) == TOOL_PMU__EVENT_SYSTEM_TIME
||
1600 evsel__tool_event(evsel
) == TOOL_PMU__EVENT_USER_TIME
)
1601 xyarray__delete(evsel
->start_times
);
1604 void evsel__delete(struct evsel
*evsel
)
1613 void evsel__compute_deltas(struct evsel
*evsel
, int cpu_map_idx
, int thread
,
1614 struct perf_counts_values
*count
)
1616 struct perf_counts_values tmp
;
1618 if (!evsel
->prev_raw_counts
)
1621 tmp
= *perf_counts(evsel
->prev_raw_counts
, cpu_map_idx
, thread
);
1622 *perf_counts(evsel
->prev_raw_counts
, cpu_map_idx
, thread
) = *count
;
1624 count
->val
= count
->val
- tmp
.val
;
1625 count
->ena
= count
->ena
- tmp
.ena
;
1626 count
->run
= count
->run
- tmp
.run
;
1629 static int evsel__read_one(struct evsel
*evsel
, int cpu_map_idx
, int thread
)
1631 struct perf_counts_values
*count
= perf_counts(evsel
->counts
, cpu_map_idx
, thread
);
1633 return perf_evsel__read(&evsel
->core
, cpu_map_idx
, thread
, count
);
1636 static int evsel__read_retire_lat(struct evsel
*evsel
, int cpu_map_idx
, int thread
)
1638 return tpebs_set_evsel(evsel
, cpu_map_idx
, thread
);
1641 static void evsel__set_count(struct evsel
*counter
, int cpu_map_idx
, int thread
,
1642 u64 val
, u64 ena
, u64 run
, u64 lost
)
1644 struct perf_counts_values
*count
;
1646 count
= perf_counts(counter
->counts
, cpu_map_idx
, thread
);
1648 if (counter
->retire_lat
) {
1649 evsel__read_retire_lat(counter
, cpu_map_idx
, thread
);
1650 perf_counts__set_loaded(counter
->counts
, cpu_map_idx
, thread
, true);
1659 perf_counts__set_loaded(counter
->counts
, cpu_map_idx
, thread
, true);
1662 static bool evsel__group_has_tpebs(struct evsel
*leader
)
1664 struct evsel
*evsel
;
1666 for_each_group_evsel(evsel
, leader
) {
1667 if (evsel__is_retire_lat(evsel
))
1673 static u64
evsel__group_read_nr_members(struct evsel
*leader
)
1675 u64 nr
= leader
->core
.nr_members
;
1676 struct evsel
*evsel
;
1678 for_each_group_evsel(evsel
, leader
) {
1679 if (evsel__is_retire_lat(evsel
))
1685 static u64
evsel__group_read_size(struct evsel
*leader
)
1687 u64 read_format
= leader
->core
.attr
.read_format
;
1688 int entry
= sizeof(u64
); /* value */
1692 if (!evsel__group_has_tpebs(leader
))
1693 return perf_evsel__read_size(&leader
->core
);
1695 if (read_format
& PERF_FORMAT_TOTAL_TIME_ENABLED
)
1696 size
+= sizeof(u64
);
1698 if (read_format
& PERF_FORMAT_TOTAL_TIME_RUNNING
)
1699 size
+= sizeof(u64
);
1701 if (read_format
& PERF_FORMAT_ID
)
1702 entry
+= sizeof(u64
);
1704 if (read_format
& PERF_FORMAT_LOST
)
1705 entry
+= sizeof(u64
);
1707 if (read_format
& PERF_FORMAT_GROUP
) {
1708 nr
= evsel__group_read_nr_members(leader
);
1709 size
+= sizeof(u64
);
1716 static int evsel__process_group_data(struct evsel
*leader
, int cpu_map_idx
, int thread
, u64
*data
)
1718 u64 read_format
= leader
->core
.attr
.read_format
;
1719 struct sample_read_value
*v
;
1720 u64 nr
, ena
= 0, run
= 0, lost
= 0;
1724 if (nr
!= evsel__group_read_nr_members(leader
))
1727 if (read_format
& PERF_FORMAT_TOTAL_TIME_ENABLED
)
1730 if (read_format
& PERF_FORMAT_TOTAL_TIME_RUNNING
)
1734 sample_read_group__for_each(v
, nr
, read_format
) {
1735 struct evsel
*counter
;
1737 counter
= evlist__id2evsel(leader
->evlist
, v
->id
);
1741 if (read_format
& PERF_FORMAT_LOST
)
1744 evsel__set_count(counter
, cpu_map_idx
, thread
, v
->value
, ena
, run
, lost
);
1750 static int evsel__read_group(struct evsel
*leader
, int cpu_map_idx
, int thread
)
1752 struct perf_stat_evsel
*ps
= leader
->stats
;
1753 u64 read_format
= leader
->core
.attr
.read_format
;
1754 int size
= evsel__group_read_size(leader
);
1755 u64
*data
= ps
->group_data
;
1757 if (!(read_format
& PERF_FORMAT_ID
))
1760 if (!evsel__is_group_leader(leader
))
1764 data
= zalloc(size
);
1768 ps
->group_data
= data
;
1771 if (FD(leader
, cpu_map_idx
, thread
) < 0)
1774 if (readn(FD(leader
, cpu_map_idx
, thread
), data
, size
) <= 0)
1777 return evsel__process_group_data(leader
, cpu_map_idx
, thread
, data
);
1780 bool __evsel__match(const struct evsel
*evsel
, u32 type
, u64 config
)
1783 u32 e_type
= evsel
->core
.attr
.type
;
1784 u64 e_config
= evsel
->core
.attr
.config
;
1786 if (e_type
!= type
) {
1787 return type
== PERF_TYPE_HARDWARE
&& evsel
->pmu
&& evsel
->pmu
->is_core
&&
1788 evsel
->alternate_hw_config
== config
;
1791 if ((type
== PERF_TYPE_HARDWARE
|| type
== PERF_TYPE_HW_CACHE
) &&
1792 perf_pmus__supports_extended_type())
1793 e_config
&= PERF_HW_EVENT_MASK
;
1795 return e_config
== config
;
1798 int evsel__read_counter(struct evsel
*evsel
, int cpu_map_idx
, int thread
)
1800 if (evsel__is_tool(evsel
))
1801 return evsel__tool_pmu_read(evsel
, cpu_map_idx
, thread
);
1803 if (evsel__is_hwmon(evsel
))
1804 return evsel__hwmon_pmu_read(evsel
, cpu_map_idx
, thread
);
1806 if (evsel__is_retire_lat(evsel
))
1807 return evsel__read_retire_lat(evsel
, cpu_map_idx
, thread
);
1809 if (evsel
->core
.attr
.read_format
& PERF_FORMAT_GROUP
)
1810 return evsel__read_group(evsel
, cpu_map_idx
, thread
);
1812 return evsel__read_one(evsel
, cpu_map_idx
, thread
);
1815 int __evsel__read_on_cpu(struct evsel
*evsel
, int cpu_map_idx
, int thread
, bool scale
)
1817 struct perf_counts_values count
;
1818 size_t nv
= scale
? 3 : 1;
1820 if (FD(evsel
, cpu_map_idx
, thread
) < 0)
1823 if (evsel
->counts
== NULL
&& evsel__alloc_counts(evsel
) < 0)
1826 if (readn(FD(evsel
, cpu_map_idx
, thread
), &count
, nv
* sizeof(u64
)) <= 0)
1829 evsel__compute_deltas(evsel
, cpu_map_idx
, thread
, &count
);
1830 perf_counts_values__scale(&count
, scale
, NULL
);
1831 *perf_counts(evsel
->counts
, cpu_map_idx
, thread
) = count
;
1835 static int evsel__match_other_cpu(struct evsel
*evsel
, struct evsel
*other
,
1838 struct perf_cpu cpu
;
1840 cpu
= perf_cpu_map__cpu(evsel
->core
.cpus
, cpu_map_idx
);
1841 return perf_cpu_map__idx(other
->core
.cpus
, cpu
);
1844 static int evsel__hybrid_group_cpu_map_idx(struct evsel
*evsel
, int cpu_map_idx
)
1846 struct evsel
*leader
= evsel__leader(evsel
);
1848 if ((evsel__is_hybrid(evsel
) && !evsel__is_hybrid(leader
)) ||
1849 (!evsel__is_hybrid(evsel
) && evsel__is_hybrid(leader
))) {
1850 return evsel__match_other_cpu(evsel
, leader
, cpu_map_idx
);
1856 static int get_group_fd(struct evsel
*evsel
, int cpu_map_idx
, int thread
)
1858 struct evsel
*leader
= evsel__leader(evsel
);
1861 if (evsel__is_group_leader(evsel
))
1865 * Leader must be already processed/open,
1866 * if not it's a bug.
1868 BUG_ON(!leader
->core
.fd
);
1870 cpu_map_idx
= evsel__hybrid_group_cpu_map_idx(evsel
, cpu_map_idx
);
1871 if (cpu_map_idx
== -1)
1874 fd
= FD(leader
, cpu_map_idx
, thread
);
1875 BUG_ON(fd
== -1 && !leader
->skippable
);
1878 * When the leader has been skipped, return -2 to distinguish from no
1879 * group leader case.
1881 return fd
== -1 ? -2 : fd
;
1884 static void evsel__remove_fd(struct evsel
*pos
, int nr_cpus
, int nr_threads
, int thread_idx
)
1886 for (int cpu
= 0; cpu
< nr_cpus
; cpu
++)
1887 for (int thread
= thread_idx
; thread
< nr_threads
- 1; thread
++)
1888 FD(pos
, cpu
, thread
) = FD(pos
, cpu
, thread
+ 1);
1891 static int update_fds(struct evsel
*evsel
,
1892 int nr_cpus
, int cpu_map_idx
,
1893 int nr_threads
, int thread_idx
)
1897 if (cpu_map_idx
>= nr_cpus
|| thread_idx
>= nr_threads
)
1900 evlist__for_each_entry(evsel
->evlist
, pos
) {
1901 nr_cpus
= pos
!= evsel
? nr_cpus
: cpu_map_idx
;
1903 evsel__remove_fd(pos
, nr_cpus
, nr_threads
, thread_idx
);
1906 * Since fds for next evsel has not been created,
1907 * there is no need to iterate whole event list.
1915 static bool evsel__ignore_missing_thread(struct evsel
*evsel
,
1916 int nr_cpus
, int cpu_map_idx
,
1917 struct perf_thread_map
*threads
,
1918 int thread
, int err
)
1920 pid_t ignore_pid
= perf_thread_map__pid(threads
, thread
);
1922 if (!evsel
->ignore_missing_thread
)
1925 /* The system wide setup does not work with threads. */
1926 if (evsel
->core
.system_wide
)
1929 /* The -ESRCH is perf event syscall errno for pid's not found. */
1933 /* If there's only one thread, let it fail. */
1934 if (threads
->nr
== 1)
1938 * We should remove fd for missing_thread first
1939 * because thread_map__remove() will decrease threads->nr.
1941 if (update_fds(evsel
, nr_cpus
, cpu_map_idx
, threads
->nr
, thread
))
1944 if (thread_map__remove(threads
, thread
))
1947 pr_warning("WARNING: Ignored open failure for pid %d\n",
1952 static int __open_attr__fprintf(FILE *fp
, const char *name
, const char *val
,
1953 void *priv __maybe_unused
)
1955 return fprintf(fp
, " %-32s %s\n", name
, val
);
1958 static void display_attr(struct perf_event_attr
*attr
)
1960 if (verbose
>= 2 || debug_peo_args
) {
1961 fprintf(stderr
, "%.60s\n", graph_dotted_line
);
1962 fprintf(stderr
, "perf_event_attr:\n");
1963 perf_event_attr__fprintf(stderr
, attr
, __open_attr__fprintf
, NULL
);
1964 fprintf(stderr
, "%.60s\n", graph_dotted_line
);
1968 bool evsel__precise_ip_fallback(struct evsel
*evsel
)
1970 /* Do not try less precise if not requested. */
1971 if (!evsel
->precise_max
)
1975 * We tried all the precise_ip values, and it's
1976 * still failing, so leave it to standard fallback.
1978 if (!evsel
->core
.attr
.precise_ip
) {
1979 evsel
->core
.attr
.precise_ip
= evsel
->precise_ip_original
;
1983 if (!evsel
->precise_ip_original
)
1984 evsel
->precise_ip_original
= evsel
->core
.attr
.precise_ip
;
1986 evsel
->core
.attr
.precise_ip
--;
1987 pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel
->core
.attr
.precise_ip
);
1988 display_attr(&evsel
->core
.attr
);
1992 static struct perf_cpu_map
*empty_cpu_map
;
1993 static struct perf_thread_map
*empty_thread_map
;
1995 static int __evsel__prepare_open(struct evsel
*evsel
, struct perf_cpu_map
*cpus
,
1996 struct perf_thread_map
*threads
)
1999 int nthreads
= perf_thread_map__nr(threads
);
2001 if ((perf_missing_features
.write_backward
&& evsel
->core
.attr
.write_backward
) ||
2002 (perf_missing_features
.aux_output
&& evsel
->core
.attr
.aux_output
))
2006 if (empty_cpu_map
== NULL
) {
2007 empty_cpu_map
= perf_cpu_map__new_any_cpu();
2008 if (empty_cpu_map
== NULL
)
2012 cpus
= empty_cpu_map
;
2015 if (threads
== NULL
) {
2016 if (empty_thread_map
== NULL
) {
2017 empty_thread_map
= thread_map__new_by_tid(-1);
2018 if (empty_thread_map
== NULL
)
2022 threads
= empty_thread_map
;
2025 if (evsel
->core
.fd
== NULL
&&
2026 perf_evsel__alloc_fd(&evsel
->core
, perf_cpu_map__nr(cpus
), nthreads
) < 0)
2029 if (evsel__is_tool(evsel
))
2030 ret
= evsel__tool_pmu_prepare_open(evsel
, cpus
, nthreads
);
2032 evsel
->open_flags
= PERF_FLAG_FD_CLOEXEC
;
2034 evsel
->open_flags
|= PERF_FLAG_PID_CGROUP
;
2039 static void evsel__disable_missing_features(struct evsel
*evsel
)
2041 if (perf_missing_features
.inherit_sample_read
&& evsel
->core
.attr
.inherit
&&
2042 (evsel
->core
.attr
.sample_type
& PERF_SAMPLE_READ
))
2043 evsel
->core
.attr
.inherit
= 0;
2044 if (perf_missing_features
.branch_counters
)
2045 evsel
->core
.attr
.branch_sample_type
&= ~PERF_SAMPLE_BRANCH_COUNTERS
;
2046 if (perf_missing_features
.read_lost
)
2047 evsel
->core
.attr
.read_format
&= ~PERF_FORMAT_LOST
;
2048 if (perf_missing_features
.weight_struct
) {
2049 evsel__set_sample_bit(evsel
, WEIGHT
);
2050 evsel__reset_sample_bit(evsel
, WEIGHT_STRUCT
);
2052 if (perf_missing_features
.clockid_wrong
)
2053 evsel
->core
.attr
.clockid
= CLOCK_MONOTONIC
; /* should always work */
2054 if (perf_missing_features
.clockid
) {
2055 evsel
->core
.attr
.use_clockid
= 0;
2056 evsel
->core
.attr
.clockid
= 0;
2058 if (perf_missing_features
.cloexec
)
2059 evsel
->open_flags
&= ~(unsigned long)PERF_FLAG_FD_CLOEXEC
;
2060 if (perf_missing_features
.mmap2
)
2061 evsel
->core
.attr
.mmap2
= 0;
2062 if (evsel
->pmu
&& evsel
->pmu
->missing_features
.exclude_guest
)
2063 evsel
->core
.attr
.exclude_guest
= evsel
->core
.attr
.exclude_host
= 0;
2064 if (perf_missing_features
.lbr_flags
)
2065 evsel
->core
.attr
.branch_sample_type
&= ~(PERF_SAMPLE_BRANCH_NO_FLAGS
|
2066 PERF_SAMPLE_BRANCH_NO_CYCLES
);
2067 if (perf_missing_features
.group_read
&& evsel
->core
.attr
.inherit
)
2068 evsel
->core
.attr
.read_format
&= ~(PERF_FORMAT_GROUP
|PERF_FORMAT_ID
);
2069 if (perf_missing_features
.ksymbol
)
2070 evsel
->core
.attr
.ksymbol
= 0;
2071 if (perf_missing_features
.bpf
)
2072 evsel
->core
.attr
.bpf_event
= 0;
2073 if (perf_missing_features
.branch_hw_idx
)
2074 evsel
->core
.attr
.branch_sample_type
&= ~PERF_SAMPLE_BRANCH_HW_INDEX
;
2075 if (perf_missing_features
.sample_id_all
)
2076 evsel
->core
.attr
.sample_id_all
= 0;
2079 int evsel__prepare_open(struct evsel
*evsel
, struct perf_cpu_map
*cpus
,
2080 struct perf_thread_map
*threads
)
2084 err
= __evsel__prepare_open(evsel
, cpus
, threads
);
2088 evsel__disable_missing_features(evsel
);
2093 static bool has_attr_feature(struct perf_event_attr
*attr
, unsigned long flags
)
2095 int fd
= syscall(SYS_perf_event_open
, attr
, /*pid=*/0, /*cpu=*/-1,
2096 /*group_fd=*/-1, flags
);
2100 attr
->exclude_kernel
= 1;
2102 fd
= syscall(SYS_perf_event_open
, attr
, /*pid=*/0, /*cpu=*/-1,
2103 /*group_fd=*/-1, flags
);
2108 attr
->exclude_hv
= 1;
2110 fd
= syscall(SYS_perf_event_open
, attr
, /*pid=*/0, /*cpu=*/-1,
2111 /*group_fd=*/-1, flags
);
2116 attr
->exclude_guest
= 1;
2118 fd
= syscall(SYS_perf_event_open
, attr
, /*pid=*/0, /*cpu=*/-1,
2119 /*group_fd=*/-1, flags
);
2123 attr
->exclude_kernel
= 0;
2124 attr
->exclude_guest
= 0;
2125 attr
->exclude_hv
= 0;
2130 static void evsel__detect_missing_pmu_features(struct evsel
*evsel
)
2132 struct perf_event_attr attr
= {
2133 .type
= evsel
->core
.attr
.type
,
2134 .config
= evsel
->core
.attr
.config
,
2137 struct perf_pmu
*pmu
= evsel
->pmu
;
2143 pmu
= evsel
->pmu
= evsel__find_pmu(evsel
);
2145 if (pmu
== NULL
|| pmu
->missing_features
.checked
)
2149 * Must probe features in the order they were added to the
2150 * perf_event_attr interface. These are kernel core limitation but
2151 * specific to PMUs with branch stack. So we can detect with the given
2152 * hardware event and stop on the first one succeeded.
2155 /* Please add new feature detection here. */
2157 attr
.exclude_guest
= 1;
2158 if (has_attr_feature(&attr
, /*flags=*/0))
2160 pmu
->missing_features
.exclude_guest
= true;
2161 pr_debug2("switching off exclude_guest for PMU %s\n", pmu
->name
);
2164 pmu
->missing_features
.checked
= true;
2169 static void evsel__detect_missing_brstack_features(struct evsel
*evsel
)
2171 static bool detection_done
= false;
2172 struct perf_event_attr attr
= {
2173 .type
= evsel
->core
.attr
.type
,
2174 .config
= evsel
->core
.attr
.config
,
2176 .sample_type
= PERF_SAMPLE_BRANCH_STACK
,
2177 .sample_period
= 1000,
2187 * Must probe features in the order they were added to the
2188 * perf_event_attr interface. These are PMU specific limitation
2189 * so we can detect with the given hardware event and stop on the
2190 * first one succeeded.
2193 /* Please add new feature detection here. */
2195 attr
.branch_sample_type
= PERF_SAMPLE_BRANCH_COUNTERS
;
2196 if (has_attr_feature(&attr
, /*flags=*/0))
2198 perf_missing_features
.branch_counters
= true;
2199 pr_debug2("switching off branch counters support\n");
2201 attr
.branch_sample_type
= PERF_SAMPLE_BRANCH_HW_INDEX
;
2202 if (has_attr_feature(&attr
, /*flags=*/0))
2204 perf_missing_features
.branch_hw_idx
= true;
2205 pr_debug2("switching off branch HW index support\n");
2207 attr
.branch_sample_type
= PERF_SAMPLE_BRANCH_NO_CYCLES
| PERF_SAMPLE_BRANCH_NO_FLAGS
;
2208 if (has_attr_feature(&attr
, /*flags=*/0))
2210 perf_missing_features
.lbr_flags
= true;
2211 pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
2214 detection_done
= true;
2218 static bool evsel__detect_missing_features(struct evsel
*evsel
)
2220 static bool detection_done
= false;
2221 struct perf_event_attr attr
= {
2222 .type
= PERF_TYPE_SOFTWARE
,
2223 .config
= PERF_COUNT_SW_TASK_CLOCK
,
2228 evsel__detect_missing_pmu_features(evsel
);
2230 if (evsel__has_br_stack(evsel
))
2231 evsel__detect_missing_brstack_features(evsel
);
2239 * Must probe features in the order they were added to the
2240 * perf_event_attr interface. These are kernel core limitation
2241 * not PMU-specific so we can detect with a software event and
2242 * stop on the first one succeeded.
2245 /* Please add new feature detection here. */
2247 attr
.inherit
= true;
2248 attr
.sample_type
= PERF_SAMPLE_READ
;
2249 if (has_attr_feature(&attr
, /*flags=*/0))
2251 perf_missing_features
.inherit_sample_read
= true;
2252 pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n");
2253 attr
.inherit
= false;
2254 attr
.sample_type
= 0;
2256 attr
.read_format
= PERF_FORMAT_LOST
;
2257 if (has_attr_feature(&attr
, /*flags=*/0))
2259 perf_missing_features
.read_lost
= true;
2260 pr_debug2("switching off PERF_FORMAT_LOST support\n");
2261 attr
.read_format
= 0;
2263 attr
.sample_type
= PERF_SAMPLE_WEIGHT_STRUCT
;
2264 if (has_attr_feature(&attr
, /*flags=*/0))
2266 perf_missing_features
.weight_struct
= true;
2267 pr_debug2("switching off weight struct support\n");
2268 attr
.sample_type
= 0;
2270 attr
.sample_type
= PERF_SAMPLE_CODE_PAGE_SIZE
;
2271 if (has_attr_feature(&attr
, /*flags=*/0))
2273 perf_missing_features
.code_page_size
= true;
2274 pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n");
2275 attr
.sample_type
= 0;
2277 attr
.sample_type
= PERF_SAMPLE_DATA_PAGE_SIZE
;
2278 if (has_attr_feature(&attr
, /*flags=*/0))
2280 perf_missing_features
.data_page_size
= true;
2281 pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n");
2282 attr
.sample_type
= 0;
2285 if (has_attr_feature(&attr
, /*flags=*/0))
2287 perf_missing_features
.cgroup
= true;
2288 pr_debug2_peo("Kernel has no cgroup sampling support\n");
2291 attr
.aux_output
= 1;
2292 if (has_attr_feature(&attr
, /*flags=*/0))
2294 perf_missing_features
.aux_output
= true;
2295 pr_debug2_peo("Kernel has no attr.aux_output support\n");
2296 attr
.aux_output
= 0;
2299 if (has_attr_feature(&attr
, /*flags=*/0))
2301 perf_missing_features
.bpf
= true;
2302 pr_debug2_peo("switching off bpf_event\n");
2306 if (has_attr_feature(&attr
, /*flags=*/0))
2308 perf_missing_features
.ksymbol
= true;
2309 pr_debug2_peo("switching off ksymbol\n");
2312 attr
.write_backward
= 1;
2313 if (has_attr_feature(&attr
, /*flags=*/0))
2315 perf_missing_features
.write_backward
= true;
2316 pr_debug2_peo("switching off write_backward\n");
2317 attr
.write_backward
= 0;
2319 attr
.use_clockid
= 1;
2320 attr
.clockid
= CLOCK_MONOTONIC
;
2321 if (has_attr_feature(&attr
, /*flags=*/0))
2323 perf_missing_features
.clockid
= true;
2324 pr_debug2_peo("switching off clockid\n");
2325 attr
.use_clockid
= 0;
2328 if (has_attr_feature(&attr
, /*flags=*/PERF_FLAG_FD_CLOEXEC
))
2330 perf_missing_features
.cloexec
= true;
2331 pr_debug2_peo("switching off cloexec flag\n");
2334 if (has_attr_feature(&attr
, /*flags=*/0))
2336 perf_missing_features
.mmap2
= true;
2337 pr_debug2_peo("switching off mmap2\n");
2340 /* set this unconditionally? */
2341 perf_missing_features
.sample_id_all
= true;
2342 pr_debug2_peo("switching off sample_id_all\n");
2345 attr
.read_format
= PERF_FORMAT_GROUP
;
2346 if (has_attr_feature(&attr
, /*flags=*/0))
2348 perf_missing_features
.group_read
= true;
2349 pr_debug2_peo("switching off group read\n");
2351 attr
.read_format
= 0;
2354 detection_done
= true;
2358 if (evsel
->core
.attr
.inherit
&&
2359 (evsel
->core
.attr
.sample_type
& PERF_SAMPLE_READ
) &&
2360 perf_missing_features
.inherit_sample_read
)
2363 if ((evsel
->core
.attr
.branch_sample_type
& PERF_SAMPLE_BRANCH_COUNTERS
) &&
2364 perf_missing_features
.branch_counters
)
2367 if ((evsel
->core
.attr
.read_format
& PERF_FORMAT_LOST
) &&
2368 perf_missing_features
.read_lost
)
2371 if ((evsel
->core
.attr
.sample_type
& PERF_SAMPLE_WEIGHT_STRUCT
) &&
2372 perf_missing_features
.weight_struct
)
2375 if (evsel
->core
.attr
.use_clockid
&& evsel
->core
.attr
.clockid
!= CLOCK_MONOTONIC
&&
2376 !perf_missing_features
.clockid
) {
2377 perf_missing_features
.clockid_wrong
= true;
2381 if (evsel
->core
.attr
.use_clockid
&& perf_missing_features
.clockid
)
2384 if ((evsel
->open_flags
& PERF_FLAG_FD_CLOEXEC
) &&
2385 perf_missing_features
.cloexec
)
2388 if (evsel
->core
.attr
.mmap2
&& perf_missing_features
.mmap2
)
2391 if ((evsel
->core
.attr
.branch_sample_type
& (PERF_SAMPLE_BRANCH_NO_FLAGS
|
2392 PERF_SAMPLE_BRANCH_NO_CYCLES
)) &&
2393 perf_missing_features
.lbr_flags
)
2396 if (evsel
->core
.attr
.inherit
&& (evsel
->core
.attr
.read_format
& PERF_FORMAT_GROUP
) &&
2397 perf_missing_features
.group_read
)
2400 if (evsel
->core
.attr
.ksymbol
&& perf_missing_features
.ksymbol
)
2403 if (evsel
->core
.attr
.bpf_event
&& perf_missing_features
.bpf
)
2406 if ((evsel
->core
.attr
.branch_sample_type
& PERF_SAMPLE_BRANCH_HW_INDEX
) &&
2407 perf_missing_features
.branch_hw_idx
)
2410 if (evsel
->core
.attr
.sample_id_all
&& perf_missing_features
.sample_id_all
)
2416 static bool evsel__handle_error_quirks(struct evsel
*evsel
, int error
)
2419 * AMD core PMU tries to forward events with precise_ip to IBS PMU
2420 * implicitly. But IBS PMU has more restrictions so it can fail with
2421 * supported event attributes. Let's forward it back to the core PMU
2422 * by clearing precise_ip only if it's from precise_max (:P).
2424 if ((error
== -EINVAL
|| error
== -ENOENT
) && x86__is_amd_cpu() &&
2425 evsel
->core
.attr
.precise_ip
&& evsel
->precise_max
) {
2426 evsel
->core
.attr
.precise_ip
= 0;
2427 pr_debug2_peo("removing precise_ip on AMD\n");
2428 display_attr(&evsel
->core
.attr
);
2435 static int evsel__open_cpu(struct evsel
*evsel
, struct perf_cpu_map
*cpus
,
2436 struct perf_thread_map
*threads
,
2437 int start_cpu_map_idx
, int end_cpu_map_idx
)
2439 int idx
, thread
, nthreads
;
2440 int pid
= -1, err
, old_errno
;
2441 enum rlimit_action set_rlimit
= NO_CHANGE
;
2443 if (evsel__is_retire_lat(evsel
))
2444 return tpebs_start(evsel
->evlist
);
2446 err
= __evsel__prepare_open(evsel
, cpus
, threads
);
2451 cpus
= empty_cpu_map
;
2453 if (threads
== NULL
)
2454 threads
= empty_thread_map
;
2456 nthreads
= perf_thread_map__nr(threads
);
2459 pid
= evsel
->cgrp
->fd
;
2461 fallback_missing_features
:
2462 evsel__disable_missing_features(evsel
);
2464 pr_debug3("Opening: %s\n", evsel__name(evsel
));
2465 display_attr(&evsel
->core
.attr
);
2467 if (evsel__is_tool(evsel
)) {
2468 return evsel__tool_pmu_open(evsel
, threads
,
2472 if (evsel__is_hwmon(evsel
)) {
2473 return evsel__hwmon_pmu_open(evsel
, threads
,
2478 for (idx
= start_cpu_map_idx
; idx
< end_cpu_map_idx
; idx
++) {
2480 for (thread
= 0; thread
< nthreads
; thread
++) {
2483 if (thread
>= nthreads
)
2486 if (!evsel
->cgrp
&& !evsel
->core
.system_wide
)
2487 pid
= perf_thread_map__pid(threads
, thread
);
2489 group_fd
= get_group_fd(evsel
, idx
, thread
);
2491 if (group_fd
== -2) {
2492 pr_debug("broken group leader for %s\n", evsel
->name
);
2497 /* Debug message used by test scripts */
2498 pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
2499 pid
, perf_cpu_map__cpu(cpus
, idx
).cpu
, group_fd
, evsel
->open_flags
);
2501 fd
= sys_perf_event_open(&evsel
->core
.attr
, pid
,
2502 perf_cpu_map__cpu(cpus
, idx
).cpu
,
2503 group_fd
, evsel
->open_flags
);
2505 FD(evsel
, idx
, thread
) = fd
;
2510 pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
2515 bpf_counter__install_pe(evsel
, idx
, fd
);
2517 if (unlikely(test_attr__enabled())) {
2518 test_attr__open(&evsel
->core
.attr
, pid
,
2519 perf_cpu_map__cpu(cpus
, idx
),
2520 fd
, group_fd
, evsel
->open_flags
);
2523 /* Debug message used by test scripts */
2524 pr_debug2_peo(" = %d\n", fd
);
2526 if (evsel
->bpf_fd
>= 0) {
2528 int bpf_fd
= evsel
->bpf_fd
;
2531 PERF_EVENT_IOC_SET_BPF
,
2533 if (err
&& errno
!= EEXIST
) {
2534 pr_err("failed to attach bpf fd %d: %s\n",
2535 bpf_fd
, strerror(errno
));
2541 set_rlimit
= NO_CHANGE
;
2544 * If we succeeded but had to kill clockid, fail and
2545 * have evsel__open_strerror() print us a nice error.
2547 if (perf_missing_features
.clockid
||
2548 perf_missing_features
.clockid_wrong
) {
2558 if (evsel__ignore_missing_thread(evsel
, perf_cpu_map__nr(cpus
),
2559 idx
, threads
, thread
, err
)) {
2560 /* We just removed 1 thread, so lower the upper nthreads limit. */
2563 /* ... and pretend like nothing have happened. */
2568 * perf stat needs between 5 and 22 fds per CPU. When we run out
2569 * of them try to increase the limits.
2571 if (err
== -EMFILE
&& rlimit__increase_nofile(&set_rlimit
))
2574 if (err
== -EOPNOTSUPP
&& evsel__precise_ip_fallback(evsel
))
2577 if (err
== -EINVAL
&& evsel__detect_missing_features(evsel
))
2578 goto fallback_missing_features
;
2580 if (evsel__handle_error_quirks(evsel
, err
))
2585 threads
->err_thread
= thread
;
2589 while (--thread
>= 0) {
2590 if (FD(evsel
, idx
, thread
) >= 0)
2591 close(FD(evsel
, idx
, thread
));
2592 FD(evsel
, idx
, thread
) = -1;
2595 } while (--idx
>= 0);
2600 int evsel__open(struct evsel
*evsel
, struct perf_cpu_map
*cpus
,
2601 struct perf_thread_map
*threads
)
2603 return evsel__open_cpu(evsel
, cpus
, threads
, 0, perf_cpu_map__nr(cpus
));
2606 void evsel__close(struct evsel
*evsel
)
2608 if (evsel__is_retire_lat(evsel
))
2610 perf_evsel__close(&evsel
->core
);
2611 perf_evsel__free_id(&evsel
->core
);
2614 int evsel__open_per_cpu(struct evsel
*evsel
, struct perf_cpu_map
*cpus
, int cpu_map_idx
)
2616 if (cpu_map_idx
== -1)
2617 return evsel__open_cpu(evsel
, cpus
, NULL
, 0, perf_cpu_map__nr(cpus
));
2619 return evsel__open_cpu(evsel
, cpus
, NULL
, cpu_map_idx
, cpu_map_idx
+ 1);
2622 int evsel__open_per_thread(struct evsel
*evsel
, struct perf_thread_map
*threads
)
2624 return evsel__open(evsel
, NULL
, threads
);
2627 static int perf_evsel__parse_id_sample(const struct evsel
*evsel
,
2628 const union perf_event
*event
,
2629 struct perf_sample
*sample
)
2631 u64 type
= evsel
->core
.attr
.sample_type
;
2632 const __u64
*array
= event
->sample
.array
;
2633 bool swapped
= evsel
->needs_swap
;
2636 array
+= ((event
->header
.size
-
2637 sizeof(event
->header
)) / sizeof(u64
)) - 1;
2639 if (type
& PERF_SAMPLE_IDENTIFIER
) {
2640 sample
->id
= *array
;
2644 if (type
& PERF_SAMPLE_CPU
) {
2647 /* undo swap of u64, then swap on individual u32s */
2648 u
.val64
= bswap_64(u
.val64
);
2649 u
.val32
[0] = bswap_32(u
.val32
[0]);
2652 sample
->cpu
= u
.val32
[0];
2656 if (type
& PERF_SAMPLE_STREAM_ID
) {
2657 sample
->stream_id
= *array
;
2661 if (type
& PERF_SAMPLE_ID
) {
2662 sample
->id
= *array
;
2666 if (type
& PERF_SAMPLE_TIME
) {
2667 sample
->time
= *array
;
2671 if (type
& PERF_SAMPLE_TID
) {
2674 /* undo swap of u64, then swap on individual u32s */
2675 u
.val64
= bswap_64(u
.val64
);
2676 u
.val32
[0] = bswap_32(u
.val32
[0]);
2677 u
.val32
[1] = bswap_32(u
.val32
[1]);
2680 sample
->pid
= u
.val32
[0];
2681 sample
->tid
= u
.val32
[1];
2688 static inline bool overflow(const void *endp
, u16 max_size
, const void *offset
,
2691 return size
> max_size
|| offset
+ size
> endp
;
2694 #define OVERFLOW_CHECK(offset, size, max_size) \
2696 if (overflow(endp, (max_size), (offset), (size))) \
2700 #define OVERFLOW_CHECK_u64(offset) \
2701 OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
2704 perf_event__check_size(union perf_event
*event
, unsigned int sample_size
)
2707 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
2708 * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to
2709 * check the format does not go past the end of the event.
2711 if (sample_size
+ sizeof(event
->header
) > event
->header
.size
)
2717 void __weak
arch_perf_parse_sample_weight(struct perf_sample
*data
,
2719 u64 type __maybe_unused
)
2721 data
->weight
= *array
;
2724 u64
evsel__bitfield_swap_branch_flags(u64 value
)
2733 * mispred:1 //target mispredicted
2734 * predicted:1 //target predicted
2735 * in_tx:1 //in transaction
2736 * abort:1 //transaction abort
2737 * cycles:16 //cycle count to last branch
2738 * type:4 //branch type
2739 * spec:2 //branch speculation info
2740 * new_type:4 //additional branch type
2741 * priv:3 //privilege level
2746 * Avoid bswap64() the entire branch_flag.value,
2747 * as it has variable bit-field sizes. Instead the
2748 * macro takes the bit-field position/size,
2749 * swaps it based on the host endianness.
2751 if (host_is_bigendian()) {
2752 new_val
= bitfield_swap(value
, 0, 1);
2753 new_val
|= bitfield_swap(value
, 1, 1);
2754 new_val
|= bitfield_swap(value
, 2, 1);
2755 new_val
|= bitfield_swap(value
, 3, 1);
2756 new_val
|= bitfield_swap(value
, 4, 16);
2757 new_val
|= bitfield_swap(value
, 20, 4);
2758 new_val
|= bitfield_swap(value
, 24, 2);
2759 new_val
|= bitfield_swap(value
, 26, 4);
2760 new_val
|= bitfield_swap(value
, 30, 3);
2761 new_val
|= bitfield_swap(value
, 33, 31);
2763 new_val
= bitfield_swap(value
, 63, 1);
2764 new_val
|= bitfield_swap(value
, 62, 1);
2765 new_val
|= bitfield_swap(value
, 61, 1);
2766 new_val
|= bitfield_swap(value
, 60, 1);
2767 new_val
|= bitfield_swap(value
, 44, 16);
2768 new_val
|= bitfield_swap(value
, 40, 4);
2769 new_val
|= bitfield_swap(value
, 38, 2);
2770 new_val
|= bitfield_swap(value
, 34, 4);
2771 new_val
|= bitfield_swap(value
, 31, 3);
2772 new_val
|= bitfield_swap(value
, 0, 31);
2778 static inline bool evsel__has_branch_counters(const struct evsel
*evsel
)
2780 struct evsel
*leader
= evsel__leader(evsel
);
2782 /* The branch counters feature only supports group */
2783 if (!leader
|| !evsel
->evlist
)
2786 if (evsel
->evlist
->nr_br_cntr
< 0)
2787 evlist__update_br_cntr(evsel
->evlist
);
2789 if (leader
->br_cntr_nr
> 0)
2795 int evsel__parse_sample(struct evsel
*evsel
, union perf_event
*event
,
2796 struct perf_sample
*data
)
2798 u64 type
= evsel
->core
.attr
.sample_type
;
2799 bool swapped
= evsel
->needs_swap
;
2801 u16 max_size
= event
->header
.size
;
2802 const void *endp
= (void *)event
+ max_size
;
2806 * used for cross-endian analysis. See git commit 65014ab3
2807 * for why this goofiness is needed.
2811 memset(data
, 0, sizeof(*data
));
2812 data
->cpu
= data
->pid
= data
->tid
= -1;
2813 data
->stream_id
= data
->id
= data
->time
= -1ULL;
2814 data
->period
= evsel
->core
.attr
.sample_period
;
2815 data
->cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
2816 data
->misc
= event
->header
.misc
;
2817 data
->data_src
= PERF_MEM_DATA_SRC_NONE
;
2820 if (event
->header
.type
!= PERF_RECORD_SAMPLE
) {
2821 if (!evsel
->core
.attr
.sample_id_all
)
2823 return perf_evsel__parse_id_sample(evsel
, event
, data
);
2826 array
= event
->sample
.array
;
2828 if (perf_event__check_size(event
, evsel
->sample_size
))
2831 if (type
& PERF_SAMPLE_IDENTIFIER
) {
2836 if (type
& PERF_SAMPLE_IP
) {
2841 if (type
& PERF_SAMPLE_TID
) {
2844 /* undo swap of u64, then swap on individual u32s */
2845 u
.val64
= bswap_64(u
.val64
);
2846 u
.val32
[0] = bswap_32(u
.val32
[0]);
2847 u
.val32
[1] = bswap_32(u
.val32
[1]);
2850 data
->pid
= u
.val32
[0];
2851 data
->tid
= u
.val32
[1];
2855 if (type
& PERF_SAMPLE_TIME
) {
2856 data
->time
= *array
;
2860 if (type
& PERF_SAMPLE_ADDR
) {
2861 data
->addr
= *array
;
2865 if (type
& PERF_SAMPLE_ID
) {
2870 if (type
& PERF_SAMPLE_STREAM_ID
) {
2871 data
->stream_id
= *array
;
2875 if (type
& PERF_SAMPLE_CPU
) {
2879 /* undo swap of u64, then swap on individual u32s */
2880 u
.val64
= bswap_64(u
.val64
);
2881 u
.val32
[0] = bswap_32(u
.val32
[0]);
2884 data
->cpu
= u
.val32
[0];
2888 if (type
& PERF_SAMPLE_PERIOD
) {
2889 data
->period
= *array
;
2893 if (type
& PERF_SAMPLE_READ
) {
2894 u64 read_format
= evsel
->core
.attr
.read_format
;
2896 OVERFLOW_CHECK_u64(array
);
2897 if (read_format
& PERF_FORMAT_GROUP
)
2898 data
->read
.group
.nr
= *array
;
2900 data
->read
.one
.value
= *array
;
2904 if (read_format
& PERF_FORMAT_TOTAL_TIME_ENABLED
) {
2905 OVERFLOW_CHECK_u64(array
);
2906 data
->read
.time_enabled
= *array
;
2910 if (read_format
& PERF_FORMAT_TOTAL_TIME_RUNNING
) {
2911 OVERFLOW_CHECK_u64(array
);
2912 data
->read
.time_running
= *array
;
2916 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2917 if (read_format
& PERF_FORMAT_GROUP
) {
2918 const u64 max_group_nr
= UINT64_MAX
/
2919 sizeof(struct sample_read_value
);
2921 if (data
->read
.group
.nr
> max_group_nr
)
2924 sz
= data
->read
.group
.nr
* sample_read_value_size(read_format
);
2925 OVERFLOW_CHECK(array
, sz
, max_size
);
2926 data
->read
.group
.values
=
2927 (struct sample_read_value
*)array
;
2928 array
= (void *)array
+ sz
;
2930 OVERFLOW_CHECK_u64(array
);
2931 data
->read
.one
.id
= *array
;
2934 if (read_format
& PERF_FORMAT_LOST
) {
2935 OVERFLOW_CHECK_u64(array
);
2936 data
->read
.one
.lost
= *array
;
2942 if (type
& PERF_SAMPLE_CALLCHAIN
) {
2943 const u64 max_callchain_nr
= UINT64_MAX
/ sizeof(u64
);
2945 OVERFLOW_CHECK_u64(array
);
2946 data
->callchain
= (struct ip_callchain
*)array
++;
2947 if (data
->callchain
->nr
> max_callchain_nr
)
2949 sz
= data
->callchain
->nr
* sizeof(u64
);
2950 OVERFLOW_CHECK(array
, sz
, max_size
);
2951 array
= (void *)array
+ sz
;
2954 if (type
& PERF_SAMPLE_RAW
) {
2955 OVERFLOW_CHECK_u64(array
);
2959 * Undo swap of u64, then swap on individual u32s,
2960 * get the size of the raw area and undo all of the
2961 * swap. The pevent interface handles endianness by
2965 u
.val64
= bswap_64(u
.val64
);
2966 u
.val32
[0] = bswap_32(u
.val32
[0]);
2967 u
.val32
[1] = bswap_32(u
.val32
[1]);
2969 data
->raw_size
= u
.val32
[0];
2972 * The raw data is aligned on 64bits including the
2973 * u32 size, so it's safe to use mem_bswap_64.
2976 mem_bswap_64((void *) array
, data
->raw_size
);
2978 array
= (void *)array
+ sizeof(u32
);
2980 OVERFLOW_CHECK(array
, data
->raw_size
, max_size
);
2981 data
->raw_data
= (void *)array
;
2982 array
= (void *)array
+ data
->raw_size
;
2985 if (type
& PERF_SAMPLE_BRANCH_STACK
) {
2986 const u64 max_branch_nr
= UINT64_MAX
/
2987 sizeof(struct branch_entry
);
2988 struct branch_entry
*e
;
2991 OVERFLOW_CHECK_u64(array
);
2992 data
->branch_stack
= (struct branch_stack
*)array
++;
2994 if (data
->branch_stack
->nr
> max_branch_nr
)
2997 sz
= data
->branch_stack
->nr
* sizeof(struct branch_entry
);
2998 if (evsel__has_branch_hw_idx(evsel
)) {
3000 e
= &data
->branch_stack
->entries
[0];
3002 data
->no_hw_idx
= true;
3004 * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
3005 * only nr and entries[] will be output by kernel.
3007 e
= (struct branch_entry
*)&data
->branch_stack
->hw_idx
;
3012 * struct branch_flag does not have endian
3013 * specific bit field definition. And bswap
3014 * will not resolve the issue, since these
3017 * evsel__bitfield_swap_branch_flags() uses a
3018 * bitfield_swap macro to swap the bit position
3019 * based on the host endians.
3021 for (i
= 0; i
< data
->branch_stack
->nr
; i
++, e
++)
3022 e
->flags
.value
= evsel__bitfield_swap_branch_flags(e
->flags
.value
);
3025 OVERFLOW_CHECK(array
, sz
, max_size
);
3026 array
= (void *)array
+ sz
;
3028 if (evsel__has_branch_counters(evsel
)) {
3029 data
->branch_stack_cntr
= (u64
*)array
;
3030 sz
= data
->branch_stack
->nr
* sizeof(u64
);
3032 OVERFLOW_CHECK(array
, sz
, max_size
);
3033 array
= (void *)array
+ sz
;
3037 if (type
& PERF_SAMPLE_REGS_USER
) {
3038 OVERFLOW_CHECK_u64(array
);
3039 data
->user_regs
.abi
= *array
;
3042 if (data
->user_regs
.abi
) {
3043 u64 mask
= evsel
->core
.attr
.sample_regs_user
;
3045 sz
= hweight64(mask
) * sizeof(u64
);
3046 OVERFLOW_CHECK(array
, sz
, max_size
);
3047 data
->user_regs
.mask
= mask
;
3048 data
->user_regs
.regs
= (u64
*)array
;
3049 array
= (void *)array
+ sz
;
3053 if (type
& PERF_SAMPLE_STACK_USER
) {
3054 OVERFLOW_CHECK_u64(array
);
3057 data
->user_stack
.offset
= ((char *)(array
- 1)
3061 data
->user_stack
.size
= 0;
3063 OVERFLOW_CHECK(array
, sz
, max_size
);
3064 data
->user_stack
.data
= (char *)array
;
3065 array
= (void *)array
+ sz
;
3066 OVERFLOW_CHECK_u64(array
);
3067 data
->user_stack
.size
= *array
++;
3068 if (WARN_ONCE(data
->user_stack
.size
> sz
,
3069 "user stack dump failure\n"))
3074 if (type
& PERF_SAMPLE_WEIGHT_TYPE
) {
3075 OVERFLOW_CHECK_u64(array
);
3076 arch_perf_parse_sample_weight(data
, array
, type
);
3080 if (type
& PERF_SAMPLE_DATA_SRC
) {
3081 OVERFLOW_CHECK_u64(array
);
3082 data
->data_src
= *array
;
3086 if (type
& PERF_SAMPLE_TRANSACTION
) {
3087 OVERFLOW_CHECK_u64(array
);
3088 data
->transaction
= *array
;
3092 data
->intr_regs
.abi
= PERF_SAMPLE_REGS_ABI_NONE
;
3093 if (type
& PERF_SAMPLE_REGS_INTR
) {
3094 OVERFLOW_CHECK_u64(array
);
3095 data
->intr_regs
.abi
= *array
;
3098 if (data
->intr_regs
.abi
!= PERF_SAMPLE_REGS_ABI_NONE
) {
3099 u64 mask
= evsel
->core
.attr
.sample_regs_intr
;
3101 sz
= hweight64(mask
) * sizeof(u64
);
3102 OVERFLOW_CHECK(array
, sz
, max_size
);
3103 data
->intr_regs
.mask
= mask
;
3104 data
->intr_regs
.regs
= (u64
*)array
;
3105 array
= (void *)array
+ sz
;
3109 data
->phys_addr
= 0;
3110 if (type
& PERF_SAMPLE_PHYS_ADDR
) {
3111 data
->phys_addr
= *array
;
3116 if (type
& PERF_SAMPLE_CGROUP
) {
3117 data
->cgroup
= *array
;
3121 data
->data_page_size
= 0;
3122 if (type
& PERF_SAMPLE_DATA_PAGE_SIZE
) {
3123 data
->data_page_size
= *array
;
3127 data
->code_page_size
= 0;
3128 if (type
& PERF_SAMPLE_CODE_PAGE_SIZE
) {
3129 data
->code_page_size
= *array
;
3133 if (type
& PERF_SAMPLE_AUX
) {
3134 OVERFLOW_CHECK_u64(array
);
3137 OVERFLOW_CHECK(array
, sz
, max_size
);
3138 /* Undo swap of data */
3140 mem_bswap_64((char *)array
, sz
);
3141 data
->aux_sample
.size
= sz
;
3142 data
->aux_sample
.data
= (char *)array
;
3143 array
= (void *)array
+ sz
;
3149 int evsel__parse_sample_timestamp(struct evsel
*evsel
, union perf_event
*event
,
3152 u64 type
= evsel
->core
.attr
.sample_type
;
3155 if (!(type
& PERF_SAMPLE_TIME
))
3158 if (event
->header
.type
!= PERF_RECORD_SAMPLE
) {
3159 struct perf_sample data
= {
3163 if (!evsel
->core
.attr
.sample_id_all
)
3165 if (perf_evsel__parse_id_sample(evsel
, event
, &data
))
3168 *timestamp
= data
.time
;
3172 array
= event
->sample
.array
;
3174 if (perf_event__check_size(event
, evsel
->sample_size
))
3177 if (type
& PERF_SAMPLE_IDENTIFIER
)
3180 if (type
& PERF_SAMPLE_IP
)
3183 if (type
& PERF_SAMPLE_TID
)
3186 if (type
& PERF_SAMPLE_TIME
)
3187 *timestamp
= *array
;
3192 u16
evsel__id_hdr_size(const struct evsel
*evsel
)
3194 u64 sample_type
= evsel
->core
.attr
.sample_type
;
3197 if (sample_type
& PERF_SAMPLE_TID
)
3198 size
+= sizeof(u64
);
3200 if (sample_type
& PERF_SAMPLE_TIME
)
3201 size
+= sizeof(u64
);
3203 if (sample_type
& PERF_SAMPLE_ID
)
3204 size
+= sizeof(u64
);
3206 if (sample_type
& PERF_SAMPLE_STREAM_ID
)
3207 size
+= sizeof(u64
);
3209 if (sample_type
& PERF_SAMPLE_CPU
)
3210 size
+= sizeof(u64
);
3212 if (sample_type
& PERF_SAMPLE_IDENTIFIER
)
3213 size
+= sizeof(u64
);
3218 #ifdef HAVE_LIBTRACEEVENT
3219 struct tep_format_field
*evsel__field(struct evsel
*evsel
, const char *name
)
3221 return tep_find_field(evsel
->tp_format
, name
);
3224 struct tep_format_field
*evsel__common_field(struct evsel
*evsel
, const char *name
)
3226 return tep_find_common_field(evsel
->tp_format
, name
);
3229 void *evsel__rawptr(struct evsel
*evsel
, struct perf_sample
*sample
, const char *name
)
3231 struct tep_format_field
*field
= evsel__field(evsel
, name
);
3237 offset
= field
->offset
;
3239 if (field
->flags
& TEP_FIELD_IS_DYNAMIC
) {
3240 offset
= *(int *)(sample
->raw_data
+ field
->offset
);
3242 if (tep_field_is_relative(field
->flags
))
3243 offset
+= field
->offset
+ field
->size
;
3246 return sample
->raw_data
+ offset
;
3249 u64
format_field__intval(struct tep_format_field
*field
, struct perf_sample
*sample
,
3253 void *ptr
= sample
->raw_data
+ field
->offset
;
3255 switch (field
->size
) {
3259 value
= *(u16
*)ptr
;
3262 value
= *(u32
*)ptr
;
3265 memcpy(&value
, ptr
, sizeof(u64
));
3274 switch (field
->size
) {
3276 return bswap_16(value
);
3278 return bswap_32(value
);
3280 return bswap_64(value
);
3288 u64
evsel__intval(struct evsel
*evsel
, struct perf_sample
*sample
, const char *name
)
3290 struct tep_format_field
*field
= evsel__field(evsel
, name
);
3292 return field
? format_field__intval(field
, sample
, evsel
->needs_swap
) : 0;
3295 u64
evsel__intval_common(struct evsel
*evsel
, struct perf_sample
*sample
, const char *name
)
3297 struct tep_format_field
*field
= evsel__common_field(evsel
, name
);
3299 return field
? format_field__intval(field
, sample
, evsel
->needs_swap
) : 0;
3302 char evsel__taskstate(struct evsel
*evsel
, struct perf_sample
*sample
, const char *name
)
3304 static struct tep_format_field
*prev_state_field
;
3305 static const char *states
;
3306 struct tep_format_field
*field
;
3307 unsigned long long val
;
3309 char state
= '?'; /* '?' denotes unknown task state */
3311 field
= evsel__field(evsel
, name
);
3316 if (!states
|| field
!= prev_state_field
) {
3317 states
= parse_task_states(field
);
3320 prev_state_field
= field
;
3324 * Note since the kernel exposes TASK_REPORT_MAX to userspace
3325 * to denote the 'preempted' state, we might as welll report
3326 * 'R' for this case, which make senses to users as well.
3328 * We can change this if we have a good reason in the future.
3330 val
= evsel__intval(evsel
, sample
, name
);
3331 bit
= val
? ffs(val
) : 0;
3332 state
= (!bit
|| bit
> strlen(states
)) ? 'R' : states
[bit
-1];
3337 bool evsel__fallback(struct evsel
*evsel
, struct target
*target
, int err
,
3338 char *msg
, size_t msgsize
)
3342 if ((err
== ENOENT
|| err
== ENXIO
|| err
== ENODEV
) &&
3343 evsel
->core
.attr
.type
== PERF_TYPE_HARDWARE
&&
3344 evsel
->core
.attr
.config
== PERF_COUNT_HW_CPU_CYCLES
) {
3346 * If it's cycles then fall back to hrtimer based cpu-clock sw
3347 * counter, which is always available even if no PMU support.
3349 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
3352 evsel
->core
.attr
.type
= PERF_TYPE_SOFTWARE
;
3353 evsel
->core
.attr
.config
= target__has_cpu(target
)
3354 ? PERF_COUNT_SW_CPU_CLOCK
3355 : PERF_COUNT_SW_TASK_CLOCK
;
3356 scnprintf(msg
, msgsize
,
3357 "The cycles event is not supported, trying to fall back to %s",
3358 target__has_cpu(target
) ? "cpu-clock" : "task-clock");
3360 zfree(&evsel
->name
);
3362 } else if (err
== EACCES
&& !evsel
->core
.attr
.exclude_kernel
&&
3363 (paranoid
= perf_event_paranoid()) > 1) {
3364 const char *name
= evsel__name(evsel
);
3366 const char *sep
= ":";
3368 /* If event has exclude user then don't exclude kernel. */
3369 if (evsel
->core
.attr
.exclude_user
)
3372 /* Is there already the separator in the name. */
3373 if (strchr(name
, '/') ||
3374 (strchr(name
, ':') && !evsel
->is_libpfm_event
))
3377 if (asprintf(&new_name
, "%s%su", name
, sep
) < 0)
3381 evsel
->name
= new_name
;
3382 scnprintf(msg
, msgsize
, "kernel.perf_event_paranoid=%d, trying "
3383 "to fall back to excluding kernel and hypervisor "
3384 " samples", paranoid
);
3385 evsel
->core
.attr
.exclude_kernel
= 1;
3386 evsel
->core
.attr
.exclude_hv
= 1;
3389 } else if (err
== EOPNOTSUPP
&& !evsel
->core
.attr
.exclude_guest
&&
3390 !evsel
->exclude_GH
) {
3391 const char *name
= evsel__name(evsel
);
3393 const char *sep
= ":";
3395 /* Is there already the separator in the name. */
3396 if (strchr(name
, '/') ||
3397 (strchr(name
, ':') && !evsel
->is_libpfm_event
))
3400 if (asprintf(&new_name
, "%s%sH", name
, sep
) < 0)
3404 evsel
->name
= new_name
;
3405 /* Apple M1 requires exclude_guest */
3406 scnprintf(msg
, msgsize
, "trying to fall back to excluding guest samples");
3407 evsel
->core
.attr
.exclude_guest
= 1;
3415 static bool find_process(const char *name
)
3417 size_t len
= strlen(name
);
3422 dir
= opendir(procfs__mountpoint());
3426 /* Walk through the directory. */
3427 while (ret
&& (d
= readdir(dir
)) != NULL
) {
3428 char path
[PATH_MAX
];
3432 if ((d
->d_type
!= DT_DIR
) ||
3433 !strcmp(".", d
->d_name
) ||
3434 !strcmp("..", d
->d_name
))
3437 scnprintf(path
, sizeof(path
), "%s/%s/comm",
3438 procfs__mountpoint(), d
->d_name
);
3440 if (filename__read_str(path
, &data
, &size
))
3443 ret
= strncmp(name
, data
, len
);
3448 return ret
? false : true;
3451 int __weak
arch_evsel__open_strerror(struct evsel
*evsel __maybe_unused
,
3452 char *msg __maybe_unused
,
3453 size_t size __maybe_unused
)
3458 int evsel__open_strerror(struct evsel
*evsel
, struct target
*target
,
3459 int err
, char *msg
, size_t size
)
3461 char sbuf
[STRERR_BUFSIZE
];
3462 int printed
= 0, enforced
= 0;
3468 printed
+= scnprintf(msg
+ printed
, size
- printed
,
3469 "Access to performance monitoring and observability operations is limited.\n");
3471 if (!sysfs__read_int("fs/selinux/enforce", &enforced
)) {
3473 printed
+= scnprintf(msg
+ printed
, size
- printed
,
3474 "Enforced MAC policy settings (SELinux) can limit access to performance\n"
3475 "monitoring and observability operations. Inspect system audit records for\n"
3476 "more perf_event access control information and adjusting the policy.\n");
3481 printed
+= scnprintf(msg
, size
,
3482 "No permission to enable %s event.\n\n", evsel__name(evsel
));
3484 return scnprintf(msg
+ printed
, size
- printed
,
3485 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
3486 "access to performance monitoring and observability operations for processes\n"
3487 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
3488 "More information can be found at 'Perf events and tool security' document:\n"
3489 "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
3490 "perf_event_paranoid setting is %d:\n"
3491 " -1: Allow use of (almost) all events by all users\n"
3492 " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
3493 ">= 0: Disallow raw and ftrace function tracepoint access\n"
3494 ">= 1: Disallow CPU event access\n"
3495 ">= 2: Disallow kernel profiling\n"
3496 "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
3497 "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
3498 perf_event_paranoid());
3500 return scnprintf(msg
, size
, "The %s event is not supported.", evsel__name(evsel
));
3502 return scnprintf(msg
, size
, "%s",
3503 "Too many events are opened.\n"
3504 "Probably the maximum number of open file descriptors has been reached.\n"
3505 "Hint: Try again after reducing the number of events.\n"
3506 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
3508 if (evsel__has_callchain(evsel
) &&
3509 access("/proc/sys/kernel/perf_event_max_stack", F_OK
) == 0)
3510 return scnprintf(msg
, size
,
3511 "Not enough memory to setup event with callchain.\n"
3512 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
3513 "Hint: Current value: %d", sysctl__max_stack());
3516 if (target
->cpu_list
)
3517 return scnprintf(msg
, size
, "%s",
3518 "No such device - did you specify an out-of-range profile CPU?");
3521 if (evsel
->core
.attr
.sample_type
& PERF_SAMPLE_BRANCH_STACK
)
3522 return scnprintf(msg
, size
,
3523 "%s: PMU Hardware or event type doesn't support branch stack sampling.",
3524 evsel__name(evsel
));
3525 if (evsel
->core
.attr
.aux_output
)
3526 return scnprintf(msg
, size
,
3527 "%s: PMU Hardware doesn't support 'aux_output' feature",
3528 evsel__name(evsel
));
3529 if (evsel
->core
.attr
.sample_period
!= 0)
3530 return scnprintf(msg
, size
,
3531 "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
3532 evsel__name(evsel
));
3533 if (evsel
->core
.attr
.precise_ip
)
3534 return scnprintf(msg
, size
, "%s",
3535 "\'precise\' request may not be supported. Try removing 'p' modifier.");
3536 #if defined(__i386__) || defined(__x86_64__)
3537 if (evsel
->core
.attr
.type
== PERF_TYPE_HARDWARE
)
3538 return scnprintf(msg
, size
, "%s",
3539 "No hardware sampling interrupt available.\n");
3543 if (find_process("oprofiled"))
3544 return scnprintf(msg
, size
,
3545 "The PMU counters are busy/taken by another profiler.\n"
3546 "We found oprofile daemon running, please stop it and try again.");
3549 if (evsel
->core
.attr
.sample_type
& PERF_SAMPLE_CODE_PAGE_SIZE
&& perf_missing_features
.code_page_size
)
3550 return scnprintf(msg
, size
, "Asking for the code page size isn't supported by this kernel.");
3551 if (evsel
->core
.attr
.sample_type
& PERF_SAMPLE_DATA_PAGE_SIZE
&& perf_missing_features
.data_page_size
)
3552 return scnprintf(msg
, size
, "Asking for the data page size isn't supported by this kernel.");
3553 if (evsel
->core
.attr
.write_backward
&& perf_missing_features
.write_backward
)
3554 return scnprintf(msg
, size
, "Reading from overwrite event is not supported by this kernel.");
3555 if (perf_missing_features
.clockid
)
3556 return scnprintf(msg
, size
, "clockid feature not supported.");
3557 if (perf_missing_features
.clockid_wrong
)
3558 return scnprintf(msg
, size
, "wrong clockid (%d).", clockid
);
3559 if (perf_missing_features
.aux_output
)
3560 return scnprintf(msg
, size
, "The 'aux_output' feature is not supported, update the kernel.");
3561 if (!target__has_cpu(target
))
3562 return scnprintf(msg
, size
,
3563 "Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
3564 evsel__name(evsel
));
3568 return scnprintf(msg
, size
, "Cannot collect data source with the load latency event alone. "
3569 "Please add an auxiliary event in front of the load latency event.");
3574 ret
= arch_evsel__open_strerror(evsel
, msg
, size
);
3578 return scnprintf(msg
, size
,
3579 "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
3580 "\"dmesg | grep -i perf\" may provide additional information.\n",
3581 err
, str_error_r(err
, sbuf
, sizeof(sbuf
)), evsel__name(evsel
));
3584 struct perf_env
*evsel__env(struct evsel
*evsel
)
3586 if (evsel
&& evsel
->evlist
&& evsel
->evlist
->env
)
3587 return evsel
->evlist
->env
;
3591 static int store_evsel_ids(struct evsel
*evsel
, struct evlist
*evlist
)
3593 int cpu_map_idx
, thread
;
3595 if (evsel__is_retire_lat(evsel
))
3598 for (cpu_map_idx
= 0; cpu_map_idx
< xyarray__max_x(evsel
->core
.fd
); cpu_map_idx
++) {
3599 for (thread
= 0; thread
< xyarray__max_y(evsel
->core
.fd
);
3601 int fd
= FD(evsel
, cpu_map_idx
, thread
);
3603 if (perf_evlist__id_add_fd(&evlist
->core
, &evsel
->core
,
3604 cpu_map_idx
, thread
, fd
) < 0)
3612 int evsel__store_ids(struct evsel
*evsel
, struct evlist
*evlist
)
3614 struct perf_cpu_map
*cpus
= evsel
->core
.cpus
;
3615 struct perf_thread_map
*threads
= evsel
->core
.threads
;
3617 if (perf_evsel__alloc_id(&evsel
->core
, perf_cpu_map__nr(cpus
), threads
->nr
))
3620 return store_evsel_ids(evsel
, evlist
);
3623 void evsel__zero_per_pkg(struct evsel
*evsel
)
3625 struct hashmap_entry
*cur
;
3628 if (evsel
->per_pkg_mask
) {
3629 hashmap__for_each_entry(evsel
->per_pkg_mask
, cur
, bkt
)
3632 hashmap__clear(evsel
->per_pkg_mask
);
3637 * evsel__is_hybrid - does the evsel have a known PMU that is hybrid. Note, this
3638 * will be false on hybrid systems for hardware and legacy
3641 bool evsel__is_hybrid(const struct evsel
*evsel
)
3643 if (perf_pmus__num_core_pmus() == 1)
3646 return evsel
->core
.is_pmu_core
;
3649 struct evsel
*evsel__leader(const struct evsel
*evsel
)
3651 return container_of(evsel
->core
.leader
, struct evsel
, core
);
3654 bool evsel__has_leader(struct evsel
*evsel
, struct evsel
*leader
)
3656 return evsel
->core
.leader
== &leader
->core
;
3659 bool evsel__is_leader(struct evsel
*evsel
)
3661 return evsel__has_leader(evsel
, evsel
);
3664 void evsel__set_leader(struct evsel
*evsel
, struct evsel
*leader
)
3666 evsel
->core
.leader
= &leader
->core
;
3669 int evsel__source_count(const struct evsel
*evsel
)
3674 evlist__for_each_entry(evsel
->evlist
, pos
) {
3675 if (pos
->metric_leader
== evsel
)
3681 bool __weak
arch_evsel__must_be_in_group(const struct evsel
*evsel __maybe_unused
)
3687 * Remove an event from a given group (leader).
3688 * Some events, e.g., perf metrics Topdown events,
3689 * must always be grouped. Ignore the events.
3691 void evsel__remove_from_group(struct evsel
*evsel
, struct evsel
*leader
)
3693 if (!arch_evsel__must_be_in_group(evsel
) && evsel
!= leader
) {
3694 evsel__set_leader(evsel
, evsel
);
3695 evsel
->core
.nr_members
= 0;
3696 leader
->core
.nr_members
--;