1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
5 * Parts came from builtin-{top,stat,record}.c, see those files for further
13 #include "util/mmap.h"
14 #include "thread_map.h"
21 #include "bpf_counter.h"
22 #include <internal/lib.h> // page_size
26 #include "bpf-event.h"
27 #include "util/event.h"
28 #include "util/string2.h"
29 #include "util/perf_api_probe.h"
30 #include "util/evsel_fprintf.h"
32 #include "util/sample.h"
33 #include "util/bpf-filter.h"
34 #include "util/stat.h"
35 #include "util/util.h"
37 #include "util/intel-tpebs.h"
43 #include "parse-events.h"
44 #include <subcmd/parse-options.h>
47 #include <sys/ioctl.h>
49 #include <sys/prctl.h>
50 #include <sys/timerfd.h>
53 #include <linux/bitops.h>
54 #include <linux/hash.h>
55 #include <linux/log2.h>
56 #include <linux/err.h>
57 #include <linux/string.h>
58 #include <linux/time64.h>
59 #include <linux/zalloc.h>
60 #include <perf/evlist.h>
61 #include <perf/evsel.h>
62 #include <perf/cpumap.h>
63 #include <perf/mmap.h>
65 #include <internal/xyarray.h>
67 #ifdef LACKS_SIGQUEUE_PROTOTYPE
68 int sigqueue(pid_t pid
, int sig
, const union sigval value
);
71 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
72 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
74 void evlist__init(struct evlist
*evlist
, struct perf_cpu_map
*cpus
,
75 struct perf_thread_map
*threads
)
77 perf_evlist__init(&evlist
->core
);
78 perf_evlist__set_maps(&evlist
->core
, cpus
, threads
);
79 evlist
->workload
.pid
= -1;
80 evlist
->bkw_mmap_state
= BKW_MMAP_NOTREADY
;
81 evlist
->ctl_fd
.fd
= -1;
82 evlist
->ctl_fd
.ack
= -1;
83 evlist
->ctl_fd
.pos
= -1;
84 evlist
->nr_br_cntr
= -1;
87 struct evlist
*evlist__new(void)
89 struct evlist
*evlist
= zalloc(sizeof(*evlist
));
92 evlist__init(evlist
, NULL
, NULL
);
97 struct evlist
*evlist__new_default(void)
99 struct evlist
*evlist
= evlist__new();
100 bool can_profile_kernel
;
106 can_profile_kernel
= perf_event_paranoid_check(1);
107 err
= parse_event(evlist
, can_profile_kernel
? "cycles:P" : "cycles:Pu");
109 evlist__delete(evlist
);
113 if (evlist
->core
.nr_entries
> 1) {
116 evlist__for_each_entry(evlist
, evsel
)
117 evsel__set_sample_id(evsel
, /*can_sample_identifier=*/false);
123 struct evlist
*evlist__new_dummy(void)
125 struct evlist
*evlist
= evlist__new();
127 if (evlist
&& evlist__add_dummy(evlist
)) {
128 evlist__delete(evlist
);
136 * evlist__set_id_pos - set the positions of event ids.
137 * @evlist: selected event list
139 * Events with compatible sample types all have the same id_pos
140 * and is_pos. For convenience, put a copy on evlist.
142 void evlist__set_id_pos(struct evlist
*evlist
)
144 struct evsel
*first
= evlist__first(evlist
);
146 evlist
->id_pos
= first
->id_pos
;
147 evlist
->is_pos
= first
->is_pos
;
150 static void evlist__update_id_pos(struct evlist
*evlist
)
154 evlist__for_each_entry(evlist
, evsel
)
155 evsel__calc_id_pos(evsel
);
157 evlist__set_id_pos(evlist
);
160 static void evlist__purge(struct evlist
*evlist
)
162 struct evsel
*pos
, *n
;
164 evlist__for_each_entry_safe(evlist
, n
, pos
) {
165 list_del_init(&pos
->core
.node
);
170 evlist
->core
.nr_entries
= 0;
173 void evlist__exit(struct evlist
*evlist
)
175 event_enable_timer__exit(&evlist
->eet
);
176 zfree(&evlist
->mmap
);
177 zfree(&evlist
->overwrite_mmap
);
178 perf_evlist__exit(&evlist
->core
);
181 void evlist__delete(struct evlist
*evlist
)
187 evlist__free_stats(evlist
);
188 evlist__munmap(evlist
);
189 evlist__close(evlist
);
190 evlist__purge(evlist
);
191 evlist__exit(evlist
);
195 void evlist__add(struct evlist
*evlist
, struct evsel
*entry
)
197 perf_evlist__add(&evlist
->core
, &entry
->core
);
198 entry
->evlist
= evlist
;
199 entry
->tracking
= !entry
->core
.idx
;
201 if (evlist
->core
.nr_entries
== 1)
202 evlist__set_id_pos(evlist
);
205 void evlist__remove(struct evlist
*evlist
, struct evsel
*evsel
)
207 evsel
->evlist
= NULL
;
208 perf_evlist__remove(&evlist
->core
, &evsel
->core
);
211 void evlist__splice_list_tail(struct evlist
*evlist
, struct list_head
*list
)
213 while (!list_empty(list
)) {
214 struct evsel
*evsel
, *temp
, *leader
= NULL
;
216 __evlist__for_each_entry_safe(list
, temp
, evsel
) {
217 list_del_init(&evsel
->core
.node
);
218 evlist__add(evlist
, evsel
);
223 __evlist__for_each_entry_safe(list
, temp
, evsel
) {
224 if (evsel__has_leader(evsel
, leader
)) {
225 list_del_init(&evsel
->core
.node
);
226 evlist__add(evlist
, evsel
);
232 int __evlist__set_tracepoints_handlers(struct evlist
*evlist
,
233 const struct evsel_str_handler
*assocs
, size_t nr_assocs
)
238 for (i
= 0; i
< nr_assocs
; i
++) {
239 // Adding a handler for an event not in this evlist, just ignore it.
240 struct evsel
*evsel
= evlist__find_tracepoint_by_name(evlist
, assocs
[i
].name
);
245 if (evsel
->handler
!= NULL
)
247 evsel
->handler
= assocs
[i
].handler
;
255 static void evlist__set_leader(struct evlist
*evlist
)
257 perf_evlist__set_leader(&evlist
->core
);
260 static struct evsel
*evlist__dummy_event(struct evlist
*evlist
)
262 struct perf_event_attr attr
= {
263 .type
= PERF_TYPE_SOFTWARE
,
264 .config
= PERF_COUNT_SW_DUMMY
,
265 .size
= sizeof(attr
), /* to capture ABI version */
266 /* Avoid frequency mode for dummy events to avoid associated timers. */
271 return evsel__new_idx(&attr
, evlist
->core
.nr_entries
);
274 int evlist__add_dummy(struct evlist
*evlist
)
276 struct evsel
*evsel
= evlist__dummy_event(evlist
);
281 evlist__add(evlist
, evsel
);
285 struct evsel
*evlist__add_aux_dummy(struct evlist
*evlist
, bool system_wide
)
287 struct evsel
*evsel
= evlist__dummy_event(evlist
);
292 evsel
->core
.attr
.exclude_kernel
= 1;
293 evsel
->core
.attr
.exclude_guest
= 1;
294 evsel
->core
.attr
.exclude_hv
= 1;
295 evsel
->core
.system_wide
= system_wide
;
296 evsel
->no_aux_samples
= true;
297 evsel
->name
= strdup("dummy:u");
299 evlist__add(evlist
, evsel
);
303 #ifdef HAVE_LIBTRACEEVENT
304 struct evsel
*evlist__add_sched_switch(struct evlist
*evlist
, bool system_wide
)
306 struct evsel
*evsel
= evsel__newtp_idx("sched", "sched_switch", 0,
312 evsel__set_sample_bit(evsel
, CPU
);
313 evsel__set_sample_bit(evsel
, TIME
);
315 evsel
->core
.system_wide
= system_wide
;
316 evsel
->no_aux_samples
= true;
318 evlist__add(evlist
, evsel
);
323 struct evsel
*evlist__find_tracepoint_by_name(struct evlist
*evlist
, const char *name
)
327 evlist__for_each_entry(evlist
, evsel
) {
328 if ((evsel
->core
.attr
.type
== PERF_TYPE_TRACEPOINT
) &&
329 (strcmp(evsel
->name
, name
) == 0))
336 #ifdef HAVE_LIBTRACEEVENT
337 int evlist__add_newtp(struct evlist
*evlist
, const char *sys
, const char *name
, void *handler
)
339 struct evsel
*evsel
= evsel__newtp(sys
, name
);
344 evsel
->handler
= handler
;
345 evlist__add(evlist
, evsel
);
350 struct evlist_cpu_iterator
evlist__cpu_begin(struct evlist
*evlist
, struct affinity
*affinity
)
352 struct evlist_cpu_iterator itr
= {
356 .evlist_cpu_map_idx
= 0,
357 .evlist_cpu_map_nr
= perf_cpu_map__nr(evlist
->core
.all_cpus
),
358 .cpu
= (struct perf_cpu
){ .cpu
= -1},
359 .affinity
= affinity
,
362 if (evlist__empty(evlist
)) {
363 /* Ensure the empty list doesn't iterate. */
364 itr
.evlist_cpu_map_idx
= itr
.evlist_cpu_map_nr
;
366 itr
.evsel
= evlist__first(evlist
);
368 itr
.cpu
= perf_cpu_map__cpu(evlist
->core
.all_cpus
, 0);
369 affinity__set(itr
.affinity
, itr
.cpu
.cpu
);
370 itr
.cpu_map_idx
= perf_cpu_map__idx(itr
.evsel
->core
.cpus
, itr
.cpu
);
372 * If this CPU isn't in the evsel's cpu map then advance
375 if (itr
.cpu_map_idx
== -1)
376 evlist_cpu_iterator__next(&itr
);
382 void evlist_cpu_iterator__next(struct evlist_cpu_iterator
*evlist_cpu_itr
)
384 while (evlist_cpu_itr
->evsel
!= evlist__last(evlist_cpu_itr
->container
)) {
385 evlist_cpu_itr
->evsel
= evsel__next(evlist_cpu_itr
->evsel
);
386 evlist_cpu_itr
->cpu_map_idx
=
387 perf_cpu_map__idx(evlist_cpu_itr
->evsel
->core
.cpus
,
388 evlist_cpu_itr
->cpu
);
389 if (evlist_cpu_itr
->cpu_map_idx
!= -1)
392 evlist_cpu_itr
->evlist_cpu_map_idx
++;
393 if (evlist_cpu_itr
->evlist_cpu_map_idx
< evlist_cpu_itr
->evlist_cpu_map_nr
) {
394 evlist_cpu_itr
->evsel
= evlist__first(evlist_cpu_itr
->container
);
395 evlist_cpu_itr
->cpu
=
396 perf_cpu_map__cpu(evlist_cpu_itr
->container
->core
.all_cpus
,
397 evlist_cpu_itr
->evlist_cpu_map_idx
);
398 if (evlist_cpu_itr
->affinity
)
399 affinity__set(evlist_cpu_itr
->affinity
, evlist_cpu_itr
->cpu
.cpu
);
400 evlist_cpu_itr
->cpu_map_idx
=
401 perf_cpu_map__idx(evlist_cpu_itr
->evsel
->core
.cpus
,
402 evlist_cpu_itr
->cpu
);
404 * If this CPU isn't in the evsel's cpu map then advance through
407 if (evlist_cpu_itr
->cpu_map_idx
== -1)
408 evlist_cpu_iterator__next(evlist_cpu_itr
);
412 bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator
*evlist_cpu_itr
)
414 return evlist_cpu_itr
->evlist_cpu_map_idx
>= evlist_cpu_itr
->evlist_cpu_map_nr
;
417 static int evsel__strcmp(struct evsel
*pos
, char *evsel_name
)
421 if (evsel__is_dummy_event(pos
))
423 return !evsel__name_is(pos
, evsel_name
);
426 static int evlist__is_enabled(struct evlist
*evlist
)
430 evlist__for_each_entry(evlist
, pos
) {
431 if (!evsel__is_group_leader(pos
) || !pos
->core
.fd
)
433 /* If at least one event is enabled, evlist is enabled. */
440 static void __evlist__disable(struct evlist
*evlist
, char *evsel_name
, bool excl_dummy
)
443 struct evlist_cpu_iterator evlist_cpu_itr
;
444 struct affinity saved_affinity
, *affinity
= NULL
;
445 bool has_imm
= false;
447 // See explanation in evlist__close()
448 if (!cpu_map__is_dummy(evlist
->core
.user_requested_cpus
)) {
449 if (affinity__setup(&saved_affinity
) < 0)
451 affinity
= &saved_affinity
;
454 /* Disable 'immediate' events last */
455 for (int imm
= 0; imm
<= 1; imm
++) {
456 evlist__for_each_cpu(evlist_cpu_itr
, evlist
, affinity
) {
457 pos
= evlist_cpu_itr
.evsel
;
458 if (evsel__strcmp(pos
, evsel_name
))
460 if (pos
->disabled
|| !evsel__is_group_leader(pos
) || !pos
->core
.fd
)
462 if (excl_dummy
&& evsel__is_dummy_event(pos
))
466 if (pos
->immediate
!= imm
)
468 evsel__disable_cpu(pos
, evlist_cpu_itr
.cpu_map_idx
);
474 affinity__cleanup(affinity
);
475 evlist__for_each_entry(evlist
, pos
) {
476 if (evsel__strcmp(pos
, evsel_name
))
478 if (!evsel__is_group_leader(pos
) || !pos
->core
.fd
)
480 if (excl_dummy
&& evsel__is_dummy_event(pos
))
482 pos
->disabled
= true;
486 * If we disabled only single event, we need to check
487 * the enabled state of the evlist manually.
490 evlist
->enabled
= evlist__is_enabled(evlist
);
492 evlist
->enabled
= false;
495 void evlist__disable(struct evlist
*evlist
)
497 __evlist__disable(evlist
, NULL
, false);
500 void evlist__disable_non_dummy(struct evlist
*evlist
)
502 __evlist__disable(evlist
, NULL
, true);
505 void evlist__disable_evsel(struct evlist
*evlist
, char *evsel_name
)
507 __evlist__disable(evlist
, evsel_name
, false);
510 static void __evlist__enable(struct evlist
*evlist
, char *evsel_name
, bool excl_dummy
)
513 struct evlist_cpu_iterator evlist_cpu_itr
;
514 struct affinity saved_affinity
, *affinity
= NULL
;
516 // See explanation in evlist__close()
517 if (!cpu_map__is_dummy(evlist
->core
.user_requested_cpus
)) {
518 if (affinity__setup(&saved_affinity
) < 0)
520 affinity
= &saved_affinity
;
523 evlist__for_each_cpu(evlist_cpu_itr
, evlist
, affinity
) {
524 pos
= evlist_cpu_itr
.evsel
;
525 if (evsel__strcmp(pos
, evsel_name
))
527 if (!evsel__is_group_leader(pos
) || !pos
->core
.fd
)
529 if (excl_dummy
&& evsel__is_dummy_event(pos
))
531 evsel__enable_cpu(pos
, evlist_cpu_itr
.cpu_map_idx
);
533 affinity__cleanup(affinity
);
534 evlist__for_each_entry(evlist
, pos
) {
535 if (evsel__strcmp(pos
, evsel_name
))
537 if (!evsel__is_group_leader(pos
) || !pos
->core
.fd
)
539 if (excl_dummy
&& evsel__is_dummy_event(pos
))
541 pos
->disabled
= false;
545 * Even single event sets the 'enabled' for evlist,
546 * so the toggle can work properly and toggle to
549 evlist
->enabled
= true;
552 void evlist__enable(struct evlist
*evlist
)
554 __evlist__enable(evlist
, NULL
, false);
557 void evlist__enable_non_dummy(struct evlist
*evlist
)
559 __evlist__enable(evlist
, NULL
, true);
562 void evlist__enable_evsel(struct evlist
*evlist
, char *evsel_name
)
564 __evlist__enable(evlist
, evsel_name
, false);
567 void evlist__toggle_enable(struct evlist
*evlist
)
569 (evlist
->enabled
? evlist__disable
: evlist__enable
)(evlist
);
572 int evlist__add_pollfd(struct evlist
*evlist
, int fd
)
574 return perf_evlist__add_pollfd(&evlist
->core
, fd
, NULL
, POLLIN
, fdarray_flag__default
);
577 int evlist__filter_pollfd(struct evlist
*evlist
, short revents_and_mask
)
579 return perf_evlist__filter_pollfd(&evlist
->core
, revents_and_mask
);
582 #ifdef HAVE_EVENTFD_SUPPORT
583 int evlist__add_wakeup_eventfd(struct evlist
*evlist
, int fd
)
585 return perf_evlist__add_pollfd(&evlist
->core
, fd
, NULL
, POLLIN
,
586 fdarray_flag__nonfilterable
|
587 fdarray_flag__non_perf_event
);
591 int evlist__poll(struct evlist
*evlist
, int timeout
)
593 return perf_evlist__poll(&evlist
->core
, timeout
);
596 struct perf_sample_id
*evlist__id2sid(struct evlist
*evlist
, u64 id
)
598 struct hlist_head
*head
;
599 struct perf_sample_id
*sid
;
602 hash
= hash_64(id
, PERF_EVLIST__HLIST_BITS
);
603 head
= &evlist
->core
.heads
[hash
];
605 hlist_for_each_entry(sid
, head
, node
)
612 struct evsel
*evlist__id2evsel(struct evlist
*evlist
, u64 id
)
614 struct perf_sample_id
*sid
;
616 if (evlist
->core
.nr_entries
== 1 || !id
)
617 return evlist__first(evlist
);
619 sid
= evlist__id2sid(evlist
, id
);
621 return container_of(sid
->evsel
, struct evsel
, core
);
623 if (!evlist__sample_id_all(evlist
))
624 return evlist__first(evlist
);
629 struct evsel
*evlist__id2evsel_strict(struct evlist
*evlist
, u64 id
)
631 struct perf_sample_id
*sid
;
636 sid
= evlist__id2sid(evlist
, id
);
638 return container_of(sid
->evsel
, struct evsel
, core
);
643 static int evlist__event2id(struct evlist
*evlist
, union perf_event
*event
, u64
*id
)
645 const __u64
*array
= event
->sample
.array
;
648 n
= (event
->header
.size
- sizeof(event
->header
)) >> 3;
650 if (event
->header
.type
== PERF_RECORD_SAMPLE
) {
651 if (evlist
->id_pos
>= n
)
653 *id
= array
[evlist
->id_pos
];
655 if (evlist
->is_pos
> n
)
663 struct evsel
*evlist__event2evsel(struct evlist
*evlist
, union perf_event
*event
)
665 struct evsel
*first
= evlist__first(evlist
);
666 struct hlist_head
*head
;
667 struct perf_sample_id
*sid
;
671 if (evlist
->core
.nr_entries
== 1)
674 if (!first
->core
.attr
.sample_id_all
&&
675 event
->header
.type
!= PERF_RECORD_SAMPLE
)
678 if (evlist__event2id(evlist
, event
, &id
))
681 /* Synthesized events have an id of zero */
685 hash
= hash_64(id
, PERF_EVLIST__HLIST_BITS
);
686 head
= &evlist
->core
.heads
[hash
];
688 hlist_for_each_entry(sid
, head
, node
) {
690 return container_of(sid
->evsel
, struct evsel
, core
);
695 static int evlist__set_paused(struct evlist
*evlist
, bool value
)
699 if (!evlist
->overwrite_mmap
)
702 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++) {
703 int fd
= evlist
->overwrite_mmap
[i
].core
.fd
;
708 err
= ioctl(fd
, PERF_EVENT_IOC_PAUSE_OUTPUT
, value
? 1 : 0);
715 static int evlist__pause(struct evlist
*evlist
)
717 return evlist__set_paused(evlist
, true);
720 static int evlist__resume(struct evlist
*evlist
)
722 return evlist__set_paused(evlist
, false);
725 static void evlist__munmap_nofree(struct evlist
*evlist
)
730 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++)
731 perf_mmap__munmap(&evlist
->mmap
[i
].core
);
733 if (evlist
->overwrite_mmap
)
734 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++)
735 perf_mmap__munmap(&evlist
->overwrite_mmap
[i
].core
);
738 void evlist__munmap(struct evlist
*evlist
)
740 evlist__munmap_nofree(evlist
);
741 zfree(&evlist
->mmap
);
742 zfree(&evlist
->overwrite_mmap
);
745 static void perf_mmap__unmap_cb(struct perf_mmap
*map
)
747 struct mmap
*m
= container_of(map
, struct mmap
, core
);
752 static struct mmap
*evlist__alloc_mmap(struct evlist
*evlist
,
758 map
= zalloc(evlist
->core
.nr_mmaps
* sizeof(struct mmap
));
762 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++) {
763 struct perf_mmap
*prev
= i
? &map
[i
- 1].core
: NULL
;
766 * When the perf_mmap() call is made we grab one refcount, plus
767 * one extra to let perf_mmap__consume() get the last
768 * events after all real references (perf_mmap__get()) are
771 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
772 * thus does perf_mmap__get() on it.
774 perf_mmap__init(&map
[i
].core
, prev
, overwrite
, perf_mmap__unmap_cb
);
781 perf_evlist__mmap_cb_idx(struct perf_evlist
*_evlist
,
782 struct perf_evsel
*_evsel
,
783 struct perf_mmap_param
*_mp
,
786 struct evlist
*evlist
= container_of(_evlist
, struct evlist
, core
);
787 struct mmap_params
*mp
= container_of(_mp
, struct mmap_params
, core
);
788 struct evsel
*evsel
= container_of(_evsel
, struct evsel
, core
);
790 auxtrace_mmap_params__set_idx(&mp
->auxtrace_mp
, evlist
, evsel
, idx
);
793 static struct perf_mmap
*
794 perf_evlist__mmap_cb_get(struct perf_evlist
*_evlist
, bool overwrite
, int idx
)
796 struct evlist
*evlist
= container_of(_evlist
, struct evlist
, core
);
799 maps
= overwrite
? evlist
->overwrite_mmap
: evlist
->mmap
;
802 maps
= evlist__alloc_mmap(evlist
, overwrite
);
807 evlist
->overwrite_mmap
= maps
;
808 if (evlist
->bkw_mmap_state
== BKW_MMAP_NOTREADY
)
809 evlist__toggle_bkw_mmap(evlist
, BKW_MMAP_RUNNING
);
815 return &maps
[idx
].core
;
819 perf_evlist__mmap_cb_mmap(struct perf_mmap
*_map
, struct perf_mmap_param
*_mp
,
820 int output
, struct perf_cpu cpu
)
822 struct mmap
*map
= container_of(_map
, struct mmap
, core
);
823 struct mmap_params
*mp
= container_of(_mp
, struct mmap_params
, core
);
825 return mmap__mmap(map
, mp
, output
, cpu
);
828 unsigned long perf_event_mlock_kb_in_pages(void)
833 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max
) < 0) {
835 * Pick a once upon a time good value, i.e. things look
836 * strange since we can't read a sysctl value, but lets not
841 max
-= (page_size
/ 1024);
844 pages
= (max
* 1024) / page_size
;
845 if (!is_power_of_2(pages
))
846 pages
= rounddown_pow_of_two(pages
);
851 size_t evlist__mmap_size(unsigned long pages
)
853 if (pages
== UINT_MAX
)
854 pages
= perf_event_mlock_kb_in_pages();
855 else if (!is_power_of_2(pages
))
858 return (pages
+ 1) * page_size
;
861 static long parse_pages_arg(const char *str
, unsigned long min
,
864 unsigned long pages
, val
;
865 static struct parse_tag tags
[] = {
866 { .tag
= 'B', .mult
= 1 },
867 { .tag
= 'K', .mult
= 1 << 10 },
868 { .tag
= 'M', .mult
= 1 << 20 },
869 { .tag
= 'G', .mult
= 1 << 30 },
876 val
= parse_tag_value(str
, tags
);
877 if (val
!= (unsigned long) -1) {
878 /* we got file size value */
879 pages
= PERF_ALIGN(val
, page_size
) / page_size
;
881 /* we got pages count value */
883 pages
= strtoul(str
, &eptr
, 10);
888 if (pages
== 0 && min
== 0) {
889 /* leave number of pages at 0 */
890 } else if (!is_power_of_2(pages
)) {
893 /* round pages up to next power of 2 */
894 pages
= roundup_pow_of_two(pages
);
898 unit_number__scnprintf(buf
, sizeof(buf
), pages
* page_size
);
899 pr_info("rounding mmap pages size to %s (%lu pages)\n",
909 int __evlist__parse_mmap_pages(unsigned int *mmap_pages
, const char *str
)
911 unsigned long max
= UINT_MAX
;
914 if (max
> SIZE_MAX
/ page_size
)
915 max
= SIZE_MAX
/ page_size
;
917 pages
= parse_pages_arg(str
, 1, max
);
919 pr_err("Invalid argument for --mmap_pages/-m\n");
927 int evlist__parse_mmap_pages(const struct option
*opt
, const char *str
, int unset __maybe_unused
)
929 return __evlist__parse_mmap_pages(opt
->value
, str
);
933 * evlist__mmap_ex - Create mmaps to receive events.
934 * @evlist: list of events
935 * @pages: map length in pages
936 * @overwrite: overwrite older events?
937 * @auxtrace_pages - auxtrace map length in pages
938 * @auxtrace_overwrite - overwrite older auxtrace data?
940 * If @overwrite is %false the user needs to signal event consumption using
941 * perf_mmap__write_tail(). Using evlist__mmap_read() does this
944 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
945 * consumption using auxtrace_mmap__write_tail().
947 * Return: %0 on success, negative error code otherwise.
949 int evlist__mmap_ex(struct evlist
*evlist
, unsigned int pages
,
950 unsigned int auxtrace_pages
,
951 bool auxtrace_overwrite
, int nr_cblocks
, int affinity
, int flush
,
955 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
956 * Its value is decided by evsel's write_backward.
957 * So &mp should not be passed through const pointer.
959 struct mmap_params mp
= {
960 .nr_cblocks
= nr_cblocks
,
961 .affinity
= affinity
,
963 .comp_level
= comp_level
965 struct perf_evlist_mmap_ops ops
= {
966 .idx
= perf_evlist__mmap_cb_idx
,
967 .get
= perf_evlist__mmap_cb_get
,
968 .mmap
= perf_evlist__mmap_cb_mmap
,
971 evlist
->core
.mmap_len
= evlist__mmap_size(pages
);
972 pr_debug("mmap size %zuB\n", evlist
->core
.mmap_len
);
974 auxtrace_mmap_params__init(&mp
.auxtrace_mp
, evlist
->core
.mmap_len
,
975 auxtrace_pages
, auxtrace_overwrite
);
977 return perf_evlist__mmap_ops(&evlist
->core
, &ops
, &mp
.core
);
980 int evlist__mmap(struct evlist
*evlist
, unsigned int pages
)
982 return evlist__mmap_ex(evlist
, pages
, 0, false, 0, PERF_AFFINITY_SYS
, 1, 0);
985 int evlist__create_maps(struct evlist
*evlist
, struct target
*target
)
987 bool all_threads
= (target
->per_thread
&& target
->system_wide
);
988 struct perf_cpu_map
*cpus
;
989 struct perf_thread_map
*threads
;
992 * If specify '-a' and '--per-thread' to perf record, perf record
993 * will override '--per-thread'. target->per_thread = false and
994 * target->system_wide = true.
996 * If specify '--per-thread' only to perf record,
997 * target->per_thread = true and target->system_wide = false.
999 * So target->per_thread && target->system_wide is false.
1000 * For perf record, thread_map__new_str doesn't call
1001 * thread_map__new_all_cpus. That will keep perf record's
1004 * For perf stat, it allows the case that target->per_thread and
1005 * target->system_wide are all true. It means to collect system-wide
1006 * per-thread data. thread_map__new_str will call
1007 * thread_map__new_all_cpus to enumerate all threads.
1009 threads
= thread_map__new_str(target
->pid
, target
->tid
, target
->uid
,
1015 if (target__uses_dummy_map(target
) && !evlist__has_bpf_output(evlist
))
1016 cpus
= perf_cpu_map__new_any_cpu();
1018 cpus
= perf_cpu_map__new(target
->cpu_list
);
1021 goto out_delete_threads
;
1023 evlist
->core
.has_user_cpus
= !!target
->cpu_list
;
1025 perf_evlist__set_maps(&evlist
->core
, cpus
, threads
);
1027 /* as evlist now has references, put count here */
1028 perf_cpu_map__put(cpus
);
1029 perf_thread_map__put(threads
);
1034 perf_thread_map__put(threads
);
1038 int evlist__apply_filters(struct evlist
*evlist
, struct evsel
**err_evsel
,
1039 struct target
*target
)
1041 struct evsel
*evsel
;
1044 evlist__for_each_entry(evlist
, evsel
) {
1046 * filters only work for tracepoint event, which doesn't have cpu limit.
1047 * So evlist and evsel should always be same.
1049 if (evsel
->filter
) {
1050 err
= perf_evsel__apply_filter(&evsel
->core
, evsel
->filter
);
1058 * non-tracepoint events can have BPF filters.
1060 if (!list_empty(&evsel
->bpf_filters
)) {
1061 err
= perf_bpf_filter__prepare(evsel
, target
);
1072 int evlist__set_tp_filter(struct evlist
*evlist
, const char *filter
)
1074 struct evsel
*evsel
;
1080 evlist__for_each_entry(evlist
, evsel
) {
1081 if (evsel
->core
.attr
.type
!= PERF_TYPE_TRACEPOINT
)
1084 err
= evsel__set_filter(evsel
, filter
);
1092 int evlist__append_tp_filter(struct evlist
*evlist
, const char *filter
)
1094 struct evsel
*evsel
;
1100 evlist__for_each_entry(evlist
, evsel
) {
1101 if (evsel
->core
.attr
.type
!= PERF_TYPE_TRACEPOINT
)
1104 err
= evsel__append_tp_filter(evsel
, filter
);
1112 char *asprintf__tp_filter_pids(size_t npids
, pid_t
*pids
)
1117 for (i
= 0; i
< npids
; ++i
) {
1119 if (asprintf(&filter
, "common_pid != %d", pids
[i
]) < 0)
1124 if (asprintf(&tmp
, "%s && common_pid != %d", filter
, pids
[i
]) < 0)
1138 int evlist__set_tp_filter_pids(struct evlist
*evlist
, size_t npids
, pid_t
*pids
)
1140 char *filter
= asprintf__tp_filter_pids(npids
, pids
);
1141 int ret
= evlist__set_tp_filter(evlist
, filter
);
1147 int evlist__append_tp_filter_pids(struct evlist
*evlist
, size_t npids
, pid_t
*pids
)
1149 char *filter
= asprintf__tp_filter_pids(npids
, pids
);
1150 int ret
= evlist__append_tp_filter(evlist
, filter
);
1156 int evlist__append_tp_filter_pid(struct evlist
*evlist
, pid_t pid
)
1158 return evlist__append_tp_filter_pids(evlist
, 1, &pid
);
1161 bool evlist__valid_sample_type(struct evlist
*evlist
)
1165 if (evlist
->core
.nr_entries
== 1)
1168 if (evlist
->id_pos
< 0 || evlist
->is_pos
< 0)
1171 evlist__for_each_entry(evlist
, pos
) {
1172 if (pos
->id_pos
!= evlist
->id_pos
||
1173 pos
->is_pos
!= evlist
->is_pos
)
1180 u64
__evlist__combined_sample_type(struct evlist
*evlist
)
1182 struct evsel
*evsel
;
1184 if (evlist
->combined_sample_type
)
1185 return evlist
->combined_sample_type
;
1187 evlist__for_each_entry(evlist
, evsel
)
1188 evlist
->combined_sample_type
|= evsel
->core
.attr
.sample_type
;
1190 return evlist
->combined_sample_type
;
1193 u64
evlist__combined_sample_type(struct evlist
*evlist
)
1195 evlist
->combined_sample_type
= 0;
1196 return __evlist__combined_sample_type(evlist
);
1199 u64
evlist__combined_branch_type(struct evlist
*evlist
)
1201 struct evsel
*evsel
;
1202 u64 branch_type
= 0;
1204 evlist__for_each_entry(evlist
, evsel
)
1205 branch_type
|= evsel
->core
.attr
.branch_sample_type
;
1209 static struct evsel
*
1210 evlist__find_dup_event_from_prev(struct evlist
*evlist
, struct evsel
*event
)
1214 evlist__for_each_entry(evlist
, pos
) {
1217 if ((pos
->core
.attr
.branch_sample_type
& PERF_SAMPLE_BRANCH_COUNTERS
) &&
1218 !strcmp(pos
->name
, event
->name
))
1224 #define MAX_NR_ABBR_NAME (26 * 11)
1227 * The abbr name is from A to Z9. If the number of event
1228 * which requires the branch counter > MAX_NR_ABBR_NAME,
1231 static void evlist__new_abbr_name(char *name
)
1236 if (idx
>= MAX_NR_ABBR_NAME
) {
1243 name
[0] = 'A' + (idx
% 26);
1248 name
[1] = '0' + i
- 1;
1255 void evlist__update_br_cntr(struct evlist
*evlist
)
1257 struct evsel
*evsel
, *dup
;
1260 evlist__for_each_entry(evlist
, evsel
) {
1261 if (evsel
->core
.attr
.branch_sample_type
& PERF_SAMPLE_BRANCH_COUNTERS
) {
1262 evsel
->br_cntr_idx
= i
++;
1263 evsel__leader(evsel
)->br_cntr_nr
++;
1265 dup
= evlist__find_dup_event_from_prev(evlist
, evsel
);
1267 memcpy(evsel
->abbr_name
, dup
->abbr_name
, 3 * sizeof(char));
1269 evlist__new_abbr_name(evsel
->abbr_name
);
1272 evlist
->nr_br_cntr
= i
;
1275 bool evlist__valid_read_format(struct evlist
*evlist
)
1277 struct evsel
*first
= evlist__first(evlist
), *pos
= first
;
1278 u64 read_format
= first
->core
.attr
.read_format
;
1279 u64 sample_type
= first
->core
.attr
.sample_type
;
1281 evlist__for_each_entry(evlist
, pos
) {
1282 if (read_format
!= pos
->core
.attr
.read_format
) {
1283 pr_debug("Read format differs %#" PRIx64
" vs %#" PRIx64
"\n",
1284 read_format
, (u64
)pos
->core
.attr
.read_format
);
1288 /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */
1289 if ((sample_type
& PERF_SAMPLE_READ
) &&
1290 !(read_format
& PERF_FORMAT_ID
)) {
1297 u16
evlist__id_hdr_size(struct evlist
*evlist
)
1299 struct evsel
*first
= evlist__first(evlist
);
1301 return first
->core
.attr
.sample_id_all
? evsel__id_hdr_size(first
) : 0;
1304 bool evlist__valid_sample_id_all(struct evlist
*evlist
)
1306 struct evsel
*first
= evlist__first(evlist
), *pos
= first
;
1308 evlist__for_each_entry_continue(evlist
, pos
) {
1309 if (first
->core
.attr
.sample_id_all
!= pos
->core
.attr
.sample_id_all
)
1316 bool evlist__sample_id_all(struct evlist
*evlist
)
1318 struct evsel
*first
= evlist__first(evlist
);
1319 return first
->core
.attr
.sample_id_all
;
1322 void evlist__set_selected(struct evlist
*evlist
, struct evsel
*evsel
)
1324 evlist
->selected
= evsel
;
1327 void evlist__close(struct evlist
*evlist
)
1329 struct evsel
*evsel
;
1330 struct evlist_cpu_iterator evlist_cpu_itr
;
1331 struct affinity affinity
;
1334 * With perf record core.user_requested_cpus is usually NULL.
1335 * Use the old method to handle this for now.
1337 if (!evlist
->core
.user_requested_cpus
||
1338 cpu_map__is_dummy(evlist
->core
.user_requested_cpus
)) {
1339 evlist__for_each_entry_reverse(evlist
, evsel
)
1340 evsel__close(evsel
);
1344 if (affinity__setup(&affinity
) < 0)
1347 evlist__for_each_cpu(evlist_cpu_itr
, evlist
, &affinity
) {
1348 perf_evsel__close_cpu(&evlist_cpu_itr
.evsel
->core
,
1349 evlist_cpu_itr
.cpu_map_idx
);
1352 affinity__cleanup(&affinity
);
1353 evlist__for_each_entry_reverse(evlist
, evsel
) {
1354 perf_evsel__free_fd(&evsel
->core
);
1355 perf_evsel__free_id(&evsel
->core
);
1357 perf_evlist__reset_id_hash(&evlist
->core
);
1360 static int evlist__create_syswide_maps(struct evlist
*evlist
)
1362 struct perf_cpu_map
*cpus
;
1363 struct perf_thread_map
*threads
;
1366 * Try reading /sys/devices/system/cpu/online to get
1369 * FIXME: -ENOMEM is the best we can do here, the cpu_map
1370 * code needs an overhaul to properly forward the
1371 * error, and we may not want to do that fallback to a
1372 * default cpu identity map :-\
1374 cpus
= perf_cpu_map__new_online_cpus();
1378 threads
= perf_thread_map__new_dummy();
1382 perf_evlist__set_maps(&evlist
->core
, cpus
, threads
);
1384 perf_thread_map__put(threads
);
1386 perf_cpu_map__put(cpus
);
1391 int evlist__open(struct evlist
*evlist
)
1393 struct evsel
*evsel
;
1397 * Default: one fd per CPU, all threads, aka systemwide
1398 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1400 if (evlist
->core
.threads
== NULL
&& evlist
->core
.user_requested_cpus
== NULL
) {
1401 err
= evlist__create_syswide_maps(evlist
);
1406 evlist__update_id_pos(evlist
);
1408 evlist__for_each_entry(evlist
, evsel
) {
1409 err
= evsel__open(evsel
, evsel
->core
.cpus
, evsel
->core
.threads
);
1416 evlist__close(evlist
);
1421 int evlist__prepare_workload(struct evlist
*evlist
, struct target
*target
, const char *argv
[],
1422 bool pipe_output
, void (*exec_error
)(int signo
, siginfo_t
*info
, void *ucontext
))
1424 int child_ready_pipe
[2], go_pipe
[2];
1427 evlist
->workload
.cork_fd
= -1;
1429 if (pipe(child_ready_pipe
) < 0) {
1430 perror("failed to create 'ready' pipe");
1434 if (pipe(go_pipe
) < 0) {
1435 perror("failed to create 'go' pipe");
1436 goto out_close_ready_pipe
;
1439 evlist
->workload
.pid
= fork();
1440 if (evlist
->workload
.pid
< 0) {
1441 perror("failed to fork");
1442 goto out_close_pipes
;
1445 if (!evlist
->workload
.pid
) {
1451 signal(SIGTERM
, SIG_DFL
);
1453 close(child_ready_pipe
[0]);
1455 fcntl(go_pipe
[0], F_SETFD
, FD_CLOEXEC
);
1458 * Change the name of this process not to confuse --exclude-perf users
1459 * that sees 'perf' in the window up to the execvp() and thinks that
1460 * perf samples are not being excluded.
1462 prctl(PR_SET_NAME
, "perf-exec");
1465 * Tell the parent we're ready to go
1467 close(child_ready_pipe
[1]);
1470 * Wait until the parent tells us to go.
1472 ret
= read(go_pipe
[0], &bf
, 1);
1474 * The parent will ask for the execvp() to be performed by
1475 * writing exactly one byte, in workload.cork_fd, usually via
1476 * evlist__start_workload().
1478 * For cancelling the workload without actually running it,
1479 * the parent will just close workload.cork_fd, without writing
1480 * anything, i.e. read will return zero and we just exit()
1481 * here (See evlist__cancel_workload()).
1485 perror("unable to read pipe");
1489 execvp(argv
[0], (char **)argv
);
1494 val
.sival_int
= errno
;
1495 if (sigqueue(getppid(), SIGUSR1
, val
))
1503 struct sigaction act
= {
1504 .sa_flags
= SA_SIGINFO
,
1505 .sa_sigaction
= exec_error
,
1507 sigaction(SIGUSR1
, &act
, NULL
);
1510 if (target__none(target
)) {
1511 if (evlist
->core
.threads
== NULL
) {
1512 fprintf(stderr
, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1513 __func__
, __LINE__
);
1514 goto out_close_pipes
;
1516 perf_thread_map__set_pid(evlist
->core
.threads
, 0, evlist
->workload
.pid
);
1519 close(child_ready_pipe
[1]);
1522 * wait for child to settle
1524 if (read(child_ready_pipe
[0], &bf
, 1) == -1) {
1525 perror("unable to read pipe");
1526 goto out_close_pipes
;
1529 fcntl(go_pipe
[1], F_SETFD
, FD_CLOEXEC
);
1530 evlist
->workload
.cork_fd
= go_pipe
[1];
1531 close(child_ready_pipe
[0]);
1537 out_close_ready_pipe
:
1538 close(child_ready_pipe
[0]);
1539 close(child_ready_pipe
[1]);
1543 int evlist__start_workload(struct evlist
*evlist
)
1545 if (evlist
->workload
.cork_fd
>= 0) {
1549 * Remove the cork, let it rip!
1551 ret
= write(evlist
->workload
.cork_fd
, &bf
, 1);
1553 perror("unable to write to pipe");
1555 close(evlist
->workload
.cork_fd
);
1556 evlist
->workload
.cork_fd
= -1;
1563 void evlist__cancel_workload(struct evlist
*evlist
)
1567 if (evlist
->workload
.cork_fd
>= 0) {
1568 close(evlist
->workload
.cork_fd
);
1569 evlist
->workload
.cork_fd
= -1;
1570 waitpid(evlist
->workload
.pid
, &status
, WNOHANG
);
1574 int evlist__parse_sample(struct evlist
*evlist
, union perf_event
*event
, struct perf_sample
*sample
)
1576 struct evsel
*evsel
= evlist__event2evsel(evlist
, event
);
1581 ret
= evsel__parse_sample(evsel
, event
, sample
);
1584 if (perf_guest
&& sample
->id
) {
1585 struct perf_sample_id
*sid
= evlist__id2sid(evlist
, sample
->id
);
1588 sample
->machine_pid
= sid
->machine_pid
;
1589 sample
->vcpu
= sid
->vcpu
.cpu
;
1595 int evlist__parse_sample_timestamp(struct evlist
*evlist
, union perf_event
*event
, u64
*timestamp
)
1597 struct evsel
*evsel
= evlist__event2evsel(evlist
, event
);
1601 return evsel__parse_sample_timestamp(evsel
, event
, timestamp
);
1604 int evlist__strerror_open(struct evlist
*evlist
, int err
, char *buf
, size_t size
)
1607 char sbuf
[STRERR_BUFSIZE
], *emsg
= str_error_r(err
, sbuf
, sizeof(sbuf
));
1612 printed
= scnprintf(buf
, size
,
1614 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg
);
1616 value
= perf_event_paranoid();
1618 printed
+= scnprintf(buf
+ printed
, size
- printed
, "\nHint:\t");
1621 printed
+= scnprintf(buf
+ printed
, size
- printed
,
1622 "For your workloads it needs to be <= 1\nHint:\t");
1624 printed
+= scnprintf(buf
+ printed
, size
- printed
,
1625 "For system wide tracing it needs to be set to -1.\n");
1627 printed
+= scnprintf(buf
+ printed
, size
- printed
,
1628 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1629 "Hint:\tThe current value is %d.", value
);
1632 struct evsel
*first
= evlist__first(evlist
);
1635 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq
) < 0)
1638 if (first
->core
.attr
.sample_freq
< (u64
)max_freq
)
1641 printed
= scnprintf(buf
, size
,
1643 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
1644 "Hint:\tThe current value is %d and %" PRIu64
" is being requested.",
1645 emsg
, max_freq
, first
->core
.attr
.sample_freq
);
1650 scnprintf(buf
, size
, "%s", emsg
);
1657 int evlist__strerror_mmap(struct evlist
*evlist
, int err
, char *buf
, size_t size
)
1659 char sbuf
[STRERR_BUFSIZE
], *emsg
= str_error_r(err
, sbuf
, sizeof(sbuf
));
1660 int pages_attempted
= evlist
->core
.mmap_len
/ 1024, pages_max_per_user
, printed
= 0;
1664 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user
);
1665 printed
+= scnprintf(buf
+ printed
, size
- printed
,
1667 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1668 "Hint:\tTried using %zd kB.\n",
1669 emsg
, pages_max_per_user
, pages_attempted
);
1671 if (pages_attempted
>= pages_max_per_user
) {
1672 printed
+= scnprintf(buf
+ printed
, size
- printed
,
1673 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1674 pages_max_per_user
+ pages_attempted
);
1677 printed
+= scnprintf(buf
+ printed
, size
- printed
,
1678 "Hint:\tTry using a smaller -m/--mmap-pages value.");
1681 scnprintf(buf
, size
, "%s", emsg
);
1688 void evlist__to_front(struct evlist
*evlist
, struct evsel
*move_evsel
)
1690 struct evsel
*evsel
, *n
;
1693 if (move_evsel
== evlist__first(evlist
))
1696 evlist__for_each_entry_safe(evlist
, n
, evsel
) {
1697 if (evsel__leader(evsel
) == evsel__leader(move_evsel
))
1698 list_move_tail(&evsel
->core
.node
, &move
);
1701 list_splice(&move
, &evlist
->core
.entries
);
1704 struct evsel
*evlist__get_tracking_event(struct evlist
*evlist
)
1706 struct evsel
*evsel
;
1708 evlist__for_each_entry(evlist
, evsel
) {
1709 if (evsel
->tracking
)
1713 return evlist__first(evlist
);
1716 void evlist__set_tracking_event(struct evlist
*evlist
, struct evsel
*tracking_evsel
)
1718 struct evsel
*evsel
;
1720 if (tracking_evsel
->tracking
)
1723 evlist__for_each_entry(evlist
, evsel
) {
1724 if (evsel
!= tracking_evsel
)
1725 evsel
->tracking
= false;
1728 tracking_evsel
->tracking
= true;
1731 struct evsel
*evlist__findnew_tracking_event(struct evlist
*evlist
, bool system_wide
)
1733 struct evsel
*evsel
;
1735 evsel
= evlist__get_tracking_event(evlist
);
1736 if (!evsel__is_dummy_event(evsel
)) {
1737 evsel
= evlist__add_aux_dummy(evlist
, system_wide
);
1741 evlist__set_tracking_event(evlist
, evsel
);
1742 } else if (system_wide
) {
1743 perf_evlist__go_system_wide(&evlist
->core
, &evsel
->core
);
1749 struct evsel
*evlist__find_evsel_by_str(struct evlist
*evlist
, const char *str
)
1751 struct evsel
*evsel
;
1753 evlist__for_each_entry(evlist
, evsel
) {
1756 if (evsel__name_is(evsel
, str
))
1763 void evlist__toggle_bkw_mmap(struct evlist
*evlist
, enum bkw_mmap_state state
)
1765 enum bkw_mmap_state old_state
= evlist
->bkw_mmap_state
;
1772 if (!evlist
->overwrite_mmap
)
1775 switch (old_state
) {
1776 case BKW_MMAP_NOTREADY
: {
1777 if (state
!= BKW_MMAP_RUNNING
)
1781 case BKW_MMAP_RUNNING
: {
1782 if (state
!= BKW_MMAP_DATA_PENDING
)
1787 case BKW_MMAP_DATA_PENDING
: {
1788 if (state
!= BKW_MMAP_EMPTY
)
1792 case BKW_MMAP_EMPTY
: {
1793 if (state
!= BKW_MMAP_RUNNING
)
1799 WARN_ONCE(1, "Shouldn't get there\n");
1802 evlist
->bkw_mmap_state
= state
;
1806 evlist__pause(evlist
);
1809 evlist__resume(evlist
);
1820 bool evlist__exclude_kernel(struct evlist
*evlist
)
1822 struct evsel
*evsel
;
1824 evlist__for_each_entry(evlist
, evsel
) {
1825 if (!evsel
->core
.attr
.exclude_kernel
)
1833 * Events in data file are not collect in groups, but we still want
1834 * the group display. Set the artificial group and set the leader's
1835 * forced_leader flag to notify the display code.
1837 void evlist__force_leader(struct evlist
*evlist
)
1839 if (evlist__nr_groups(evlist
) == 0) {
1840 struct evsel
*leader
= evlist__first(evlist
);
1842 evlist__set_leader(evlist
);
1843 leader
->forced_leader
= true;
1847 struct evsel
*evlist__reset_weak_group(struct evlist
*evsel_list
, struct evsel
*evsel
, bool close
)
1849 struct evsel
*c2
, *leader
;
1850 bool is_open
= true;
1852 leader
= evsel__leader(evsel
);
1854 pr_debug("Weak group for %s/%d failed\n",
1855 leader
->name
, leader
->core
.nr_members
);
1858 * for_each_group_member doesn't work here because it doesn't
1859 * include the first entry.
1861 evlist__for_each_entry(evsel_list
, c2
) {
1864 if (evsel__has_leader(c2
, leader
)) {
1865 if (is_open
&& close
)
1866 perf_evsel__close(&c2
->core
);
1868 * We want to close all members of the group and reopen
1869 * them. Some events, like Intel topdown, require being
1870 * in a group and so keep these in the group.
1872 evsel__remove_from_group(c2
, leader
);
1875 * Set this for all former members of the group
1876 * to indicate they get reopened.
1878 c2
->reset_group
= true;
1881 /* Reset the leader count if all entries were removed. */
1882 if (leader
->core
.nr_members
== 1)
1883 leader
->core
.nr_members
= 0;
1887 static int evlist__parse_control_fifo(const char *str
, int *ctl_fd
, int *ctl_fd_ack
, bool *ctl_fd_close
)
1892 if (strncmp(str
, "fifo:", 5))
1896 if (!*str
|| *str
== ',')
1908 * O_RDWR avoids POLLHUPs which is necessary to allow the other
1909 * end of a FIFO to be repeatedly opened and closed.
1911 fd
= open(s
, O_RDWR
| O_NONBLOCK
| O_CLOEXEC
);
1913 pr_err("Failed to open '%s'\n", s
);
1918 *ctl_fd_close
= true;
1921 /* O_RDWR | O_NONBLOCK means the other end need not be open */
1922 fd
= open(p
, O_RDWR
| O_NONBLOCK
| O_CLOEXEC
);
1924 pr_err("Failed to open '%s'\n", p
);
1936 int evlist__parse_control(const char *str
, int *ctl_fd
, int *ctl_fd_ack
, bool *ctl_fd_close
)
1938 char *comma
= NULL
, *endptr
= NULL
;
1940 *ctl_fd_close
= false;
1942 if (strncmp(str
, "fd:", 3))
1943 return evlist__parse_control_fifo(str
, ctl_fd
, ctl_fd_ack
, ctl_fd_close
);
1945 *ctl_fd
= strtoul(&str
[3], &endptr
, 0);
1946 if (endptr
== &str
[3])
1949 comma
= strchr(str
, ',');
1951 if (endptr
!= comma
)
1954 *ctl_fd_ack
= strtoul(comma
+ 1, &endptr
, 0);
1955 if (endptr
== comma
+ 1 || *endptr
!= '\0')
1962 void evlist__close_control(int ctl_fd
, int ctl_fd_ack
, bool *ctl_fd_close
)
1964 if (*ctl_fd_close
) {
1965 *ctl_fd_close
= false;
1967 if (ctl_fd_ack
>= 0)
1972 int evlist__initialize_ctlfd(struct evlist
*evlist
, int fd
, int ack
)
1975 pr_debug("Control descriptor is not initialized\n");
1979 evlist
->ctl_fd
.pos
= perf_evlist__add_pollfd(&evlist
->core
, fd
, NULL
, POLLIN
,
1980 fdarray_flag__nonfilterable
|
1981 fdarray_flag__non_perf_event
);
1982 if (evlist
->ctl_fd
.pos
< 0) {
1983 evlist
->ctl_fd
.pos
= -1;
1984 pr_err("Failed to add ctl fd entry: %m\n");
1988 evlist
->ctl_fd
.fd
= fd
;
1989 evlist
->ctl_fd
.ack
= ack
;
1994 bool evlist__ctlfd_initialized(struct evlist
*evlist
)
1996 return evlist
->ctl_fd
.pos
>= 0;
1999 int evlist__finalize_ctlfd(struct evlist
*evlist
)
2001 struct pollfd
*entries
= evlist
->core
.pollfd
.entries
;
2003 if (!evlist__ctlfd_initialized(evlist
))
2006 entries
[evlist
->ctl_fd
.pos
].fd
= -1;
2007 entries
[evlist
->ctl_fd
.pos
].events
= 0;
2008 entries
[evlist
->ctl_fd
.pos
].revents
= 0;
2010 evlist
->ctl_fd
.pos
= -1;
2011 evlist
->ctl_fd
.ack
= -1;
2012 evlist
->ctl_fd
.fd
= -1;
2017 static int evlist__ctlfd_recv(struct evlist
*evlist
, enum evlist_ctl_cmd
*cmd
,
2018 char *cmd_data
, size_t data_size
)
2022 size_t bytes_read
= 0;
2024 *cmd
= EVLIST_CTL_CMD_UNSUPPORTED
;
2025 memset(cmd_data
, 0, data_size
);
2029 err
= read(evlist
->ctl_fd
.fd
, &c
, 1);
2031 if (c
== '\n' || c
== '\0')
2033 cmd_data
[bytes_read
++] = c
;
2034 if (bytes_read
== data_size
)
2037 } else if (err
== -1) {
2040 if (errno
== EAGAIN
|| errno
== EWOULDBLOCK
)
2043 pr_err("Failed to read from ctlfd %d: %m\n", evlist
->ctl_fd
.fd
);
2048 pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data
,
2049 bytes_read
== data_size
? "" : c
== '\n' ? "\\n" : "\\0");
2051 if (bytes_read
> 0) {
2052 if (!strncmp(cmd_data
, EVLIST_CTL_CMD_ENABLE_TAG
,
2053 (sizeof(EVLIST_CTL_CMD_ENABLE_TAG
)-1))) {
2054 *cmd
= EVLIST_CTL_CMD_ENABLE
;
2055 } else if (!strncmp(cmd_data
, EVLIST_CTL_CMD_DISABLE_TAG
,
2056 (sizeof(EVLIST_CTL_CMD_DISABLE_TAG
)-1))) {
2057 *cmd
= EVLIST_CTL_CMD_DISABLE
;
2058 } else if (!strncmp(cmd_data
, EVLIST_CTL_CMD_SNAPSHOT_TAG
,
2059 (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG
)-1))) {
2060 *cmd
= EVLIST_CTL_CMD_SNAPSHOT
;
2061 pr_debug("is snapshot\n");
2062 } else if (!strncmp(cmd_data
, EVLIST_CTL_CMD_EVLIST_TAG
,
2063 (sizeof(EVLIST_CTL_CMD_EVLIST_TAG
)-1))) {
2064 *cmd
= EVLIST_CTL_CMD_EVLIST
;
2065 } else if (!strncmp(cmd_data
, EVLIST_CTL_CMD_STOP_TAG
,
2066 (sizeof(EVLIST_CTL_CMD_STOP_TAG
)-1))) {
2067 *cmd
= EVLIST_CTL_CMD_STOP
;
2068 } else if (!strncmp(cmd_data
, EVLIST_CTL_CMD_PING_TAG
,
2069 (sizeof(EVLIST_CTL_CMD_PING_TAG
)-1))) {
2070 *cmd
= EVLIST_CTL_CMD_PING
;
2074 return bytes_read
? (int)bytes_read
: err
;
2077 int evlist__ctlfd_ack(struct evlist
*evlist
)
2081 if (evlist
->ctl_fd
.ack
== -1)
2084 err
= write(evlist
->ctl_fd
.ack
, EVLIST_CTL_CMD_ACK_TAG
,
2085 sizeof(EVLIST_CTL_CMD_ACK_TAG
));
2087 pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist
->ctl_fd
.ack
);
2092 static int get_cmd_arg(char *cmd_data
, size_t cmd_size
, char **arg
)
2094 char *data
= cmd_data
+ cmd_size
;
2100 /* there's argument */
2110 static int evlist__ctlfd_enable(struct evlist
*evlist
, char *cmd_data
, bool enable
)
2112 struct evsel
*evsel
;
2116 err
= get_cmd_arg(cmd_data
,
2117 enable
? sizeof(EVLIST_CTL_CMD_ENABLE_TAG
) - 1 :
2118 sizeof(EVLIST_CTL_CMD_DISABLE_TAG
) - 1,
2121 pr_info("failed: wrong command\n");
2126 evsel
= evlist__find_evsel_by_str(evlist
, name
);
2129 evlist__enable_evsel(evlist
, name
);
2131 evlist__disable_evsel(evlist
, name
);
2132 pr_info("Event %s %s\n", evsel
->name
,
2133 enable
? "enabled" : "disabled");
2135 pr_info("failed: can't find '%s' event\n", name
);
2139 evlist__enable(evlist
);
2140 pr_info(EVLIST_ENABLED_MSG
);
2142 evlist__disable(evlist
);
2143 pr_info(EVLIST_DISABLED_MSG
);
2150 static int evlist__ctlfd_list(struct evlist
*evlist
, char *cmd_data
)
2152 struct perf_attr_details details
= { .verbose
= false, };
2153 struct evsel
*evsel
;
2157 err
= get_cmd_arg(cmd_data
,
2158 sizeof(EVLIST_CTL_CMD_EVLIST_TAG
) - 1,
2161 pr_info("failed: wrong command\n");
2166 if (!strcmp(arg
, "-v")) {
2167 details
.verbose
= true;
2168 } else if (!strcmp(arg
, "-g")) {
2169 details
.event_group
= true;
2170 } else if (!strcmp(arg
, "-F")) {
2171 details
.freq
= true;
2173 pr_info("failed: wrong command\n");
2178 evlist__for_each_entry(evlist
, evsel
)
2179 evsel__fprintf(evsel
, &details
, stderr
);
2184 int evlist__ctlfd_process(struct evlist
*evlist
, enum evlist_ctl_cmd
*cmd
)
2187 char cmd_data
[EVLIST_CTL_CMD_MAX_LEN
];
2188 int ctlfd_pos
= evlist
->ctl_fd
.pos
;
2189 struct pollfd
*entries
= evlist
->core
.pollfd
.entries
;
2191 if (!evlist__ctlfd_initialized(evlist
) || !entries
[ctlfd_pos
].revents
)
2194 if (entries
[ctlfd_pos
].revents
& POLLIN
) {
2195 err
= evlist__ctlfd_recv(evlist
, cmd
, cmd_data
,
2196 EVLIST_CTL_CMD_MAX_LEN
);
2199 case EVLIST_CTL_CMD_ENABLE
:
2200 case EVLIST_CTL_CMD_DISABLE
:
2201 err
= evlist__ctlfd_enable(evlist
, cmd_data
,
2202 *cmd
== EVLIST_CTL_CMD_ENABLE
);
2204 case EVLIST_CTL_CMD_EVLIST
:
2205 err
= evlist__ctlfd_list(evlist
, cmd_data
);
2207 case EVLIST_CTL_CMD_SNAPSHOT
:
2208 case EVLIST_CTL_CMD_STOP
:
2209 case EVLIST_CTL_CMD_PING
:
2211 case EVLIST_CTL_CMD_ACK
:
2212 case EVLIST_CTL_CMD_UNSUPPORTED
:
2214 pr_debug("ctlfd: unsupported %d\n", *cmd
);
2217 if (!(*cmd
== EVLIST_CTL_CMD_ACK
|| *cmd
== EVLIST_CTL_CMD_UNSUPPORTED
||
2218 *cmd
== EVLIST_CTL_CMD_SNAPSHOT
))
2219 evlist__ctlfd_ack(evlist
);
2223 if (entries
[ctlfd_pos
].revents
& (POLLHUP
| POLLERR
))
2224 evlist__finalize_ctlfd(evlist
);
2226 entries
[ctlfd_pos
].revents
= 0;
2232 * struct event_enable_time - perf record -D/--delay single time range.
2233 * @start: start of time range to enable events in milliseconds
2234 * @end: end of time range to enable events in milliseconds
2236 * N.B. this structure is also accessed as an array of int.
2238 struct event_enable_time
{
2243 static int parse_event_enable_time(const char *str
, struct event_enable_time
*range
, bool first
)
2245 const char *fmt
= first
? "%u - %u %n" : " , %u - %u %n";
2246 int ret
, start
, end
, n
;
2248 ret
= sscanf(str
, fmt
, &start
, &end
, &n
);
2249 if (ret
!= 2 || end
<= start
)
2252 range
->start
= start
;
2258 static ssize_t
parse_event_enable_times(const char *str
, struct event_enable_time
*range
)
2264 for (cnt
= 0; *str
; cnt
++) {
2265 ret
= parse_event_enable_time(str
, range
, first
);
2268 /* Check no overlap */
2269 if (!first
&& range
&& range
->start
<= range
[-1].end
)
2279 * struct event_enable_timer - control structure for perf record -D/--delay.
2280 * @evlist: event list
2281 * @times: time ranges that events are enabled (N.B. this is also accessed as an
2283 * @times_cnt: number of time ranges
2284 * @timerfd: timer file descriptor
2285 * @pollfd_pos: position in @evlist array of file descriptors to poll (fdarray)
2286 * @times_step: current position in (int *)@times)[],
2287 * refer event_enable_timer__process()
2289 * Note, this structure is only used when there are time ranges, not when there
2290 * is only an initial delay.
2292 struct event_enable_timer
{
2293 struct evlist
*evlist
;
2294 struct event_enable_time
*times
;
2301 static int str_to_delay(const char *str
)
2306 d
= strtol(str
, &endptr
, 10);
2307 if (*endptr
|| d
> INT_MAX
|| d
< -1)
2312 int evlist__parse_event_enable_time(struct evlist
*evlist
, struct record_opts
*opts
,
2313 const char *str
, int unset
)
2315 enum fdarray_flags flags
= fdarray_flag__nonfilterable
| fdarray_flag__non_perf_event
;
2316 struct event_enable_timer
*eet
;
2324 opts
->target
.initial_delay
= str_to_delay(str
);
2325 if (opts
->target
.initial_delay
)
2328 ret
= parse_event_enable_times(str
, NULL
);
2336 eet
= zalloc(sizeof(*eet
));
2340 eet
->times
= calloc(times_cnt
, sizeof(*eet
->times
));
2346 if (parse_event_enable_times(str
, eet
->times
) != times_cnt
) {
2348 goto free_eet_times
;
2351 eet
->times_cnt
= times_cnt
;
2353 eet
->timerfd
= timerfd_create(CLOCK_MONOTONIC
, TFD_CLOEXEC
);
2354 if (eet
->timerfd
== -1) {
2356 pr_err("timerfd_create failed: %s\n", strerror(errno
));
2357 goto free_eet_times
;
2360 eet
->pollfd_pos
= perf_evlist__add_pollfd(&evlist
->core
, eet
->timerfd
, NULL
, POLLIN
, flags
);
2361 if (eet
->pollfd_pos
< 0) {
2362 err
= eet
->pollfd_pos
;
2366 eet
->evlist
= evlist
;
2368 opts
->target
.initial_delay
= eet
->times
[0].start
;
2373 close(eet
->timerfd
);
2381 static int event_enable_timer__set_timer(struct event_enable_timer
*eet
, int ms
)
2383 struct itimerspec its
= {
2384 .it_value
.tv_sec
= ms
/ MSEC_PER_SEC
,
2385 .it_value
.tv_nsec
= (ms
% MSEC_PER_SEC
) * NSEC_PER_MSEC
,
2389 if (timerfd_settime(eet
->timerfd
, 0, &its
, NULL
) < 0) {
2391 pr_err("timerfd_settime failed: %s\n", strerror(errno
));
2396 int event_enable_timer__start(struct event_enable_timer
*eet
)
2403 ms
= eet
->times
[0].end
- eet
->times
[0].start
;
2404 eet
->times_step
= 1;
2406 return event_enable_timer__set_timer(eet
, ms
);
2409 int event_enable_timer__process(struct event_enable_timer
*eet
)
2411 struct pollfd
*entries
;
2417 entries
= eet
->evlist
->core
.pollfd
.entries
;
2418 revents
= entries
[eet
->pollfd_pos
].revents
;
2419 entries
[eet
->pollfd_pos
].revents
= 0;
2421 if (revents
& POLLIN
) {
2422 size_t step
= eet
->times_step
;
2423 size_t pos
= step
/ 2;
2426 evlist__disable_non_dummy(eet
->evlist
);
2427 pr_info(EVLIST_DISABLED_MSG
);
2428 if (pos
>= eet
->times_cnt
- 1) {
2430 event_enable_timer__set_timer(eet
, 0);
2431 return 1; /* Stop */
2434 evlist__enable_non_dummy(eet
->evlist
);
2435 pr_info(EVLIST_ENABLED_MSG
);
2441 if (pos
< eet
->times_cnt
) {
2442 int *times
= (int *)eet
->times
; /* Accessing 'times' as array of int */
2443 int ms
= times
[step
] - times
[step
- 1];
2445 eet
->times_step
= step
;
2446 return event_enable_timer__set_timer(eet
, ms
);
2453 void event_enable_timer__exit(struct event_enable_timer
**ep
)
2457 zfree(&(*ep
)->times
);
2461 struct evsel
*evlist__find_evsel(struct evlist
*evlist
, int idx
)
2463 struct evsel
*evsel
;
2465 evlist__for_each_entry(evlist
, evsel
) {
2466 if (evsel
->core
.idx
== idx
)
2472 int evlist__scnprintf_evsels(struct evlist
*evlist
, size_t size
, char *bf
)
2474 struct evsel
*evsel
;
2477 evlist__for_each_entry(evlist
, evsel
) {
2478 if (evsel__is_dummy_event(evsel
))
2480 if (size
> (strlen(evsel__name(evsel
)) + (printed
? 2 : 1))) {
2481 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "," : "", evsel__name(evsel
));
2483 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s...", printed
? "," : "");
2491 void evlist__check_mem_load_aux(struct evlist
*evlist
)
2493 struct evsel
*leader
, *evsel
, *pos
;
2496 * For some platforms, the 'mem-loads' event is required to use
2497 * together with 'mem-loads-aux' within a group and 'mem-loads-aux'
2498 * must be the group leader. Now we disable this group before reporting
2499 * because 'mem-loads-aux' is just an auxiliary event. It doesn't carry
2500 * any valid memory load information.
2502 evlist__for_each_entry(evlist
, evsel
) {
2503 leader
= evsel__leader(evsel
);
2504 if (leader
== evsel
)
2507 if (leader
->name
&& strstr(leader
->name
, "mem-loads-aux")) {
2508 for_each_group_evsel(pos
, leader
) {
2509 evsel__set_leader(pos
, pos
);
2510 pos
->core
.nr_members
= 0;
2517 * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs
2518 * and warn if the user CPU list is inapplicable for the event's PMU's
2519 * CPUs. Not core PMUs list a CPU in sysfs, but this may be overwritten by a
2520 * user requested CPU and so any online CPU is applicable. Core PMUs handle
2521 * events on the CPUs in their list and otherwise the event isn't supported.
2522 * @evlist: The list of events being checked.
2523 * @cpu_list: The user provided list of CPUs.
2525 void evlist__warn_user_requested_cpus(struct evlist
*evlist
, const char *cpu_list
)
2527 struct perf_cpu_map
*user_requested_cpus
;
2533 user_requested_cpus
= perf_cpu_map__new(cpu_list
);
2534 if (!user_requested_cpus
)
2537 evlist__for_each_entry(evlist
, pos
) {
2538 struct perf_cpu_map
*intersect
, *to_test
;
2539 const struct perf_pmu
*pmu
= evsel__find_pmu(pos
);
2541 to_test
= pmu
&& pmu
->is_core
? pmu
->cpus
: cpu_map__online();
2542 intersect
= perf_cpu_map__intersect(to_test
, user_requested_cpus
);
2543 if (!perf_cpu_map__equal(intersect
, user_requested_cpus
)) {
2546 cpu_map__snprint(to_test
, buf
, sizeof(buf
));
2547 pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n",
2548 cpu_list
, pmu
? pmu
->name
: "cpu", buf
, evsel__name(pos
));
2550 perf_cpu_map__put(intersect
);
2552 perf_cpu_map__put(user_requested_cpus
);
2555 void evlist__uniquify_name(struct evlist
*evlist
)
2557 char *new_name
, empty_attributes
[2] = ":", *attributes
;
2560 if (perf_pmus__num_core_pmus() == 1)
2563 evlist__for_each_entry(evlist
, pos
) {
2564 if (!evsel__is_hybrid(pos
))
2567 if (strchr(pos
->name
, '/'))
2570 attributes
= strchr(pos
->name
, ':');
2574 attributes
= empty_attributes
;
2576 if (asprintf(&new_name
, "%s/%s/%s", pos
->pmu
? pos
->pmu
->name
: "",
2577 pos
->name
, attributes
+ 1)) {
2579 pos
->name
= new_name
;
2586 bool evlist__has_bpf_output(struct evlist
*evlist
)
2588 struct evsel
*evsel
;
2590 evlist__for_each_entry(evlist
, evsel
) {
2591 if (evsel__is_bpf_output(evsel
))