1 // SPDX-License-Identifier: GPL-2.0
2 #include <perf/evlist.h>
3 #include <perf/evsel.h>
4 #include <linux/bitops.h>
5 #include <linux/list.h>
6 #include <linux/hash.h>
8 #include <internal/evlist.h>
9 #include <internal/evsel.h>
10 #include <internal/xyarray.h>
11 #include <internal/mmap.h>
12 #include <internal/cpumap.h>
13 #include <internal/threadmap.h>
14 #include <internal/lib.h>
15 #include <linux/zalloc.h>
23 #include <perf/cpumap.h>
24 #include <perf/threadmap.h>
25 #include <api/fd/array.h>
28 void perf_evlist__init(struct perf_evlist
*evlist
)
30 INIT_LIST_HEAD(&evlist
->entries
);
31 evlist
->nr_entries
= 0;
32 fdarray__init(&evlist
->pollfd
, 64);
33 perf_evlist__reset_id_hash(evlist
);
36 static void __perf_evlist__propagate_maps(struct perf_evlist
*evlist
,
37 struct perf_evsel
*evsel
)
39 if (evsel
->system_wide
) {
40 /* System wide: set the cpu map of the evsel to all online CPUs. */
41 perf_cpu_map__put(evsel
->cpus
);
42 evsel
->cpus
= perf_cpu_map__new_online_cpus();
43 } else if (evlist
->has_user_cpus
&& evsel
->is_pmu_core
) {
45 * User requested CPUs on a core PMU, ensure the requested CPUs
46 * are valid by intersecting with those of the PMU.
48 perf_cpu_map__put(evsel
->cpus
);
49 evsel
->cpus
= perf_cpu_map__intersect(evlist
->user_requested_cpus
, evsel
->own_cpus
);
50 } else if (!evsel
->own_cpus
|| evlist
->has_user_cpus
||
51 (!evsel
->requires_cpu
&& perf_cpu_map__has_any_cpu(evlist
->user_requested_cpus
))) {
53 * The PMU didn't specify a default cpu map, this isn't a core
54 * event and the user requested CPUs or the evlist user
55 * requested CPUs have the "any CPU" (aka dummy) CPU value. In
56 * which case use the user requested CPUs rather than the PMU
59 perf_cpu_map__put(evsel
->cpus
);
60 evsel
->cpus
= perf_cpu_map__get(evlist
->user_requested_cpus
);
61 } else if (evsel
->cpus
!= evsel
->own_cpus
) {
63 * No user requested cpu map but the PMU cpu map doesn't match
64 * the evsel's. Reset it back to the PMU cpu map.
66 perf_cpu_map__put(evsel
->cpus
);
67 evsel
->cpus
= perf_cpu_map__get(evsel
->own_cpus
);
70 if (evsel
->system_wide
) {
71 perf_thread_map__put(evsel
->threads
);
72 evsel
->threads
= perf_thread_map__new_dummy();
74 perf_thread_map__put(evsel
->threads
);
75 evsel
->threads
= perf_thread_map__get(evlist
->threads
);
78 evlist
->all_cpus
= perf_cpu_map__merge(evlist
->all_cpus
, evsel
->cpus
);
81 static void perf_evlist__propagate_maps(struct perf_evlist
*evlist
)
83 struct perf_evsel
*evsel
;
85 evlist
->needs_map_propagation
= true;
87 perf_evlist__for_each_evsel(evlist
, evsel
)
88 __perf_evlist__propagate_maps(evlist
, evsel
);
91 void perf_evlist__add(struct perf_evlist
*evlist
,
92 struct perf_evsel
*evsel
)
94 evsel
->idx
= evlist
->nr_entries
;
95 list_add_tail(&evsel
->node
, &evlist
->entries
);
96 evlist
->nr_entries
+= 1;
98 if (evlist
->needs_map_propagation
)
99 __perf_evlist__propagate_maps(evlist
, evsel
);
102 void perf_evlist__remove(struct perf_evlist
*evlist
,
103 struct perf_evsel
*evsel
)
105 list_del_init(&evsel
->node
);
106 evlist
->nr_entries
-= 1;
109 struct perf_evlist
*perf_evlist__new(void)
111 struct perf_evlist
*evlist
= zalloc(sizeof(*evlist
));
114 perf_evlist__init(evlist
);
120 perf_evlist__next(struct perf_evlist
*evlist
, struct perf_evsel
*prev
)
122 struct perf_evsel
*next
;
125 next
= list_first_entry(&evlist
->entries
,
129 next
= list_next_entry(prev
, node
);
132 /* Empty list is noticed here so don't need checking on entry. */
133 if (&next
->node
== &evlist
->entries
)
139 static void perf_evlist__purge(struct perf_evlist
*evlist
)
141 struct perf_evsel
*pos
, *n
;
143 perf_evlist__for_each_entry_safe(evlist
, n
, pos
) {
144 list_del_init(&pos
->node
);
145 perf_evsel__delete(pos
);
148 evlist
->nr_entries
= 0;
151 void perf_evlist__exit(struct perf_evlist
*evlist
)
153 perf_cpu_map__put(evlist
->user_requested_cpus
);
154 perf_cpu_map__put(evlist
->all_cpus
);
155 perf_thread_map__put(evlist
->threads
);
156 evlist
->user_requested_cpus
= NULL
;
157 evlist
->all_cpus
= NULL
;
158 evlist
->threads
= NULL
;
159 fdarray__exit(&evlist
->pollfd
);
162 void perf_evlist__delete(struct perf_evlist
*evlist
)
167 perf_evlist__munmap(evlist
);
168 perf_evlist__close(evlist
);
169 perf_evlist__purge(evlist
);
170 perf_evlist__exit(evlist
);
174 void perf_evlist__set_maps(struct perf_evlist
*evlist
,
175 struct perf_cpu_map
*cpus
,
176 struct perf_thread_map
*threads
)
179 * Allow for the possibility that one or another of the maps isn't being
180 * changed i.e. don't put it. Note we are assuming the maps that are
181 * being applied are brand new and evlist is taking ownership of the
182 * original reference count of 1. If that is not the case it is up to
183 * the caller to increase the reference count.
185 if (cpus
!= evlist
->user_requested_cpus
) {
186 perf_cpu_map__put(evlist
->user_requested_cpus
);
187 evlist
->user_requested_cpus
= perf_cpu_map__get(cpus
);
190 if (threads
!= evlist
->threads
) {
191 perf_thread_map__put(evlist
->threads
);
192 evlist
->threads
= perf_thread_map__get(threads
);
195 perf_evlist__propagate_maps(evlist
);
198 int perf_evlist__open(struct perf_evlist
*evlist
)
200 struct perf_evsel
*evsel
;
203 perf_evlist__for_each_entry(evlist
, evsel
) {
204 err
= perf_evsel__open(evsel
, evsel
->cpus
, evsel
->threads
);
212 perf_evlist__close(evlist
);
216 void perf_evlist__close(struct perf_evlist
*evlist
)
218 struct perf_evsel
*evsel
;
220 perf_evlist__for_each_entry_reverse(evlist
, evsel
)
221 perf_evsel__close(evsel
);
224 void perf_evlist__enable(struct perf_evlist
*evlist
)
226 struct perf_evsel
*evsel
;
228 perf_evlist__for_each_entry(evlist
, evsel
)
229 perf_evsel__enable(evsel
);
232 void perf_evlist__disable(struct perf_evlist
*evlist
)
234 struct perf_evsel
*evsel
;
236 perf_evlist__for_each_entry(evlist
, evsel
)
237 perf_evsel__disable(evsel
);
240 u64
perf_evlist__read_format(struct perf_evlist
*evlist
)
242 struct perf_evsel
*first
= perf_evlist__first(evlist
);
244 return first
->attr
.read_format
;
247 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
249 static void perf_evlist__id_hash(struct perf_evlist
*evlist
,
250 struct perf_evsel
*evsel
,
251 int cpu_map_idx
, int thread
, u64 id
)
254 struct perf_sample_id
*sid
= SID(evsel
, cpu_map_idx
, thread
);
258 hash
= hash_64(sid
->id
, PERF_EVLIST__HLIST_BITS
);
259 hlist_add_head(&sid
->node
, &evlist
->heads
[hash
]);
262 void perf_evlist__reset_id_hash(struct perf_evlist
*evlist
)
266 for (i
= 0; i
< PERF_EVLIST__HLIST_SIZE
; ++i
)
267 INIT_HLIST_HEAD(&evlist
->heads
[i
]);
270 void perf_evlist__id_add(struct perf_evlist
*evlist
,
271 struct perf_evsel
*evsel
,
272 int cpu_map_idx
, int thread
, u64 id
)
274 if (!SID(evsel
, cpu_map_idx
, thread
))
277 perf_evlist__id_hash(evlist
, evsel
, cpu_map_idx
, thread
, id
);
278 evsel
->id
[evsel
->ids
++] = id
;
281 int perf_evlist__id_add_fd(struct perf_evlist
*evlist
,
282 struct perf_evsel
*evsel
,
283 int cpu_map_idx
, int thread
, int fd
)
285 u64 read_data
[4] = { 0, };
286 int id_idx
= 1; /* The first entry is the counter value */
290 if (!SID(evsel
, cpu_map_idx
, thread
))
293 ret
= ioctl(fd
, PERF_EVENT_IOC_ID
, &id
);
300 /* Legacy way to get event id.. All hail to old kernels! */
303 * This way does not work with group format read, so bail
306 if (perf_evlist__read_format(evlist
) & PERF_FORMAT_GROUP
)
309 if (!(evsel
->attr
.read_format
& PERF_FORMAT_ID
) ||
310 read(fd
, &read_data
, sizeof(read_data
)) == -1)
313 if (evsel
->attr
.read_format
& PERF_FORMAT_TOTAL_TIME_ENABLED
)
315 if (evsel
->attr
.read_format
& PERF_FORMAT_TOTAL_TIME_RUNNING
)
318 id
= read_data
[id_idx
];
321 perf_evlist__id_add(evlist
, evsel
, cpu_map_idx
, thread
, id
);
325 int perf_evlist__alloc_pollfd(struct perf_evlist
*evlist
)
327 int nr_cpus
= perf_cpu_map__nr(evlist
->all_cpus
);
328 int nr_threads
= perf_thread_map__nr(evlist
->threads
);
330 struct perf_evsel
*evsel
;
332 perf_evlist__for_each_entry(evlist
, evsel
) {
333 if (evsel
->system_wide
)
336 nfds
+= nr_cpus
* nr_threads
;
339 if (fdarray__available_entries(&evlist
->pollfd
) < nfds
&&
340 fdarray__grow(&evlist
->pollfd
, nfds
) < 0)
346 int perf_evlist__add_pollfd(struct perf_evlist
*evlist
, int fd
,
347 void *ptr
, short revent
, enum fdarray_flags flags
)
349 int pos
= fdarray__add(&evlist
->pollfd
, fd
, revent
| POLLERR
| POLLHUP
, flags
);
352 evlist
->pollfd
.priv
[pos
].ptr
= ptr
;
353 fcntl(fd
, F_SETFL
, O_NONBLOCK
);
359 static void perf_evlist__munmap_filtered(struct fdarray
*fda
, int fd
,
360 void *arg __maybe_unused
)
362 struct perf_mmap
*map
= fda
->priv
[fd
].ptr
;
368 int perf_evlist__filter_pollfd(struct perf_evlist
*evlist
, short revents_and_mask
)
370 return fdarray__filter(&evlist
->pollfd
, revents_and_mask
,
371 perf_evlist__munmap_filtered
, NULL
);
374 int perf_evlist__poll(struct perf_evlist
*evlist
, int timeout
)
376 return fdarray__poll(&evlist
->pollfd
, timeout
);
379 static struct perf_mmap
* perf_evlist__alloc_mmap(struct perf_evlist
*evlist
, bool overwrite
)
382 struct perf_mmap
*map
;
384 map
= zalloc(evlist
->nr_mmaps
* sizeof(struct perf_mmap
));
388 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
389 struct perf_mmap
*prev
= i
? &map
[i
- 1] : NULL
;
392 * When the perf_mmap() call is made we grab one refcount, plus
393 * one extra to let perf_mmap__consume() get the last
394 * events after all real references (perf_mmap__get()) are
397 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
398 * thus does perf_mmap__get() on it.
400 perf_mmap__init(&map
[i
], prev
, overwrite
, NULL
);
406 static void perf_evsel__set_sid_idx(struct perf_evsel
*evsel
, int idx
, int cpu
, int thread
)
408 struct perf_sample_id
*sid
= SID(evsel
, cpu
, thread
);
411 sid
->cpu
= perf_cpu_map__cpu(evsel
->cpus
, cpu
);
412 sid
->tid
= perf_thread_map__pid(evsel
->threads
, thread
);
415 static struct perf_mmap
*
416 perf_evlist__mmap_cb_get(struct perf_evlist
*evlist
, bool overwrite
, int idx
)
418 struct perf_mmap
*maps
;
420 maps
= overwrite
? evlist
->mmap_ovw
: evlist
->mmap
;
423 maps
= perf_evlist__alloc_mmap(evlist
, overwrite
);
428 evlist
->mmap_ovw
= maps
;
436 #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
439 perf_evlist__mmap_cb_mmap(struct perf_mmap
*map
, struct perf_mmap_param
*mp
,
440 int output
, struct perf_cpu cpu
)
442 return perf_mmap__mmap(map
, mp
, output
, cpu
);
445 static void perf_evlist__set_mmap_first(struct perf_evlist
*evlist
, struct perf_mmap
*map
,
449 evlist
->mmap_ovw_first
= map
;
451 evlist
->mmap_first
= map
;
455 mmap_per_evsel(struct perf_evlist
*evlist
, struct perf_evlist_mmap_ops
*ops
,
456 int idx
, struct perf_mmap_param
*mp
, int cpu_idx
,
457 int thread
, int *_output
, int *_output_overwrite
, int *nr_mmaps
)
459 struct perf_cpu evlist_cpu
= perf_cpu_map__cpu(evlist
->all_cpus
, cpu_idx
);
460 struct perf_evsel
*evsel
;
463 perf_evlist__for_each_entry(evlist
, evsel
) {
464 bool overwrite
= evsel
->attr
.write_backward
;
465 enum fdarray_flags flgs
;
466 struct perf_mmap
*map
;
467 int *output
, fd
, cpu
;
469 if (evsel
->system_wide
&& thread
)
472 cpu
= perf_cpu_map__idx(evsel
->cpus
, evlist_cpu
);
476 map
= ops
->get(evlist
, overwrite
, idx
);
481 mp
->prot
= PROT_READ
;
482 output
= _output_overwrite
;
484 mp
->prot
= PROT_READ
| PROT_WRITE
;
488 fd
= FD(evsel
, cpu
, thread
);
494 * The last one will be done at perf_mmap__consume(), so that we
495 * make sure we don't prevent tools from consuming every last event in
498 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
499 * anymore, but the last events for it are still in the ring buffer,
500 * waiting to be consumed.
502 * Tools can chose to ignore this at their own discretion, but the
503 * evlist layer can't just drop it when filtering events in
504 * perf_evlist__filter_pollfd().
506 refcount_set(&map
->refcnt
, 2);
509 ops
->idx(evlist
, evsel
, mp
, idx
);
511 /* Debug message used by test scripts */
512 pr_debug("idx %d: mmapping fd %d\n", idx
, *output
);
513 if (ops
->mmap(map
, mp
, *output
, evlist_cpu
) < 0)
519 perf_evlist__set_mmap_first(evlist
, map
, overwrite
);
521 /* Debug message used by test scripts */
522 pr_debug("idx %d: set output fd %d -> %d\n", idx
, fd
, *output
);
523 if (ioctl(fd
, PERF_EVENT_IOC_SET_OUTPUT
, *output
) != 0)
529 revent
= !overwrite
? POLLIN
: 0;
531 flgs
= evsel
->system_wide
? fdarray_flag__nonfilterable
: fdarray_flag__default
;
532 if (perf_evlist__add_pollfd(evlist
, fd
, map
, revent
, flgs
) < 0) {
537 if (evsel
->attr
.read_format
& PERF_FORMAT_ID
) {
538 if (perf_evlist__id_add_fd(evlist
, evsel
, cpu
, thread
,
541 perf_evsel__set_sid_idx(evsel
, idx
, cpu
, thread
);
549 mmap_per_thread(struct perf_evlist
*evlist
, struct perf_evlist_mmap_ops
*ops
,
550 struct perf_mmap_param
*mp
)
552 int nr_threads
= perf_thread_map__nr(evlist
->threads
);
553 int nr_cpus
= perf_cpu_map__nr(evlist
->all_cpus
);
554 int cpu
, thread
, idx
= 0;
557 pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n",
558 __func__
, nr_cpus
, nr_threads
);
560 /* per-thread mmaps */
561 for (thread
= 0; thread
< nr_threads
; thread
++, idx
++) {
563 int output_overwrite
= -1;
565 if (mmap_per_evsel(evlist
, ops
, idx
, mp
, 0, thread
, &output
,
566 &output_overwrite
, &nr_mmaps
))
570 /* system-wide mmaps i.e. per-cpu */
571 for (cpu
= 1; cpu
< nr_cpus
; cpu
++, idx
++) {
573 int output_overwrite
= -1;
575 if (mmap_per_evsel(evlist
, ops
, idx
, mp
, cpu
, 0, &output
,
576 &output_overwrite
, &nr_mmaps
))
580 if (nr_mmaps
!= evlist
->nr_mmaps
)
581 pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps
, evlist
->nr_mmaps
);
586 perf_evlist__munmap(evlist
);
591 mmap_per_cpu(struct perf_evlist
*evlist
, struct perf_evlist_mmap_ops
*ops
,
592 struct perf_mmap_param
*mp
)
594 int nr_threads
= perf_thread_map__nr(evlist
->threads
);
595 int nr_cpus
= perf_cpu_map__nr(evlist
->all_cpus
);
599 pr_debug("%s: nr cpu values %d nr threads %d\n", __func__
, nr_cpus
, nr_threads
);
601 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
603 int output_overwrite
= -1;
605 for (thread
= 0; thread
< nr_threads
; thread
++) {
606 if (mmap_per_evsel(evlist
, ops
, cpu
, mp
, cpu
,
607 thread
, &output
, &output_overwrite
, &nr_mmaps
))
612 if (nr_mmaps
!= evlist
->nr_mmaps
)
613 pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps
, evlist
->nr_mmaps
);
618 perf_evlist__munmap(evlist
);
622 static int perf_evlist__nr_mmaps(struct perf_evlist
*evlist
)
626 /* One for each CPU */
627 nr_mmaps
= perf_cpu_map__nr(evlist
->all_cpus
);
628 if (perf_cpu_map__has_any_cpu_or_is_empty(evlist
->all_cpus
)) {
629 /* Plus one for each thread */
630 nr_mmaps
+= perf_thread_map__nr(evlist
->threads
);
631 /* Minus the per-thread CPU (-1) */
638 int perf_evlist__mmap_ops(struct perf_evlist
*evlist
,
639 struct perf_evlist_mmap_ops
*ops
,
640 struct perf_mmap_param
*mp
)
642 const struct perf_cpu_map
*cpus
= evlist
->all_cpus
;
643 struct perf_evsel
*evsel
;
645 if (!ops
|| !ops
->get
|| !ops
->mmap
)
648 mp
->mask
= evlist
->mmap_len
- page_size
- 1;
650 evlist
->nr_mmaps
= perf_evlist__nr_mmaps(evlist
);
652 perf_evlist__for_each_entry(evlist
, evsel
) {
653 if ((evsel
->attr
.read_format
& PERF_FORMAT_ID
) &&
654 evsel
->sample_id
== NULL
&&
655 perf_evsel__alloc_id(evsel
, evsel
->fd
->max_x
, evsel
->fd
->max_y
) < 0)
659 if (evlist
->pollfd
.entries
== NULL
&& perf_evlist__alloc_pollfd(evlist
) < 0)
662 if (perf_cpu_map__has_any_cpu_or_is_empty(cpus
))
663 return mmap_per_thread(evlist
, ops
, mp
);
665 return mmap_per_cpu(evlist
, ops
, mp
);
668 int perf_evlist__mmap(struct perf_evlist
*evlist
, int pages
)
670 struct perf_mmap_param mp
;
671 struct perf_evlist_mmap_ops ops
= {
672 .get
= perf_evlist__mmap_cb_get
,
673 .mmap
= perf_evlist__mmap_cb_mmap
,
676 evlist
->mmap_len
= (pages
+ 1) * page_size
;
678 return perf_evlist__mmap_ops(evlist
, &ops
, &mp
);
681 void perf_evlist__munmap(struct perf_evlist
*evlist
)
686 for (i
= 0; i
< evlist
->nr_mmaps
; i
++)
687 perf_mmap__munmap(&evlist
->mmap
[i
]);
690 if (evlist
->mmap_ovw
) {
691 for (i
= 0; i
< evlist
->nr_mmaps
; i
++)
692 perf_mmap__munmap(&evlist
->mmap_ovw
[i
]);
695 zfree(&evlist
->mmap
);
696 zfree(&evlist
->mmap_ovw
);
700 perf_evlist__next_mmap(struct perf_evlist
*evlist
, struct perf_mmap
*map
,
706 return overwrite
? evlist
->mmap_ovw_first
: evlist
->mmap_first
;
709 void __perf_evlist__set_leader(struct list_head
*list
, struct perf_evsel
*leader
)
711 struct perf_evsel
*evsel
;
714 __perf_evlist__for_each_entry(list
, evsel
) {
715 evsel
->leader
= leader
;
718 leader
->nr_members
= n
;
721 void perf_evlist__set_leader(struct perf_evlist
*evlist
)
723 if (evlist
->nr_entries
) {
724 struct perf_evsel
*first
= list_entry(evlist
->entries
.next
,
725 struct perf_evsel
, node
);
727 __perf_evlist__set_leader(&evlist
->entries
, first
);
731 int perf_evlist__nr_groups(struct perf_evlist
*evlist
)
733 struct perf_evsel
*evsel
;
736 perf_evlist__for_each_evsel(evlist
, evsel
) {
738 * evsels by default have a nr_members of 1, and they are their
739 * own leader. If the nr_members is >1 then this is an
740 * indication of a group.
742 if (evsel
->leader
== evsel
&& evsel
->nr_members
> 1)
748 void perf_evlist__go_system_wide(struct perf_evlist
*evlist
, struct perf_evsel
*evsel
)
750 if (!evsel
->system_wide
) {
751 evsel
->system_wide
= true;
752 if (evlist
->needs_map_propagation
)
753 __perf_evlist__propagate_maps(evlist
, evsel
);