2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
7 * Released under the GPL v2. (and only v2, not any later version)
16 #include "thread_map.h"
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
21 int __perf_evsel__sample_size(u64 sample_type
)
23 u64 mask
= sample_type
& PERF_SAMPLE_MASK
;
27 for (i
= 0; i
< 64; i
++) {
28 if (mask
& (1ULL << i
))
37 static void hists__init(struct hists
*hists
)
39 memset(hists
, 0, sizeof(*hists
));
40 hists
->entries_in_array
[0] = hists
->entries_in_array
[1] = RB_ROOT
;
41 hists
->entries_in
= &hists
->entries_in_array
[0];
42 hists
->entries_collapsed
= RB_ROOT
;
43 hists
->entries
= RB_ROOT
;
44 pthread_mutex_init(&hists
->lock
, NULL
);
47 void perf_evsel__init(struct perf_evsel
*evsel
,
48 struct perf_event_attr
*attr
, int idx
)
52 INIT_LIST_HEAD(&evsel
->node
);
53 hists__init(&evsel
->hists
);
56 struct perf_evsel
*perf_evsel__new(struct perf_event_attr
*attr
, int idx
)
58 struct perf_evsel
*evsel
= zalloc(sizeof(*evsel
));
61 perf_evsel__init(evsel
, attr
, idx
);
66 void perf_evsel__config(struct perf_evsel
*evsel
, struct perf_record_opts
*opts
)
68 struct perf_event_attr
*attr
= &evsel
->attr
;
69 int track
= !evsel
->idx
; /* only the first counter needs these */
71 attr
->sample_id_all
= opts
->sample_id_all_avail
? 1 : 0;
72 attr
->inherit
= !opts
->no_inherit
;
73 attr
->read_format
= PERF_FORMAT_TOTAL_TIME_ENABLED
|
74 PERF_FORMAT_TOTAL_TIME_RUNNING
|
77 attr
->sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
;
80 * We default some events to a 1 default interval. But keep
81 * it a weak assumption overridable by the user.
83 if (!attr
->sample_period
|| (opts
->user_freq
!= UINT_MAX
&&
84 opts
->user_interval
!= ULLONG_MAX
)) {
86 attr
->sample_type
|= PERF_SAMPLE_PERIOD
;
88 attr
->sample_freq
= opts
->freq
;
90 attr
->sample_period
= opts
->default_interval
;
95 attr
->sample_freq
= 0;
97 if (opts
->inherit_stat
)
98 attr
->inherit_stat
= 1;
100 if (opts
->sample_address
) {
101 attr
->sample_type
|= PERF_SAMPLE_ADDR
;
102 attr
->mmap_data
= track
;
105 if (opts
->call_graph
)
106 attr
->sample_type
|= PERF_SAMPLE_CALLCHAIN
;
108 if (opts
->system_wide
)
109 attr
->sample_type
|= PERF_SAMPLE_CPU
;
112 attr
->sample_type
|= PERF_SAMPLE_PERIOD
;
114 if (opts
->sample_id_all_avail
&&
115 (opts
->sample_time
|| opts
->system_wide
||
116 !opts
->no_inherit
|| opts
->cpu_list
))
117 attr
->sample_type
|= PERF_SAMPLE_TIME
;
119 if (opts
->raw_samples
) {
120 attr
->sample_type
|= PERF_SAMPLE_TIME
;
121 attr
->sample_type
|= PERF_SAMPLE_RAW
;
122 attr
->sample_type
|= PERF_SAMPLE_CPU
;
125 if (opts
->no_delay
) {
127 attr
->wakeup_events
= 1;
133 if (opts
->target_pid
== -1 && opts
->target_tid
== -1 && !opts
->system_wide
) {
135 attr
->enable_on_exec
= 1;
139 int perf_evsel__alloc_fd(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
142 evsel
->fd
= xyarray__new(ncpus
, nthreads
, sizeof(int));
145 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
146 for (thread
= 0; thread
< nthreads
; thread
++) {
147 FD(evsel
, cpu
, thread
) = -1;
152 return evsel
->fd
!= NULL
? 0 : -ENOMEM
;
155 int perf_evsel__alloc_id(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
157 evsel
->sample_id
= xyarray__new(ncpus
, nthreads
, sizeof(struct perf_sample_id
));
158 if (evsel
->sample_id
== NULL
)
161 evsel
->id
= zalloc(ncpus
* nthreads
* sizeof(u64
));
162 if (evsel
->id
== NULL
) {
163 xyarray__delete(evsel
->sample_id
);
164 evsel
->sample_id
= NULL
;
171 int perf_evsel__alloc_counts(struct perf_evsel
*evsel
, int ncpus
)
173 evsel
->counts
= zalloc((sizeof(*evsel
->counts
) +
174 (ncpus
* sizeof(struct perf_counts_values
))));
175 return evsel
->counts
!= NULL
? 0 : -ENOMEM
;
178 void perf_evsel__free_fd(struct perf_evsel
*evsel
)
180 xyarray__delete(evsel
->fd
);
184 void perf_evsel__free_id(struct perf_evsel
*evsel
)
186 xyarray__delete(evsel
->sample_id
);
187 evsel
->sample_id
= NULL
;
192 void perf_evsel__close_fd(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
196 for (cpu
= 0; cpu
< ncpus
; cpu
++)
197 for (thread
= 0; thread
< nthreads
; ++thread
) {
198 close(FD(evsel
, cpu
, thread
));
199 FD(evsel
, cpu
, thread
) = -1;
203 void perf_evsel__exit(struct perf_evsel
*evsel
)
205 assert(list_empty(&evsel
->node
));
206 xyarray__delete(evsel
->fd
);
207 xyarray__delete(evsel
->sample_id
);
211 void perf_evsel__delete(struct perf_evsel
*evsel
)
213 perf_evsel__exit(evsel
);
214 close_cgroup(evsel
->cgrp
);
219 int __perf_evsel__read_on_cpu(struct perf_evsel
*evsel
,
220 int cpu
, int thread
, bool scale
)
222 struct perf_counts_values count
;
223 size_t nv
= scale
? 3 : 1;
225 if (FD(evsel
, cpu
, thread
) < 0)
228 if (evsel
->counts
== NULL
&& perf_evsel__alloc_counts(evsel
, cpu
+ 1) < 0)
231 if (readn(FD(evsel
, cpu
, thread
), &count
, nv
* sizeof(u64
)) < 0)
237 else if (count
.run
< count
.ena
)
238 count
.val
= (u64
)((double)count
.val
* count
.ena
/ count
.run
+ 0.5);
240 count
.ena
= count
.run
= 0;
242 evsel
->counts
->cpu
[cpu
] = count
;
246 int __perf_evsel__read(struct perf_evsel
*evsel
,
247 int ncpus
, int nthreads
, bool scale
)
249 size_t nv
= scale
? 3 : 1;
251 struct perf_counts_values
*aggr
= &evsel
->counts
->aggr
, count
;
253 aggr
->val
= aggr
->ena
= aggr
->run
= 0;
255 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
256 for (thread
= 0; thread
< nthreads
; thread
++) {
257 if (FD(evsel
, cpu
, thread
) < 0)
260 if (readn(FD(evsel
, cpu
, thread
),
261 &count
, nv
* sizeof(u64
)) < 0)
264 aggr
->val
+= count
.val
;
266 aggr
->ena
+= count
.ena
;
267 aggr
->run
+= count
.run
;
272 evsel
->counts
->scaled
= 0;
274 if (aggr
->run
== 0) {
275 evsel
->counts
->scaled
= -1;
280 if (aggr
->run
< aggr
->ena
) {
281 evsel
->counts
->scaled
= 1;
282 aggr
->val
= (u64
)((double)aggr
->val
* aggr
->ena
/ aggr
->run
+ 0.5);
285 aggr
->ena
= aggr
->run
= 0;
290 static int __perf_evsel__open(struct perf_evsel
*evsel
, struct cpu_map
*cpus
,
291 struct thread_map
*threads
, bool group
,
292 struct xyarray
*group_fds
)
295 unsigned long flags
= 0;
298 if (evsel
->fd
== NULL
&&
299 perf_evsel__alloc_fd(evsel
, cpus
->nr
, threads
->nr
) < 0)
303 flags
= PERF_FLAG_PID_CGROUP
;
304 pid
= evsel
->cgrp
->fd
;
307 for (cpu
= 0; cpu
< cpus
->nr
; cpu
++) {
308 int group_fd
= group_fds
? GROUP_FD(group_fds
, cpu
) : -1;
310 for (thread
= 0; thread
< threads
->nr
; thread
++) {
313 pid
= threads
->map
[thread
];
315 FD(evsel
, cpu
, thread
) = sys_perf_event_open(&evsel
->attr
,
319 if (FD(evsel
, cpu
, thread
) < 0) {
324 if (group
&& group_fd
== -1)
325 group_fd
= FD(evsel
, cpu
, thread
);
333 while (--thread
>= 0) {
334 close(FD(evsel
, cpu
, thread
));
335 FD(evsel
, cpu
, thread
) = -1;
337 thread
= threads
->nr
;
338 } while (--cpu
>= 0);
342 void perf_evsel__close(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
344 if (evsel
->fd
== NULL
)
347 perf_evsel__close_fd(evsel
, ncpus
, nthreads
);
348 perf_evsel__free_fd(evsel
);
361 struct thread_map map
;
363 } empty_thread_map
= {
368 int perf_evsel__open(struct perf_evsel
*evsel
, struct cpu_map
*cpus
,
369 struct thread_map
*threads
, bool group
,
370 struct xyarray
*group_fd
)
373 /* Work around old compiler warnings about strict aliasing */
374 cpus
= &empty_cpu_map
.map
;
378 threads
= &empty_thread_map
.map
;
380 return __perf_evsel__open(evsel
, cpus
, threads
, group
, group_fd
);
383 int perf_evsel__open_per_cpu(struct perf_evsel
*evsel
,
384 struct cpu_map
*cpus
, bool group
,
385 struct xyarray
*group_fd
)
387 return __perf_evsel__open(evsel
, cpus
, &empty_thread_map
.map
, group
,
391 int perf_evsel__open_per_thread(struct perf_evsel
*evsel
,
392 struct thread_map
*threads
, bool group
,
393 struct xyarray
*group_fd
)
395 return __perf_evsel__open(evsel
, &empty_cpu_map
.map
, threads
, group
,
399 static int perf_event__parse_id_sample(const union perf_event
*event
, u64 type
,
400 struct perf_sample
*sample
)
402 const u64
*array
= event
->sample
.array
;
404 array
+= ((event
->header
.size
-
405 sizeof(event
->header
)) / sizeof(u64
)) - 1;
407 if (type
& PERF_SAMPLE_CPU
) {
408 u32
*p
= (u32
*)array
;
413 if (type
& PERF_SAMPLE_STREAM_ID
) {
414 sample
->stream_id
= *array
;
418 if (type
& PERF_SAMPLE_ID
) {
423 if (type
& PERF_SAMPLE_TIME
) {
424 sample
->time
= *array
;
428 if (type
& PERF_SAMPLE_TID
) {
429 u32
*p
= (u32
*)array
;
437 static bool sample_overlap(const union perf_event
*event
,
438 const void *offset
, u64 size
)
440 const void *base
= event
;
442 if (offset
+ size
> base
+ event
->header
.size
)
448 int perf_event__parse_sample(const union perf_event
*event
, u64 type
,
449 int sample_size
, bool sample_id_all
,
450 struct perf_sample
*data
, bool swapped
)
455 * used for cross-endian analysis. See git commit 65014ab3
456 * for why this goofiness is needed.
463 memset(data
, 0, sizeof(*data
));
464 data
->cpu
= data
->pid
= data
->tid
= -1;
465 data
->stream_id
= data
->id
= data
->time
= -1ULL;
468 if (event
->header
.type
!= PERF_RECORD_SAMPLE
) {
471 return perf_event__parse_id_sample(event
, type
, data
);
474 array
= event
->sample
.array
;
476 if (sample_size
+ sizeof(event
->header
) > event
->header
.size
)
479 if (type
& PERF_SAMPLE_IP
) {
480 data
->ip
= event
->ip
.ip
;
484 if (type
& PERF_SAMPLE_TID
) {
487 /* undo swap of u64, then swap on individual u32s */
488 u
.val64
= bswap_64(u
.val64
);
489 u
.val32
[0] = bswap_32(u
.val32
[0]);
490 u
.val32
[1] = bswap_32(u
.val32
[1]);
493 data
->pid
= u
.val32
[0];
494 data
->tid
= u
.val32
[1];
498 if (type
& PERF_SAMPLE_TIME
) {
504 if (type
& PERF_SAMPLE_ADDR
) {
510 if (type
& PERF_SAMPLE_ID
) {
515 if (type
& PERF_SAMPLE_STREAM_ID
) {
516 data
->stream_id
= *array
;
520 if (type
& PERF_SAMPLE_CPU
) {
524 /* undo swap of u64, then swap on individual u32s */
525 u
.val64
= bswap_64(u
.val64
);
526 u
.val32
[0] = bswap_32(u
.val32
[0]);
529 data
->cpu
= u
.val32
[0];
533 if (type
& PERF_SAMPLE_PERIOD
) {
534 data
->period
= *array
;
538 if (type
& PERF_SAMPLE_READ
) {
539 fprintf(stderr
, "PERF_SAMPLE_READ is unsuported for now\n");
543 if (type
& PERF_SAMPLE_CALLCHAIN
) {
544 if (sample_overlap(event
, array
, sizeof(data
->callchain
->nr
)))
547 data
->callchain
= (struct ip_callchain
*)array
;
549 if (sample_overlap(event
, array
, data
->callchain
->nr
))
552 array
+= 1 + data
->callchain
->nr
;
555 if (type
& PERF_SAMPLE_RAW
) {
559 if (WARN_ONCE(swapped
,
560 "Endianness of raw data not corrected!\n")) {
561 /* undo swap of u64, then swap on individual u32s */
562 u
.val64
= bswap_64(u
.val64
);
563 u
.val32
[0] = bswap_32(u
.val32
[0]);
564 u
.val32
[1] = bswap_32(u
.val32
[1]);
567 if (sample_overlap(event
, array
, sizeof(u32
)))
570 data
->raw_size
= u
.val32
[0];
571 pdata
= (void *) array
+ sizeof(u32
);
573 if (sample_overlap(event
, pdata
, data
->raw_size
))
576 data
->raw_data
= (void *) pdata
;
582 int perf_event__synthesize_sample(union perf_event
*event
, u64 type
,
583 const struct perf_sample
*sample
,
589 * used for cross-endian analysis. See git commit 65014ab3
590 * for why this goofiness is needed.
597 array
= event
->sample
.array
;
599 if (type
& PERF_SAMPLE_IP
) {
600 event
->ip
.ip
= sample
->ip
;
604 if (type
& PERF_SAMPLE_TID
) {
605 u
.val32
[0] = sample
->pid
;
606 u
.val32
[1] = sample
->tid
;
609 * Inverse of what is done in perf_event__parse_sample
611 u
.val32
[0] = bswap_32(u
.val32
[0]);
612 u
.val32
[1] = bswap_32(u
.val32
[1]);
613 u
.val64
= bswap_64(u
.val64
);
620 if (type
& PERF_SAMPLE_TIME
) {
621 *array
= sample
->time
;
625 if (type
& PERF_SAMPLE_ADDR
) {
626 *array
= sample
->addr
;
630 if (type
& PERF_SAMPLE_ID
) {
635 if (type
& PERF_SAMPLE_STREAM_ID
) {
636 *array
= sample
->stream_id
;
640 if (type
& PERF_SAMPLE_CPU
) {
641 u
.val32
[0] = sample
->cpu
;
644 * Inverse of what is done in perf_event__parse_sample
646 u
.val32
[0] = bswap_32(u
.val32
[0]);
647 u
.val64
= bswap_64(u
.val64
);
653 if (type
& PERF_SAMPLE_PERIOD
) {
654 *array
= sample
->period
;