2 * thread-stack.c: Synthesize a thread's stack using call / return events
3 * Copyright (c) 2014, Intel Corporation.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
26 #include "call-path.h"
27 #include "thread-stack.h"
29 #define STACK_GROWTH 2048
32 * struct thread_stack_entry - thread stack entry.
33 * @ret_addr: return address
34 * @timestamp: timestamp (if known)
35 * @ref: external reference (e.g. db_id of sample)
36 * @branch_count: the branch count when the entry was created
38 * @no_call: a 'call' was not seen
40 struct thread_stack_entry
{
50 * struct thread_stack - thread stack constructed from 'call' and 'return'
52 * @stack: array that holds the stack
53 * @cnt: number of entries in the stack
54 * @sz: current maximum stack size
55 * @trace_nr: current trace number
56 * @branch_count: running branch count
57 * @kernel_start: kernel start address
58 * @last_time: last timestamp
59 * @crp: call/return processor
63 struct thread_stack_entry
*stack
;
70 struct call_return_processor
*crp
;
74 static int thread_stack__grow(struct thread_stack
*ts
)
76 struct thread_stack_entry
*new_stack
;
79 new_sz
= ts
->sz
+ STACK_GROWTH
;
80 sz
= new_sz
* sizeof(struct thread_stack_entry
);
82 new_stack
= realloc(ts
->stack
, sz
);
86 ts
->stack
= new_stack
;
92 static struct thread_stack
*thread_stack__new(struct thread
*thread
,
93 struct call_return_processor
*crp
)
95 struct thread_stack
*ts
;
97 ts
= zalloc(sizeof(struct thread_stack
));
101 if (thread_stack__grow(ts
)) {
106 if (thread
->mg
&& thread
->mg
->machine
)
107 ts
->kernel_start
= machine__kernel_start(thread
->mg
->machine
);
109 ts
->kernel_start
= 1ULL << 63;
115 static int thread_stack__push(struct thread_stack
*ts
, u64 ret_addr
)
119 if (ts
->cnt
== ts
->sz
) {
120 err
= thread_stack__grow(ts
);
122 pr_warning("Out of memory: discarding thread stack\n");
127 ts
->stack
[ts
->cnt
++].ret_addr
= ret_addr
;
132 static void thread_stack__pop(struct thread_stack
*ts
, u64 ret_addr
)
137 * In some cases there may be functions which are not seen to return.
138 * For example when setjmp / longjmp has been used. Or the perf context
139 * switch in the kernel which doesn't stop and start tracing in exactly
140 * the same code path. When that happens the return address will be
141 * further down the stack. If the return address is not found at all,
142 * we assume the opposite (i.e. this is a return for a call that wasn't
143 * seen for some reason) and leave the stack alone.
145 for (i
= ts
->cnt
; i
; ) {
146 if (ts
->stack
[--i
].ret_addr
== ret_addr
) {
153 static bool thread_stack__in_kernel(struct thread_stack
*ts
)
158 return ts
->stack
[ts
->cnt
- 1].cp
->in_kernel
;
161 static int thread_stack__call_return(struct thread
*thread
,
162 struct thread_stack
*ts
, size_t idx
,
163 u64 timestamp
, u64 ref
, bool no_return
)
165 struct call_return_processor
*crp
= ts
->crp
;
166 struct thread_stack_entry
*tse
;
167 struct call_return cr
= {
173 tse
= &ts
->stack
[idx
];
175 cr
.call_time
= tse
->timestamp
;
176 cr
.return_time
= timestamp
;
177 cr
.branch_count
= ts
->branch_count
- tse
->branch_count
;
178 cr
.call_ref
= tse
->ref
;
181 cr
.flags
|= CALL_RETURN_NO_CALL
;
183 cr
.flags
|= CALL_RETURN_NO_RETURN
;
185 return crp
->process(&cr
, crp
->data
);
188 static int __thread_stack__flush(struct thread
*thread
, struct thread_stack
*ts
)
190 struct call_return_processor
*crp
= ts
->crp
;
199 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
200 ts
->last_time
, 0, true);
202 pr_err("Error flushing thread stack!\n");
211 int thread_stack__flush(struct thread
*thread
)
214 return __thread_stack__flush(thread
, thread
->ts
);
219 int thread_stack__event(struct thread
*thread
, u32 flags
, u64 from_ip
,
220 u64 to_ip
, u16 insn_len
, u64 trace_nr
)
226 thread
->ts
= thread_stack__new(thread
, NULL
);
228 pr_warning("Out of memory: no thread stack\n");
231 thread
->ts
->trace_nr
= trace_nr
;
235 * When the trace is discontinuous, the trace_nr changes. In that case
236 * the stack might be completely invalid. Better to report nothing than
237 * to report something misleading, so flush the stack.
239 if (trace_nr
!= thread
->ts
->trace_nr
) {
240 if (thread
->ts
->trace_nr
)
241 __thread_stack__flush(thread
, thread
->ts
);
242 thread
->ts
->trace_nr
= trace_nr
;
245 /* Stop here if thread_stack__process() is in use */
249 if (flags
& PERF_IP_FLAG_CALL
) {
254 ret_addr
= from_ip
+ insn_len
;
255 if (ret_addr
== to_ip
)
256 return 0; /* Zero-length calls are excluded */
257 return thread_stack__push(thread
->ts
, ret_addr
);
258 } else if (flags
& PERF_IP_FLAG_RETURN
) {
261 thread_stack__pop(thread
->ts
, to_ip
);
267 void thread_stack__set_trace_nr(struct thread
*thread
, u64 trace_nr
)
269 if (!thread
|| !thread
->ts
)
272 if (trace_nr
!= thread
->ts
->trace_nr
) {
273 if (thread
->ts
->trace_nr
)
274 __thread_stack__flush(thread
, thread
->ts
);
275 thread
->ts
->trace_nr
= trace_nr
;
279 void thread_stack__free(struct thread
*thread
)
282 __thread_stack__flush(thread
, thread
->ts
);
283 zfree(&thread
->ts
->stack
);
288 void thread_stack__sample(struct thread
*thread
, struct ip_callchain
*chain
,
293 if (!thread
|| !thread
->ts
)
296 chain
->nr
= min(sz
, thread
->ts
->cnt
+ 1);
300 for (i
= 1; i
< chain
->nr
; i
++)
301 chain
->ips
[i
] = thread
->ts
->stack
[thread
->ts
->cnt
- i
].ret_addr
;
304 struct call_return_processor
*
305 call_return_processor__new(int (*process
)(struct call_return
*cr
, void *data
),
308 struct call_return_processor
*crp
;
310 crp
= zalloc(sizeof(struct call_return_processor
));
313 crp
->cpr
= call_path_root__new();
316 crp
->process
= process
;
325 void call_return_processor__free(struct call_return_processor
*crp
)
328 call_path_root__free(crp
->cpr
);
333 static int thread_stack__push_cp(struct thread_stack
*ts
, u64 ret_addr
,
334 u64 timestamp
, u64 ref
, struct call_path
*cp
,
337 struct thread_stack_entry
*tse
;
340 if (ts
->cnt
== ts
->sz
) {
341 err
= thread_stack__grow(ts
);
346 tse
= &ts
->stack
[ts
->cnt
++];
347 tse
->ret_addr
= ret_addr
;
348 tse
->timestamp
= timestamp
;
350 tse
->branch_count
= ts
->branch_count
;
352 tse
->no_call
= no_call
;
357 static int thread_stack__pop_cp(struct thread
*thread
, struct thread_stack
*ts
,
358 u64 ret_addr
, u64 timestamp
, u64 ref
,
367 struct thread_stack_entry
*tse
= &ts
->stack
[0];
369 if (tse
->cp
->sym
== sym
)
370 return thread_stack__call_return(thread
, ts
, --ts
->cnt
,
371 timestamp
, ref
, false);
374 if (ts
->stack
[ts
->cnt
- 1].ret_addr
== ret_addr
) {
375 return thread_stack__call_return(thread
, ts
, --ts
->cnt
,
376 timestamp
, ref
, false);
378 size_t i
= ts
->cnt
- 1;
381 if (ts
->stack
[i
].ret_addr
!= ret_addr
)
384 while (ts
->cnt
> i
) {
385 err
= thread_stack__call_return(thread
, ts
,
392 return thread_stack__call_return(thread
, ts
, --ts
->cnt
,
393 timestamp
, ref
, false);
400 static int thread_stack__bottom(struct thread
*thread
, struct thread_stack
*ts
,
401 struct perf_sample
*sample
,
402 struct addr_location
*from_al
,
403 struct addr_location
*to_al
, u64 ref
)
405 struct call_path_root
*cpr
= ts
->crp
->cpr
;
406 struct call_path
*cp
;
413 } else if (sample
->addr
) {
420 cp
= call_path__findnew(cpr
, &cpr
->call_path
, sym
, ip
,
425 return thread_stack__push_cp(thread
->ts
, ip
, sample
->time
, ref
, cp
,
429 static int thread_stack__no_call_return(struct thread
*thread
,
430 struct thread_stack
*ts
,
431 struct perf_sample
*sample
,
432 struct addr_location
*from_al
,
433 struct addr_location
*to_al
, u64 ref
)
435 struct call_path_root
*cpr
= ts
->crp
->cpr
;
436 struct call_path
*cp
, *parent
;
437 u64 ks
= ts
->kernel_start
;
440 if (sample
->ip
>= ks
&& sample
->addr
< ks
) {
441 /* Return to userspace, so pop all kernel addresses */
442 while (thread_stack__in_kernel(ts
)) {
443 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
450 /* If the stack is empty, push the userspace address */
452 cp
= call_path__findnew(cpr
, &cpr
->call_path
,
453 to_al
->sym
, sample
->addr
,
457 return thread_stack__push_cp(ts
, 0, sample
->time
, ref
,
460 } else if (thread_stack__in_kernel(ts
) && sample
->ip
< ks
) {
461 /* Return to userspace, so pop all kernel addresses */
462 while (thread_stack__in_kernel(ts
)) {
463 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
472 parent
= ts
->stack
[ts
->cnt
- 1].cp
;
474 parent
= &cpr
->call_path
;
476 /* This 'return' had no 'call', so push and pop top of stack */
477 cp
= call_path__findnew(cpr
, parent
, from_al
->sym
, sample
->ip
,
482 err
= thread_stack__push_cp(ts
, sample
->addr
, sample
->time
, ref
, cp
,
487 return thread_stack__pop_cp(thread
, ts
, sample
->addr
, sample
->time
, ref
,
491 static int thread_stack__trace_begin(struct thread
*thread
,
492 struct thread_stack
*ts
, u64 timestamp
,
495 struct thread_stack_entry
*tse
;
502 tse
= &ts
->stack
[ts
->cnt
- 1];
503 if (tse
->cp
->sym
== NULL
&& tse
->cp
->ip
== 0) {
504 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
505 timestamp
, ref
, false);
513 static int thread_stack__trace_end(struct thread_stack
*ts
,
514 struct perf_sample
*sample
, u64 ref
)
516 struct call_path_root
*cpr
= ts
->crp
->cpr
;
517 struct call_path
*cp
;
520 /* No point having 'trace end' on the bottom of the stack */
521 if (!ts
->cnt
|| (ts
->cnt
== 1 && ts
->stack
[0].ref
== ref
))
524 cp
= call_path__findnew(cpr
, ts
->stack
[ts
->cnt
- 1].cp
, NULL
, 0,
529 ret_addr
= sample
->ip
+ sample
->insn_len
;
531 return thread_stack__push_cp(ts
, ret_addr
, sample
->time
, ref
, cp
,
535 int thread_stack__process(struct thread
*thread
, struct comm
*comm
,
536 struct perf_sample
*sample
,
537 struct addr_location
*from_al
,
538 struct addr_location
*to_al
, u64 ref
,
539 struct call_return_processor
*crp
)
541 struct thread_stack
*ts
= thread
->ts
;
546 /* Supersede thread_stack__event() */
547 thread_stack__free(thread
);
548 thread
->ts
= thread_stack__new(thread
, crp
);
555 thread
->ts
= thread_stack__new(thread
, crp
);
562 /* Flush stack on exec */
563 if (ts
->comm
!= comm
&& thread
->pid_
== thread
->tid
) {
564 err
= __thread_stack__flush(thread
, ts
);
570 /* If the stack is empty, put the current symbol on the stack */
572 err
= thread_stack__bottom(thread
, ts
, sample
, from_al
, to_al
,
578 ts
->branch_count
+= 1;
579 ts
->last_time
= sample
->time
;
581 if (sample
->flags
& PERF_IP_FLAG_CALL
) {
582 struct call_path_root
*cpr
= ts
->crp
->cpr
;
583 struct call_path
*cp
;
586 if (!sample
->ip
|| !sample
->addr
)
589 ret_addr
= sample
->ip
+ sample
->insn_len
;
590 if (ret_addr
== sample
->addr
)
591 return 0; /* Zero-length calls are excluded */
593 cp
= call_path__findnew(cpr
, ts
->stack
[ts
->cnt
- 1].cp
,
594 to_al
->sym
, sample
->addr
,
598 err
= thread_stack__push_cp(ts
, ret_addr
, sample
->time
, ref
,
600 } else if (sample
->flags
& PERF_IP_FLAG_RETURN
) {
601 if (!sample
->ip
|| !sample
->addr
)
604 err
= thread_stack__pop_cp(thread
, ts
, sample
->addr
,
605 sample
->time
, ref
, from_al
->sym
);
609 err
= thread_stack__no_call_return(thread
, ts
, sample
,
610 from_al
, to_al
, ref
);
612 } else if (sample
->flags
& PERF_IP_FLAG_TRACE_BEGIN
) {
613 err
= thread_stack__trace_begin(thread
, ts
, sample
->time
, ref
);
614 } else if (sample
->flags
& PERF_IP_FLAG_TRACE_END
) {
615 err
= thread_stack__trace_end(ts
, sample
, ref
);
621 size_t thread_stack__depth(struct thread
*thread
)
625 return thread
->ts
->cnt
;