perf intel-pt: Add lookahead callback
[linux/fpc-iii.git] / tools / perf / util / thread-stack.c
blob8e390f78486f2cd5a6c57491794b070c51290607
1 /*
2 * thread-stack.c: Synthesize a thread's stack using call / return events
3 * Copyright (c) 2014, Intel Corporation.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include <linux/log2.h>
19 #include <errno.h>
20 #include "thread.h"
21 #include "event.h"
22 #include "machine.h"
23 #include "env.h"
24 #include "util.h"
25 #include "debug.h"
26 #include "symbol.h"
27 #include "comm.h"
28 #include "call-path.h"
29 #include "thread-stack.h"
31 #define STACK_GROWTH 2048
34 * State of retpoline detection.
36 * RETPOLINE_NONE: no retpoline detection
37 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
38 * X86_RETPOLINE_DETECTED: x86 retpoline detected
40 enum retpoline_state_t {
41 RETPOLINE_NONE,
42 X86_RETPOLINE_POSSIBLE,
43 X86_RETPOLINE_DETECTED,
46 /**
47 * struct thread_stack_entry - thread stack entry.
48 * @ret_addr: return address
49 * @timestamp: timestamp (if known)
50 * @ref: external reference (e.g. db_id of sample)
51 * @branch_count: the branch count when the entry was created
52 * @insn_count: the instruction count when the entry was created
53 * @cyc_count the cycle count when the entry was created
54 * @db_id: id used for db-export
55 * @cp: call path
56 * @no_call: a 'call' was not seen
57 * @trace_end: a 'call' but trace ended
58 * @non_call: a branch but not a 'call' to the start of a different symbol
60 struct thread_stack_entry {
61 u64 ret_addr;
62 u64 timestamp;
63 u64 ref;
64 u64 branch_count;
65 u64 insn_count;
66 u64 cyc_count;
67 u64 db_id;
68 struct call_path *cp;
69 bool no_call;
70 bool trace_end;
71 bool non_call;
74 /**
75 * struct thread_stack - thread stack constructed from 'call' and 'return'
76 * branch samples.
77 * @stack: array that holds the stack
78 * @cnt: number of entries in the stack
79 * @sz: current maximum stack size
80 * @trace_nr: current trace number
81 * @branch_count: running branch count
82 * @insn_count: running instruction count
83 * @cyc_count running cycle count
84 * @kernel_start: kernel start address
85 * @last_time: last timestamp
86 * @crp: call/return processor
87 * @comm: current comm
88 * @arr_sz: size of array if this is the first element of an array
89 * @rstate: used to detect retpolines
91 struct thread_stack {
92 struct thread_stack_entry *stack;
93 size_t cnt;
94 size_t sz;
95 u64 trace_nr;
96 u64 branch_count;
97 u64 insn_count;
98 u64 cyc_count;
99 u64 kernel_start;
100 u64 last_time;
101 struct call_return_processor *crp;
102 struct comm *comm;
103 unsigned int arr_sz;
104 enum retpoline_state_t rstate;
108 * Assume pid == tid == 0 identifies the idle task as defined by
109 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
110 * and therefore requires a stack for each cpu.
112 static inline bool thread_stack__per_cpu(struct thread *thread)
114 return !(thread->tid || thread->pid_);
117 static int thread_stack__grow(struct thread_stack *ts)
119 struct thread_stack_entry *new_stack;
120 size_t sz, new_sz;
122 new_sz = ts->sz + STACK_GROWTH;
123 sz = new_sz * sizeof(struct thread_stack_entry);
125 new_stack = realloc(ts->stack, sz);
126 if (!new_stack)
127 return -ENOMEM;
129 ts->stack = new_stack;
130 ts->sz = new_sz;
132 return 0;
135 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
136 struct call_return_processor *crp)
138 int err;
140 err = thread_stack__grow(ts);
141 if (err)
142 return err;
144 if (thread->mg && thread->mg->machine) {
145 struct machine *machine = thread->mg->machine;
146 const char *arch = perf_env__arch(machine->env);
148 ts->kernel_start = machine__kernel_start(machine);
149 if (!strcmp(arch, "x86"))
150 ts->rstate = X86_RETPOLINE_POSSIBLE;
151 } else {
152 ts->kernel_start = 1ULL << 63;
154 ts->crp = crp;
156 return 0;
159 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
160 struct call_return_processor *crp)
162 struct thread_stack *ts = thread->ts, *new_ts;
163 unsigned int old_sz = ts ? ts->arr_sz : 0;
164 unsigned int new_sz = 1;
166 if (thread_stack__per_cpu(thread) && cpu > 0)
167 new_sz = roundup_pow_of_two(cpu + 1);
169 if (!ts || new_sz > old_sz) {
170 new_ts = calloc(new_sz, sizeof(*ts));
171 if (!new_ts)
172 return NULL;
173 if (ts)
174 memcpy(new_ts, ts, old_sz * sizeof(*ts));
175 new_ts->arr_sz = new_sz;
176 zfree(&thread->ts);
177 thread->ts = new_ts;
178 ts = new_ts;
181 if (thread_stack__per_cpu(thread) && cpu > 0 &&
182 (unsigned int)cpu < ts->arr_sz)
183 ts += cpu;
185 if (!ts->stack &&
186 thread_stack__init(ts, thread, crp))
187 return NULL;
189 return ts;
192 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
194 struct thread_stack *ts = thread->ts;
196 if (cpu < 0)
197 cpu = 0;
199 if (!ts || (unsigned int)cpu >= ts->arr_sz)
200 return NULL;
202 ts += cpu;
204 if (!ts->stack)
205 return NULL;
207 return ts;
210 static inline struct thread_stack *thread__stack(struct thread *thread,
211 int cpu)
213 if (!thread)
214 return NULL;
216 if (thread_stack__per_cpu(thread))
217 return thread__cpu_stack(thread, cpu);
219 return thread->ts;
222 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
223 bool trace_end)
225 int err = 0;
227 if (ts->cnt == ts->sz) {
228 err = thread_stack__grow(ts);
229 if (err) {
230 pr_warning("Out of memory: discarding thread stack\n");
231 ts->cnt = 0;
235 ts->stack[ts->cnt].trace_end = trace_end;
236 ts->stack[ts->cnt++].ret_addr = ret_addr;
238 return err;
241 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
243 size_t i;
246 * In some cases there may be functions which are not seen to return.
247 * For example when setjmp / longjmp has been used. Or the perf context
248 * switch in the kernel which doesn't stop and start tracing in exactly
249 * the same code path. When that happens the return address will be
250 * further down the stack. If the return address is not found at all,
251 * we assume the opposite (i.e. this is a return for a call that wasn't
252 * seen for some reason) and leave the stack alone.
254 for (i = ts->cnt; i; ) {
255 if (ts->stack[--i].ret_addr == ret_addr) {
256 ts->cnt = i;
257 return;
262 static void thread_stack__pop_trace_end(struct thread_stack *ts)
264 size_t i;
266 for (i = ts->cnt; i; ) {
267 if (ts->stack[--i].trace_end)
268 ts->cnt = i;
269 else
270 return;
274 static bool thread_stack__in_kernel(struct thread_stack *ts)
276 if (!ts->cnt)
277 return false;
279 return ts->stack[ts->cnt - 1].cp->in_kernel;
282 static int thread_stack__call_return(struct thread *thread,
283 struct thread_stack *ts, size_t idx,
284 u64 timestamp, u64 ref, bool no_return)
286 struct call_return_processor *crp = ts->crp;
287 struct thread_stack_entry *tse;
288 struct call_return cr = {
289 .thread = thread,
290 .comm = ts->comm,
291 .db_id = 0,
293 u64 *parent_db_id;
295 tse = &ts->stack[idx];
296 cr.cp = tse->cp;
297 cr.call_time = tse->timestamp;
298 cr.return_time = timestamp;
299 cr.branch_count = ts->branch_count - tse->branch_count;
300 cr.insn_count = ts->insn_count - tse->insn_count;
301 cr.cyc_count = ts->cyc_count - tse->cyc_count;
302 cr.db_id = tse->db_id;
303 cr.call_ref = tse->ref;
304 cr.return_ref = ref;
305 if (tse->no_call)
306 cr.flags |= CALL_RETURN_NO_CALL;
307 if (no_return)
308 cr.flags |= CALL_RETURN_NO_RETURN;
309 if (tse->non_call)
310 cr.flags |= CALL_RETURN_NON_CALL;
313 * The parent db_id must be assigned before exporting the child. Note
314 * it is not possible to export the parent first because its information
315 * is not yet complete because its 'return' has not yet been processed.
317 parent_db_id = idx ? &(tse - 1)->db_id : NULL;
319 return crp->process(&cr, parent_db_id, crp->data);
322 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
324 struct call_return_processor *crp = ts->crp;
325 int err;
327 if (!crp) {
328 ts->cnt = 0;
329 return 0;
332 while (ts->cnt) {
333 err = thread_stack__call_return(thread, ts, --ts->cnt,
334 ts->last_time, 0, true);
335 if (err) {
336 pr_err("Error flushing thread stack!\n");
337 ts->cnt = 0;
338 return err;
342 return 0;
345 int thread_stack__flush(struct thread *thread)
347 struct thread_stack *ts = thread->ts;
348 unsigned int pos;
349 int err = 0;
351 if (ts) {
352 for (pos = 0; pos < ts->arr_sz; pos++) {
353 int ret = __thread_stack__flush(thread, ts + pos);
355 if (ret)
356 err = ret;
360 return err;
363 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
364 u64 to_ip, u16 insn_len, u64 trace_nr)
366 struct thread_stack *ts = thread__stack(thread, cpu);
368 if (!thread)
369 return -EINVAL;
371 if (!ts) {
372 ts = thread_stack__new(thread, cpu, NULL);
373 if (!ts) {
374 pr_warning("Out of memory: no thread stack\n");
375 return -ENOMEM;
377 ts->trace_nr = trace_nr;
381 * When the trace is discontinuous, the trace_nr changes. In that case
382 * the stack might be completely invalid. Better to report nothing than
383 * to report something misleading, so flush the stack.
385 if (trace_nr != ts->trace_nr) {
386 if (ts->trace_nr)
387 __thread_stack__flush(thread, ts);
388 ts->trace_nr = trace_nr;
391 /* Stop here if thread_stack__process() is in use */
392 if (ts->crp)
393 return 0;
395 if (flags & PERF_IP_FLAG_CALL) {
396 u64 ret_addr;
398 if (!to_ip)
399 return 0;
400 ret_addr = from_ip + insn_len;
401 if (ret_addr == to_ip)
402 return 0; /* Zero-length calls are excluded */
403 return thread_stack__push(ts, ret_addr,
404 flags & PERF_IP_FLAG_TRACE_END);
405 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
407 * If the caller did not change the trace number (which would
408 * have flushed the stack) then try to make sense of the stack.
409 * Possibly, tracing began after returning to the current
410 * address, so try to pop that. Also, do not expect a call made
411 * when the trace ended, to return, so pop that.
413 thread_stack__pop(ts, to_ip);
414 thread_stack__pop_trace_end(ts);
415 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
416 thread_stack__pop(ts, to_ip);
419 return 0;
422 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
424 struct thread_stack *ts = thread__stack(thread, cpu);
426 if (!ts)
427 return;
429 if (trace_nr != ts->trace_nr) {
430 if (ts->trace_nr)
431 __thread_stack__flush(thread, ts);
432 ts->trace_nr = trace_nr;
436 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
438 __thread_stack__flush(thread, ts);
439 zfree(&ts->stack);
442 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
444 unsigned int arr_sz = ts->arr_sz;
446 __thread_stack__free(thread, ts);
447 memset(ts, 0, sizeof(*ts));
448 ts->arr_sz = arr_sz;
451 void thread_stack__free(struct thread *thread)
453 struct thread_stack *ts = thread->ts;
454 unsigned int pos;
456 if (ts) {
457 for (pos = 0; pos < ts->arr_sz; pos++)
458 __thread_stack__free(thread, ts + pos);
459 zfree(&thread->ts);
463 static inline u64 callchain_context(u64 ip, u64 kernel_start)
465 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
468 void thread_stack__sample(struct thread *thread, int cpu,
469 struct ip_callchain *chain,
470 size_t sz, u64 ip, u64 kernel_start)
472 struct thread_stack *ts = thread__stack(thread, cpu);
473 u64 context = callchain_context(ip, kernel_start);
474 u64 last_context;
475 size_t i, j;
477 if (sz < 2) {
478 chain->nr = 0;
479 return;
482 chain->ips[0] = context;
483 chain->ips[1] = ip;
485 if (!ts) {
486 chain->nr = 2;
487 return;
490 last_context = context;
492 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
493 ip = ts->stack[ts->cnt - j].ret_addr;
494 context = callchain_context(ip, kernel_start);
495 if (context != last_context) {
496 if (i >= sz - 1)
497 break;
498 chain->ips[i++] = context;
499 last_context = context;
501 chain->ips[i] = ip;
504 chain->nr = i;
507 struct call_return_processor *
508 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
509 void *data)
511 struct call_return_processor *crp;
513 crp = zalloc(sizeof(struct call_return_processor));
514 if (!crp)
515 return NULL;
516 crp->cpr = call_path_root__new();
517 if (!crp->cpr)
518 goto out_free;
519 crp->process = process;
520 crp->data = data;
521 return crp;
523 out_free:
524 free(crp);
525 return NULL;
528 void call_return_processor__free(struct call_return_processor *crp)
530 if (crp) {
531 call_path_root__free(crp->cpr);
532 free(crp);
536 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
537 u64 timestamp, u64 ref, struct call_path *cp,
538 bool no_call, bool trace_end)
540 struct thread_stack_entry *tse;
541 int err;
543 if (!cp)
544 return -ENOMEM;
546 if (ts->cnt == ts->sz) {
547 err = thread_stack__grow(ts);
548 if (err)
549 return err;
552 tse = &ts->stack[ts->cnt++];
553 tse->ret_addr = ret_addr;
554 tse->timestamp = timestamp;
555 tse->ref = ref;
556 tse->branch_count = ts->branch_count;
557 tse->insn_count = ts->insn_count;
558 tse->cyc_count = ts->cyc_count;
559 tse->cp = cp;
560 tse->no_call = no_call;
561 tse->trace_end = trace_end;
562 tse->non_call = false;
563 tse->db_id = 0;
565 return 0;
568 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
569 u64 ret_addr, u64 timestamp, u64 ref,
570 struct symbol *sym)
572 int err;
574 if (!ts->cnt)
575 return 1;
577 if (ts->cnt == 1) {
578 struct thread_stack_entry *tse = &ts->stack[0];
580 if (tse->cp->sym == sym)
581 return thread_stack__call_return(thread, ts, --ts->cnt,
582 timestamp, ref, false);
585 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
586 !ts->stack[ts->cnt - 1].non_call) {
587 return thread_stack__call_return(thread, ts, --ts->cnt,
588 timestamp, ref, false);
589 } else {
590 size_t i = ts->cnt - 1;
592 while (i--) {
593 if (ts->stack[i].ret_addr != ret_addr ||
594 ts->stack[i].non_call)
595 continue;
596 i += 1;
597 while (ts->cnt > i) {
598 err = thread_stack__call_return(thread, ts,
599 --ts->cnt,
600 timestamp, ref,
601 true);
602 if (err)
603 return err;
605 return thread_stack__call_return(thread, ts, --ts->cnt,
606 timestamp, ref, false);
610 return 1;
613 static int thread_stack__bottom(struct thread_stack *ts,
614 struct perf_sample *sample,
615 struct addr_location *from_al,
616 struct addr_location *to_al, u64 ref)
618 struct call_path_root *cpr = ts->crp->cpr;
619 struct call_path *cp;
620 struct symbol *sym;
621 u64 ip;
623 if (sample->ip) {
624 ip = sample->ip;
625 sym = from_al->sym;
626 } else if (sample->addr) {
627 ip = sample->addr;
628 sym = to_al->sym;
629 } else {
630 return 0;
633 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
634 ts->kernel_start);
636 return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
637 true, false);
640 static int thread_stack__no_call_return(struct thread *thread,
641 struct thread_stack *ts,
642 struct perf_sample *sample,
643 struct addr_location *from_al,
644 struct addr_location *to_al, u64 ref)
646 struct call_path_root *cpr = ts->crp->cpr;
647 struct call_path *root = &cpr->call_path;
648 struct symbol *fsym = from_al->sym;
649 struct symbol *tsym = to_al->sym;
650 struct call_path *cp, *parent;
651 u64 ks = ts->kernel_start;
652 u64 addr = sample->addr;
653 u64 tm = sample->time;
654 u64 ip = sample->ip;
655 int err;
657 if (ip >= ks && addr < ks) {
658 /* Return to userspace, so pop all kernel addresses */
659 while (thread_stack__in_kernel(ts)) {
660 err = thread_stack__call_return(thread, ts, --ts->cnt,
661 tm, ref, true);
662 if (err)
663 return err;
666 /* If the stack is empty, push the userspace address */
667 if (!ts->cnt) {
668 cp = call_path__findnew(cpr, root, tsym, addr, ks);
669 return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
670 false);
672 } else if (thread_stack__in_kernel(ts) && ip < ks) {
673 /* Return to userspace, so pop all kernel addresses */
674 while (thread_stack__in_kernel(ts)) {
675 err = thread_stack__call_return(thread, ts, --ts->cnt,
676 tm, ref, true);
677 if (err)
678 return err;
682 if (ts->cnt)
683 parent = ts->stack[ts->cnt - 1].cp;
684 else
685 parent = root;
687 if (parent->sym == from_al->sym) {
689 * At the bottom of the stack, assume the missing 'call' was
690 * before the trace started. So, pop the current symbol and push
691 * the 'to' symbol.
693 if (ts->cnt == 1) {
694 err = thread_stack__call_return(thread, ts, --ts->cnt,
695 tm, ref, false);
696 if (err)
697 return err;
700 if (!ts->cnt) {
701 cp = call_path__findnew(cpr, root, tsym, addr, ks);
703 return thread_stack__push_cp(ts, addr, tm, ref, cp,
704 true, false);
708 * Otherwise assume the 'return' is being used as a jump (e.g.
709 * retpoline) and just push the 'to' symbol.
711 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
713 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
714 if (!err)
715 ts->stack[ts->cnt - 1].non_call = true;
717 return err;
721 * Assume 'parent' has not yet returned, so push 'to', and then push and
722 * pop 'from'.
725 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
727 err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
728 if (err)
729 return err;
731 cp = call_path__findnew(cpr, cp, fsym, ip, ks);
733 err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
734 if (err)
735 return err;
737 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
740 static int thread_stack__trace_begin(struct thread *thread,
741 struct thread_stack *ts, u64 timestamp,
742 u64 ref)
744 struct thread_stack_entry *tse;
745 int err;
747 if (!ts->cnt)
748 return 0;
750 /* Pop trace end */
751 tse = &ts->stack[ts->cnt - 1];
752 if (tse->trace_end) {
753 err = thread_stack__call_return(thread, ts, --ts->cnt,
754 timestamp, ref, false);
755 if (err)
756 return err;
759 return 0;
762 static int thread_stack__trace_end(struct thread_stack *ts,
763 struct perf_sample *sample, u64 ref)
765 struct call_path_root *cpr = ts->crp->cpr;
766 struct call_path *cp;
767 u64 ret_addr;
769 /* No point having 'trace end' on the bottom of the stack */
770 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
771 return 0;
773 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
774 ts->kernel_start);
776 ret_addr = sample->ip + sample->insn_len;
778 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
779 false, true);
782 static bool is_x86_retpoline(const char *name)
784 const char *p = strstr(name, "__x86_indirect_thunk_");
786 return p == name || !strcmp(name, "__indirect_thunk_start");
790 * x86 retpoline functions pollute the call graph. This function removes them.
791 * This does not handle function return thunks, nor is there any improvement
792 * for the handling of inline thunks or extern thunks.
794 static int thread_stack__x86_retpoline(struct thread_stack *ts,
795 struct perf_sample *sample,
796 struct addr_location *to_al)
798 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
799 struct call_path_root *cpr = ts->crp->cpr;
800 struct symbol *sym = tse->cp->sym;
801 struct symbol *tsym = to_al->sym;
802 struct call_path *cp;
804 if (sym && is_x86_retpoline(sym->name)) {
806 * This is a x86 retpoline fn. It pollutes the call graph by
807 * showing up everywhere there is an indirect branch, but does
808 * not itself mean anything. Here the top-of-stack is removed,
809 * by decrementing the stack count, and then further down, the
810 * resulting top-of-stack is replaced with the actual target.
811 * The result is that the retpoline functions will no longer
812 * appear in the call graph. Note this only affects the call
813 * graph, since all the original branches are left unchanged.
815 ts->cnt -= 1;
816 sym = ts->stack[ts->cnt - 2].cp->sym;
817 if (sym && sym == tsym && to_al->addr != tsym->start) {
819 * Target is back to the middle of the symbol we came
820 * from so assume it is an indirect jmp and forget it
821 * altogether.
823 ts->cnt -= 1;
824 return 0;
826 } else if (sym && sym == tsym) {
828 * Target is back to the symbol we came from so assume it is an
829 * indirect jmp and forget it altogether.
831 ts->cnt -= 1;
832 return 0;
835 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
836 sample->addr, ts->kernel_start);
837 if (!cp)
838 return -ENOMEM;
840 /* Replace the top-of-stack with the actual target */
841 ts->stack[ts->cnt - 1].cp = cp;
843 return 0;
846 int thread_stack__process(struct thread *thread, struct comm *comm,
847 struct perf_sample *sample,
848 struct addr_location *from_al,
849 struct addr_location *to_al, u64 ref,
850 struct call_return_processor *crp)
852 struct thread_stack *ts = thread__stack(thread, sample->cpu);
853 enum retpoline_state_t rstate;
854 int err = 0;
856 if (ts && !ts->crp) {
857 /* Supersede thread_stack__event() */
858 thread_stack__reset(thread, ts);
859 ts = NULL;
862 if (!ts) {
863 ts = thread_stack__new(thread, sample->cpu, crp);
864 if (!ts)
865 return -ENOMEM;
866 ts->comm = comm;
869 rstate = ts->rstate;
870 if (rstate == X86_RETPOLINE_DETECTED)
871 ts->rstate = X86_RETPOLINE_POSSIBLE;
873 /* Flush stack on exec */
874 if (ts->comm != comm && thread->pid_ == thread->tid) {
875 err = __thread_stack__flush(thread, ts);
876 if (err)
877 return err;
878 ts->comm = comm;
881 /* If the stack is empty, put the current symbol on the stack */
882 if (!ts->cnt) {
883 err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
884 if (err)
885 return err;
888 ts->branch_count += 1;
889 ts->insn_count += sample->insn_cnt;
890 ts->cyc_count += sample->cyc_cnt;
891 ts->last_time = sample->time;
893 if (sample->flags & PERF_IP_FLAG_CALL) {
894 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
895 struct call_path_root *cpr = ts->crp->cpr;
896 struct call_path *cp;
897 u64 ret_addr;
899 if (!sample->ip || !sample->addr)
900 return 0;
902 ret_addr = sample->ip + sample->insn_len;
903 if (ret_addr == sample->addr)
904 return 0; /* Zero-length calls are excluded */
906 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
907 to_al->sym, sample->addr,
908 ts->kernel_start);
909 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
910 cp, false, trace_end);
913 * A call to the same symbol but not the start of the symbol,
914 * may be the start of a x86 retpoline.
916 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
917 from_al->sym == to_al->sym &&
918 to_al->addr != to_al->sym->start)
919 ts->rstate = X86_RETPOLINE_DETECTED;
921 } else if (sample->flags & PERF_IP_FLAG_RETURN) {
922 if (!sample->ip || !sample->addr)
923 return 0;
925 /* x86 retpoline 'return' doesn't match the stack */
926 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
927 ts->stack[ts->cnt - 1].ret_addr != sample->addr)
928 return thread_stack__x86_retpoline(ts, sample, to_al);
930 err = thread_stack__pop_cp(thread, ts, sample->addr,
931 sample->time, ref, from_al->sym);
932 if (err) {
933 if (err < 0)
934 return err;
935 err = thread_stack__no_call_return(thread, ts, sample,
936 from_al, to_al, ref);
938 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
939 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
940 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
941 err = thread_stack__trace_end(ts, sample, ref);
942 } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
943 from_al->sym != to_al->sym && to_al->sym &&
944 to_al->addr == to_al->sym->start) {
945 struct call_path_root *cpr = ts->crp->cpr;
946 struct call_path *cp;
949 * The compiler might optimize a call/ret combination by making
950 * it a jmp. Make that visible by recording on the stack a
951 * branch to the start of a different symbol. Note, that means
952 * when a ret pops the stack, all jmps must be popped off first.
954 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
955 to_al->sym, sample->addr,
956 ts->kernel_start);
957 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
958 false);
959 if (!err)
960 ts->stack[ts->cnt - 1].non_call = true;
963 return err;
966 size_t thread_stack__depth(struct thread *thread, int cpu)
968 struct thread_stack *ts = thread__stack(thread, cpu);
970 if (!ts)
971 return 0;
972 return ts->cnt;