1 // SPDX-License-Identifier: GPL-2.0-only
3 * thread-stack.c: Synthesize a thread's stack using call / return events
4 * Copyright (c) 2014, Intel Corporation.
7 #include <linux/rbtree.h>
8 #include <linux/list.h>
9 #include <linux/log2.h>
10 #include <linux/zalloc.h>
21 #include "call-path.h"
22 #include "thread-stack.h"
24 #define STACK_GROWTH 2048
27 * State of retpoline detection.
29 * RETPOLINE_NONE: no retpoline detection
30 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
31 * X86_RETPOLINE_DETECTED: x86 retpoline detected
33 enum retpoline_state_t
{
35 X86_RETPOLINE_POSSIBLE
,
36 X86_RETPOLINE_DETECTED
,
40 * struct thread_stack_entry - thread stack entry.
41 * @ret_addr: return address
42 * @timestamp: timestamp (if known)
43 * @ref: external reference (e.g. db_id of sample)
44 * @branch_count: the branch count when the entry was created
45 * @insn_count: the instruction count when the entry was created
46 * @cyc_count the cycle count when the entry was created
47 * @db_id: id used for db-export
49 * @no_call: a 'call' was not seen
50 * @trace_end: a 'call' but trace ended
51 * @non_call: a branch but not a 'call' to the start of a different symbol
53 struct thread_stack_entry
{
68 * struct thread_stack - thread stack constructed from 'call' and 'return'
70 * @stack: array that holds the stack
71 * @cnt: number of entries in the stack
72 * @sz: current maximum stack size
73 * @trace_nr: current trace number
74 * @branch_count: running branch count
75 * @insn_count: running instruction count
76 * @cyc_count running cycle count
77 * @kernel_start: kernel start address
78 * @last_time: last timestamp
79 * @crp: call/return processor
81 * @arr_sz: size of array if this is the first element of an array
82 * @rstate: used to detect retpolines
85 struct thread_stack_entry
*stack
;
94 struct call_return_processor
*crp
;
97 enum retpoline_state_t rstate
;
101 * Assume pid == tid == 0 identifies the idle task as defined by
102 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
103 * and therefore requires a stack for each cpu.
105 static inline bool thread_stack__per_cpu(struct thread
*thread
)
107 return !(thread
->tid
|| thread
->pid_
);
110 static int thread_stack__grow(struct thread_stack
*ts
)
112 struct thread_stack_entry
*new_stack
;
115 new_sz
= ts
->sz
+ STACK_GROWTH
;
116 sz
= new_sz
* sizeof(struct thread_stack_entry
);
118 new_stack
= realloc(ts
->stack
, sz
);
122 ts
->stack
= new_stack
;
128 static int thread_stack__init(struct thread_stack
*ts
, struct thread
*thread
,
129 struct call_return_processor
*crp
)
133 err
= thread_stack__grow(ts
);
137 if (thread
->maps
&& thread
->maps
->machine
) {
138 struct machine
*machine
= thread
->maps
->machine
;
139 const char *arch
= perf_env__arch(machine
->env
);
141 ts
->kernel_start
= machine__kernel_start(machine
);
142 if (!strcmp(arch
, "x86"))
143 ts
->rstate
= X86_RETPOLINE_POSSIBLE
;
145 ts
->kernel_start
= 1ULL << 63;
152 static struct thread_stack
*thread_stack__new(struct thread
*thread
, int cpu
,
153 struct call_return_processor
*crp
)
155 struct thread_stack
*ts
= thread
->ts
, *new_ts
;
156 unsigned int old_sz
= ts
? ts
->arr_sz
: 0;
157 unsigned int new_sz
= 1;
159 if (thread_stack__per_cpu(thread
) && cpu
> 0)
160 new_sz
= roundup_pow_of_two(cpu
+ 1);
162 if (!ts
|| new_sz
> old_sz
) {
163 new_ts
= calloc(new_sz
, sizeof(*ts
));
167 memcpy(new_ts
, ts
, old_sz
* sizeof(*ts
));
168 new_ts
->arr_sz
= new_sz
;
174 if (thread_stack__per_cpu(thread
) && cpu
> 0 &&
175 (unsigned int)cpu
< ts
->arr_sz
)
179 thread_stack__init(ts
, thread
, crp
))
185 static struct thread_stack
*thread__cpu_stack(struct thread
*thread
, int cpu
)
187 struct thread_stack
*ts
= thread
->ts
;
192 if (!ts
|| (unsigned int)cpu
>= ts
->arr_sz
)
203 static inline struct thread_stack
*thread__stack(struct thread
*thread
,
209 if (thread_stack__per_cpu(thread
))
210 return thread__cpu_stack(thread
, cpu
);
215 static int thread_stack__push(struct thread_stack
*ts
, u64 ret_addr
,
220 if (ts
->cnt
== ts
->sz
) {
221 err
= thread_stack__grow(ts
);
223 pr_warning("Out of memory: discarding thread stack\n");
228 ts
->stack
[ts
->cnt
].trace_end
= trace_end
;
229 ts
->stack
[ts
->cnt
++].ret_addr
= ret_addr
;
234 static void thread_stack__pop(struct thread_stack
*ts
, u64 ret_addr
)
239 * In some cases there may be functions which are not seen to return.
240 * For example when setjmp / longjmp has been used. Or the perf context
241 * switch in the kernel which doesn't stop and start tracing in exactly
242 * the same code path. When that happens the return address will be
243 * further down the stack. If the return address is not found at all,
244 * we assume the opposite (i.e. this is a return for a call that wasn't
245 * seen for some reason) and leave the stack alone.
247 for (i
= ts
->cnt
; i
; ) {
248 if (ts
->stack
[--i
].ret_addr
== ret_addr
) {
255 static void thread_stack__pop_trace_end(struct thread_stack
*ts
)
259 for (i
= ts
->cnt
; i
; ) {
260 if (ts
->stack
[--i
].trace_end
)
267 static bool thread_stack__in_kernel(struct thread_stack
*ts
)
272 return ts
->stack
[ts
->cnt
- 1].cp
->in_kernel
;
275 static int thread_stack__call_return(struct thread
*thread
,
276 struct thread_stack
*ts
, size_t idx
,
277 u64 timestamp
, u64 ref
, bool no_return
)
279 struct call_return_processor
*crp
= ts
->crp
;
280 struct thread_stack_entry
*tse
;
281 struct call_return cr
= {
288 tse
= &ts
->stack
[idx
];
290 cr
.call_time
= tse
->timestamp
;
291 cr
.return_time
= timestamp
;
292 cr
.branch_count
= ts
->branch_count
- tse
->branch_count
;
293 cr
.insn_count
= ts
->insn_count
- tse
->insn_count
;
294 cr
.cyc_count
= ts
->cyc_count
- tse
->cyc_count
;
295 cr
.db_id
= tse
->db_id
;
296 cr
.call_ref
= tse
->ref
;
299 cr
.flags
|= CALL_RETURN_NO_CALL
;
301 cr
.flags
|= CALL_RETURN_NO_RETURN
;
303 cr
.flags
|= CALL_RETURN_NON_CALL
;
306 * The parent db_id must be assigned before exporting the child. Note
307 * it is not possible to export the parent first because its information
308 * is not yet complete because its 'return' has not yet been processed.
310 parent_db_id
= idx
? &(tse
- 1)->db_id
: NULL
;
312 return crp
->process(&cr
, parent_db_id
, crp
->data
);
315 static int __thread_stack__flush(struct thread
*thread
, struct thread_stack
*ts
)
317 struct call_return_processor
*crp
= ts
->crp
;
326 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
327 ts
->last_time
, 0, true);
329 pr_err("Error flushing thread stack!\n");
338 int thread_stack__flush(struct thread
*thread
)
340 struct thread_stack
*ts
= thread
->ts
;
345 for (pos
= 0; pos
< ts
->arr_sz
; pos
++) {
346 int ret
= __thread_stack__flush(thread
, ts
+ pos
);
356 int thread_stack__event(struct thread
*thread
, int cpu
, u32 flags
, u64 from_ip
,
357 u64 to_ip
, u16 insn_len
, u64 trace_nr
)
359 struct thread_stack
*ts
= thread__stack(thread
, cpu
);
365 ts
= thread_stack__new(thread
, cpu
, NULL
);
367 pr_warning("Out of memory: no thread stack\n");
370 ts
->trace_nr
= trace_nr
;
374 * When the trace is discontinuous, the trace_nr changes. In that case
375 * the stack might be completely invalid. Better to report nothing than
376 * to report something misleading, so flush the stack.
378 if (trace_nr
!= ts
->trace_nr
) {
380 __thread_stack__flush(thread
, ts
);
381 ts
->trace_nr
= trace_nr
;
384 /* Stop here if thread_stack__process() is in use */
388 if (flags
& PERF_IP_FLAG_CALL
) {
393 ret_addr
= from_ip
+ insn_len
;
394 if (ret_addr
== to_ip
)
395 return 0; /* Zero-length calls are excluded */
396 return thread_stack__push(ts
, ret_addr
,
397 flags
& PERF_IP_FLAG_TRACE_END
);
398 } else if (flags
& PERF_IP_FLAG_TRACE_BEGIN
) {
400 * If the caller did not change the trace number (which would
401 * have flushed the stack) then try to make sense of the stack.
402 * Possibly, tracing began after returning to the current
403 * address, so try to pop that. Also, do not expect a call made
404 * when the trace ended, to return, so pop that.
406 thread_stack__pop(ts
, to_ip
);
407 thread_stack__pop_trace_end(ts
);
408 } else if ((flags
& PERF_IP_FLAG_RETURN
) && from_ip
) {
409 thread_stack__pop(ts
, to_ip
);
415 void thread_stack__set_trace_nr(struct thread
*thread
, int cpu
, u64 trace_nr
)
417 struct thread_stack
*ts
= thread__stack(thread
, cpu
);
422 if (trace_nr
!= ts
->trace_nr
) {
424 __thread_stack__flush(thread
, ts
);
425 ts
->trace_nr
= trace_nr
;
429 static void __thread_stack__free(struct thread
*thread
, struct thread_stack
*ts
)
431 __thread_stack__flush(thread
, ts
);
435 static void thread_stack__reset(struct thread
*thread
, struct thread_stack
*ts
)
437 unsigned int arr_sz
= ts
->arr_sz
;
439 __thread_stack__free(thread
, ts
);
440 memset(ts
, 0, sizeof(*ts
));
444 void thread_stack__free(struct thread
*thread
)
446 struct thread_stack
*ts
= thread
->ts
;
450 for (pos
= 0; pos
< ts
->arr_sz
; pos
++)
451 __thread_stack__free(thread
, ts
+ pos
);
456 static inline u64
callchain_context(u64 ip
, u64 kernel_start
)
458 return ip
< kernel_start
? PERF_CONTEXT_USER
: PERF_CONTEXT_KERNEL
;
461 void thread_stack__sample(struct thread
*thread
, int cpu
,
462 struct ip_callchain
*chain
,
463 size_t sz
, u64 ip
, u64 kernel_start
)
465 struct thread_stack
*ts
= thread__stack(thread
, cpu
);
466 u64 context
= callchain_context(ip
, kernel_start
);
475 chain
->ips
[0] = context
;
483 last_context
= context
;
485 for (i
= 2, j
= 1; i
< sz
&& j
<= ts
->cnt
; i
++, j
++) {
486 ip
= ts
->stack
[ts
->cnt
- j
].ret_addr
;
487 context
= callchain_context(ip
, kernel_start
);
488 if (context
!= last_context
) {
491 chain
->ips
[i
++] = context
;
492 last_context
= context
;
500 struct call_return_processor
*
501 call_return_processor__new(int (*process
)(struct call_return
*cr
, u64
*parent_db_id
, void *data
),
504 struct call_return_processor
*crp
;
506 crp
= zalloc(sizeof(struct call_return_processor
));
509 crp
->cpr
= call_path_root__new();
512 crp
->process
= process
;
521 void call_return_processor__free(struct call_return_processor
*crp
)
524 call_path_root__free(crp
->cpr
);
529 static int thread_stack__push_cp(struct thread_stack
*ts
, u64 ret_addr
,
530 u64 timestamp
, u64 ref
, struct call_path
*cp
,
531 bool no_call
, bool trace_end
)
533 struct thread_stack_entry
*tse
;
539 if (ts
->cnt
== ts
->sz
) {
540 err
= thread_stack__grow(ts
);
545 tse
= &ts
->stack
[ts
->cnt
++];
546 tse
->ret_addr
= ret_addr
;
547 tse
->timestamp
= timestamp
;
549 tse
->branch_count
= ts
->branch_count
;
550 tse
->insn_count
= ts
->insn_count
;
551 tse
->cyc_count
= ts
->cyc_count
;
553 tse
->no_call
= no_call
;
554 tse
->trace_end
= trace_end
;
555 tse
->non_call
= false;
561 static int thread_stack__pop_cp(struct thread
*thread
, struct thread_stack
*ts
,
562 u64 ret_addr
, u64 timestamp
, u64 ref
,
571 struct thread_stack_entry
*tse
= &ts
->stack
[0];
573 if (tse
->cp
->sym
== sym
)
574 return thread_stack__call_return(thread
, ts
, --ts
->cnt
,
575 timestamp
, ref
, false);
578 if (ts
->stack
[ts
->cnt
- 1].ret_addr
== ret_addr
&&
579 !ts
->stack
[ts
->cnt
- 1].non_call
) {
580 return thread_stack__call_return(thread
, ts
, --ts
->cnt
,
581 timestamp
, ref
, false);
583 size_t i
= ts
->cnt
- 1;
586 if (ts
->stack
[i
].ret_addr
!= ret_addr
||
587 ts
->stack
[i
].non_call
)
590 while (ts
->cnt
> i
) {
591 err
= thread_stack__call_return(thread
, ts
,
598 return thread_stack__call_return(thread
, ts
, --ts
->cnt
,
599 timestamp
, ref
, false);
606 static int thread_stack__bottom(struct thread_stack
*ts
,
607 struct perf_sample
*sample
,
608 struct addr_location
*from_al
,
609 struct addr_location
*to_al
, u64 ref
)
611 struct call_path_root
*cpr
= ts
->crp
->cpr
;
612 struct call_path
*cp
;
619 } else if (sample
->addr
) {
626 cp
= call_path__findnew(cpr
, &cpr
->call_path
, sym
, ip
,
629 return thread_stack__push_cp(ts
, ip
, sample
->time
, ref
, cp
,
633 static int thread_stack__pop_ks(struct thread
*thread
, struct thread_stack
*ts
,
634 struct perf_sample
*sample
, u64 ref
)
636 u64 tm
= sample
->time
;
639 /* Return to userspace, so pop all kernel addresses */
640 while (thread_stack__in_kernel(ts
)) {
641 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
650 static int thread_stack__no_call_return(struct thread
*thread
,
651 struct thread_stack
*ts
,
652 struct perf_sample
*sample
,
653 struct addr_location
*from_al
,
654 struct addr_location
*to_al
, u64 ref
)
656 struct call_path_root
*cpr
= ts
->crp
->cpr
;
657 struct call_path
*root
= &cpr
->call_path
;
658 struct symbol
*fsym
= from_al
->sym
;
659 struct symbol
*tsym
= to_al
->sym
;
660 struct call_path
*cp
, *parent
;
661 u64 ks
= ts
->kernel_start
;
662 u64 addr
= sample
->addr
;
663 u64 tm
= sample
->time
;
667 if (ip
>= ks
&& addr
< ks
) {
668 /* Return to userspace, so pop all kernel addresses */
669 err
= thread_stack__pop_ks(thread
, ts
, sample
, ref
);
673 /* If the stack is empty, push the userspace address */
675 cp
= call_path__findnew(cpr
, root
, tsym
, addr
, ks
);
676 return thread_stack__push_cp(ts
, 0, tm
, ref
, cp
, true,
679 } else if (thread_stack__in_kernel(ts
) && ip
< ks
) {
680 /* Return to userspace, so pop all kernel addresses */
681 err
= thread_stack__pop_ks(thread
, ts
, sample
, ref
);
687 parent
= ts
->stack
[ts
->cnt
- 1].cp
;
691 if (parent
->sym
== from_al
->sym
) {
693 * At the bottom of the stack, assume the missing 'call' was
694 * before the trace started. So, pop the current symbol and push
698 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
705 cp
= call_path__findnew(cpr
, root
, tsym
, addr
, ks
);
707 return thread_stack__push_cp(ts
, addr
, tm
, ref
, cp
,
712 * Otherwise assume the 'return' is being used as a jump (e.g.
713 * retpoline) and just push the 'to' symbol.
715 cp
= call_path__findnew(cpr
, parent
, tsym
, addr
, ks
);
717 err
= thread_stack__push_cp(ts
, 0, tm
, ref
, cp
, true, false);
719 ts
->stack
[ts
->cnt
- 1].non_call
= true;
725 * Assume 'parent' has not yet returned, so push 'to', and then push and
729 cp
= call_path__findnew(cpr
, parent
, tsym
, addr
, ks
);
731 err
= thread_stack__push_cp(ts
, addr
, tm
, ref
, cp
, true, false);
735 cp
= call_path__findnew(cpr
, cp
, fsym
, ip
, ks
);
737 err
= thread_stack__push_cp(ts
, ip
, tm
, ref
, cp
, true, false);
741 return thread_stack__call_return(thread
, ts
, --ts
->cnt
, tm
, ref
, false);
744 static int thread_stack__trace_begin(struct thread
*thread
,
745 struct thread_stack
*ts
, u64 timestamp
,
748 struct thread_stack_entry
*tse
;
755 tse
= &ts
->stack
[ts
->cnt
- 1];
756 if (tse
->trace_end
) {
757 err
= thread_stack__call_return(thread
, ts
, --ts
->cnt
,
758 timestamp
, ref
, false);
766 static int thread_stack__trace_end(struct thread_stack
*ts
,
767 struct perf_sample
*sample
, u64 ref
)
769 struct call_path_root
*cpr
= ts
->crp
->cpr
;
770 struct call_path
*cp
;
773 /* No point having 'trace end' on the bottom of the stack */
774 if (!ts
->cnt
|| (ts
->cnt
== 1 && ts
->stack
[0].ref
== ref
))
777 cp
= call_path__findnew(cpr
, ts
->stack
[ts
->cnt
- 1].cp
, NULL
, 0,
780 ret_addr
= sample
->ip
+ sample
->insn_len
;
782 return thread_stack__push_cp(ts
, ret_addr
, sample
->time
, ref
, cp
,
786 static bool is_x86_retpoline(const char *name
)
788 const char *p
= strstr(name
, "__x86_indirect_thunk_");
790 return p
== name
|| !strcmp(name
, "__indirect_thunk_start");
794 * x86 retpoline functions pollute the call graph. This function removes them.
795 * This does not handle function return thunks, nor is there any improvement
796 * for the handling of inline thunks or extern thunks.
798 static int thread_stack__x86_retpoline(struct thread_stack
*ts
,
799 struct perf_sample
*sample
,
800 struct addr_location
*to_al
)
802 struct thread_stack_entry
*tse
= &ts
->stack
[ts
->cnt
- 1];
803 struct call_path_root
*cpr
= ts
->crp
->cpr
;
804 struct symbol
*sym
= tse
->cp
->sym
;
805 struct symbol
*tsym
= to_al
->sym
;
806 struct call_path
*cp
;
808 if (sym
&& is_x86_retpoline(sym
->name
)) {
810 * This is a x86 retpoline fn. It pollutes the call graph by
811 * showing up everywhere there is an indirect branch, but does
812 * not itself mean anything. Here the top-of-stack is removed,
813 * by decrementing the stack count, and then further down, the
814 * resulting top-of-stack is replaced with the actual target.
815 * The result is that the retpoline functions will no longer
816 * appear in the call graph. Note this only affects the call
817 * graph, since all the original branches are left unchanged.
820 sym
= ts
->stack
[ts
->cnt
- 2].cp
->sym
;
821 if (sym
&& sym
== tsym
&& to_al
->addr
!= tsym
->start
) {
823 * Target is back to the middle of the symbol we came
824 * from so assume it is an indirect jmp and forget it
830 } else if (sym
&& sym
== tsym
) {
832 * Target is back to the symbol we came from so assume it is an
833 * indirect jmp and forget it altogether.
839 cp
= call_path__findnew(cpr
, ts
->stack
[ts
->cnt
- 2].cp
, tsym
,
840 sample
->addr
, ts
->kernel_start
);
844 /* Replace the top-of-stack with the actual target */
845 ts
->stack
[ts
->cnt
- 1].cp
= cp
;
850 int thread_stack__process(struct thread
*thread
, struct comm
*comm
,
851 struct perf_sample
*sample
,
852 struct addr_location
*from_al
,
853 struct addr_location
*to_al
, u64 ref
,
854 struct call_return_processor
*crp
)
856 struct thread_stack
*ts
= thread__stack(thread
, sample
->cpu
);
857 enum retpoline_state_t rstate
;
860 if (ts
&& !ts
->crp
) {
861 /* Supersede thread_stack__event() */
862 thread_stack__reset(thread
, ts
);
867 ts
= thread_stack__new(thread
, sample
->cpu
, crp
);
874 if (rstate
== X86_RETPOLINE_DETECTED
)
875 ts
->rstate
= X86_RETPOLINE_POSSIBLE
;
877 /* Flush stack on exec */
878 if (ts
->comm
!= comm
&& thread
->pid_
== thread
->tid
) {
879 err
= __thread_stack__flush(thread
, ts
);
885 /* If the stack is empty, put the current symbol on the stack */
887 err
= thread_stack__bottom(ts
, sample
, from_al
, to_al
, ref
);
892 ts
->branch_count
+= 1;
893 ts
->insn_count
+= sample
->insn_cnt
;
894 ts
->cyc_count
+= sample
->cyc_cnt
;
895 ts
->last_time
= sample
->time
;
897 if (sample
->flags
& PERF_IP_FLAG_CALL
) {
898 bool trace_end
= sample
->flags
& PERF_IP_FLAG_TRACE_END
;
899 struct call_path_root
*cpr
= ts
->crp
->cpr
;
900 struct call_path
*cp
;
903 if (!sample
->ip
|| !sample
->addr
)
906 ret_addr
= sample
->ip
+ sample
->insn_len
;
907 if (ret_addr
== sample
->addr
)
908 return 0; /* Zero-length calls are excluded */
910 cp
= call_path__findnew(cpr
, ts
->stack
[ts
->cnt
- 1].cp
,
911 to_al
->sym
, sample
->addr
,
913 err
= thread_stack__push_cp(ts
, ret_addr
, sample
->time
, ref
,
914 cp
, false, trace_end
);
917 * A call to the same symbol but not the start of the symbol,
918 * may be the start of a x86 retpoline.
920 if (!err
&& rstate
== X86_RETPOLINE_POSSIBLE
&& to_al
->sym
&&
921 from_al
->sym
== to_al
->sym
&&
922 to_al
->addr
!= to_al
->sym
->start
)
923 ts
->rstate
= X86_RETPOLINE_DETECTED
;
925 } else if (sample
->flags
& PERF_IP_FLAG_RETURN
) {
927 u32 return_from_kernel
= PERF_IP_FLAG_SYSCALLRET
|
928 PERF_IP_FLAG_INTERRUPT
;
930 if (!(sample
->flags
& return_from_kernel
))
933 /* Pop kernel stack */
934 return thread_stack__pop_ks(thread
, ts
, sample
, ref
);
940 /* x86 retpoline 'return' doesn't match the stack */
941 if (rstate
== X86_RETPOLINE_DETECTED
&& ts
->cnt
> 2 &&
942 ts
->stack
[ts
->cnt
- 1].ret_addr
!= sample
->addr
)
943 return thread_stack__x86_retpoline(ts
, sample
, to_al
);
945 err
= thread_stack__pop_cp(thread
, ts
, sample
->addr
,
946 sample
->time
, ref
, from_al
->sym
);
950 err
= thread_stack__no_call_return(thread
, ts
, sample
,
951 from_al
, to_al
, ref
);
953 } else if (sample
->flags
& PERF_IP_FLAG_TRACE_BEGIN
) {
954 err
= thread_stack__trace_begin(thread
, ts
, sample
->time
, ref
);
955 } else if (sample
->flags
& PERF_IP_FLAG_TRACE_END
) {
956 err
= thread_stack__trace_end(ts
, sample
, ref
);
957 } else if (sample
->flags
& PERF_IP_FLAG_BRANCH
&&
958 from_al
->sym
!= to_al
->sym
&& to_al
->sym
&&
959 to_al
->addr
== to_al
->sym
->start
) {
960 struct call_path_root
*cpr
= ts
->crp
->cpr
;
961 struct call_path
*cp
;
964 * The compiler might optimize a call/ret combination by making
965 * it a jmp. Make that visible by recording on the stack a
966 * branch to the start of a different symbol. Note, that means
967 * when a ret pops the stack, all jmps must be popped off first.
969 cp
= call_path__findnew(cpr
, ts
->stack
[ts
->cnt
- 1].cp
,
970 to_al
->sym
, sample
->addr
,
972 err
= thread_stack__push_cp(ts
, 0, sample
->time
, ref
, cp
, false,
975 ts
->stack
[ts
->cnt
- 1].non_call
= true;
981 size_t thread_stack__depth(struct thread
*thread
, int cpu
)
983 struct thread_stack
*ts
= thread__stack(thread
, cpu
);