1 // SPDX-License-Identifier: GPL-2.0
3 * Infrastructure to took into function calls and returns.
4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
8 * Highly modified by Steven Rostedt (VMware).
10 #include <linux/bits.h>
11 #include <linux/jump_label.h>
12 #include <linux/suspend.h>
13 #include <linux/ftrace.h>
14 #include <linux/static_call.h>
15 #include <linux/slab.h>
17 #include <trace/events/sched.h>
19 #include "ftrace_internal.h"
23 * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack
24 * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame
26 #define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack)
27 #define FGRAPH_FRAME_OFFSET DIV_ROUND_UP(FGRAPH_FRAME_SIZE, sizeof(long))
30 * On entry to a function (via function_graph_enter()), a new fgraph frame
31 * (ftrace_ret_stack) is pushed onto the stack as well as a word that
32 * holds a bitmask and a type (called "bitmap"). The bitmap is defined as:
34 * bits: 0 - 9 offset in words from the previous ftrace_ret_stack
36 * bits: 10 - 11 Type of storage
38 * 1 - bitmap of fgraph_array index
41 * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP):
42 * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index
43 * That is, it's a bitmask of 0-15 (16 bits)
44 * where if a corresponding ops in the fgraph_array[]
45 * expects a callback from the return of the function
46 * it's corresponding bit will be set.
49 * The top of the ret_stack (when not empty) will always have a reference
50 * word that points to the last fgraph frame that was saved.
53 * bits: 12 - 17 The size in words that is stored
54 * bits: 18 - 23 The index of fgraph_array, which shows who is stored
56 * That is, at the end of function_graph_enter, if the first and forth
57 * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called
58 * on the return of the function being traced, and the forth fgraph_ops
59 * stored two words of data, this is what will be on the task's shadow
60 * ret_stack: (the stack grows upward)
62 * ret_stack[SHADOW_STACK_OFFSET]
63 * | SHADOW_STACK_TASK_VARS(ret_stack)[15] |
65 * | SHADOW_STACK_TASK_VARS(ret_stack)[0] |
66 * ret_stack[SHADOW_STACK_MAX_OFFSET]
68 * | | <- task->curr_ret_stack
69 * +--------------------------------------------+
70 * | (3 << 12) | (3 << 10) | FGRAPH_FRAME_OFFSET|
71 * | *or put another way* |
72 * | (3 << FGRAPH_DATA_INDEX_SHIFT)| \ | This is for fgraph_ops[3].
73 * | ((2 - 1) << FGRAPH_DATA_SHIFT)| \ | The data size is 2 words.
74 * | (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT)| \ |
75 * | (offset2:FGRAPH_FRAME_OFFSET+3) | <- the offset2 is from here
76 * +--------------------------------------------+ ( It is 4 words from the ret_stack)
77 * | STORED DATA WORD 2 |
78 * | STORED DATA WORD 1 |
79 * +--------------------------------------------+
80 * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET|
81 * | *or put another way* |
82 * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ |
83 * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ |
84 * | (offset1:FGRAPH_FRAME_OFFSET) | <- the offset1 is from here
85 * +--------------------------------------------+
86 * | struct ftrace_ret_stack |
87 * | (stores the saved ret pointer) | <- the offset points here
88 * +--------------------------------------------+
89 * | (X) | (N) | ( N words away from
90 * | | previous ret_stack)
94 * If a backtrace is required, and the real return pointer needs to be
95 * fetched, then it looks at the task's curr_ret_stack offset, if it
96 * is greater than zero (reserved, or right before popped), it would mask
97 * the value by FGRAPH_FRAME_OFFSET_MASK to get the offset of the
98 * ftrace_ret_stack structure stored on the shadow stack.
102 * The following is for the top word on the stack:
104 * FGRAPH_FRAME_OFFSET (0-9) holds the offset delta to the fgraph frame
105 * FGRAPH_TYPE (10-11) holds the type of word this is.
106 * (RESERVED or BITMAP)
108 #define FGRAPH_FRAME_OFFSET_BITS 10
109 #define FGRAPH_FRAME_OFFSET_MASK GENMASK(FGRAPH_FRAME_OFFSET_BITS - 1, 0)
111 #define FGRAPH_TYPE_BITS 2
112 #define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_BITS - 1, 0)
113 #define FGRAPH_TYPE_SHIFT FGRAPH_FRAME_OFFSET_BITS
116 FGRAPH_TYPE_RESERVED
= 0,
117 FGRAPH_TYPE_BITMAP
= 1,
118 FGRAPH_TYPE_DATA
= 2,
123 * FGRAPH_INDEX (12-27) bits holding the gops index wanting return callback called
125 #define FGRAPH_INDEX_BITS 16
126 #define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0)
127 #define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS)
131 * FGRAPH_DATA (12-17) bits hold the size of data (in words)
132 * FGRAPH_INDEX (18-23) bits hold the index for which gops->idx the data is for
135 * data_size == 0 means 1 word, and 31 (=2^5 - 1) means 32 words.
137 #define FGRAPH_DATA_BITS 5
138 #define FGRAPH_DATA_MASK GENMASK(FGRAPH_DATA_BITS - 1, 0)
139 #define FGRAPH_DATA_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS)
140 #define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_BITS))
142 #define FGRAPH_DATA_INDEX_BITS 4
143 #define FGRAPH_DATA_INDEX_MASK GENMASK(FGRAPH_DATA_INDEX_BITS - 1, 0)
144 #define FGRAPH_DATA_INDEX_SHIFT (FGRAPH_DATA_SHIFT + FGRAPH_DATA_BITS)
146 #define FGRAPH_MAX_INDEX \
147 ((FGRAPH_INDEX_SIZE << FGRAPH_DATA_BITS) + FGRAPH_RET_INDEX)
149 #define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS
152 * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack
153 * SHADOW_STACK_OFFSET: The size in long words of the shadow stack
154 * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added
156 #define SHADOW_STACK_SIZE (4096)
157 #define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long))
158 /* Leave on a buffer at the end */
159 #define SHADOW_STACK_MAX_OFFSET \
160 (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE))
162 /* RET_STACK(): Return the frame from a given @offset from task @t */
163 #define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset]))
166 * Each fgraph_ops has a reservered unsigned long at the end (top) of the
167 * ret_stack to store task specific state.
169 #define SHADOW_STACK_TASK_VARS(ret_stack) \
170 ((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE]))
172 DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph
);
173 int ftrace_graph_active
;
175 static struct kmem_cache
*fgraph_stack_cachep
;
177 static struct fgraph_ops
*fgraph_array
[FGRAPH_ARRAY_SIZE
];
178 static unsigned long fgraph_array_bitmask
;
180 /* LRU index table for fgraph_array */
181 static int fgraph_lru_table
[FGRAPH_ARRAY_SIZE
];
182 static int fgraph_lru_next
;
183 static int fgraph_lru_last
;
185 /* Initialize fgraph_lru_table with unused index */
186 static void fgraph_lru_init(void)
190 for (i
= 0; i
< FGRAPH_ARRAY_SIZE
; i
++)
191 fgraph_lru_table
[i
] = i
;
194 /* Release the used index to the LRU table */
195 static int fgraph_lru_release_index(int idx
)
197 if (idx
< 0 || idx
>= FGRAPH_ARRAY_SIZE
||
198 WARN_ON_ONCE(fgraph_lru_table
[fgraph_lru_last
] != -1))
201 fgraph_lru_table
[fgraph_lru_last
] = idx
;
202 fgraph_lru_last
= (fgraph_lru_last
+ 1) % FGRAPH_ARRAY_SIZE
;
204 clear_bit(idx
, &fgraph_array_bitmask
);
208 /* Allocate a new index from LRU table */
209 static int fgraph_lru_alloc_index(void)
211 int idx
= fgraph_lru_table
[fgraph_lru_next
];
213 /* No id is available */
217 fgraph_lru_table
[fgraph_lru_next
] = -1;
218 fgraph_lru_next
= (fgraph_lru_next
+ 1) % FGRAPH_ARRAY_SIZE
;
220 set_bit(idx
, &fgraph_array_bitmask
);
224 /* Get the offset to the fgraph frame from a ret_stack value */
225 static inline int __get_offset(unsigned long val
)
227 return val
& FGRAPH_FRAME_OFFSET_MASK
;
230 /* Get the type of word from a ret_stack value */
231 static inline int __get_type(unsigned long val
)
233 return (val
>> FGRAPH_TYPE_SHIFT
) & FGRAPH_TYPE_MASK
;
236 /* Get the data_index for a DATA type ret_stack word */
237 static inline int __get_data_index(unsigned long val
)
239 return (val
>> FGRAPH_DATA_INDEX_SHIFT
) & FGRAPH_DATA_INDEX_MASK
;
242 /* Get the data_size for a DATA type ret_stack word */
243 static inline int __get_data_size(unsigned long val
)
245 return ((val
>> FGRAPH_DATA_SHIFT
) & FGRAPH_DATA_MASK
) + 1;
248 /* Get the word from the ret_stack at @offset */
249 static inline unsigned long get_fgraph_entry(struct task_struct
*t
, int offset
)
251 return t
->ret_stack
[offset
];
254 /* Get the FRAME_OFFSET from the word from the @offset on ret_stack */
255 static inline int get_frame_offset(struct task_struct
*t
, int offset
)
257 return __get_offset(t
->ret_stack
[offset
]);
260 /* For BITMAP type: get the bitmask from the @offset at ret_stack */
261 static inline unsigned long
262 get_bitmap_bits(struct task_struct
*t
, int offset
)
264 return (t
->ret_stack
[offset
] >> FGRAPH_INDEX_SHIFT
) & FGRAPH_INDEX_MASK
;
267 /* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */
269 set_bitmap(struct task_struct
*t
, int offset
, unsigned long bitmap
)
271 t
->ret_stack
[offset
] = (bitmap
<< FGRAPH_INDEX_SHIFT
) |
272 (FGRAPH_TYPE_BITMAP
<< FGRAPH_TYPE_SHIFT
) | FGRAPH_FRAME_OFFSET
;
275 /* For DATA type: get the data saved under the ret_stack word at @offset */
276 static inline void *get_data_type_data(struct task_struct
*t
, int offset
)
278 unsigned long val
= t
->ret_stack
[offset
];
280 if (__get_type(val
) != FGRAPH_TYPE_DATA
)
282 offset
-= __get_data_size(val
);
283 return (void *)&t
->ret_stack
[offset
];
286 /* Create the ret_stack word for a DATA type */
287 static inline unsigned long make_data_type_val(int idx
, int size
, int offset
)
289 return (idx
<< FGRAPH_DATA_INDEX_SHIFT
) |
290 ((size
- 1) << FGRAPH_DATA_SHIFT
) |
291 (FGRAPH_TYPE_DATA
<< FGRAPH_TYPE_SHIFT
) | offset
;
294 /* ftrace_graph_entry set to this to tell some archs to run function graph */
295 static int entry_run(struct ftrace_graph_ent
*trace
, struct fgraph_ops
*ops
)
300 /* ftrace_graph_return set to this to tell some archs to run function graph */
301 static void return_run(struct ftrace_graph_ret
*trace
, struct fgraph_ops
*ops
)
305 static void ret_stack_set_task_var(struct task_struct
*t
, int idx
, long val
)
307 unsigned long *gvals
= SHADOW_STACK_TASK_VARS(t
->ret_stack
);
312 static unsigned long *
313 ret_stack_get_task_var(struct task_struct
*t
, int idx
)
315 unsigned long *gvals
= SHADOW_STACK_TASK_VARS(t
->ret_stack
);
320 static void ret_stack_init_task_vars(unsigned long *ret_stack
)
322 unsigned long *gvals
= SHADOW_STACK_TASK_VARS(ret_stack
);
324 memset(gvals
, 0, sizeof(*gvals
) * FGRAPH_ARRAY_SIZE
);
328 * fgraph_reserve_data - Reserve storage on the task's ret_stack
329 * @idx: The index of fgraph_array
330 * @size_bytes: The size in bytes to reserve
332 * Reserves space of up to FGRAPH_MAX_DATA_SIZE bytes on the
333 * task's ret_stack shadow stack, for a given fgraph_ops during
334 * the entryfunc() call. If entryfunc() returns zero, the storage
335 * is discarded. An entryfunc() can only call this once per iteration.
336 * The fgraph_ops retfunc() can retrieve this stored data with
337 * fgraph_retrieve_data().
339 * Returns: On success, a pointer to the data on the stack.
340 * Otherwise, NULL if there's not enough space left on the
341 * ret_stack for the data, or if fgraph_reserve_data() was called
342 * more than once for a single entryfunc() call.
344 void *fgraph_reserve_data(int idx
, int size_bytes
)
348 int curr_ret_stack
= current
->curr_ret_stack
;
351 if (size_bytes
> FGRAPH_MAX_DATA_SIZE
)
354 /* Convert the data size to number of longs. */
355 data_size
= (size_bytes
+ sizeof(long) - 1) >> (sizeof(long) == 4 ? 2 : 3);
357 val
= get_fgraph_entry(current
, curr_ret_stack
- 1);
358 data
= ¤t
->ret_stack
[curr_ret_stack
];
360 curr_ret_stack
+= data_size
+ 1;
361 if (unlikely(curr_ret_stack
>= SHADOW_STACK_MAX_OFFSET
))
364 val
= make_data_type_val(idx
, data_size
, __get_offset(val
) + data_size
+ 1);
366 /* Set the last word to be reserved */
367 current
->ret_stack
[curr_ret_stack
- 1] = val
;
369 /* Make sure interrupts see this */
371 current
->curr_ret_stack
= curr_ret_stack
;
372 /* Again sync with interrupts, and reset reserve */
373 current
->ret_stack
[curr_ret_stack
- 1] = val
;
379 * fgraph_retrieve_data - Retrieve stored data from fgraph_reserve_data()
380 * @idx: the index of fgraph_array (fgraph_ops::idx)
381 * @size_bytes: pointer to retrieved data size.
383 * This is to be called by a fgraph_ops retfunc(), to retrieve data that
384 * was stored by the fgraph_ops entryfunc() on the function entry.
385 * That is, this will retrieve the data that was reserved on the
386 * entry of the function that corresponds to the exit of the function
387 * that the fgraph_ops retfunc() is called on.
389 * Returns: The stored data from fgraph_reserve_data() called by the
390 * matching entryfunc() for the retfunc() this is called from.
391 * Or NULL if there was nothing stored.
393 void *fgraph_retrieve_data(int idx
, int *size_bytes
)
395 return fgraph_retrieve_parent_data(idx
, size_bytes
, 0);
399 * fgraph_get_task_var - retrieve a task specific state variable
400 * @gops: The ftrace_ops that owns the task specific variable
402 * Every registered fgraph_ops has a task state variable
403 * reserved on the task's ret_stack. This function returns the
404 * address to that variable.
406 * Returns the address to the fgraph_ops @gops tasks specific
407 * unsigned long variable.
409 unsigned long *fgraph_get_task_var(struct fgraph_ops
*gops
)
411 return ret_stack_get_task_var(current
, gops
->idx
);
415 * @offset: The offset into @t->ret_stack to find the ret_stack entry
416 * @frame_offset: Where to place the offset into @t->ret_stack of that entry
418 * Returns a pointer to the previous ret_stack below @offset or NULL
419 * when it reaches the bottom of the stack.
423 * offset = task->curr_ret_stack;
425 * ret_stack = get_ret_stack(task, offset, &offset);
426 * } while (ret_stack);
428 * Will iterate through all the ret_stack entries from curr_ret_stack
429 * down to the first one.
431 static inline struct ftrace_ret_stack
*
432 get_ret_stack(struct task_struct
*t
, int offset
, int *frame_offset
)
436 BUILD_BUG_ON(FGRAPH_FRAME_SIZE
% sizeof(long));
438 if (unlikely(offset
<= 0))
441 offs
= get_frame_offset(t
, --offset
);
442 if (WARN_ON_ONCE(offs
<= 0 || offs
> offset
))
447 *frame_offset
= offset
;
448 return RET_STACK(t
, offset
);
452 * fgraph_retrieve_parent_data - get data from a parent function
453 * @idx: The index into the fgraph_array (fgraph_ops::idx)
454 * @size_bytes: A pointer to retrieved data size
455 * @depth: The depth to find the parent (0 is the current function)
457 * This is similar to fgraph_retrieve_data() but can be used to retrieve
458 * data from a parent caller function.
460 * Return: a pointer to the specified parent data or NULL if not found
462 void *fgraph_retrieve_parent_data(int idx
, int *size_bytes
, int depth
)
464 struct ftrace_ret_stack
*ret_stack
= NULL
;
465 int offset
= current
->curr_ret_stack
;
474 ret_stack
= get_ret_stack(current
, offset
, &next_offset
);
475 if (!ret_stack
|| --depth
< 0)
477 offset
= next_offset
;
485 val
= get_fgraph_entry(current
, offset
);
486 while (__get_type(val
) == FGRAPH_TYPE_DATA
) {
487 if (__get_data_index(val
) == idx
)
489 offset
-= __get_data_size(val
) + 1;
490 val
= get_fgraph_entry(current
, offset
);
495 *size_bytes
= __get_data_size(val
) * sizeof(long);
496 return get_data_type_data(current
, offset
);
499 /* Both enabled by default (can be cleared by function_graph tracer flags */
500 bool fgraph_sleep_time
= true;
502 #ifdef CONFIG_DYNAMIC_FTRACE
504 * archs can override this function if they must do something
505 * to enable hook for graph tracer.
507 int __weak
ftrace_enable_ftrace_graph_caller(void)
513 * archs can override this function if they must do something
514 * to disable hook for graph tracer.
516 int __weak
ftrace_disable_ftrace_graph_caller(void)
522 int ftrace_graph_entry_stub(struct ftrace_graph_ent
*trace
,
523 struct fgraph_ops
*gops
)
528 static void ftrace_graph_ret_stub(struct ftrace_graph_ret
*trace
,
529 struct fgraph_ops
*gops
)
533 static struct fgraph_ops fgraph_stub
= {
534 .entryfunc
= ftrace_graph_entry_stub
,
535 .retfunc
= ftrace_graph_ret_stub
,
538 static struct fgraph_ops
*fgraph_direct_gops
= &fgraph_stub
;
539 DEFINE_STATIC_CALL(fgraph_func
, ftrace_graph_entry_stub
);
540 DEFINE_STATIC_CALL(fgraph_retfunc
, ftrace_graph_ret_stub
);
541 static DEFINE_STATIC_KEY_TRUE(fgraph_do_direct
);
544 * ftrace_graph_stop - set to permanently disable function graph tracing
546 * In case of an error int function graph tracing, this is called
547 * to try to keep function graph tracing from causing any more harm.
548 * Usually this is pretty severe and this is called to try to at least
549 * get a warning out to the user.
551 void ftrace_graph_stop(void)
553 static_branch_enable(&kill_ftrace_graph
);
556 /* Add a function return address to the trace stack on thread info.*/
558 ftrace_push_return_trace(unsigned long ret
, unsigned long func
,
559 unsigned long frame_pointer
, unsigned long *retp
,
562 struct ftrace_ret_stack
*ret_stack
;
566 if (unlikely(ftrace_graph_is_dead()))
569 if (!current
->ret_stack
)
572 BUILD_BUG_ON(SHADOW_STACK_SIZE
% sizeof(long));
574 /* Set val to "reserved" with the delta to the new fgraph frame */
575 val
= (FGRAPH_TYPE_RESERVED
<< FGRAPH_TYPE_SHIFT
) | FGRAPH_FRAME_OFFSET
;
578 * We must make sure the ret_stack is tested before we read
584 * Check if there's room on the shadow stack to fit a fraph frame
587 if (current
->curr_ret_stack
+ FGRAPH_FRAME_OFFSET
+ 1 >= SHADOW_STACK_MAX_OFFSET
) {
588 atomic_inc(¤t
->trace_overrun
);
592 offset
= READ_ONCE(current
->curr_ret_stack
);
593 ret_stack
= RET_STACK(current
, offset
);
594 offset
+= FGRAPH_FRAME_OFFSET
;
596 /* ret offset = FGRAPH_FRAME_OFFSET ; type = reserved */
597 current
->ret_stack
[offset
] = val
;
598 ret_stack
->ret
= ret
;
600 * The unwinders expect curr_ret_stack to point to either zero
601 * or an offset where to find the next ret_stack. Even though the
602 * ret stack might be bogus, we want to write the ret and the
603 * offset to find the ret_stack before we increment the stack point.
604 * If an interrupt comes in now before we increment the curr_ret_stack
605 * it may blow away what we wrote. But that's fine, because the
606 * offset will still be correct (even though the 'ret' won't be).
607 * What we worry about is the offset being correct after we increment
608 * the curr_ret_stack and before we update that offset, as if an
609 * interrupt comes in and does an unwind stack dump, it will need
610 * at least a correct offset!
613 WRITE_ONCE(current
->curr_ret_stack
, offset
+ 1);
615 * This next barrier is to ensure that an interrupt coming in
616 * will not corrupt what we are about to write.
620 /* Still keep it reserved even if an interrupt came in */
621 current
->ret_stack
[offset
] = val
;
623 ret_stack
->ret
= ret
;
624 ret_stack
->func
= func
;
625 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
626 ret_stack
->fp
= frame_pointer
;
628 ret_stack
->retp
= retp
;
633 * Not all archs define MCOUNT_INSN_SIZE which is used to look for direct
634 * functions. But those archs currently don't support direct functions
635 * anyway, and ftrace_find_rec_direct() is just a stub for them.
636 * Define MCOUNT_INSN_SIZE to keep those archs compiling.
638 #ifndef MCOUNT_INSN_SIZE
639 /* Make sure this only works without direct calls */
640 # ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
641 # error MCOUNT_INSN_SIZE not defined with direct calls enabled
643 # define MCOUNT_INSN_SIZE 0
646 /* If the caller does not use ftrace, call this function. */
647 int function_graph_enter(unsigned long ret
, unsigned long func
,
648 unsigned long frame_pointer
, unsigned long *retp
)
650 struct ftrace_graph_ent trace
;
651 unsigned long bitmap
= 0;
656 trace
.depth
= ++current
->curr_ret_depth
;
658 offset
= ftrace_push_return_trace(ret
, func
, frame_pointer
, retp
, 0);
662 #ifdef CONFIG_HAVE_STATIC_CALL
663 if (static_branch_likely(&fgraph_do_direct
)) {
664 int save_curr_ret_stack
= current
->curr_ret_stack
;
666 if (static_call(fgraph_func
)(&trace
, fgraph_direct_gops
))
667 bitmap
|= BIT(fgraph_direct_gops
->idx
);
669 /* Clear out any saved storage */
670 current
->curr_ret_stack
= save_curr_ret_stack
;
674 for_each_set_bit(i
, &fgraph_array_bitmask
,
675 sizeof(fgraph_array_bitmask
) * BITS_PER_BYTE
) {
676 struct fgraph_ops
*gops
= READ_ONCE(fgraph_array
[i
]);
677 int save_curr_ret_stack
;
679 if (gops
== &fgraph_stub
)
682 save_curr_ret_stack
= current
->curr_ret_stack
;
683 if (ftrace_ops_test(&gops
->ops
, func
, NULL
) &&
684 gops
->entryfunc(&trace
, gops
))
687 /* Clear out any saved storage */
688 current
->curr_ret_stack
= save_curr_ret_stack
;
696 * Since this function uses fgraph_idx = 0 as a tail-call checking
697 * flag, set that bit always.
699 set_bitmap(current
, offset
, bitmap
| BIT(0));
703 current
->curr_ret_stack
-= FGRAPH_FRAME_OFFSET
+ 1;
705 current
->curr_ret_depth
--;
709 /* Retrieve a function return address to the trace stack on thread info.*/
710 static struct ftrace_ret_stack
*
711 ftrace_pop_return_trace(struct ftrace_graph_ret
*trace
, unsigned long *ret
,
712 unsigned long frame_pointer
, int *offset
)
714 struct ftrace_ret_stack
*ret_stack
;
716 ret_stack
= get_ret_stack(current
, current
->curr_ret_stack
, offset
);
718 if (unlikely(!ret_stack
)) {
720 WARN(1, "Bad function graph ret_stack pointer: %d",
721 current
->curr_ret_stack
);
722 /* Might as well panic, otherwise we have no where to go */
723 *ret
= (unsigned long)panic
;
727 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
729 * The arch may choose to record the frame pointer used
730 * and check it here to make sure that it is what we expect it
731 * to be. If gcc does not set the place holder of the return
732 * address in the frame pointer, and does a copy instead, then
733 * the function graph trace will fail. This test detects this
736 * Currently, x86_32 with optimize for size (-Os) makes the latest
739 * Note, -mfentry does not use frame pointers, and this test
740 * is not needed if CC_USING_FENTRY is set.
742 if (unlikely(ret_stack
->fp
!= frame_pointer
)) {
744 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
745 " from func %ps return to %lx\n",
748 (void *)ret_stack
->func
,
750 *ret
= (unsigned long)panic
;
755 *offset
+= FGRAPH_FRAME_OFFSET
;
756 *ret
= ret_stack
->ret
;
757 trace
->func
= ret_stack
->func
;
758 trace
->overrun
= atomic_read(¤t
->trace_overrun
);
759 trace
->depth
= current
->curr_ret_depth
;
761 * We still want to trace interrupts coming in if
762 * max_depth is set to 1. Make sure the decrement is
763 * seen before ftrace_graph_return.
771 * Hibernation protection.
772 * The state of the current task is too much unstable during
773 * suspend/restore to disk. We want to protect against that.
776 ftrace_suspend_notifier_call(struct notifier_block
*bl
, unsigned long state
,
780 case PM_HIBERNATION_PREPARE
:
781 pause_graph_tracing();
784 case PM_POST_HIBERNATION
:
785 unpause_graph_tracing();
791 static struct notifier_block ftrace_suspend_notifier
= {
792 .notifier_call
= ftrace_suspend_notifier_call
,
795 /* fgraph_ret_regs is not defined without CONFIG_FUNCTION_GRAPH_RETVAL */
796 struct fgraph_ret_regs
;
799 * Send the trace to the ring-buffer.
800 * @return the original return address.
802 static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs
*ret_regs
,
803 unsigned long frame_pointer
)
805 struct ftrace_ret_stack
*ret_stack
;
806 struct ftrace_graph_ret trace
;
807 unsigned long bitmap
;
812 ret_stack
= ftrace_pop_return_trace(&trace
, &ret
, frame_pointer
, &offset
);
814 if (unlikely(!ret_stack
)) {
817 /* Might as well panic. What else to do? */
818 return (unsigned long)panic
;
821 trace
.rettime
= trace_clock_local();
822 #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
823 trace
.retval
= fgraph_ret_regs_return_value(ret_regs
);
826 bitmap
= get_bitmap_bits(current
, offset
);
828 #ifdef CONFIG_HAVE_STATIC_CALL
829 if (static_branch_likely(&fgraph_do_direct
)) {
830 if (test_bit(fgraph_direct_gops
->idx
, &bitmap
))
831 static_call(fgraph_retfunc
)(&trace
, fgraph_direct_gops
);
835 for_each_set_bit(i
, &bitmap
, sizeof(bitmap
) * BITS_PER_BYTE
) {
836 struct fgraph_ops
*gops
= fgraph_array
[i
];
838 if (gops
== &fgraph_stub
)
841 gops
->retfunc(&trace
, gops
);
846 * The ftrace_graph_return() may still access the current
847 * ret_stack structure, we need to make sure the update of
848 * curr_ret_stack is after that.
851 current
->curr_ret_stack
= offset
- FGRAPH_FRAME_OFFSET
;
853 current
->curr_ret_depth
--;
858 * After all architecures have selected HAVE_FUNCTION_GRAPH_RETVAL, we can
859 * leave only ftrace_return_to_handler(ret_regs).
861 #ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
862 unsigned long ftrace_return_to_handler(struct fgraph_ret_regs
*ret_regs
)
864 return __ftrace_return_to_handler(ret_regs
,
865 fgraph_ret_regs_frame_pointer(ret_regs
));
868 unsigned long ftrace_return_to_handler(unsigned long frame_pointer
)
870 return __ftrace_return_to_handler(NULL
, frame_pointer
);
875 * ftrace_graph_get_ret_stack - return the entry of the shadow stack
876 * @task: The task to read the shadow stack from.
877 * @idx: Index down the shadow stack
879 * Return the ret_struct on the shadow stack of the @task at the
880 * call graph at @idx starting with zero. If @idx is zero, it
881 * will return the last saved ret_stack entry. If it is greater than
882 * zero, it will return the corresponding ret_stack for the depth
883 * of saved return addresses.
885 struct ftrace_ret_stack
*
886 ftrace_graph_get_ret_stack(struct task_struct
*task
, int idx
)
888 struct ftrace_ret_stack
*ret_stack
= NULL
;
889 int offset
= task
->curr_ret_stack
;
895 ret_stack
= get_ret_stack(task
, offset
, &offset
);
896 } while (ret_stack
&& --idx
>= 0);
902 * ftrace_graph_top_ret_addr - return the top return address in the shadow stack
903 * @task: The task to read the shadow stack from.
905 * Return the first return address on the shadow stack of the @task, which is
906 * not the fgraph's return_to_handler.
908 unsigned long ftrace_graph_top_ret_addr(struct task_struct
*task
)
910 unsigned long return_handler
= (unsigned long)dereference_kernel_function_descriptor(return_to_handler
);
911 struct ftrace_ret_stack
*ret_stack
= NULL
;
912 int offset
= task
->curr_ret_stack
;
918 ret_stack
= get_ret_stack(task
, offset
, &offset
);
919 } while (ret_stack
&& ret_stack
->ret
== return_handler
);
921 return ret_stack
? ret_stack
->ret
: 0;
925 * ftrace_graph_ret_addr - return the original value of the return address
926 * @task: The task the unwinder is being executed on
927 * @idx: An initialized pointer to the next stack index to use
928 * @ret: The current return address (likely pointing to return_handler)
929 * @retp: The address on the stack of the current return location
931 * This function can be called by stack unwinding code to convert a found stack
932 * return address (@ret) to its original value, in case the function graph
933 * tracer has modified it to be 'return_to_handler'. If the address hasn't
934 * been modified, the unchanged value of @ret is returned.
936 * @idx holds the last index used to know where to start from. It should be
937 * initialized to zero for the first iteration as that will mean to start
938 * at the top of the shadow stack. If the location is found, this pointer
939 * will be assigned that location so that if called again, it will continue
942 * @retp is a pointer to the return address on the stack.
944 unsigned long ftrace_graph_ret_addr(struct task_struct
*task
, int *idx
,
945 unsigned long ret
, unsigned long *retp
)
947 struct ftrace_ret_stack
*ret_stack
;
948 unsigned long return_handler
= (unsigned long)dereference_kernel_function_descriptor(return_to_handler
);
951 if (ret
!= return_handler
)
957 i
= *idx
? : task
->curr_ret_stack
;
959 ret_stack
= get_ret_stack(task
, i
, &i
);
963 * For the tail-call, there would be 2 or more ftrace_ret_stacks on
964 * the ret_stack, which records "return_to_handler" as the return
965 * address except for the last one.
966 * But on the real stack, there should be 1 entry because tail-call
967 * reuses the return address on the stack and jump to the next function.
968 * Thus we will continue to find real return address.
970 if (ret_stack
->retp
== retp
&&
971 ret_stack
->ret
!= return_handler
) {
973 return ret_stack
->ret
;
980 static struct ftrace_ops graph_ops
= {
981 .func
= ftrace_graph_func
,
982 .flags
= FTRACE_OPS_GRAPH_STUB
,
983 #ifdef FTRACE_GRAPH_TRAMP_ADDR
984 .trampoline
= FTRACE_GRAPH_TRAMP_ADDR
,
985 /* trampoline_size is only needed for dynamically allocated tramps */
989 void fgraph_init_ops(struct ftrace_ops
*dst_ops
,
990 struct ftrace_ops
*src_ops
)
992 dst_ops
->flags
= FTRACE_OPS_FL_PID
| FTRACE_OPS_GRAPH_STUB
;
994 #ifdef CONFIG_DYNAMIC_FTRACE
996 dst_ops
->func_hash
= &src_ops
->local_hash
;
997 mutex_init(&dst_ops
->local_hash
.regex_lock
);
998 INIT_LIST_HEAD(&dst_ops
->subop_list
);
999 dst_ops
->flags
|= FTRACE_OPS_FL_INITIALIZED
;
1004 void ftrace_graph_sleep_time_control(bool enable
)
1006 fgraph_sleep_time
= enable
;
1010 * Simply points to ftrace_stub, but with the proper protocol.
1011 * Defined by the linker script in linux/vmlinux.lds.h
1013 void ftrace_stub_graph(struct ftrace_graph_ret
*trace
, struct fgraph_ops
*gops
);
1015 /* The callbacks that hook a function */
1016 trace_func_graph_ret_t ftrace_graph_return
= ftrace_stub_graph
;
1017 trace_func_graph_ent_t ftrace_graph_entry
= ftrace_graph_entry_stub
;
1019 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1020 static int alloc_retstack_tasklist(unsigned long **ret_stack_list
)
1024 int start
= 0, end
= FTRACE_RETSTACK_ALLOC_SIZE
;
1025 struct task_struct
*g
, *t
;
1027 if (WARN_ON_ONCE(!fgraph_stack_cachep
))
1030 for (i
= 0; i
< FTRACE_RETSTACK_ALLOC_SIZE
; i
++) {
1031 ret_stack_list
[i
] = kmem_cache_alloc(fgraph_stack_cachep
, GFP_KERNEL
);
1032 if (!ret_stack_list
[i
]) {
1041 for_each_process_thread(g
, t
) {
1047 if (t
->ret_stack
== NULL
) {
1048 atomic_set(&t
->trace_overrun
, 0);
1049 ret_stack_init_task_vars(ret_stack_list
[start
]);
1050 t
->curr_ret_stack
= 0;
1051 t
->curr_ret_depth
= -1;
1052 /* Make sure the tasks see the 0 first: */
1054 t
->ret_stack
= ret_stack_list
[start
++];
1061 for (i
= start
; i
< end
; i
++)
1062 kmem_cache_free(fgraph_stack_cachep
, ret_stack_list
[i
]);
1067 ftrace_graph_probe_sched_switch(void *ignore
, bool preempt
,
1068 struct task_struct
*prev
,
1069 struct task_struct
*next
,
1070 unsigned int prev_state
)
1072 unsigned long long timestamp
;
1075 * Does the user want to count the time a function was asleep.
1076 * If so, do not update the time stamps.
1078 if (fgraph_sleep_time
)
1081 timestamp
= trace_clock_local();
1083 prev
->ftrace_timestamp
= timestamp
;
1085 /* only process tasks that we timestamped */
1086 if (!next
->ftrace_timestamp
)
1089 next
->ftrace_sleeptime
+= timestamp
- next
->ftrace_timestamp
;
1092 static DEFINE_PER_CPU(unsigned long *, idle_ret_stack
);
1095 graph_init_task(struct task_struct
*t
, unsigned long *ret_stack
)
1097 atomic_set(&t
->trace_overrun
, 0);
1098 ret_stack_init_task_vars(ret_stack
);
1099 t
->ftrace_timestamp
= 0;
1100 t
->curr_ret_stack
= 0;
1101 t
->curr_ret_depth
= -1;
1102 /* make curr_ret_stack visible before we add the ret_stack */
1104 t
->ret_stack
= ret_stack
;
1108 * Allocate a return stack for the idle task. May be the first
1109 * time through, or it may be done by CPU hotplug online.
1111 void ftrace_graph_init_idle_task(struct task_struct
*t
, int cpu
)
1113 t
->curr_ret_stack
= 0;
1114 t
->curr_ret_depth
= -1;
1116 * The idle task has no parent, it either has its own
1117 * stack or no stack at all.
1120 WARN_ON(t
->ret_stack
!= per_cpu(idle_ret_stack
, cpu
));
1122 if (ftrace_graph_active
) {
1123 unsigned long *ret_stack
;
1125 if (WARN_ON_ONCE(!fgraph_stack_cachep
))
1128 ret_stack
= per_cpu(idle_ret_stack
, cpu
);
1130 ret_stack
= kmem_cache_alloc(fgraph_stack_cachep
, GFP_KERNEL
);
1133 per_cpu(idle_ret_stack
, cpu
) = ret_stack
;
1135 graph_init_task(t
, ret_stack
);
1139 /* Allocate a return stack for newly created task */
1140 void ftrace_graph_init_task(struct task_struct
*t
)
1142 /* Make sure we do not use the parent ret_stack */
1143 t
->ret_stack
= NULL
;
1144 t
->curr_ret_stack
= 0;
1145 t
->curr_ret_depth
= -1;
1147 if (ftrace_graph_active
) {
1148 unsigned long *ret_stack
;
1150 if (WARN_ON_ONCE(!fgraph_stack_cachep
))
1153 ret_stack
= kmem_cache_alloc(fgraph_stack_cachep
, GFP_KERNEL
);
1156 graph_init_task(t
, ret_stack
);
1160 void ftrace_graph_exit_task(struct task_struct
*t
)
1162 unsigned long *ret_stack
= t
->ret_stack
;
1164 t
->ret_stack
= NULL
;
1165 /* NULL must become visible to IRQs before we free it: */
1169 if (WARN_ON_ONCE(!fgraph_stack_cachep
))
1171 kmem_cache_free(fgraph_stack_cachep
, ret_stack
);
1175 #ifdef CONFIG_DYNAMIC_FTRACE
1176 static int fgraph_pid_func(struct ftrace_graph_ent
*trace
,
1177 struct fgraph_ops
*gops
)
1179 struct trace_array
*tr
= gops
->ops
.private;
1183 pid
= this_cpu_read(tr
->array_buffer
.data
->ftrace_ignore_pid
);
1184 if (pid
== FTRACE_PID_IGNORE
)
1186 if (pid
!= FTRACE_PID_TRACE
&&
1187 pid
!= current
->pid
)
1191 return gops
->saved_func(trace
, gops
);
1194 void fgraph_update_pid_func(void)
1196 struct fgraph_ops
*gops
;
1197 struct ftrace_ops
*op
;
1199 if (!(graph_ops
.flags
& FTRACE_OPS_FL_INITIALIZED
))
1202 list_for_each_entry(op
, &graph_ops
.subop_list
, list
) {
1203 if (op
->flags
& FTRACE_OPS_FL_PID
) {
1204 gops
= container_of(op
, struct fgraph_ops
, ops
);
1205 gops
->entryfunc
= ftrace_pids_enabled(op
) ?
1206 fgraph_pid_func
: gops
->saved_func
;
1207 if (ftrace_graph_active
== 1)
1208 static_call_update(fgraph_func
, gops
->entryfunc
);
1214 /* Allocate a return stack for each task */
1215 static int start_graph_tracing(void)
1217 unsigned long **ret_stack_list
;
1220 ret_stack_list
= kcalloc(FTRACE_RETSTACK_ALLOC_SIZE
,
1221 sizeof(*ret_stack_list
), GFP_KERNEL
);
1223 if (!ret_stack_list
)
1227 ret
= alloc_retstack_tasklist(ret_stack_list
);
1228 } while (ret
== -EAGAIN
);
1231 ret
= register_trace_sched_switch(ftrace_graph_probe_sched_switch
, NULL
);
1233 pr_info("ftrace_graph: Couldn't activate tracepoint"
1234 " probe to kernel_sched_switch\n");
1237 kfree(ret_stack_list
);
1241 static void init_task_vars(int idx
)
1243 struct task_struct
*g
, *t
;
1246 for_each_online_cpu(cpu
) {
1247 if (idle_task(cpu
)->ret_stack
)
1248 ret_stack_set_task_var(idle_task(cpu
), idx
, 0);
1251 read_lock(&tasklist_lock
);
1252 for_each_process_thread(g
, t
) {
1254 ret_stack_set_task_var(t
, idx
, 0);
1256 read_unlock(&tasklist_lock
);
1259 static void ftrace_graph_enable_direct(bool enable_branch
, struct fgraph_ops
*gops
)
1261 trace_func_graph_ent_t func
= NULL
;
1262 trace_func_graph_ret_t retfunc
= NULL
;
1266 func
= gops
->entryfunc
;
1267 retfunc
= gops
->retfunc
;
1268 fgraph_direct_gops
= gops
;
1270 for_each_set_bit(i
, &fgraph_array_bitmask
,
1271 sizeof(fgraph_array_bitmask
) * BITS_PER_BYTE
) {
1272 func
= fgraph_array
[i
]->entryfunc
;
1273 retfunc
= fgraph_array
[i
]->retfunc
;
1274 fgraph_direct_gops
= fgraph_array
[i
];
1277 if (WARN_ON_ONCE(!func
))
1280 static_call_update(fgraph_func
, func
);
1281 static_call_update(fgraph_retfunc
, retfunc
);
1283 static_branch_disable(&fgraph_do_direct
);
1286 static void ftrace_graph_disable_direct(bool disable_branch
)
1289 static_branch_disable(&fgraph_do_direct
);
1290 static_call_update(fgraph_func
, ftrace_graph_entry_stub
);
1291 static_call_update(fgraph_retfunc
, ftrace_graph_ret_stub
);
1292 fgraph_direct_gops
= &fgraph_stub
;
1295 /* The cpu_boot init_task->ret_stack will never be freed */
1296 static int fgraph_cpu_init(unsigned int cpu
)
1298 if (!idle_task(cpu
)->ret_stack
)
1299 ftrace_graph_init_idle_task(idle_task(cpu
), cpu
);
1303 int register_ftrace_graph(struct fgraph_ops
*gops
)
1305 static bool fgraph_initialized
;
1310 guard(mutex
)(&ftrace_lock
);
1312 if (!fgraph_stack_cachep
) {
1313 fgraph_stack_cachep
= kmem_cache_create("fgraph_stack",
1315 SHADOW_STACK_SIZE
, 0, NULL
);
1316 if (!fgraph_stack_cachep
)
1320 if (!fgraph_initialized
) {
1321 ret
= cpuhp_setup_state(CPUHP_AP_ONLINE_DYN
, "fgraph:online",
1322 fgraph_cpu_init
, NULL
);
1324 pr_warn("fgraph: Error to init cpu hotplug support\n");
1327 fgraph_initialized
= true;
1331 if (!fgraph_array
[0]) {
1332 /* The array must always have real data on it */
1333 for (i
= 0; i
< FGRAPH_ARRAY_SIZE
; i
++)
1334 fgraph_array
[i
] = &fgraph_stub
;
1338 i
= fgraph_lru_alloc_index();
1339 if (i
< 0 || WARN_ON_ONCE(fgraph_array
[i
] != &fgraph_stub
))
1343 ftrace_graph_active
++;
1345 if (ftrace_graph_active
== 2)
1346 ftrace_graph_disable_direct(true);
1348 if (ftrace_graph_active
== 1) {
1349 ftrace_graph_enable_direct(false, gops
);
1350 register_pm_notifier(&ftrace_suspend_notifier
);
1351 ret
= start_graph_tracing();
1355 * Some archs just test to see if these are not
1356 * the default function
1358 ftrace_graph_return
= return_run
;
1359 ftrace_graph_entry
= entry_run
;
1360 command
= FTRACE_START_FUNC_RET
;
1362 init_task_vars(gops
->idx
);
1364 /* Always save the function, and reset at unregistering */
1365 gops
->saved_func
= gops
->entryfunc
;
1367 ret
= ftrace_startup_subops(&graph_ops
, &gops
->ops
, command
);
1369 fgraph_array
[i
] = gops
;
1373 ftrace_graph_active
--;
1374 gops
->saved_func
= NULL
;
1375 fgraph_lru_release_index(i
);
1380 void unregister_ftrace_graph(struct fgraph_ops
*gops
)
1384 guard(mutex
)(&ftrace_lock
);
1386 if (unlikely(!ftrace_graph_active
))
1389 if (unlikely(gops
->idx
< 0 || gops
->idx
>= FGRAPH_ARRAY_SIZE
||
1390 fgraph_array
[gops
->idx
] != gops
))
1393 if (fgraph_lru_release_index(gops
->idx
) < 0)
1396 fgraph_array
[gops
->idx
] = &fgraph_stub
;
1398 ftrace_graph_active
--;
1400 if (!ftrace_graph_active
)
1401 command
= FTRACE_STOP_FUNC_RET
;
1403 ftrace_shutdown_subops(&graph_ops
, &gops
->ops
, command
);
1405 if (ftrace_graph_active
== 1)
1406 ftrace_graph_enable_direct(true, NULL
);
1407 else if (!ftrace_graph_active
)
1408 ftrace_graph_disable_direct(false);
1410 if (!ftrace_graph_active
) {
1411 ftrace_graph_return
= ftrace_stub_graph
;
1412 ftrace_graph_entry
= ftrace_graph_entry_stub
;
1413 unregister_pm_notifier(&ftrace_suspend_notifier
);
1414 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch
, NULL
);
1416 gops
->saved_func
= NULL
;