1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
8 #include <linux/module.h>
9 #include <linux/kallsyms.h>
10 #include <linux/uaccess.h>
11 #include <linux/kmemleak.h>
12 #include <linux/ftrace.h>
13 #include <trace/events/sched.h>
17 #define RECORD_CMDLINE 1
20 static int sched_cmdline_ref
;
21 static int sched_tgid_ref
;
22 static DEFINE_MUTEX(sched_register_mutex
);
25 probe_sched_switch(void *ignore
, bool preempt
,
26 struct task_struct
*prev
, struct task_struct
*next
,
27 unsigned int prev_state
)
31 flags
= (RECORD_TGID
* !!sched_tgid_ref
) +
32 (RECORD_CMDLINE
* !!sched_cmdline_ref
);
36 tracing_record_taskinfo_sched_switch(prev
, next
, flags
);
40 probe_sched_wakeup(void *ignore
, struct task_struct
*wakee
)
44 flags
= (RECORD_TGID
* !!sched_tgid_ref
) +
45 (RECORD_CMDLINE
* !!sched_cmdline_ref
);
49 tracing_record_taskinfo_sched_switch(current
, wakee
, flags
);
52 static int tracing_sched_register(void)
56 ret
= register_trace_sched_wakeup(probe_sched_wakeup
, NULL
);
58 pr_info("wakeup trace: Couldn't activate tracepoint"
59 " probe to kernel_sched_wakeup\n");
63 ret
= register_trace_sched_wakeup_new(probe_sched_wakeup
, NULL
);
65 pr_info("wakeup trace: Couldn't activate tracepoint"
66 " probe to kernel_sched_wakeup_new\n");
70 ret
= register_trace_sched_switch(probe_sched_switch
, NULL
);
72 pr_info("sched trace: Couldn't activate tracepoint"
73 " probe to kernel_sched_switch\n");
74 goto fail_deprobe_wake_new
;
78 fail_deprobe_wake_new
:
79 unregister_trace_sched_wakeup_new(probe_sched_wakeup
, NULL
);
81 unregister_trace_sched_wakeup(probe_sched_wakeup
, NULL
);
85 static void tracing_sched_unregister(void)
87 unregister_trace_sched_switch(probe_sched_switch
, NULL
);
88 unregister_trace_sched_wakeup_new(probe_sched_wakeup
, NULL
);
89 unregister_trace_sched_wakeup(probe_sched_wakeup
, NULL
);
92 static void tracing_start_sched_switch(int ops
)
96 mutex_lock(&sched_register_mutex
);
97 sched_register
= (!sched_cmdline_ref
&& !sched_tgid_ref
);
109 if (sched_register
&& (sched_cmdline_ref
|| sched_tgid_ref
))
110 tracing_sched_register();
111 mutex_unlock(&sched_register_mutex
);
114 static void tracing_stop_sched_switch(int ops
)
116 mutex_lock(&sched_register_mutex
);
128 if (!sched_cmdline_ref
&& !sched_tgid_ref
)
129 tracing_sched_unregister();
130 mutex_unlock(&sched_register_mutex
);
133 void tracing_start_cmdline_record(void)
135 tracing_start_sched_switch(RECORD_CMDLINE
);
138 void tracing_stop_cmdline_record(void)
140 tracing_stop_sched_switch(RECORD_CMDLINE
);
143 void tracing_start_tgid_record(void)
145 tracing_start_sched_switch(RECORD_TGID
);
148 void tracing_stop_tgid_record(void)
150 tracing_stop_sched_switch(RECORD_TGID
);
154 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
155 * is the tgid last observed corresponding to pid=i.
157 static int *tgid_map
;
159 /* The maximum valid index into tgid_map. */
160 static size_t tgid_map_max
;
162 #define SAVED_CMDLINES_DEFAULT 128
163 #define NO_CMDLINE_MAP UINT_MAX
165 * Preemption must be disabled before acquiring trace_cmdline_lock.
166 * The various trace_arrays' max_lock must be acquired in a context
167 * where interrupt is disabled.
169 static arch_spinlock_t trace_cmdline_lock
= __ARCH_SPIN_LOCK_UNLOCKED
;
170 struct saved_cmdlines_buffer
{
171 unsigned map_pid_to_cmdline
[PID_MAX_DEFAULT
+1];
172 unsigned *map_cmdline_to_pid
;
173 unsigned cmdline_num
;
175 char saved_cmdlines
[];
177 static struct saved_cmdlines_buffer
*savedcmd
;
179 /* Holds the size of a cmdline and pid element */
180 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
181 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
183 static inline char *get_saved_cmdlines(int idx
)
185 return &savedcmd
->saved_cmdlines
[idx
* TASK_COMM_LEN
];
188 static inline void set_cmdline(int idx
, const char *cmdline
)
190 strscpy(get_saved_cmdlines(idx
), cmdline
, TASK_COMM_LEN
);
193 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer
*s
)
195 int order
= get_order(sizeof(*s
) + s
->cmdline_num
* TASK_COMM_LEN
);
198 free_pages((unsigned long)s
, order
);
201 static struct saved_cmdlines_buffer
*allocate_cmdlines_buffer(unsigned int val
)
203 struct saved_cmdlines_buffer
*s
;
208 /* Figure out how much is needed to hold the given number of cmdlines */
209 orig_size
= sizeof(*s
) + val
* SAVED_CMDLINE_MAP_ELEMENT_SIZE(s
);
210 order
= get_order(orig_size
);
211 size
= 1 << (order
+ PAGE_SHIFT
);
212 page
= alloc_pages(GFP_KERNEL
, order
);
216 s
= page_address(page
);
217 kmemleak_alloc(s
, size
, 1, GFP_KERNEL
);
218 memset(s
, 0, sizeof(*s
));
220 /* Round up to actual allocation */
221 val
= (size
- sizeof(*s
)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s
);
222 s
->cmdline_num
= val
;
224 /* Place map_cmdline_to_pid array right after saved_cmdlines */
225 s
->map_cmdline_to_pid
= (unsigned *)&s
->saved_cmdlines
[val
* TASK_COMM_LEN
];
228 memset(&s
->map_pid_to_cmdline
, NO_CMDLINE_MAP
,
229 sizeof(s
->map_pid_to_cmdline
));
230 memset(s
->map_cmdline_to_pid
, NO_CMDLINE_MAP
,
231 val
* sizeof(*s
->map_cmdline_to_pid
));
236 int trace_create_savedcmd(void)
238 savedcmd
= allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT
);
240 return savedcmd
? 0 : -ENOMEM
;
243 int trace_save_cmdline(struct task_struct
*tsk
)
247 /* treat recording of idle task as a success */
251 tpid
= tsk
->pid
& (PID_MAX_DEFAULT
- 1);
254 * It's not the end of the world if we don't get
255 * the lock, but we also don't want to spin
256 * nor do we want to disable interrupts,
257 * so if we miss here, then better luck next time.
259 * This is called within the scheduler and wake up, so interrupts
260 * had better been disabled and run queue lock been held.
262 lockdep_assert_preemption_disabled();
263 if (!arch_spin_trylock(&trace_cmdline_lock
))
266 idx
= savedcmd
->map_pid_to_cmdline
[tpid
];
267 if (idx
== NO_CMDLINE_MAP
) {
268 idx
= (savedcmd
->cmdline_idx
+ 1) % savedcmd
->cmdline_num
;
270 savedcmd
->map_pid_to_cmdline
[tpid
] = idx
;
271 savedcmd
->cmdline_idx
= idx
;
274 savedcmd
->map_cmdline_to_pid
[idx
] = tsk
->pid
;
275 set_cmdline(idx
, tsk
->comm
);
277 arch_spin_unlock(&trace_cmdline_lock
);
282 static void __trace_find_cmdline(int pid
, char comm
[])
288 strcpy(comm
, "<idle>");
292 if (WARN_ON_ONCE(pid
< 0)) {
293 strcpy(comm
, "<XXX>");
297 tpid
= pid
& (PID_MAX_DEFAULT
- 1);
298 map
= savedcmd
->map_pid_to_cmdline
[tpid
];
299 if (map
!= NO_CMDLINE_MAP
) {
300 tpid
= savedcmd
->map_cmdline_to_pid
[map
];
302 strscpy(comm
, get_saved_cmdlines(map
), TASK_COMM_LEN
);
306 strcpy(comm
, "<...>");
309 void trace_find_cmdline(int pid
, char comm
[])
312 arch_spin_lock(&trace_cmdline_lock
);
314 __trace_find_cmdline(pid
, comm
);
316 arch_spin_unlock(&trace_cmdline_lock
);
320 static int *trace_find_tgid_ptr(int pid
)
323 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
324 * if we observe a non-NULL tgid_map then we also observe the correct
327 int *map
= smp_load_acquire(&tgid_map
);
329 if (unlikely(!map
|| pid
> tgid_map_max
))
335 int trace_find_tgid(int pid
)
337 int *ptr
= trace_find_tgid_ptr(pid
);
339 return ptr
? *ptr
: 0;
342 static int trace_save_tgid(struct task_struct
*tsk
)
346 /* treat recording of idle task as a success */
350 ptr
= trace_find_tgid_ptr(tsk
->pid
);
358 static bool tracing_record_taskinfo_skip(int flags
)
360 if (unlikely(!(flags
& (TRACE_RECORD_CMDLINE
| TRACE_RECORD_TGID
))))
362 if (!__this_cpu_read(trace_taskinfo_save
))
368 * tracing_record_taskinfo - record the task info of a task
370 * @task: task to record
371 * @flags: TRACE_RECORD_CMDLINE for recording comm
372 * TRACE_RECORD_TGID for recording tgid
374 void tracing_record_taskinfo(struct task_struct
*task
, int flags
)
378 if (tracing_record_taskinfo_skip(flags
))
382 * Record as much task information as possible. If some fail, continue
383 * to try to record the others.
385 done
= !(flags
& TRACE_RECORD_CMDLINE
) || trace_save_cmdline(task
);
386 done
&= !(flags
& TRACE_RECORD_TGID
) || trace_save_tgid(task
);
388 /* If recording any information failed, retry again soon. */
392 __this_cpu_write(trace_taskinfo_save
, false);
396 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
398 * @prev: previous task during sched_switch
399 * @next: next task during sched_switch
400 * @flags: TRACE_RECORD_CMDLINE for recording comm
401 * TRACE_RECORD_TGID for recording tgid
403 void tracing_record_taskinfo_sched_switch(struct task_struct
*prev
,
404 struct task_struct
*next
, int flags
)
408 if (tracing_record_taskinfo_skip(flags
))
412 * Record as much task information as possible. If some fail, continue
413 * to try to record the others.
415 done
= !(flags
& TRACE_RECORD_CMDLINE
) || trace_save_cmdline(prev
);
416 done
&= !(flags
& TRACE_RECORD_CMDLINE
) || trace_save_cmdline(next
);
417 done
&= !(flags
& TRACE_RECORD_TGID
) || trace_save_tgid(prev
);
418 done
&= !(flags
& TRACE_RECORD_TGID
) || trace_save_tgid(next
);
420 /* If recording any information failed, retry again soon. */
424 __this_cpu_write(trace_taskinfo_save
, false);
427 /* Helpers to record a specific task information */
428 void tracing_record_cmdline(struct task_struct
*task
)
430 tracing_record_taskinfo(task
, TRACE_RECORD_CMDLINE
);
433 void tracing_record_tgid(struct task_struct
*task
)
435 tracing_record_taskinfo(task
, TRACE_RECORD_TGID
);
438 int trace_alloc_tgid_map(void)
445 tgid_map_max
= pid_max
;
446 map
= kvcalloc(tgid_map_max
+ 1, sizeof(*tgid_map
),
452 * Pairs with smp_load_acquire() in
453 * trace_find_tgid_ptr() to ensure that if it observes
454 * the tgid_map we just allocated then it also observes
455 * the corresponding tgid_map_max value.
457 smp_store_release(&tgid_map
, map
);
461 static void *saved_tgids_next(struct seq_file
*m
, void *v
, loff_t
*pos
)
465 return trace_find_tgid_ptr(pid
);
468 static void *saved_tgids_start(struct seq_file
*m
, loff_t
*pos
)
472 return trace_find_tgid_ptr(pid
);
475 static void saved_tgids_stop(struct seq_file
*m
, void *v
)
479 static int saved_tgids_show(struct seq_file
*m
, void *v
)
481 int *entry
= (int *)v
;
482 int pid
= entry
- tgid_map
;
488 seq_printf(m
, "%d %d\n", pid
, tgid
);
492 static const struct seq_operations tracing_saved_tgids_seq_ops
= {
493 .start
= saved_tgids_start
,
494 .stop
= saved_tgids_stop
,
495 .next
= saved_tgids_next
,
496 .show
= saved_tgids_show
,
499 static int tracing_saved_tgids_open(struct inode
*inode
, struct file
*filp
)
503 ret
= tracing_check_open_get_tr(NULL
);
507 return seq_open(filp
, &tracing_saved_tgids_seq_ops
);
511 const struct file_operations tracing_saved_tgids_fops
= {
512 .open
= tracing_saved_tgids_open
,
515 .release
= seq_release
,
518 static void *saved_cmdlines_next(struct seq_file
*m
, void *v
, loff_t
*pos
)
520 unsigned int *ptr
= v
;
522 if (*pos
|| m
->count
)
527 for (; ptr
< &savedcmd
->map_cmdline_to_pid
[savedcmd
->cmdline_num
];
529 if (*ptr
== -1 || *ptr
== NO_CMDLINE_MAP
)
538 static void *saved_cmdlines_start(struct seq_file
*m
, loff_t
*pos
)
544 arch_spin_lock(&trace_cmdline_lock
);
546 v
= &savedcmd
->map_cmdline_to_pid
[0];
548 v
= saved_cmdlines_next(m
, v
, &l
);
556 static void saved_cmdlines_stop(struct seq_file
*m
, void *v
)
558 arch_spin_unlock(&trace_cmdline_lock
);
562 static int saved_cmdlines_show(struct seq_file
*m
, void *v
)
564 char buf
[TASK_COMM_LEN
];
565 unsigned int *pid
= v
;
567 __trace_find_cmdline(*pid
, buf
);
568 seq_printf(m
, "%d %s\n", *pid
, buf
);
572 static const struct seq_operations tracing_saved_cmdlines_seq_ops
= {
573 .start
= saved_cmdlines_start
,
574 .next
= saved_cmdlines_next
,
575 .stop
= saved_cmdlines_stop
,
576 .show
= saved_cmdlines_show
,
579 static int tracing_saved_cmdlines_open(struct inode
*inode
, struct file
*filp
)
583 ret
= tracing_check_open_get_tr(NULL
);
587 return seq_open(filp
, &tracing_saved_cmdlines_seq_ops
);
590 const struct file_operations tracing_saved_cmdlines_fops
= {
591 .open
= tracing_saved_cmdlines_open
,
594 .release
= seq_release
,
598 tracing_saved_cmdlines_size_read(struct file
*filp
, char __user
*ubuf
,
599 size_t cnt
, loff_t
*ppos
)
605 arch_spin_lock(&trace_cmdline_lock
);
606 r
= scnprintf(buf
, sizeof(buf
), "%u\n", savedcmd
->cmdline_num
);
607 arch_spin_unlock(&trace_cmdline_lock
);
610 return simple_read_from_buffer(ubuf
, cnt
, ppos
, buf
, r
);
613 void trace_free_saved_cmdlines_buffer(void)
615 free_saved_cmdlines_buffer(savedcmd
);
618 static int tracing_resize_saved_cmdlines(unsigned int val
)
620 struct saved_cmdlines_buffer
*s
, *savedcmd_temp
;
622 s
= allocate_cmdlines_buffer(val
);
627 arch_spin_lock(&trace_cmdline_lock
);
628 savedcmd_temp
= savedcmd
;
630 arch_spin_unlock(&trace_cmdline_lock
);
632 free_saved_cmdlines_buffer(savedcmd_temp
);
638 tracing_saved_cmdlines_size_write(struct file
*filp
, const char __user
*ubuf
,
639 size_t cnt
, loff_t
*ppos
)
644 ret
= kstrtoul_from_user(ubuf
, cnt
, 10, &val
);
648 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
649 if (!val
|| val
> PID_MAX_DEFAULT
)
652 ret
= tracing_resize_saved_cmdlines((unsigned int)val
);
661 const struct file_operations tracing_saved_cmdlines_size_fops
= {
662 .open
= tracing_open_generic
,
663 .read
= tracing_saved_cmdlines_size_read
,
664 .write
= tracing_saved_cmdlines_size_write
,