1 // SPDX-License-Identifier: GPL-2.0-only
5 * Print the CFS rbtree and other debugging details
7 * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
11 * This allows printing both to /sys/kernel/debug/sched/debug and
14 #define SEQ_printf(m, x...) \
23 * Ease the printing of nsec fields:
25 static long long nsec_high(unsigned long long nsec
)
27 if ((long long)nsec
< 0) {
29 do_div(nsec
, 1000000);
32 do_div(nsec
, 1000000);
37 static unsigned long nsec_low(unsigned long long nsec
)
39 if ((long long)nsec
< 0)
42 return do_div(nsec
, 1000000);
45 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
47 #define SCHED_FEAT(name, enabled) \
50 static const char * const sched_feat_names
[] = {
56 static int sched_feat_show(struct seq_file
*m
, void *v
)
60 for (i
= 0; i
< __SCHED_FEAT_NR
; i
++) {
61 if (!(sysctl_sched_features
& (1UL << i
)))
63 seq_printf(m
, "%s ", sched_feat_names
[i
]);
70 #ifdef CONFIG_JUMP_LABEL
72 #define jump_label_key__true STATIC_KEY_INIT_TRUE
73 #define jump_label_key__false STATIC_KEY_INIT_FALSE
75 #define SCHED_FEAT(name, enabled) \
76 jump_label_key__##enabled ,
78 struct static_key sched_feat_keys
[__SCHED_FEAT_NR
] = {
84 static void sched_feat_disable(int i
)
86 static_key_disable_cpuslocked(&sched_feat_keys
[i
]);
89 static void sched_feat_enable(int i
)
91 static_key_enable_cpuslocked(&sched_feat_keys
[i
]);
94 static void sched_feat_disable(int i
) { };
95 static void sched_feat_enable(int i
) { };
96 #endif /* CONFIG_JUMP_LABEL */
98 static int sched_feat_set(char *cmp
)
103 if (strncmp(cmp
, "NO_", 3) == 0) {
108 i
= match_string(sched_feat_names
, __SCHED_FEAT_NR
, cmp
);
113 sysctl_sched_features
&= ~(1UL << i
);
114 sched_feat_disable(i
);
116 sysctl_sched_features
|= (1UL << i
);
117 sched_feat_enable(i
);
124 sched_feat_write(struct file
*filp
, const char __user
*ubuf
,
125 size_t cnt
, loff_t
*ppos
)
135 if (copy_from_user(&buf
, ubuf
, cnt
))
141 /* Ensure the static_key remains in a consistent state */
142 inode
= file_inode(filp
);
145 ret
= sched_feat_set(cmp
);
156 static int sched_feat_open(struct inode
*inode
, struct file
*filp
)
158 return single_open(filp
, sched_feat_show
, NULL
);
161 static const struct file_operations sched_feat_fops
= {
162 .open
= sched_feat_open
,
163 .write
= sched_feat_write
,
166 .release
= single_release
,
171 static ssize_t
sched_scaling_write(struct file
*filp
, const char __user
*ubuf
,
172 size_t cnt
, loff_t
*ppos
)
175 unsigned int scaling
;
180 if (copy_from_user(&buf
, ubuf
, cnt
))
184 if (kstrtouint(buf
, 10, &scaling
))
187 if (scaling
>= SCHED_TUNABLESCALING_END
)
190 sysctl_sched_tunable_scaling
= scaling
;
191 if (sched_update_scaling())
198 static int sched_scaling_show(struct seq_file
*m
, void *v
)
200 seq_printf(m
, "%d\n", sysctl_sched_tunable_scaling
);
204 static int sched_scaling_open(struct inode
*inode
, struct file
*filp
)
206 return single_open(filp
, sched_scaling_show
, NULL
);
209 static const struct file_operations sched_scaling_fops
= {
210 .open
= sched_scaling_open
,
211 .write
= sched_scaling_write
,
214 .release
= single_release
,
219 #ifdef CONFIG_PREEMPT_DYNAMIC
221 static ssize_t
sched_dynamic_write(struct file
*filp
, const char __user
*ubuf
,
222 size_t cnt
, loff_t
*ppos
)
230 if (copy_from_user(&buf
, ubuf
, cnt
))
234 mode
= sched_dynamic_mode(strstrip(buf
));
238 sched_dynamic_update(mode
);
245 static int sched_dynamic_show(struct seq_file
*m
, void *v
)
247 static const char * preempt_modes
[] = {
248 "none", "voluntary", "full", "lazy",
250 int j
= ARRAY_SIZE(preempt_modes
) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY
);
251 int i
= IS_ENABLED(CONFIG_PREEMPT_RT
) * 2;
254 if (preempt_dynamic_mode
== i
)
256 seq_puts(m
, preempt_modes
[i
]);
257 if (preempt_dynamic_mode
== i
)
267 static int sched_dynamic_open(struct inode
*inode
, struct file
*filp
)
269 return single_open(filp
, sched_dynamic_show
, NULL
);
272 static const struct file_operations sched_dynamic_fops
= {
273 .open
= sched_dynamic_open
,
274 .write
= sched_dynamic_write
,
277 .release
= single_release
,
280 #endif /* CONFIG_PREEMPT_DYNAMIC */
282 __read_mostly
bool sched_debug_verbose
;
285 static struct dentry
*sd_dentry
;
288 static ssize_t
sched_verbose_write(struct file
*filp
, const char __user
*ubuf
,
289 size_t cnt
, loff_t
*ppos
)
295 mutex_lock(&sched_domains_mutex
);
297 orig
= sched_debug_verbose
;
298 result
= debugfs_write_file_bool(filp
, ubuf
, cnt
, ppos
);
300 if (sched_debug_verbose
&& !orig
)
301 update_sched_domain_debugfs();
302 else if (!sched_debug_verbose
&& orig
) {
303 debugfs_remove(sd_dentry
);
307 mutex_unlock(&sched_domains_mutex
);
313 #define sched_verbose_write debugfs_write_file_bool
316 static const struct file_operations sched_verbose_fops
= {
317 .read
= debugfs_read_file_bool
,
318 .write
= sched_verbose_write
,
320 .llseek
= default_llseek
,
323 static const struct seq_operations sched_debug_sops
;
325 static int sched_debug_open(struct inode
*inode
, struct file
*filp
)
327 return seq_open(filp
, &sched_debug_sops
);
330 static const struct file_operations sched_debug_fops
= {
331 .open
= sched_debug_open
,
334 .release
= seq_release
,
342 static unsigned long fair_server_period_max
= (1UL << 22) * NSEC_PER_USEC
; /* ~4 seconds */
343 static unsigned long fair_server_period_min
= (100) * NSEC_PER_USEC
; /* 100 us */
345 static ssize_t
sched_fair_server_write(struct file
*filp
, const char __user
*ubuf
,
346 size_t cnt
, loff_t
*ppos
, enum dl_param param
)
348 long cpu
= (long) ((struct seq_file
*) filp
->private_data
)->private;
349 struct rq
*rq
= cpu_rq(cpu
);
355 err
= kstrtoull_from_user(ubuf
, cnt
, 10, &value
);
359 scoped_guard (rq_lock_irqsave
, rq
) {
360 runtime
= rq
->fair_server
.dl_runtime
;
361 period
= rq
->fair_server
.dl_period
;
365 if (runtime
== value
)
376 if (runtime
> period
||
377 period
> fair_server_period_max
||
378 period
< fair_server_period_min
) {
382 if (rq
->cfs
.h_nr_running
) {
384 dl_server_stop(&rq
->fair_server
);
387 retval
= dl_server_apply_params(&rq
->fair_server
, runtime
, period
, 0);
392 printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
395 if (rq
->cfs
.h_nr_running
)
396 dl_server_start(&rq
->fair_server
);
403 static size_t sched_fair_server_show(struct seq_file
*m
, void *v
, enum dl_param param
)
405 unsigned long cpu
= (unsigned long) m
->private;
406 struct rq
*rq
= cpu_rq(cpu
);
411 value
= rq
->fair_server
.dl_runtime
;
414 value
= rq
->fair_server
.dl_period
;
418 seq_printf(m
, "%llu\n", value
);
424 sched_fair_server_runtime_write(struct file
*filp
, const char __user
*ubuf
,
425 size_t cnt
, loff_t
*ppos
)
427 return sched_fair_server_write(filp
, ubuf
, cnt
, ppos
, DL_RUNTIME
);
430 static int sched_fair_server_runtime_show(struct seq_file
*m
, void *v
)
432 return sched_fair_server_show(m
, v
, DL_RUNTIME
);
435 static int sched_fair_server_runtime_open(struct inode
*inode
, struct file
*filp
)
437 return single_open(filp
, sched_fair_server_runtime_show
, inode
->i_private
);
440 static const struct file_operations fair_server_runtime_fops
= {
441 .open
= sched_fair_server_runtime_open
,
442 .write
= sched_fair_server_runtime_write
,
445 .release
= single_release
,
449 sched_fair_server_period_write(struct file
*filp
, const char __user
*ubuf
,
450 size_t cnt
, loff_t
*ppos
)
452 return sched_fair_server_write(filp
, ubuf
, cnt
, ppos
, DL_PERIOD
);
455 static int sched_fair_server_period_show(struct seq_file
*m
, void *v
)
457 return sched_fair_server_show(m
, v
, DL_PERIOD
);
460 static int sched_fair_server_period_open(struct inode
*inode
, struct file
*filp
)
462 return single_open(filp
, sched_fair_server_period_show
, inode
->i_private
);
465 static const struct file_operations fair_server_period_fops
= {
466 .open
= sched_fair_server_period_open
,
467 .write
= sched_fair_server_period_write
,
470 .release
= single_release
,
473 static struct dentry
*debugfs_sched
;
475 static void debugfs_fair_server_init(void)
477 struct dentry
*d_fair
;
480 d_fair
= debugfs_create_dir("fair_server", debugfs_sched
);
484 for_each_possible_cpu(cpu
) {
485 struct dentry
*d_cpu
;
488 snprintf(buf
, sizeof(buf
), "cpu%lu", cpu
);
489 d_cpu
= debugfs_create_dir(buf
, d_fair
);
491 debugfs_create_file("runtime", 0644, d_cpu
, (void *) cpu
, &fair_server_runtime_fops
);
492 debugfs_create_file("period", 0644, d_cpu
, (void *) cpu
, &fair_server_period_fops
);
496 static __init
int sched_init_debug(void)
498 struct dentry __maybe_unused
*numa
;
500 debugfs_sched
= debugfs_create_dir("sched", NULL
);
502 debugfs_create_file("features", 0644, debugfs_sched
, NULL
, &sched_feat_fops
);
503 debugfs_create_file_unsafe("verbose", 0644, debugfs_sched
, &sched_debug_verbose
, &sched_verbose_fops
);
504 #ifdef CONFIG_PREEMPT_DYNAMIC
505 debugfs_create_file("preempt", 0644, debugfs_sched
, NULL
, &sched_dynamic_fops
);
508 debugfs_create_u32("base_slice_ns", 0644, debugfs_sched
, &sysctl_sched_base_slice
);
510 debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched
, &sysctl_resched_latency_warn_ms
);
511 debugfs_create_u32("latency_warn_once", 0644, debugfs_sched
, &sysctl_resched_latency_warn_once
);
514 debugfs_create_file("tunable_scaling", 0644, debugfs_sched
, NULL
, &sched_scaling_fops
);
515 debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched
, &sysctl_sched_migration_cost
);
516 debugfs_create_u32("nr_migrate", 0644, debugfs_sched
, &sysctl_sched_nr_migrate
);
518 mutex_lock(&sched_domains_mutex
);
519 update_sched_domain_debugfs();
520 mutex_unlock(&sched_domains_mutex
);
523 #ifdef CONFIG_NUMA_BALANCING
524 numa
= debugfs_create_dir("numa_balancing", debugfs_sched
);
526 debugfs_create_u32("scan_delay_ms", 0644, numa
, &sysctl_numa_balancing_scan_delay
);
527 debugfs_create_u32("scan_period_min_ms", 0644, numa
, &sysctl_numa_balancing_scan_period_min
);
528 debugfs_create_u32("scan_period_max_ms", 0644, numa
, &sysctl_numa_balancing_scan_period_max
);
529 debugfs_create_u32("scan_size_mb", 0644, numa
, &sysctl_numa_balancing_scan_size
);
530 debugfs_create_u32("hot_threshold_ms", 0644, numa
, &sysctl_numa_balancing_hot_threshold
);
533 debugfs_create_file("debug", 0444, debugfs_sched
, NULL
, &sched_debug_fops
);
535 debugfs_fair_server_init();
539 late_initcall(sched_init_debug
);
543 static cpumask_var_t sd_sysctl_cpus
;
545 static int sd_flags_show(struct seq_file
*m
, void *v
)
547 unsigned long flags
= *(unsigned int *)m
->private;
550 for_each_set_bit(idx
, &flags
, __SD_FLAG_CNT
) {
551 seq_puts(m
, sd_flag_debug
[idx
].name
);
559 static int sd_flags_open(struct inode
*inode
, struct file
*file
)
561 return single_open(file
, sd_flags_show
, inode
->i_private
);
564 static const struct file_operations sd_flags_fops
= {
565 .open
= sd_flags_open
,
568 .release
= single_release
,
571 static void register_sd(struct sched_domain
*sd
, struct dentry
*parent
)
573 #define SDM(type, mode, member) \
574 debugfs_create_##type(#member, mode, parent, &sd->member)
576 SDM(ulong
, 0644, min_interval
);
577 SDM(ulong
, 0644, max_interval
);
578 SDM(u64
, 0644, max_newidle_lb_cost
);
579 SDM(u32
, 0644, busy_factor
);
580 SDM(u32
, 0644, imbalance_pct
);
581 SDM(u32
, 0644, cache_nice_tries
);
582 SDM(str
, 0444, name
);
586 debugfs_create_file("flags", 0444, parent
, &sd
->flags
, &sd_flags_fops
);
587 debugfs_create_file("groups_flags", 0444, parent
, &sd
->groups
->flags
, &sd_flags_fops
);
588 debugfs_create_u32("level", 0444, parent
, (u32
*)&sd
->level
);
591 void update_sched_domain_debugfs(void)
596 * This can unfortunately be invoked before sched_debug_init() creates
597 * the debug directory. Don't touch sd_sysctl_cpus until then.
602 if (!sched_debug_verbose
)
605 if (!cpumask_available(sd_sysctl_cpus
)) {
606 if (!alloc_cpumask_var(&sd_sysctl_cpus
, GFP_KERNEL
))
608 cpumask_copy(sd_sysctl_cpus
, cpu_possible_mask
);
612 sd_dentry
= debugfs_create_dir("domains", debugfs_sched
);
614 /* rebuild sd_sysctl_cpus if empty since it gets cleared below */
615 if (cpumask_empty(sd_sysctl_cpus
))
616 cpumask_copy(sd_sysctl_cpus
, cpu_online_mask
);
619 for_each_cpu(cpu
, sd_sysctl_cpus
) {
620 struct sched_domain
*sd
;
621 struct dentry
*d_cpu
;
624 snprintf(buf
, sizeof(buf
), "cpu%d", cpu
);
625 debugfs_lookup_and_remove(buf
, sd_dentry
);
626 d_cpu
= debugfs_create_dir(buf
, sd_dentry
);
629 for_each_domain(cpu
, sd
) {
632 snprintf(buf
, sizeof(buf
), "domain%d", i
);
633 d_sd
= debugfs_create_dir(buf
, d_cpu
);
635 register_sd(sd
, d_sd
);
639 __cpumask_clear_cpu(cpu
, sd_sysctl_cpus
);
643 void dirty_sched_domain_sysctl(int cpu
)
645 if (cpumask_available(sd_sysctl_cpus
))
646 __cpumask_set_cpu(cpu
, sd_sysctl_cpus
);
649 #endif /* CONFIG_SMP */
651 #ifdef CONFIG_FAIR_GROUP_SCHED
652 static void print_cfs_group_stats(struct seq_file
*m
, int cpu
, struct task_group
*tg
)
654 struct sched_entity
*se
= tg
->se
[cpu
];
656 #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
657 #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
658 #F, (long long)schedstat_val(stats->F))
659 #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
660 #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
661 #F, SPLIT_NS((long long)schedstat_val(stats->F)))
668 PN(se
->sum_exec_runtime
);
670 if (schedstat_enabled()) {
671 struct sched_statistics
*stats
;
672 stats
= __schedstats_from_se(se
);
674 PN_SCHEDSTAT(wait_start
);
675 PN_SCHEDSTAT(sleep_start
);
676 PN_SCHEDSTAT(block_start
);
677 PN_SCHEDSTAT(sleep_max
);
678 PN_SCHEDSTAT(block_max
);
679 PN_SCHEDSTAT(exec_max
);
680 PN_SCHEDSTAT(slice_max
);
681 PN_SCHEDSTAT(wait_max
);
682 PN_SCHEDSTAT(wait_sum
);
683 P_SCHEDSTAT(wait_count
);
690 P(se
->avg
.runnable_avg
);
700 #ifdef CONFIG_CGROUP_SCHED
701 static DEFINE_SPINLOCK(sched_debug_lock
);
702 static char group_path
[PATH_MAX
];
704 static void task_group_path(struct task_group
*tg
, char *path
, int plen
)
706 if (autogroup_path(tg
, path
, plen
))
709 cgroup_path(tg
->css
.cgroup
, path
, plen
);
713 * Only 1 SEQ_printf_task_group_path() caller can use the full length
714 * group_path[] for cgroup path. Other simultaneous callers will have
715 * to use a shorter stack buffer. A "..." suffix is appended at the end
716 * of the stack buffer so that it will show up in case the output length
717 * matches the given buffer size to indicate possible path name truncation.
719 #define SEQ_printf_task_group_path(m, tg, fmt...) \
721 if (spin_trylock(&sched_debug_lock)) { \
722 task_group_path(tg, group_path, sizeof(group_path)); \
723 SEQ_printf(m, fmt, group_path); \
724 spin_unlock(&sched_debug_lock); \
727 char *bufend = buf + sizeof(buf) - 3; \
728 task_group_path(tg, buf, bufend - buf); \
729 strcpy(bufend - 1, "..."); \
730 SEQ_printf(m, fmt, buf); \
736 print_task(struct seq_file
*m
, struct rq
*rq
, struct task_struct
*p
)
738 if (task_current(rq
, p
))
741 SEQ_printf(m
, " %c", task_state_to_char(p
));
743 SEQ_printf(m
, " %15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
744 p
->comm
, task_pid_nr(p
),
745 SPLIT_NS(p
->se
.vruntime
),
746 entity_eligible(cfs_rq_of(&p
->se
), &p
->se
) ? 'E' : 'N',
747 SPLIT_NS(p
->se
.deadline
),
748 p
->se
.custom_slice
? 'S' : ' ',
749 SPLIT_NS(p
->se
.slice
),
750 SPLIT_NS(p
->se
.sum_exec_runtime
),
751 (long long)(p
->nvcsw
+ p
->nivcsw
),
754 SEQ_printf(m
, "%9lld.%06ld %9lld.%06ld %9lld.%06ld",
755 SPLIT_NS(schedstat_val_or_zero(p
->stats
.wait_sum
)),
756 SPLIT_NS(schedstat_val_or_zero(p
->stats
.sum_sleep_runtime
)),
757 SPLIT_NS(schedstat_val_or_zero(p
->stats
.sum_block_runtime
)));
759 #ifdef CONFIG_NUMA_BALANCING
760 SEQ_printf(m
, " %d %d", task_node(p
), task_numa_group_id(p
));
762 #ifdef CONFIG_CGROUP_SCHED
763 SEQ_printf_task_group_path(m
, task_group(p
), " %s")
769 static void print_rq(struct seq_file
*m
, struct rq
*rq
, int rq_cpu
)
771 struct task_struct
*g
, *p
;
774 SEQ_printf(m
, "runnable tasks:\n");
775 SEQ_printf(m
, " S task PID vruntime eligible "
776 "deadline slice sum-exec switches "
777 "prio wait-time sum-sleep sum-block"
778 #ifdef CONFIG_NUMA_BALANCING
781 #ifdef CONFIG_CGROUP_SCHED
785 SEQ_printf(m
, "-------------------------------------------------------"
786 "------------------------------------------------------"
787 "------------------------------------------------------"
788 #ifdef CONFIG_NUMA_BALANCING
791 #ifdef CONFIG_CGROUP_SCHED
797 for_each_process_thread(g
, p
) {
798 if (task_cpu(p
) != rq_cpu
)
801 print_task(m
, rq
, p
);
806 void print_cfs_rq(struct seq_file
*m
, int cpu
, struct cfs_rq
*cfs_rq
)
808 s64 left_vruntime
= -1, min_vruntime
, right_vruntime
= -1, left_deadline
= -1, spread
;
809 struct sched_entity
*last
, *first
, *root
;
810 struct rq
*rq
= cpu_rq(cpu
);
813 #ifdef CONFIG_FAIR_GROUP_SCHED
815 SEQ_printf_task_group_path(m
, cfs_rq
->tg
, "cfs_rq[%d]:%s\n", cpu
);
818 SEQ_printf(m
, "cfs_rq[%d]:\n", cpu
);
821 raw_spin_rq_lock_irqsave(rq
, flags
);
822 root
= __pick_root_entity(cfs_rq
);
824 left_vruntime
= root
->min_vruntime
;
825 first
= __pick_first_entity(cfs_rq
);
827 left_deadline
= first
->deadline
;
828 last
= __pick_last_entity(cfs_rq
);
830 right_vruntime
= last
->vruntime
;
831 min_vruntime
= cfs_rq
->min_vruntime
;
832 raw_spin_rq_unlock_irqrestore(rq
, flags
);
834 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "left_deadline",
835 SPLIT_NS(left_deadline
));
836 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "left_vruntime",
837 SPLIT_NS(left_vruntime
));
838 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "min_vruntime",
839 SPLIT_NS(min_vruntime
));
840 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
841 SPLIT_NS(avg_vruntime(cfs_rq
)));
842 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "right_vruntime",
843 SPLIT_NS(right_vruntime
));
844 spread
= right_vruntime
- left_vruntime
;
845 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread
));
846 SEQ_printf(m
, " .%-30s: %d\n", "nr_running", cfs_rq
->nr_running
);
847 SEQ_printf(m
, " .%-30s: %d\n", "h_nr_running", cfs_rq
->h_nr_running
);
848 SEQ_printf(m
, " .%-30s: %d\n", "idle_nr_running",
849 cfs_rq
->idle_nr_running
);
850 SEQ_printf(m
, " .%-30s: %d\n", "idle_h_nr_running",
851 cfs_rq
->idle_h_nr_running
);
852 SEQ_printf(m
, " .%-30s: %ld\n", "load", cfs_rq
->load
.weight
);
854 SEQ_printf(m
, " .%-30s: %lu\n", "load_avg",
855 cfs_rq
->avg
.load_avg
);
856 SEQ_printf(m
, " .%-30s: %lu\n", "runnable_avg",
857 cfs_rq
->avg
.runnable_avg
);
858 SEQ_printf(m
, " .%-30s: %lu\n", "util_avg",
859 cfs_rq
->avg
.util_avg
);
860 SEQ_printf(m
, " .%-30s: %u\n", "util_est",
861 cfs_rq
->avg
.util_est
);
862 SEQ_printf(m
, " .%-30s: %ld\n", "removed.load_avg",
863 cfs_rq
->removed
.load_avg
);
864 SEQ_printf(m
, " .%-30s: %ld\n", "removed.util_avg",
865 cfs_rq
->removed
.util_avg
);
866 SEQ_printf(m
, " .%-30s: %ld\n", "removed.runnable_avg",
867 cfs_rq
->removed
.runnable_avg
);
868 #ifdef CONFIG_FAIR_GROUP_SCHED
869 SEQ_printf(m
, " .%-30s: %lu\n", "tg_load_avg_contrib",
870 cfs_rq
->tg_load_avg_contrib
);
871 SEQ_printf(m
, " .%-30s: %ld\n", "tg_load_avg",
872 atomic_long_read(&cfs_rq
->tg
->load_avg
));
875 #ifdef CONFIG_CFS_BANDWIDTH
876 SEQ_printf(m
, " .%-30s: %d\n", "throttled",
878 SEQ_printf(m
, " .%-30s: %d\n", "throttle_count",
879 cfs_rq
->throttle_count
);
882 #ifdef CONFIG_FAIR_GROUP_SCHED
883 print_cfs_group_stats(m
, cpu
, cfs_rq
->tg
);
887 void print_rt_rq(struct seq_file
*m
, int cpu
, struct rt_rq
*rt_rq
)
889 #ifdef CONFIG_RT_GROUP_SCHED
891 SEQ_printf_task_group_path(m
, rt_rq
->tg
, "rt_rq[%d]:%s\n", cpu
);
894 SEQ_printf(m
, "rt_rq[%d]:\n", cpu
);
898 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
900 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
902 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
906 #ifdef CONFIG_RT_GROUP_SCHED
917 void print_dl_rq(struct seq_file
*m
, int cpu
, struct dl_rq
*dl_rq
)
922 SEQ_printf(m
, "dl_rq[%d]:\n", cpu
);
925 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
929 dl_bw
= &cpu_rq(cpu
)->rd
->dl_bw
;
931 dl_bw
= &dl_rq
->dl_bw
;
933 SEQ_printf(m
, " .%-30s: %lld\n", "dl_bw->bw", dl_bw
->bw
);
934 SEQ_printf(m
, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw
->total_bw
);
939 static void print_cpu(struct seq_file
*m
, int cpu
)
941 struct rq
*rq
= cpu_rq(cpu
);
945 unsigned int freq
= cpu_khz
? : 1;
947 SEQ_printf(m
, "cpu#%d, %u.%03u MHz\n",
948 cpu
, freq
/ 1000, (freq
% 1000));
951 SEQ_printf(m
, "cpu#%d\n", cpu
);
956 if (sizeof(rq->x) == 4) \
957 SEQ_printf(m, " .%-30s: %d\n", #x, (int)(rq->x)); \
959 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\
963 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
967 P(nr_uninterruptible
);
969 SEQ_printf(m
, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq
->curr
)));
976 #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
978 P64(max_idle_balance_cost
);
982 #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n));
983 if (schedstat_enabled()) {
992 print_cfs_stats(m
, cpu
);
993 print_rt_stats(m
, cpu
);
994 print_dl_stats(m
, cpu
);
996 print_rq(m
, rq
, cpu
);
1000 static const char *sched_tunable_scaling_names
[] = {
1006 static void sched_debug_header(struct seq_file
*m
)
1008 u64 ktime
, sched_clk
, cpu_clk
;
1009 unsigned long flags
;
1011 local_irq_save(flags
);
1012 ktime
= ktime_to_ns(ktime_get());
1013 sched_clk
= sched_clock();
1014 cpu_clk
= local_clock();
1015 local_irq_restore(flags
);
1017 SEQ_printf(m
, "Sched Debug Version: v0.11, %s %.*s\n",
1018 init_utsname()->release
,
1019 (int)strcspn(init_utsname()->version
, " "),
1020 init_utsname()->version
);
1023 SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
1025 SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
1030 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1031 P(sched_clock_stable());
1036 SEQ_printf(m
, "\n");
1037 SEQ_printf(m
, "sysctl_sched\n");
1040 SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
1042 SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
1043 PN(sysctl_sched_base_slice
);
1044 P(sysctl_sched_features
);
1048 SEQ_printf(m
, " .%-40s: %d (%s)\n",
1049 "sysctl_sched_tunable_scaling",
1050 sysctl_sched_tunable_scaling
,
1051 sched_tunable_scaling_names
[sysctl_sched_tunable_scaling
]);
1052 SEQ_printf(m
, "\n");
1055 static int sched_debug_show(struct seq_file
*m
, void *v
)
1057 int cpu
= (unsigned long)(v
- 2);
1062 sched_debug_header(m
);
1067 void sysrq_sched_debug_show(void)
1071 sched_debug_header(NULL
);
1072 for_each_online_cpu(cpu
) {
1074 * Need to reset softlockup watchdogs on all CPUs, because
1075 * another CPU might be blocked waiting for us to process
1076 * an IPI or stop_machine.
1078 touch_nmi_watchdog();
1079 touch_all_softlockup_watchdogs();
1080 print_cpu(NULL
, cpu
);
1085 * This iterator needs some explanation.
1086 * It returns 1 for the header position.
1087 * This means 2 is CPU 0.
1088 * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
1089 * to use cpumask_* to iterate over the CPUs.
1091 static void *sched_debug_start(struct seq_file
*file
, loff_t
*offset
)
1093 unsigned long n
= *offset
;
1101 n
= cpumask_next(n
- 1, cpu_online_mask
);
1103 n
= cpumask_first(cpu_online_mask
);
1108 return (void *)(unsigned long)(n
+ 2);
1113 static void *sched_debug_next(struct seq_file
*file
, void *data
, loff_t
*offset
)
1116 return sched_debug_start(file
, offset
);
1119 static void sched_debug_stop(struct seq_file
*file
, void *data
)
1123 static const struct seq_operations sched_debug_sops
= {
1124 .start
= sched_debug_start
,
1125 .next
= sched_debug_next
,
1126 .stop
= sched_debug_stop
,
1127 .show
= sched_debug_show
,
1130 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
1131 #define __P(F) __PS(#F, F)
1132 #define P(F) __PS(#F, p->F)
1133 #define PM(F, M) __PS(#F, p->F & (M))
1134 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
1135 #define __PN(F) __PSN(#F, F)
1136 #define PN(F) __PSN(#F, p->F)
1139 #ifdef CONFIG_NUMA_BALANCING
1140 void print_numa_stats(struct seq_file
*m
, int node
, unsigned long tsf
,
1141 unsigned long tpf
, unsigned long gsf
, unsigned long gpf
)
1143 SEQ_printf(m
, "numa_faults node=%d ", node
);
1144 SEQ_printf(m
, "task_private=%lu task_shared=%lu ", tpf
, tsf
);
1145 SEQ_printf(m
, "group_private=%lu group_shared=%lu\n", gpf
, gsf
);
1150 static void sched_show_numa(struct task_struct
*p
, struct seq_file
*m
)
1152 #ifdef CONFIG_NUMA_BALANCING
1154 P(mm
->numa_scan_seq
);
1156 P(numa_pages_migrated
);
1157 P(numa_preferred_nid
);
1158 P(total_numa_faults
);
1159 SEQ_printf(m
, "current_node=%d, numa_group_id=%d\n",
1160 task_node(p
), task_numa_group_id(p
));
1161 show_numa_stats(p
, m
);
1165 void proc_sched_show_task(struct task_struct
*p
, struct pid_namespace
*ns
,
1168 unsigned long nr_switches
;
1170 SEQ_printf(m
, "%s (%d, #threads: %d)\n", p
->comm
, task_pid_nr_ns(p
, ns
),
1173 "---------------------------------------------------------"
1176 #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F))
1177 #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
1181 PN(se
.sum_exec_runtime
);
1183 nr_switches
= p
->nvcsw
+ p
->nivcsw
;
1185 P(se
.nr_migrations
);
1187 if (schedstat_enabled()) {
1188 u64 avg_atom
, avg_per_cpu
;
1190 PN_SCHEDSTAT(sum_sleep_runtime
);
1191 PN_SCHEDSTAT(sum_block_runtime
);
1192 PN_SCHEDSTAT(wait_start
);
1193 PN_SCHEDSTAT(sleep_start
);
1194 PN_SCHEDSTAT(block_start
);
1195 PN_SCHEDSTAT(sleep_max
);
1196 PN_SCHEDSTAT(block_max
);
1197 PN_SCHEDSTAT(exec_max
);
1198 PN_SCHEDSTAT(slice_max
);
1199 PN_SCHEDSTAT(wait_max
);
1200 PN_SCHEDSTAT(wait_sum
);
1201 P_SCHEDSTAT(wait_count
);
1202 PN_SCHEDSTAT(iowait_sum
);
1203 P_SCHEDSTAT(iowait_count
);
1204 P_SCHEDSTAT(nr_migrations_cold
);
1205 P_SCHEDSTAT(nr_failed_migrations_affine
);
1206 P_SCHEDSTAT(nr_failed_migrations_running
);
1207 P_SCHEDSTAT(nr_failed_migrations_hot
);
1208 P_SCHEDSTAT(nr_forced_migrations
);
1209 P_SCHEDSTAT(nr_wakeups
);
1210 P_SCHEDSTAT(nr_wakeups_sync
);
1211 P_SCHEDSTAT(nr_wakeups_migrate
);
1212 P_SCHEDSTAT(nr_wakeups_local
);
1213 P_SCHEDSTAT(nr_wakeups_remote
);
1214 P_SCHEDSTAT(nr_wakeups_affine
);
1215 P_SCHEDSTAT(nr_wakeups_affine_attempts
);
1216 P_SCHEDSTAT(nr_wakeups_passive
);
1217 P_SCHEDSTAT(nr_wakeups_idle
);
1219 avg_atom
= p
->se
.sum_exec_runtime
;
1221 avg_atom
= div64_ul(avg_atom
, nr_switches
);
1225 avg_per_cpu
= p
->se
.sum_exec_runtime
;
1226 if (p
->se
.nr_migrations
) {
1227 avg_per_cpu
= div64_u64(avg_per_cpu
,
1228 p
->se
.nr_migrations
);
1236 #ifdef CONFIG_SCHED_CORE
1237 PN_SCHEDSTAT(core_forceidle_sum
);
1242 __PS("nr_voluntary_switches", p
->nvcsw
);
1243 __PS("nr_involuntary_switches", p
->nivcsw
);
1248 P(se
.avg
.runnable_sum
);
1251 P(se
.avg
.runnable_avg
);
1253 P(se
.avg
.last_update_time
);
1254 PM(se
.avg
.util_est
, ~UTIL_AVG_UNCHANGED
);
1256 #ifdef CONFIG_UCLAMP_TASK
1257 __PS("uclamp.min", p
->uclamp_req
[UCLAMP_MIN
].value
);
1258 __PS("uclamp.max", p
->uclamp_req
[UCLAMP_MAX
].value
);
1259 __PS("effective uclamp.min", uclamp_eff_value(p
, UCLAMP_MIN
));
1260 __PS("effective uclamp.max", uclamp_eff_value(p
, UCLAMP_MAX
));
1264 if (task_has_dl_policy(p
)) {
1268 #ifdef CONFIG_SCHED_CLASS_EXT
1269 __PS("ext.enabled", task_on_scx(p
));
1275 unsigned int this_cpu
= raw_smp_processor_id();
1278 t0
= cpu_clock(this_cpu
);
1279 t1
= cpu_clock(this_cpu
);
1280 __PS("clock-delta", t1
-t0
);
1283 sched_show_numa(p
, m
);
1286 void proc_sched_set_task(struct task_struct
*p
)
1288 #ifdef CONFIG_SCHEDSTATS
1289 memset(&p
->stats
, 0, sizeof(p
->stats
));
1293 void resched_latency_warn(int cpu
, u64 latency
)
1295 static DEFINE_RATELIMIT_STATE(latency_check_ratelimit
, 60 * 60 * HZ
, 1);
1297 WARN(__ratelimit(&latency_check_ratelimit
),
1298 "sched: CPU %d need_resched set for > %llu ns (%d ticks) "
1299 "without schedule\n",
1300 cpu
, latency
, cpu_rq(cpu
)->ticks_without_resched
);