1 // SPDX-License-Identifier: GPL-2.0-only
5 * Print the CFS rbtree and other debugging details
7 * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
11 * This allows printing both to /sys/kernel/debug/sched/debug and
14 #define SEQ_printf(m, x...) \
23 * Ease the printing of nsec fields:
25 static long long nsec_high(unsigned long long nsec
)
27 if ((long long)nsec
< 0) {
29 do_div(nsec
, 1000000);
32 do_div(nsec
, 1000000);
37 static unsigned long nsec_low(unsigned long long nsec
)
39 if ((long long)nsec
< 0)
42 return do_div(nsec
, 1000000);
45 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
47 #define SCHED_FEAT(name, enabled) \
50 static const char * const sched_feat_names
[] = {
56 static int sched_feat_show(struct seq_file
*m
, void *v
)
60 for (i
= 0; i
< __SCHED_FEAT_NR
; i
++) {
61 if (!(sysctl_sched_features
& (1UL << i
)))
63 seq_printf(m
, "%s ", sched_feat_names
[i
]);
70 #ifdef CONFIG_JUMP_LABEL
72 #define jump_label_key__true STATIC_KEY_INIT_TRUE
73 #define jump_label_key__false STATIC_KEY_INIT_FALSE
75 #define SCHED_FEAT(name, enabled) \
76 jump_label_key__##enabled ,
78 struct static_key sched_feat_keys
[__SCHED_FEAT_NR
] = {
84 static void sched_feat_disable(int i
)
86 static_key_disable_cpuslocked(&sched_feat_keys
[i
]);
89 static void sched_feat_enable(int i
)
91 static_key_enable_cpuslocked(&sched_feat_keys
[i
]);
94 static void sched_feat_disable(int i
) { };
95 static void sched_feat_enable(int i
) { };
96 #endif /* CONFIG_JUMP_LABEL */
98 static int sched_feat_set(char *cmp
)
103 if (strncmp(cmp
, "NO_", 3) == 0) {
108 i
= match_string(sched_feat_names
, __SCHED_FEAT_NR
, cmp
);
113 sysctl_sched_features
&= ~(1UL << i
);
114 sched_feat_disable(i
);
116 sysctl_sched_features
|= (1UL << i
);
117 sched_feat_enable(i
);
124 sched_feat_write(struct file
*filp
, const char __user
*ubuf
,
125 size_t cnt
, loff_t
*ppos
)
135 if (copy_from_user(&buf
, ubuf
, cnt
))
141 /* Ensure the static_key remains in a consistent state */
142 inode
= file_inode(filp
);
145 ret
= sched_feat_set(cmp
);
156 static int sched_feat_open(struct inode
*inode
, struct file
*filp
)
158 return single_open(filp
, sched_feat_show
, NULL
);
161 static const struct file_operations sched_feat_fops
= {
162 .open
= sched_feat_open
,
163 .write
= sched_feat_write
,
166 .release
= single_release
,
171 static ssize_t
sched_scaling_write(struct file
*filp
, const char __user
*ubuf
,
172 size_t cnt
, loff_t
*ppos
)
175 unsigned int scaling
;
180 if (copy_from_user(&buf
, ubuf
, cnt
))
184 if (kstrtouint(buf
, 10, &scaling
))
187 if (scaling
>= SCHED_TUNABLESCALING_END
)
190 sysctl_sched_tunable_scaling
= scaling
;
191 if (sched_update_scaling())
198 static int sched_scaling_show(struct seq_file
*m
, void *v
)
200 seq_printf(m
, "%d\n", sysctl_sched_tunable_scaling
);
204 static int sched_scaling_open(struct inode
*inode
, struct file
*filp
)
206 return single_open(filp
, sched_scaling_show
, NULL
);
209 static const struct file_operations sched_scaling_fops
= {
210 .open
= sched_scaling_open
,
211 .write
= sched_scaling_write
,
214 .release
= single_release
,
219 #ifdef CONFIG_PREEMPT_DYNAMIC
221 static ssize_t
sched_dynamic_write(struct file
*filp
, const char __user
*ubuf
,
222 size_t cnt
, loff_t
*ppos
)
230 if (copy_from_user(&buf
, ubuf
, cnt
))
234 mode
= sched_dynamic_mode(strstrip(buf
));
238 sched_dynamic_update(mode
);
245 static int sched_dynamic_show(struct seq_file
*m
, void *v
)
247 static const char * preempt_modes
[] = {
248 "none", "voluntary", "full"
252 for (i
= 0; i
< ARRAY_SIZE(preempt_modes
); i
++) {
253 if (preempt_dynamic_mode
== i
)
255 seq_puts(m
, preempt_modes
[i
]);
256 if (preempt_dynamic_mode
== i
)
266 static int sched_dynamic_open(struct inode
*inode
, struct file
*filp
)
268 return single_open(filp
, sched_dynamic_show
, NULL
);
271 static const struct file_operations sched_dynamic_fops
= {
272 .open
= sched_dynamic_open
,
273 .write
= sched_dynamic_write
,
276 .release
= single_release
,
279 #endif /* CONFIG_PREEMPT_DYNAMIC */
281 __read_mostly
bool sched_debug_verbose
;
284 static struct dentry
*sd_dentry
;
287 static ssize_t
sched_verbose_write(struct file
*filp
, const char __user
*ubuf
,
288 size_t cnt
, loff_t
*ppos
)
294 mutex_lock(&sched_domains_mutex
);
296 orig
= sched_debug_verbose
;
297 result
= debugfs_write_file_bool(filp
, ubuf
, cnt
, ppos
);
299 if (sched_debug_verbose
&& !orig
)
300 update_sched_domain_debugfs();
301 else if (!sched_debug_verbose
&& orig
) {
302 debugfs_remove(sd_dentry
);
306 mutex_unlock(&sched_domains_mutex
);
312 #define sched_verbose_write debugfs_write_file_bool
315 static const struct file_operations sched_verbose_fops
= {
316 .read
= debugfs_read_file_bool
,
317 .write
= sched_verbose_write
,
319 .llseek
= default_llseek
,
322 static const struct seq_operations sched_debug_sops
;
324 static int sched_debug_open(struct inode
*inode
, struct file
*filp
)
326 return seq_open(filp
, &sched_debug_sops
);
329 static const struct file_operations sched_debug_fops
= {
330 .open
= sched_debug_open
,
333 .release
= seq_release
,
341 static unsigned long fair_server_period_max
= (1UL << 22) * NSEC_PER_USEC
; /* ~4 seconds */
342 static unsigned long fair_server_period_min
= (100) * NSEC_PER_USEC
; /* 100 us */
344 static ssize_t
sched_fair_server_write(struct file
*filp
, const char __user
*ubuf
,
345 size_t cnt
, loff_t
*ppos
, enum dl_param param
)
347 long cpu
= (long) ((struct seq_file
*) filp
->private_data
)->private;
348 struct rq
*rq
= cpu_rq(cpu
);
354 err
= kstrtoull_from_user(ubuf
, cnt
, 10, &value
);
358 scoped_guard (rq_lock_irqsave
, rq
) {
359 runtime
= rq
->fair_server
.dl_runtime
;
360 period
= rq
->fair_server
.dl_period
;
364 if (runtime
== value
)
375 if (runtime
> period
||
376 period
> fair_server_period_max
||
377 period
< fair_server_period_min
) {
381 if (rq
->cfs
.h_nr_running
) {
383 dl_server_stop(&rq
->fair_server
);
386 retval
= dl_server_apply_params(&rq
->fair_server
, runtime
, period
, 0);
391 printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
394 if (rq
->cfs
.h_nr_running
)
395 dl_server_start(&rq
->fair_server
);
402 static size_t sched_fair_server_show(struct seq_file
*m
, void *v
, enum dl_param param
)
404 unsigned long cpu
= (unsigned long) m
->private;
405 struct rq
*rq
= cpu_rq(cpu
);
410 value
= rq
->fair_server
.dl_runtime
;
413 value
= rq
->fair_server
.dl_period
;
417 seq_printf(m
, "%llu\n", value
);
423 sched_fair_server_runtime_write(struct file
*filp
, const char __user
*ubuf
,
424 size_t cnt
, loff_t
*ppos
)
426 return sched_fair_server_write(filp
, ubuf
, cnt
, ppos
, DL_RUNTIME
);
429 static int sched_fair_server_runtime_show(struct seq_file
*m
, void *v
)
431 return sched_fair_server_show(m
, v
, DL_RUNTIME
);
434 static int sched_fair_server_runtime_open(struct inode
*inode
, struct file
*filp
)
436 return single_open(filp
, sched_fair_server_runtime_show
, inode
->i_private
);
439 static const struct file_operations fair_server_runtime_fops
= {
440 .open
= sched_fair_server_runtime_open
,
441 .write
= sched_fair_server_runtime_write
,
444 .release
= single_release
,
448 sched_fair_server_period_write(struct file
*filp
, const char __user
*ubuf
,
449 size_t cnt
, loff_t
*ppos
)
451 return sched_fair_server_write(filp
, ubuf
, cnt
, ppos
, DL_PERIOD
);
454 static int sched_fair_server_period_show(struct seq_file
*m
, void *v
)
456 return sched_fair_server_show(m
, v
, DL_PERIOD
);
459 static int sched_fair_server_period_open(struct inode
*inode
, struct file
*filp
)
461 return single_open(filp
, sched_fair_server_period_show
, inode
->i_private
);
464 static const struct file_operations fair_server_period_fops
= {
465 .open
= sched_fair_server_period_open
,
466 .write
= sched_fair_server_period_write
,
469 .release
= single_release
,
472 static struct dentry
*debugfs_sched
;
474 static void debugfs_fair_server_init(void)
476 struct dentry
*d_fair
;
479 d_fair
= debugfs_create_dir("fair_server", debugfs_sched
);
483 for_each_possible_cpu(cpu
) {
484 struct dentry
*d_cpu
;
487 snprintf(buf
, sizeof(buf
), "cpu%lu", cpu
);
488 d_cpu
= debugfs_create_dir(buf
, d_fair
);
490 debugfs_create_file("runtime", 0644, d_cpu
, (void *) cpu
, &fair_server_runtime_fops
);
491 debugfs_create_file("period", 0644, d_cpu
, (void *) cpu
, &fair_server_period_fops
);
495 static __init
int sched_init_debug(void)
497 struct dentry __maybe_unused
*numa
;
499 debugfs_sched
= debugfs_create_dir("sched", NULL
);
501 debugfs_create_file("features", 0644, debugfs_sched
, NULL
, &sched_feat_fops
);
502 debugfs_create_file_unsafe("verbose", 0644, debugfs_sched
, &sched_debug_verbose
, &sched_verbose_fops
);
503 #ifdef CONFIG_PREEMPT_DYNAMIC
504 debugfs_create_file("preempt", 0644, debugfs_sched
, NULL
, &sched_dynamic_fops
);
507 debugfs_create_u32("base_slice_ns", 0644, debugfs_sched
, &sysctl_sched_base_slice
);
509 debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched
, &sysctl_resched_latency_warn_ms
);
510 debugfs_create_u32("latency_warn_once", 0644, debugfs_sched
, &sysctl_resched_latency_warn_once
);
513 debugfs_create_file("tunable_scaling", 0644, debugfs_sched
, NULL
, &sched_scaling_fops
);
514 debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched
, &sysctl_sched_migration_cost
);
515 debugfs_create_u32("nr_migrate", 0644, debugfs_sched
, &sysctl_sched_nr_migrate
);
517 mutex_lock(&sched_domains_mutex
);
518 update_sched_domain_debugfs();
519 mutex_unlock(&sched_domains_mutex
);
522 #ifdef CONFIG_NUMA_BALANCING
523 numa
= debugfs_create_dir("numa_balancing", debugfs_sched
);
525 debugfs_create_u32("scan_delay_ms", 0644, numa
, &sysctl_numa_balancing_scan_delay
);
526 debugfs_create_u32("scan_period_min_ms", 0644, numa
, &sysctl_numa_balancing_scan_period_min
);
527 debugfs_create_u32("scan_period_max_ms", 0644, numa
, &sysctl_numa_balancing_scan_period_max
);
528 debugfs_create_u32("scan_size_mb", 0644, numa
, &sysctl_numa_balancing_scan_size
);
529 debugfs_create_u32("hot_threshold_ms", 0644, numa
, &sysctl_numa_balancing_hot_threshold
);
532 debugfs_create_file("debug", 0444, debugfs_sched
, NULL
, &sched_debug_fops
);
534 debugfs_fair_server_init();
538 late_initcall(sched_init_debug
);
542 static cpumask_var_t sd_sysctl_cpus
;
544 static int sd_flags_show(struct seq_file
*m
, void *v
)
546 unsigned long flags
= *(unsigned int *)m
->private;
549 for_each_set_bit(idx
, &flags
, __SD_FLAG_CNT
) {
550 seq_puts(m
, sd_flag_debug
[idx
].name
);
558 static int sd_flags_open(struct inode
*inode
, struct file
*file
)
560 return single_open(file
, sd_flags_show
, inode
->i_private
);
563 static const struct file_operations sd_flags_fops
= {
564 .open
= sd_flags_open
,
567 .release
= single_release
,
570 static void register_sd(struct sched_domain
*sd
, struct dentry
*parent
)
572 #define SDM(type, mode, member) \
573 debugfs_create_##type(#member, mode, parent, &sd->member)
575 SDM(ulong
, 0644, min_interval
);
576 SDM(ulong
, 0644, max_interval
);
577 SDM(u64
, 0644, max_newidle_lb_cost
);
578 SDM(u32
, 0644, busy_factor
);
579 SDM(u32
, 0644, imbalance_pct
);
580 SDM(u32
, 0644, cache_nice_tries
);
581 SDM(str
, 0444, name
);
585 debugfs_create_file("flags", 0444, parent
, &sd
->flags
, &sd_flags_fops
);
586 debugfs_create_file("groups_flags", 0444, parent
, &sd
->groups
->flags
, &sd_flags_fops
);
587 debugfs_create_u32("level", 0444, parent
, (u32
*)&sd
->level
);
590 void update_sched_domain_debugfs(void)
595 * This can unfortunately be invoked before sched_debug_init() creates
596 * the debug directory. Don't touch sd_sysctl_cpus until then.
601 if (!sched_debug_verbose
)
604 if (!cpumask_available(sd_sysctl_cpus
)) {
605 if (!alloc_cpumask_var(&sd_sysctl_cpus
, GFP_KERNEL
))
607 cpumask_copy(sd_sysctl_cpus
, cpu_possible_mask
);
611 sd_dentry
= debugfs_create_dir("domains", debugfs_sched
);
613 /* rebuild sd_sysctl_cpus if empty since it gets cleared below */
614 if (cpumask_empty(sd_sysctl_cpus
))
615 cpumask_copy(sd_sysctl_cpus
, cpu_online_mask
);
618 for_each_cpu(cpu
, sd_sysctl_cpus
) {
619 struct sched_domain
*sd
;
620 struct dentry
*d_cpu
;
623 snprintf(buf
, sizeof(buf
), "cpu%d", cpu
);
624 debugfs_lookup_and_remove(buf
, sd_dentry
);
625 d_cpu
= debugfs_create_dir(buf
, sd_dentry
);
628 for_each_domain(cpu
, sd
) {
631 snprintf(buf
, sizeof(buf
), "domain%d", i
);
632 d_sd
= debugfs_create_dir(buf
, d_cpu
);
634 register_sd(sd
, d_sd
);
638 __cpumask_clear_cpu(cpu
, sd_sysctl_cpus
);
642 void dirty_sched_domain_sysctl(int cpu
)
644 if (cpumask_available(sd_sysctl_cpus
))
645 __cpumask_set_cpu(cpu
, sd_sysctl_cpus
);
648 #endif /* CONFIG_SMP */
650 #ifdef CONFIG_FAIR_GROUP_SCHED
651 static void print_cfs_group_stats(struct seq_file
*m
, int cpu
, struct task_group
*tg
)
653 struct sched_entity
*se
= tg
->se
[cpu
];
655 #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
656 #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
657 #F, (long long)schedstat_val(stats->F))
658 #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
659 #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
660 #F, SPLIT_NS((long long)schedstat_val(stats->F)))
667 PN(se
->sum_exec_runtime
);
669 if (schedstat_enabled()) {
670 struct sched_statistics
*stats
;
671 stats
= __schedstats_from_se(se
);
673 PN_SCHEDSTAT(wait_start
);
674 PN_SCHEDSTAT(sleep_start
);
675 PN_SCHEDSTAT(block_start
);
676 PN_SCHEDSTAT(sleep_max
);
677 PN_SCHEDSTAT(block_max
);
678 PN_SCHEDSTAT(exec_max
);
679 PN_SCHEDSTAT(slice_max
);
680 PN_SCHEDSTAT(wait_max
);
681 PN_SCHEDSTAT(wait_sum
);
682 P_SCHEDSTAT(wait_count
);
689 P(se
->avg
.runnable_avg
);
699 #ifdef CONFIG_CGROUP_SCHED
700 static DEFINE_SPINLOCK(sched_debug_lock
);
701 static char group_path
[PATH_MAX
];
703 static void task_group_path(struct task_group
*tg
, char *path
, int plen
)
705 if (autogroup_path(tg
, path
, plen
))
708 cgroup_path(tg
->css
.cgroup
, path
, plen
);
712 * Only 1 SEQ_printf_task_group_path() caller can use the full length
713 * group_path[] for cgroup path. Other simultaneous callers will have
714 * to use a shorter stack buffer. A "..." suffix is appended at the end
715 * of the stack buffer so that it will show up in case the output length
716 * matches the given buffer size to indicate possible path name truncation.
718 #define SEQ_printf_task_group_path(m, tg, fmt...) \
720 if (spin_trylock(&sched_debug_lock)) { \
721 task_group_path(tg, group_path, sizeof(group_path)); \
722 SEQ_printf(m, fmt, group_path); \
723 spin_unlock(&sched_debug_lock); \
726 char *bufend = buf + sizeof(buf) - 3; \
727 task_group_path(tg, buf, bufend - buf); \
728 strcpy(bufend - 1, "..."); \
729 SEQ_printf(m, fmt, buf); \
735 print_task(struct seq_file
*m
, struct rq
*rq
, struct task_struct
*p
)
737 if (task_current(rq
, p
))
740 SEQ_printf(m
, " %c", task_state_to_char(p
));
742 SEQ_printf(m
, " %15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
743 p
->comm
, task_pid_nr(p
),
744 SPLIT_NS(p
->se
.vruntime
),
745 entity_eligible(cfs_rq_of(&p
->se
), &p
->se
) ? 'E' : 'N',
746 SPLIT_NS(p
->se
.deadline
),
747 p
->se
.custom_slice
? 'S' : ' ',
748 SPLIT_NS(p
->se
.slice
),
749 SPLIT_NS(p
->se
.sum_exec_runtime
),
750 (long long)(p
->nvcsw
+ p
->nivcsw
),
753 SEQ_printf(m
, "%9lld.%06ld %9lld.%06ld %9lld.%06ld",
754 SPLIT_NS(schedstat_val_or_zero(p
->stats
.wait_sum
)),
755 SPLIT_NS(schedstat_val_or_zero(p
->stats
.sum_sleep_runtime
)),
756 SPLIT_NS(schedstat_val_or_zero(p
->stats
.sum_block_runtime
)));
758 #ifdef CONFIG_NUMA_BALANCING
759 SEQ_printf(m
, " %d %d", task_node(p
), task_numa_group_id(p
));
761 #ifdef CONFIG_CGROUP_SCHED
762 SEQ_printf_task_group_path(m
, task_group(p
), " %s")
768 static void print_rq(struct seq_file
*m
, struct rq
*rq
, int rq_cpu
)
770 struct task_struct
*g
, *p
;
773 SEQ_printf(m
, "runnable tasks:\n");
774 SEQ_printf(m
, " S task PID vruntime eligible "
775 "deadline slice sum-exec switches "
776 "prio wait-time sum-sleep sum-block"
777 #ifdef CONFIG_NUMA_BALANCING
780 #ifdef CONFIG_CGROUP_SCHED
784 SEQ_printf(m
, "-------------------------------------------------------"
785 "------------------------------------------------------"
786 "------------------------------------------------------"
787 #ifdef CONFIG_NUMA_BALANCING
790 #ifdef CONFIG_CGROUP_SCHED
796 for_each_process_thread(g
, p
) {
797 if (task_cpu(p
) != rq_cpu
)
800 print_task(m
, rq
, p
);
805 void print_cfs_rq(struct seq_file
*m
, int cpu
, struct cfs_rq
*cfs_rq
)
807 s64 left_vruntime
= -1, min_vruntime
, right_vruntime
= -1, left_deadline
= -1, spread
;
808 struct sched_entity
*last
, *first
, *root
;
809 struct rq
*rq
= cpu_rq(cpu
);
812 #ifdef CONFIG_FAIR_GROUP_SCHED
814 SEQ_printf_task_group_path(m
, cfs_rq
->tg
, "cfs_rq[%d]:%s\n", cpu
);
817 SEQ_printf(m
, "cfs_rq[%d]:\n", cpu
);
820 raw_spin_rq_lock_irqsave(rq
, flags
);
821 root
= __pick_root_entity(cfs_rq
);
823 left_vruntime
= root
->min_vruntime
;
824 first
= __pick_first_entity(cfs_rq
);
826 left_deadline
= first
->deadline
;
827 last
= __pick_last_entity(cfs_rq
);
829 right_vruntime
= last
->vruntime
;
830 min_vruntime
= cfs_rq
->min_vruntime
;
831 raw_spin_rq_unlock_irqrestore(rq
, flags
);
833 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "left_deadline",
834 SPLIT_NS(left_deadline
));
835 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "left_vruntime",
836 SPLIT_NS(left_vruntime
));
837 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "min_vruntime",
838 SPLIT_NS(min_vruntime
));
839 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
840 SPLIT_NS(avg_vruntime(cfs_rq
)));
841 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "right_vruntime",
842 SPLIT_NS(right_vruntime
));
843 spread
= right_vruntime
- left_vruntime
;
844 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread
));
845 SEQ_printf(m
, " .%-30s: %d\n", "nr_running", cfs_rq
->nr_running
);
846 SEQ_printf(m
, " .%-30s: %d\n", "h_nr_running", cfs_rq
->h_nr_running
);
847 SEQ_printf(m
, " .%-30s: %d\n", "idle_nr_running",
848 cfs_rq
->idle_nr_running
);
849 SEQ_printf(m
, " .%-30s: %d\n", "idle_h_nr_running",
850 cfs_rq
->idle_h_nr_running
);
851 SEQ_printf(m
, " .%-30s: %ld\n", "load", cfs_rq
->load
.weight
);
853 SEQ_printf(m
, " .%-30s: %lu\n", "load_avg",
854 cfs_rq
->avg
.load_avg
);
855 SEQ_printf(m
, " .%-30s: %lu\n", "runnable_avg",
856 cfs_rq
->avg
.runnable_avg
);
857 SEQ_printf(m
, " .%-30s: %lu\n", "util_avg",
858 cfs_rq
->avg
.util_avg
);
859 SEQ_printf(m
, " .%-30s: %u\n", "util_est",
860 cfs_rq
->avg
.util_est
);
861 SEQ_printf(m
, " .%-30s: %ld\n", "removed.load_avg",
862 cfs_rq
->removed
.load_avg
);
863 SEQ_printf(m
, " .%-30s: %ld\n", "removed.util_avg",
864 cfs_rq
->removed
.util_avg
);
865 SEQ_printf(m
, " .%-30s: %ld\n", "removed.runnable_avg",
866 cfs_rq
->removed
.runnable_avg
);
867 #ifdef CONFIG_FAIR_GROUP_SCHED
868 SEQ_printf(m
, " .%-30s: %lu\n", "tg_load_avg_contrib",
869 cfs_rq
->tg_load_avg_contrib
);
870 SEQ_printf(m
, " .%-30s: %ld\n", "tg_load_avg",
871 atomic_long_read(&cfs_rq
->tg
->load_avg
));
874 #ifdef CONFIG_CFS_BANDWIDTH
875 SEQ_printf(m
, " .%-30s: %d\n", "throttled",
877 SEQ_printf(m
, " .%-30s: %d\n", "throttle_count",
878 cfs_rq
->throttle_count
);
881 #ifdef CONFIG_FAIR_GROUP_SCHED
882 print_cfs_group_stats(m
, cpu
, cfs_rq
->tg
);
886 void print_rt_rq(struct seq_file
*m
, int cpu
, struct rt_rq
*rt_rq
)
888 #ifdef CONFIG_RT_GROUP_SCHED
890 SEQ_printf_task_group_path(m
, rt_rq
->tg
, "rt_rq[%d]:%s\n", cpu
);
893 SEQ_printf(m
, "rt_rq[%d]:\n", cpu
);
897 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
899 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
901 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
905 #ifdef CONFIG_RT_GROUP_SCHED
916 void print_dl_rq(struct seq_file
*m
, int cpu
, struct dl_rq
*dl_rq
)
921 SEQ_printf(m
, "dl_rq[%d]:\n", cpu
);
924 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
928 dl_bw
= &cpu_rq(cpu
)->rd
->dl_bw
;
930 dl_bw
= &dl_rq
->dl_bw
;
932 SEQ_printf(m
, " .%-30s: %lld\n", "dl_bw->bw", dl_bw
->bw
);
933 SEQ_printf(m
, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw
->total_bw
);
938 static void print_cpu(struct seq_file
*m
, int cpu
)
940 struct rq
*rq
= cpu_rq(cpu
);
944 unsigned int freq
= cpu_khz
? : 1;
946 SEQ_printf(m
, "cpu#%d, %u.%03u MHz\n",
947 cpu
, freq
/ 1000, (freq
% 1000));
950 SEQ_printf(m
, "cpu#%d\n", cpu
);
955 if (sizeof(rq->x) == 4) \
956 SEQ_printf(m, " .%-30s: %d\n", #x, (int)(rq->x)); \
958 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\
962 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
966 P(nr_uninterruptible
);
968 SEQ_printf(m
, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq
->curr
)));
975 #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
977 P64(max_idle_balance_cost
);
981 #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n));
982 if (schedstat_enabled()) {
991 print_cfs_stats(m
, cpu
);
992 print_rt_stats(m
, cpu
);
993 print_dl_stats(m
, cpu
);
995 print_rq(m
, rq
, cpu
);
999 static const char *sched_tunable_scaling_names
[] = {
1005 static void sched_debug_header(struct seq_file
*m
)
1007 u64 ktime
, sched_clk
, cpu_clk
;
1008 unsigned long flags
;
1010 local_irq_save(flags
);
1011 ktime
= ktime_to_ns(ktime_get());
1012 sched_clk
= sched_clock();
1013 cpu_clk
= local_clock();
1014 local_irq_restore(flags
);
1016 SEQ_printf(m
, "Sched Debug Version: v0.11, %s %.*s\n",
1017 init_utsname()->release
,
1018 (int)strcspn(init_utsname()->version
, " "),
1019 init_utsname()->version
);
1022 SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
1024 SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
1029 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1030 P(sched_clock_stable());
1035 SEQ_printf(m
, "\n");
1036 SEQ_printf(m
, "sysctl_sched\n");
1039 SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
1041 SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
1042 PN(sysctl_sched_base_slice
);
1043 P(sysctl_sched_features
);
1047 SEQ_printf(m
, " .%-40s: %d (%s)\n",
1048 "sysctl_sched_tunable_scaling",
1049 sysctl_sched_tunable_scaling
,
1050 sched_tunable_scaling_names
[sysctl_sched_tunable_scaling
]);
1051 SEQ_printf(m
, "\n");
1054 static int sched_debug_show(struct seq_file
*m
, void *v
)
1056 int cpu
= (unsigned long)(v
- 2);
1061 sched_debug_header(m
);
1066 void sysrq_sched_debug_show(void)
1070 sched_debug_header(NULL
);
1071 for_each_online_cpu(cpu
) {
1073 * Need to reset softlockup watchdogs on all CPUs, because
1074 * another CPU might be blocked waiting for us to process
1075 * an IPI or stop_machine.
1077 touch_nmi_watchdog();
1078 touch_all_softlockup_watchdogs();
1079 print_cpu(NULL
, cpu
);
1084 * This iterator needs some explanation.
1085 * It returns 1 for the header position.
1086 * This means 2 is CPU 0.
1087 * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
1088 * to use cpumask_* to iterate over the CPUs.
1090 static void *sched_debug_start(struct seq_file
*file
, loff_t
*offset
)
1092 unsigned long n
= *offset
;
1100 n
= cpumask_next(n
- 1, cpu_online_mask
);
1102 n
= cpumask_first(cpu_online_mask
);
1107 return (void *)(unsigned long)(n
+ 2);
1112 static void *sched_debug_next(struct seq_file
*file
, void *data
, loff_t
*offset
)
1115 return sched_debug_start(file
, offset
);
1118 static void sched_debug_stop(struct seq_file
*file
, void *data
)
1122 static const struct seq_operations sched_debug_sops
= {
1123 .start
= sched_debug_start
,
1124 .next
= sched_debug_next
,
1125 .stop
= sched_debug_stop
,
1126 .show
= sched_debug_show
,
1129 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
1130 #define __P(F) __PS(#F, F)
1131 #define P(F) __PS(#F, p->F)
1132 #define PM(F, M) __PS(#F, p->F & (M))
1133 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
1134 #define __PN(F) __PSN(#F, F)
1135 #define PN(F) __PSN(#F, p->F)
1138 #ifdef CONFIG_NUMA_BALANCING
1139 void print_numa_stats(struct seq_file
*m
, int node
, unsigned long tsf
,
1140 unsigned long tpf
, unsigned long gsf
, unsigned long gpf
)
1142 SEQ_printf(m
, "numa_faults node=%d ", node
);
1143 SEQ_printf(m
, "task_private=%lu task_shared=%lu ", tpf
, tsf
);
1144 SEQ_printf(m
, "group_private=%lu group_shared=%lu\n", gpf
, gsf
);
1149 static void sched_show_numa(struct task_struct
*p
, struct seq_file
*m
)
1151 #ifdef CONFIG_NUMA_BALANCING
1153 P(mm
->numa_scan_seq
);
1155 P(numa_pages_migrated
);
1156 P(numa_preferred_nid
);
1157 P(total_numa_faults
);
1158 SEQ_printf(m
, "current_node=%d, numa_group_id=%d\n",
1159 task_node(p
), task_numa_group_id(p
));
1160 show_numa_stats(p
, m
);
1164 void proc_sched_show_task(struct task_struct
*p
, struct pid_namespace
*ns
,
1167 unsigned long nr_switches
;
1169 SEQ_printf(m
, "%s (%d, #threads: %d)\n", p
->comm
, task_pid_nr_ns(p
, ns
),
1172 "---------------------------------------------------------"
1175 #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F))
1176 #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
1180 PN(se
.sum_exec_runtime
);
1182 nr_switches
= p
->nvcsw
+ p
->nivcsw
;
1184 P(se
.nr_migrations
);
1186 if (schedstat_enabled()) {
1187 u64 avg_atom
, avg_per_cpu
;
1189 PN_SCHEDSTAT(sum_sleep_runtime
);
1190 PN_SCHEDSTAT(sum_block_runtime
);
1191 PN_SCHEDSTAT(wait_start
);
1192 PN_SCHEDSTAT(sleep_start
);
1193 PN_SCHEDSTAT(block_start
);
1194 PN_SCHEDSTAT(sleep_max
);
1195 PN_SCHEDSTAT(block_max
);
1196 PN_SCHEDSTAT(exec_max
);
1197 PN_SCHEDSTAT(slice_max
);
1198 PN_SCHEDSTAT(wait_max
);
1199 PN_SCHEDSTAT(wait_sum
);
1200 P_SCHEDSTAT(wait_count
);
1201 PN_SCHEDSTAT(iowait_sum
);
1202 P_SCHEDSTAT(iowait_count
);
1203 P_SCHEDSTAT(nr_migrations_cold
);
1204 P_SCHEDSTAT(nr_failed_migrations_affine
);
1205 P_SCHEDSTAT(nr_failed_migrations_running
);
1206 P_SCHEDSTAT(nr_failed_migrations_hot
);
1207 P_SCHEDSTAT(nr_forced_migrations
);
1208 P_SCHEDSTAT(nr_wakeups
);
1209 P_SCHEDSTAT(nr_wakeups_sync
);
1210 P_SCHEDSTAT(nr_wakeups_migrate
);
1211 P_SCHEDSTAT(nr_wakeups_local
);
1212 P_SCHEDSTAT(nr_wakeups_remote
);
1213 P_SCHEDSTAT(nr_wakeups_affine
);
1214 P_SCHEDSTAT(nr_wakeups_affine_attempts
);
1215 P_SCHEDSTAT(nr_wakeups_passive
);
1216 P_SCHEDSTAT(nr_wakeups_idle
);
1218 avg_atom
= p
->se
.sum_exec_runtime
;
1220 avg_atom
= div64_ul(avg_atom
, nr_switches
);
1224 avg_per_cpu
= p
->se
.sum_exec_runtime
;
1225 if (p
->se
.nr_migrations
) {
1226 avg_per_cpu
= div64_u64(avg_per_cpu
,
1227 p
->se
.nr_migrations
);
1235 #ifdef CONFIG_SCHED_CORE
1236 PN_SCHEDSTAT(core_forceidle_sum
);
1241 __PS("nr_voluntary_switches", p
->nvcsw
);
1242 __PS("nr_involuntary_switches", p
->nivcsw
);
1247 P(se
.avg
.runnable_sum
);
1250 P(se
.avg
.runnable_avg
);
1252 P(se
.avg
.last_update_time
);
1253 PM(se
.avg
.util_est
, ~UTIL_AVG_UNCHANGED
);
1255 #ifdef CONFIG_UCLAMP_TASK
1256 __PS("uclamp.min", p
->uclamp_req
[UCLAMP_MIN
].value
);
1257 __PS("uclamp.max", p
->uclamp_req
[UCLAMP_MAX
].value
);
1258 __PS("effective uclamp.min", uclamp_eff_value(p
, UCLAMP_MIN
));
1259 __PS("effective uclamp.max", uclamp_eff_value(p
, UCLAMP_MAX
));
1263 if (task_has_dl_policy(p
)) {
1267 #ifdef CONFIG_SCHED_CLASS_EXT
1268 __PS("ext.enabled", task_on_scx(p
));
1274 unsigned int this_cpu
= raw_smp_processor_id();
1277 t0
= cpu_clock(this_cpu
);
1278 t1
= cpu_clock(this_cpu
);
1279 __PS("clock-delta", t1
-t0
);
1282 sched_show_numa(p
, m
);
1285 void proc_sched_set_task(struct task_struct
*p
)
1287 #ifdef CONFIG_SCHEDSTATS
1288 memset(&p
->stats
, 0, sizeof(p
->stats
));
1292 void resched_latency_warn(int cpu
, u64 latency
)
1294 static DEFINE_RATELIMIT_STATE(latency_check_ratelimit
, 60 * 60 * HZ
, 1);
1296 WARN(__ratelimit(&latency_check_ratelimit
),
1297 "sched: CPU %d need_resched set for > %llu ns (%d ticks) "
1298 "without schedule\n",
1299 cpu
, latency
, cpu_rq(cpu
)->ticks_without_resched
);