1 // SPDX-License-Identifier: GPL-2.0-only
5 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
10 #include <linux/cpu.h>
11 #include <linux/nmi.h>
12 #include <linux/init.h>
13 #include <linux/delay.h>
14 #include <linux/freezer.h>
15 #include <linux/kthread.h>
16 #include <linux/lockdep.h>
17 #include <linux/export.h>
18 #include <linux/panic_notifier.h>
19 #include <linux/sysctl.h>
20 #include <linux/suspend.h>
21 #include <linux/utsname.h>
22 #include <linux/sched/signal.h>
23 #include <linux/sched/debug.h>
24 #include <linux/sched/sysctl.h>
26 #include <trace/events/sched.h>
29 * The number of tasks checked:
31 static int __read_mostly sysctl_hung_task_check_count
= PID_MAX_LIMIT
;
34 * Total number of tasks detected as hung since boot:
36 static unsigned long __read_mostly sysctl_hung_task_detect_count
;
39 * Limit number of tasks checked in a batch.
41 * This value controls the preemptibility of khungtaskd since preemption
42 * is disabled during the critical section. It also controls the size of
43 * the RCU grace period. So it needs to be upper-bound.
45 #define HUNG_TASK_LOCK_BREAK (HZ / 10)
48 * Zero means infinite timeout - no checking done:
50 unsigned long __read_mostly sysctl_hung_task_timeout_secs
= CONFIG_DEFAULT_HUNG_TASK_TIMEOUT
;
51 EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs
);
54 * Zero (default value) means use sysctl_hung_task_timeout_secs:
56 static unsigned long __read_mostly sysctl_hung_task_check_interval_secs
;
58 static int __read_mostly sysctl_hung_task_warnings
= 10;
60 static int __read_mostly did_panic
;
61 static bool hung_task_show_lock
;
62 static bool hung_task_call_panic
;
63 static bool hung_task_show_all_bt
;
65 static struct task_struct
*watchdog_task
;
69 * Should we dump all CPUs backtraces in a hung task event?
70 * Defaults to 0, can be changed via sysctl.
72 static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace
;
74 #define sysctl_hung_task_all_cpu_backtrace 0
75 #endif /* CONFIG_SMP */
78 * Should we panic (and reboot, if panic_timeout= is set) when a
79 * hung task is detected:
81 static unsigned int __read_mostly sysctl_hung_task_panic
=
82 IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC
);
85 hung_task_panic(struct notifier_block
*this, unsigned long event
, void *ptr
)
92 static struct notifier_block panic_block
= {
93 .notifier_call
= hung_task_panic
,
96 static void check_hung_task(struct task_struct
*t
, unsigned long timeout
)
98 unsigned long switch_count
= t
->nvcsw
+ t
->nivcsw
;
101 * Ensure the task is not frozen.
102 * Also, skip vfork and any other user process that freezer should skip.
104 if (unlikely(READ_ONCE(t
->__state
) & TASK_FROZEN
))
108 * When a freshly created task is scheduled once, changes its state to
109 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
112 if (unlikely(!switch_count
))
115 if (switch_count
!= t
->last_switch_count
) {
116 t
->last_switch_count
= switch_count
;
117 t
->last_switch_time
= jiffies
;
120 if (time_is_after_jiffies(t
->last_switch_time
+ timeout
* HZ
))
124 * This counter tracks the total number of tasks detected as hung
127 sysctl_hung_task_detect_count
++;
129 trace_sched_process_hang(t
);
131 if (sysctl_hung_task_panic
) {
133 hung_task_show_lock
= true;
134 hung_task_call_panic
= true;
138 * Ok, the task did not get scheduled for more than 2 minutes,
141 if (sysctl_hung_task_warnings
|| hung_task_call_panic
) {
142 if (sysctl_hung_task_warnings
> 0)
143 sysctl_hung_task_warnings
--;
144 pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
145 t
->comm
, t
->pid
, (jiffies
- t
->last_switch_time
) / HZ
);
146 pr_err(" %s %s %.*s\n",
147 print_tainted(), init_utsname()->release
,
148 (int)strcspn(init_utsname()->version
, " "),
149 init_utsname()->version
);
150 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
151 " disables this message.\n");
153 hung_task_show_lock
= true;
155 if (sysctl_hung_task_all_cpu_backtrace
)
156 hung_task_show_all_bt
= true;
157 if (!sysctl_hung_task_warnings
)
158 pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
161 touch_nmi_watchdog();
165 * To avoid extending the RCU grace period for an unbounded amount of time,
166 * periodically exit the critical section and enter a new one.
168 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
169 * to exit the grace period. For classic RCU, a reschedule is required.
171 static bool rcu_lock_break(struct task_struct
*g
, struct task_struct
*t
)
180 can_cont
= pid_alive(g
) && pid_alive(t
);
188 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
189 * a really long time (120 seconds). If that happens, print out
192 static void check_hung_uninterruptible_tasks(unsigned long timeout
)
194 int max_count
= sysctl_hung_task_check_count
;
195 unsigned long last_break
= jiffies
;
196 struct task_struct
*g
, *t
;
199 * If the system crashed already then all bets are off,
200 * do not report extra hung tasks:
202 if (test_taint(TAINT_DIE
) || did_panic
)
205 hung_task_show_lock
= false;
207 for_each_process_thread(g
, t
) {
212 if (time_after(jiffies
, last_break
+ HUNG_TASK_LOCK_BREAK
)) {
213 if (!rcu_lock_break(g
, t
))
215 last_break
= jiffies
;
218 * skip the TASK_KILLABLE tasks -- these can be killed
219 * skip the TASK_IDLE tasks -- those are genuinely idle
221 state
= READ_ONCE(t
->__state
);
222 if ((state
& TASK_UNINTERRUPTIBLE
) &&
223 !(state
& TASK_WAKEKILL
) &&
224 !(state
& TASK_NOLOAD
))
225 check_hung_task(t
, timeout
);
229 if (hung_task_show_lock
)
230 debug_show_all_locks();
232 if (hung_task_show_all_bt
) {
233 hung_task_show_all_bt
= false;
234 trigger_all_cpu_backtrace();
237 if (hung_task_call_panic
)
238 panic("hung_task: blocked tasks");
241 static long hung_timeout_jiffies(unsigned long last_checked
,
242 unsigned long timeout
)
244 /* timeout of 0 will disable the watchdog */
245 return timeout
? last_checked
- jiffies
+ timeout
* HZ
:
246 MAX_SCHEDULE_TIMEOUT
;
251 * Process updating of timeout sysctl
253 static int proc_dohung_task_timeout_secs(const struct ctl_table
*table
, int write
,
255 size_t *lenp
, loff_t
*ppos
)
259 ret
= proc_doulongvec_minmax(table
, write
, buffer
, lenp
, ppos
);
264 wake_up_process(watchdog_task
);
271 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
272 * and hung_task_check_interval_secs
274 static const unsigned long hung_task_timeout_max
= (LONG_MAX
/ HZ
);
275 static struct ctl_table hung_task_sysctls
[] = {
278 .procname
= "hung_task_all_cpu_backtrace",
279 .data
= &sysctl_hung_task_all_cpu_backtrace
,
280 .maxlen
= sizeof(int),
282 .proc_handler
= proc_dointvec_minmax
,
283 .extra1
= SYSCTL_ZERO
,
284 .extra2
= SYSCTL_ONE
,
286 #endif /* CONFIG_SMP */
288 .procname
= "hung_task_panic",
289 .data
= &sysctl_hung_task_panic
,
290 .maxlen
= sizeof(int),
292 .proc_handler
= proc_dointvec_minmax
,
293 .extra1
= SYSCTL_ZERO
,
294 .extra2
= SYSCTL_ONE
,
297 .procname
= "hung_task_check_count",
298 .data
= &sysctl_hung_task_check_count
,
299 .maxlen
= sizeof(int),
301 .proc_handler
= proc_dointvec_minmax
,
302 .extra1
= SYSCTL_ZERO
,
305 .procname
= "hung_task_timeout_secs",
306 .data
= &sysctl_hung_task_timeout_secs
,
307 .maxlen
= sizeof(unsigned long),
309 .proc_handler
= proc_dohung_task_timeout_secs
,
310 .extra2
= (void *)&hung_task_timeout_max
,
313 .procname
= "hung_task_check_interval_secs",
314 .data
= &sysctl_hung_task_check_interval_secs
,
315 .maxlen
= sizeof(unsigned long),
317 .proc_handler
= proc_dohung_task_timeout_secs
,
318 .extra2
= (void *)&hung_task_timeout_max
,
321 .procname
= "hung_task_warnings",
322 .data
= &sysctl_hung_task_warnings
,
323 .maxlen
= sizeof(int),
325 .proc_handler
= proc_dointvec_minmax
,
326 .extra1
= SYSCTL_NEG_ONE
,
329 .procname
= "hung_task_detect_count",
330 .data
= &sysctl_hung_task_detect_count
,
331 .maxlen
= sizeof(unsigned long),
333 .proc_handler
= proc_doulongvec_minmax
,
337 static void __init
hung_task_sysctl_init(void)
339 register_sysctl_init("kernel", hung_task_sysctls
);
342 #define hung_task_sysctl_init() do { } while (0)
343 #endif /* CONFIG_SYSCTL */
346 static atomic_t reset_hung_task
= ATOMIC_INIT(0);
348 void reset_hung_task_detector(void)
350 atomic_set(&reset_hung_task
, 1);
352 EXPORT_SYMBOL_GPL(reset_hung_task_detector
);
354 static bool hung_detector_suspended
;
356 static int hungtask_pm_notify(struct notifier_block
*self
,
357 unsigned long action
, void *hcpu
)
360 case PM_SUSPEND_PREPARE
:
361 case PM_HIBERNATION_PREPARE
:
362 case PM_RESTORE_PREPARE
:
363 hung_detector_suspended
= true;
365 case PM_POST_SUSPEND
:
366 case PM_POST_HIBERNATION
:
367 case PM_POST_RESTORE
:
368 hung_detector_suspended
= false;
377 * kthread which checks for tasks stuck in D state
379 static int watchdog(void *dummy
)
381 unsigned long hung_last_checked
= jiffies
;
383 set_user_nice(current
, 0);
386 unsigned long timeout
= sysctl_hung_task_timeout_secs
;
387 unsigned long interval
= sysctl_hung_task_check_interval_secs
;
392 interval
= min_t(unsigned long, interval
, timeout
);
393 t
= hung_timeout_jiffies(hung_last_checked
, interval
);
395 if (!atomic_xchg(&reset_hung_task
, 0) &&
396 !hung_detector_suspended
)
397 check_hung_uninterruptible_tasks(timeout
);
398 hung_last_checked
= jiffies
;
401 schedule_timeout_interruptible(t
);
407 static int __init
hung_task_init(void)
409 atomic_notifier_chain_register(&panic_notifier_list
, &panic_block
);
411 /* Disable hung task detector on suspend */
412 pm_notifier(hungtask_pm_notify
, 0);
414 watchdog_task
= kthread_run(watchdog
, NULL
, "khungtaskd");
415 hung_task_sysctl_init();
419 subsys_initcall(hung_task_init
);