1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
4 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
5 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 * NOHZ implementation for low and high resolution timers
9 * Started by: Thomas Gleixner and Ingo Molnar
11 #include <linux/compiler.h>
12 #include <linux/cpu.h>
13 #include <linux/err.h>
14 #include <linux/hrtimer.h>
15 #include <linux/interrupt.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/percpu.h>
18 #include <linux/nmi.h>
19 #include <linux/profile.h>
20 #include <linux/sched/signal.h>
21 #include <linux/sched/clock.h>
22 #include <linux/sched/stat.h>
23 #include <linux/sched/nohz.h>
24 #include <linux/sched/loadavg.h>
25 #include <linux/module.h>
26 #include <linux/irq_work.h>
27 #include <linux/posix-timers.h>
28 #include <linux/context_tracking.h>
31 #include <asm/irq_regs.h>
33 #include "tick-internal.h"
35 #include <trace/events/timer.h>
38 * Per-CPU nohz control structure
40 static DEFINE_PER_CPU(struct tick_sched
, tick_cpu_sched
);
42 struct tick_sched
*tick_get_tick_sched(int cpu
)
44 return &per_cpu(tick_cpu_sched
, cpu
);
48 * The time when the last jiffy update happened. Write access must hold
49 * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
50 * consistent view of jiffies and last_jiffies_update.
52 static ktime_t last_jiffies_update
;
55 * Must be called with interrupts disabled !
57 static void tick_do_update_jiffies64(ktime_t now
)
59 unsigned long ticks
= 1;
63 * 64-bit can do a quick check without holding the jiffies lock and
64 * without looking at the sequence count. The smp_load_acquire()
65 * pairs with the update done later in this function.
67 * 32-bit cannot do that because the store of 'tick_next_period'
68 * consists of two 32-bit stores, and the first store could be
69 * moved by the CPU to a random point in the future.
71 if (IS_ENABLED(CONFIG_64BIT
)) {
72 if (ktime_before(now
, smp_load_acquire(&tick_next_period
)))
78 * Avoid contention on 'jiffies_lock' and protect the quick
79 * check with the sequence count.
82 seq
= read_seqcount_begin(&jiffies_seq
);
83 nextp
= tick_next_period
;
84 } while (read_seqcount_retry(&jiffies_seq
, seq
));
86 if (ktime_before(now
, nextp
))
90 /* Quick check failed, i.e. update is required. */
91 raw_spin_lock(&jiffies_lock
);
93 * Re-evaluate with the lock held. Another CPU might have done the
96 if (ktime_before(now
, tick_next_period
)) {
97 raw_spin_unlock(&jiffies_lock
);
101 write_seqcount_begin(&jiffies_seq
);
103 delta
= ktime_sub(now
, tick_next_period
);
104 if (unlikely(delta
>= TICK_NSEC
)) {
105 /* Slow path for long idle sleep times */
106 s64 incr
= TICK_NSEC
;
108 ticks
+= ktime_divns(delta
, incr
);
110 last_jiffies_update
= ktime_add_ns(last_jiffies_update
,
113 last_jiffies_update
= ktime_add_ns(last_jiffies_update
,
117 /* Advance jiffies to complete the 'jiffies_seq' protected job */
120 /* Keep the tick_next_period variable up to date */
121 nextp
= ktime_add_ns(last_jiffies_update
, TICK_NSEC
);
123 if (IS_ENABLED(CONFIG_64BIT
)) {
125 * Pairs with smp_load_acquire() in the lockless quick
126 * check above, and ensures that the update to 'jiffies_64' is
127 * not reordered vs. the store to 'tick_next_period', neither
128 * by the compiler nor by the CPU.
130 smp_store_release(&tick_next_period
, nextp
);
133 * A plain store is good enough on 32-bit, as the quick check
134 * above is protected by the sequence count.
136 tick_next_period
= nextp
;
140 * Release the sequence count. calc_global_load() below is not
141 * protected by it, but 'jiffies_lock' needs to be held to prevent
142 * concurrent invocations.
144 write_seqcount_end(&jiffies_seq
);
148 raw_spin_unlock(&jiffies_lock
);
153 * Initialize and return retrieve the jiffies update.
155 static ktime_t
tick_init_jiffy_update(void)
159 raw_spin_lock(&jiffies_lock
);
160 write_seqcount_begin(&jiffies_seq
);
162 /* Have we started the jiffies update yet ? */
163 if (last_jiffies_update
== 0) {
167 * Ensure that the tick is aligned to a multiple of
170 div_u64_rem(tick_next_period
, TICK_NSEC
, &rem
);
172 tick_next_period
+= TICK_NSEC
- rem
;
174 last_jiffies_update
= tick_next_period
;
176 period
= last_jiffies_update
;
178 write_seqcount_end(&jiffies_seq
);
179 raw_spin_unlock(&jiffies_lock
);
184 static inline int tick_sched_flag_test(struct tick_sched
*ts
,
187 return !!(ts
->flags
& flag
);
190 static inline void tick_sched_flag_set(struct tick_sched
*ts
,
193 lockdep_assert_irqs_disabled();
197 static inline void tick_sched_flag_clear(struct tick_sched
*ts
,
200 lockdep_assert_irqs_disabled();
204 #define MAX_STALLED_JIFFIES 5
206 static void tick_sched_do_timer(struct tick_sched
*ts
, ktime_t now
)
208 int tick_cpu
, cpu
= smp_processor_id();
211 * Check if the do_timer duty was dropped. We don't care about
212 * concurrency: This happens only when the CPU in charge went
213 * into a long sleep. If two CPUs happen to assign themselves to
214 * this duty, then the jiffies update is still serialized by
217 * If nohz_full is enabled, this should not happen because the
218 * 'tick_do_timer_cpu' CPU never relinquishes.
220 tick_cpu
= READ_ONCE(tick_do_timer_cpu
);
222 if (IS_ENABLED(CONFIG_NO_HZ_COMMON
) && unlikely(tick_cpu
== TICK_DO_TIMER_NONE
)) {
223 #ifdef CONFIG_NO_HZ_FULL
224 WARN_ON_ONCE(tick_nohz_full_running
);
226 WRITE_ONCE(tick_do_timer_cpu
, cpu
);
230 /* Check if jiffies need an update */
232 tick_do_update_jiffies64(now
);
235 * If the jiffies update stalled for too long (timekeeper in stop_machine()
236 * or VMEXIT'ed for several msecs), force an update.
238 if (ts
->last_tick_jiffies
!= jiffies
) {
239 ts
->stalled_jiffies
= 0;
240 ts
->last_tick_jiffies
= READ_ONCE(jiffies
);
242 if (++ts
->stalled_jiffies
== MAX_STALLED_JIFFIES
) {
243 tick_do_update_jiffies64(now
);
244 ts
->stalled_jiffies
= 0;
245 ts
->last_tick_jiffies
= READ_ONCE(jiffies
);
249 if (tick_sched_flag_test(ts
, TS_FLAG_INIDLE
))
250 ts
->got_idle_tick
= 1;
253 static void tick_sched_handle(struct tick_sched
*ts
, struct pt_regs
*regs
)
256 * When we are idle and the tick is stopped, we have to touch
257 * the watchdog as we might not schedule for a really long
258 * time. This happens on completely idle SMP systems while
259 * waiting on the login prompt. We also increment the "start of
260 * idle" jiffy stamp so the idle accounting adjustment we do
261 * when we go busy again does not account too many ticks.
263 if (IS_ENABLED(CONFIG_NO_HZ_COMMON
) &&
264 tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)) {
265 touch_softlockup_watchdog_sched();
266 if (is_idle_task(current
))
269 * In case the current tick fired too early past its expected
270 * expiration, make sure we don't bypass the next clock reprogramming
271 * to the same deadline.
276 update_process_times(user_mode(regs
));
277 profile_tick(CPU_PROFILING
);
281 * We rearm the timer until we get disabled by the idle code.
282 * Called with interrupts disabled.
284 static enum hrtimer_restart
tick_nohz_handler(struct hrtimer
*timer
)
286 struct tick_sched
*ts
= container_of(timer
, struct tick_sched
, sched_timer
);
287 struct pt_regs
*regs
= get_irq_regs();
288 ktime_t now
= ktime_get();
290 tick_sched_do_timer(ts
, now
);
293 * Do not call when we are not in IRQ context and have
294 * no valid 'regs' pointer
297 tick_sched_handle(ts
, regs
);
302 * In dynticks mode, tick reprogram is deferred:
303 * - to the idle task if in dynticks-idle
304 * - to IRQ exit if in full-dynticks.
306 if (unlikely(tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)))
307 return HRTIMER_NORESTART
;
309 hrtimer_forward(timer
, now
, TICK_NSEC
);
311 return HRTIMER_RESTART
;
314 #ifdef CONFIG_NO_HZ_FULL
315 cpumask_var_t tick_nohz_full_mask
;
316 EXPORT_SYMBOL_GPL(tick_nohz_full_mask
);
317 bool tick_nohz_full_running
;
318 EXPORT_SYMBOL_GPL(tick_nohz_full_running
);
319 static atomic_t tick_dep_mask
;
321 static bool check_tick_dependency(atomic_t
*dep
)
323 int val
= atomic_read(dep
);
325 if (val
& TICK_DEP_MASK_POSIX_TIMER
) {
326 trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER
);
330 if (val
& TICK_DEP_MASK_PERF_EVENTS
) {
331 trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS
);
335 if (val
& TICK_DEP_MASK_SCHED
) {
336 trace_tick_stop(0, TICK_DEP_MASK_SCHED
);
340 if (val
& TICK_DEP_MASK_CLOCK_UNSTABLE
) {
341 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE
);
345 if (val
& TICK_DEP_MASK_RCU
) {
346 trace_tick_stop(0, TICK_DEP_MASK_RCU
);
350 if (val
& TICK_DEP_MASK_RCU_EXP
) {
351 trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP
);
358 static bool can_stop_full_tick(int cpu
, struct tick_sched
*ts
)
360 lockdep_assert_irqs_disabled();
362 if (unlikely(!cpu_online(cpu
)))
365 if (check_tick_dependency(&tick_dep_mask
))
368 if (check_tick_dependency(&ts
->tick_dep_mask
))
371 if (check_tick_dependency(¤t
->tick_dep_mask
))
374 if (check_tick_dependency(¤t
->signal
->tick_dep_mask
))
380 static void nohz_full_kick_func(struct irq_work
*work
)
382 /* Empty, the tick restart happens on tick_nohz_irq_exit() */
385 static DEFINE_PER_CPU(struct irq_work
, nohz_full_kick_work
) =
386 IRQ_WORK_INIT_HARD(nohz_full_kick_func
);
389 * Kick this CPU if it's full dynticks in order to force it to
390 * re-evaluate its dependency on the tick and restart it if necessary.
391 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
394 static void tick_nohz_full_kick(void)
396 if (!tick_nohz_full_cpu(smp_processor_id()))
399 irq_work_queue(this_cpu_ptr(&nohz_full_kick_work
));
403 * Kick the CPU if it's full dynticks in order to force it to
404 * re-evaluate its dependency on the tick and restart it if necessary.
406 void tick_nohz_full_kick_cpu(int cpu
)
408 if (!tick_nohz_full_cpu(cpu
))
411 irq_work_queue_on(&per_cpu(nohz_full_kick_work
, cpu
), cpu
);
414 static void tick_nohz_kick_task(struct task_struct
*tsk
)
419 * If the task is not running, run_posix_cpu_timers()
420 * has nothing to elapse, and an IPI can then be optimized out.
422 * activate_task() STORE p->tick_dep_mask
424 * __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
425 * LOCK rq->lock LOAD p->on_rq
426 * smp_mb__after_spin_lock()
427 * tick_nohz_task_switch()
428 * LOAD p->tick_dep_mask
430 * XXX given a task picks up the dependency on schedule(), should we
431 * only care about tasks that are currently on the CPU instead of all
432 * that are on the runqueue?
434 * That is, does this want to be: task_on_cpu() / task_curr()?
436 if (!sched_task_on_rq(tsk
))
440 * If the task concurrently migrates to another CPU,
441 * we guarantee it sees the new tick dependency upon
444 * set_task_cpu(p, cpu);
445 * STORE p->cpu = @cpu
446 * __schedule() (switch to task 'p')
448 * smp_mb__after_spin_lock() STORE p->tick_dep_mask
449 * tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
450 * LOAD p->tick_dep_mask LOAD p->cpu
456 tick_nohz_full_kick_cpu(cpu
);
461 * Kick all full dynticks CPUs in order to force these to re-evaluate
462 * their dependency on the tick and restart it if necessary.
464 static void tick_nohz_full_kick_all(void)
468 if (!tick_nohz_full_running
)
472 for_each_cpu_and(cpu
, tick_nohz_full_mask
, cpu_online_mask
)
473 tick_nohz_full_kick_cpu(cpu
);
477 static void tick_nohz_dep_set_all(atomic_t
*dep
,
478 enum tick_dep_bits bit
)
482 prev
= atomic_fetch_or(BIT(bit
), dep
);
484 tick_nohz_full_kick_all();
488 * Set a global tick dependency. Used by perf events that rely on freq and
491 void tick_nohz_dep_set(enum tick_dep_bits bit
)
493 tick_nohz_dep_set_all(&tick_dep_mask
, bit
);
496 void tick_nohz_dep_clear(enum tick_dep_bits bit
)
498 atomic_andnot(BIT(bit
), &tick_dep_mask
);
502 * Set per-CPU tick dependency. Used by scheduler and perf events in order to
503 * manage event-throttling.
505 void tick_nohz_dep_set_cpu(int cpu
, enum tick_dep_bits bit
)
508 struct tick_sched
*ts
;
510 ts
= per_cpu_ptr(&tick_cpu_sched
, cpu
);
512 prev
= atomic_fetch_or(BIT(bit
), &ts
->tick_dep_mask
);
515 /* Perf needs local kick that is NMI safe */
516 if (cpu
== smp_processor_id()) {
517 tick_nohz_full_kick();
519 /* Remote IRQ work not NMI-safe */
520 if (!WARN_ON_ONCE(in_nmi()))
521 tick_nohz_full_kick_cpu(cpu
);
526 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu
);
528 void tick_nohz_dep_clear_cpu(int cpu
, enum tick_dep_bits bit
)
530 struct tick_sched
*ts
= per_cpu_ptr(&tick_cpu_sched
, cpu
);
532 atomic_andnot(BIT(bit
), &ts
->tick_dep_mask
);
534 EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu
);
537 * Set a per-task tick dependency. RCU needs this. Also posix CPU timers
538 * in order to elapse per task timers.
540 void tick_nohz_dep_set_task(struct task_struct
*tsk
, enum tick_dep_bits bit
)
542 if (!atomic_fetch_or(BIT(bit
), &tsk
->tick_dep_mask
))
543 tick_nohz_kick_task(tsk
);
545 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task
);
547 void tick_nohz_dep_clear_task(struct task_struct
*tsk
, enum tick_dep_bits bit
)
549 atomic_andnot(BIT(bit
), &tsk
->tick_dep_mask
);
551 EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task
);
554 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
555 * per process timers.
557 void tick_nohz_dep_set_signal(struct task_struct
*tsk
,
558 enum tick_dep_bits bit
)
561 struct signal_struct
*sig
= tsk
->signal
;
563 prev
= atomic_fetch_or(BIT(bit
), &sig
->tick_dep_mask
);
565 struct task_struct
*t
;
567 lockdep_assert_held(&tsk
->sighand
->siglock
);
568 __for_each_thread(sig
, t
)
569 tick_nohz_kick_task(t
);
573 void tick_nohz_dep_clear_signal(struct signal_struct
*sig
, enum tick_dep_bits bit
)
575 atomic_andnot(BIT(bit
), &sig
->tick_dep_mask
);
579 * Re-evaluate the need for the tick as we switch the current task.
580 * It might need the tick due to per task/process properties:
581 * perf events, posix CPU timers, ...
583 void __tick_nohz_task_switch(void)
585 struct tick_sched
*ts
;
587 if (!tick_nohz_full_cpu(smp_processor_id()))
590 ts
= this_cpu_ptr(&tick_cpu_sched
);
592 if (tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)) {
593 if (atomic_read(¤t
->tick_dep_mask
) ||
594 atomic_read(¤t
->signal
->tick_dep_mask
))
595 tick_nohz_full_kick();
599 /* Get the boot-time nohz CPU list from the kernel parameters. */
600 void __init
tick_nohz_full_setup(cpumask_var_t cpumask
)
602 alloc_bootmem_cpumask_var(&tick_nohz_full_mask
);
603 cpumask_copy(tick_nohz_full_mask
, cpumask
);
604 tick_nohz_full_running
= true;
607 bool tick_nohz_cpu_hotpluggable(unsigned int cpu
)
610 * The 'tick_do_timer_cpu' CPU handles housekeeping duty (unbound
611 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
612 * CPUs. It must remain online when nohz full is enabled.
614 if (tick_nohz_full_running
&& READ_ONCE(tick_do_timer_cpu
) == cpu
)
619 static int tick_nohz_cpu_down(unsigned int cpu
)
621 return tick_nohz_cpu_hotpluggable(cpu
) ? 0 : -EBUSY
;
624 void __init
tick_nohz_init(void)
628 if (!tick_nohz_full_running
)
632 * Full dynticks uses IRQ work to drive the tick rescheduling on safe
633 * locking contexts. But then we need IRQ work to raise its own
634 * interrupts to avoid circular dependency on the tick.
636 if (!arch_irq_work_has_interrupt()) {
637 pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n");
638 cpumask_clear(tick_nohz_full_mask
);
639 tick_nohz_full_running
= false;
643 if (IS_ENABLED(CONFIG_PM_SLEEP_SMP
) &&
644 !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU
)) {
645 cpu
= smp_processor_id();
647 if (cpumask_test_cpu(cpu
, tick_nohz_full_mask
)) {
648 pr_warn("NO_HZ: Clearing %d from nohz_full range "
649 "for timekeeping\n", cpu
);
650 cpumask_clear_cpu(cpu
, tick_nohz_full_mask
);
654 for_each_cpu(cpu
, tick_nohz_full_mask
)
655 ct_cpu_track_user(cpu
);
657 ret
= cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN
,
658 "kernel/nohz:predown", NULL
,
661 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
662 cpumask_pr_args(tick_nohz_full_mask
));
664 #endif /* #ifdef CONFIG_NO_HZ_FULL */
667 * NOHZ - aka dynamic tick functionality
669 #ifdef CONFIG_NO_HZ_COMMON
673 bool tick_nohz_enabled __read_mostly
= true;
674 unsigned long tick_nohz_active __read_mostly
;
676 * Enable / Disable tickless mode
678 static int __init
setup_tick_nohz(char *str
)
680 return (kstrtobool(str
, &tick_nohz_enabled
) == 0);
683 __setup("nohz=", setup_tick_nohz
);
685 bool tick_nohz_tick_stopped(void)
687 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
689 return tick_sched_flag_test(ts
, TS_FLAG_STOPPED
);
692 bool tick_nohz_tick_stopped_cpu(int cpu
)
694 struct tick_sched
*ts
= per_cpu_ptr(&tick_cpu_sched
, cpu
);
696 return tick_sched_flag_test(ts
, TS_FLAG_STOPPED
);
700 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
701 * @now: current ktime_t
703 * Called from interrupt entry when the CPU was idle
705 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
706 * must be updated. Otherwise an interrupt handler could use a stale jiffy
707 * value. We do this unconditionally on any CPU, as we don't know whether the
708 * CPU, which has the update task assigned, is in a long sleep.
710 static void tick_nohz_update_jiffies(ktime_t now
)
714 __this_cpu_write(tick_cpu_sched
.idle_waketime
, now
);
716 local_irq_save(flags
);
717 tick_do_update_jiffies64(now
);
718 local_irq_restore(flags
);
720 touch_softlockup_watchdog_sched();
723 static void tick_nohz_stop_idle(struct tick_sched
*ts
, ktime_t now
)
727 if (WARN_ON_ONCE(!tick_sched_flag_test(ts
, TS_FLAG_IDLE_ACTIVE
)))
730 delta
= ktime_sub(now
, ts
->idle_entrytime
);
732 write_seqcount_begin(&ts
->idle_sleeptime_seq
);
733 if (nr_iowait_cpu(smp_processor_id()) > 0)
734 ts
->iowait_sleeptime
= ktime_add(ts
->iowait_sleeptime
, delta
);
736 ts
->idle_sleeptime
= ktime_add(ts
->idle_sleeptime
, delta
);
738 ts
->idle_entrytime
= now
;
739 tick_sched_flag_clear(ts
, TS_FLAG_IDLE_ACTIVE
);
740 write_seqcount_end(&ts
->idle_sleeptime_seq
);
742 sched_clock_idle_wakeup_event();
745 static void tick_nohz_start_idle(struct tick_sched
*ts
)
747 write_seqcount_begin(&ts
->idle_sleeptime_seq
);
748 ts
->idle_entrytime
= ktime_get();
749 tick_sched_flag_set(ts
, TS_FLAG_IDLE_ACTIVE
);
750 write_seqcount_end(&ts
->idle_sleeptime_seq
);
752 sched_clock_idle_sleep_event();
755 static u64
get_cpu_sleep_time_us(struct tick_sched
*ts
, ktime_t
*sleeptime
,
756 bool compute_delta
, u64
*last_update_time
)
761 if (!tick_nohz_active
)
765 if (last_update_time
)
766 *last_update_time
= ktime_to_us(now
);
769 seq
= read_seqcount_begin(&ts
->idle_sleeptime_seq
);
771 if (tick_sched_flag_test(ts
, TS_FLAG_IDLE_ACTIVE
) && compute_delta
) {
772 ktime_t delta
= ktime_sub(now
, ts
->idle_entrytime
);
774 idle
= ktime_add(*sleeptime
, delta
);
778 } while (read_seqcount_retry(&ts
->idle_sleeptime_seq
, seq
));
780 return ktime_to_us(idle
);
785 * get_cpu_idle_time_us - get the total idle time of a CPU
786 * @cpu: CPU number to query
787 * @last_update_time: variable to store update time in. Do not update
790 * Return the cumulative idle time (since boot) for a given
791 * CPU, in microseconds. Note that this is partially broken due to
792 * the counter of iowait tasks that can be remotely updated without
793 * any synchronization. Therefore it is possible to observe backward
794 * values within two consecutive reads.
796 * This time is measured via accounting rather than sampling,
797 * and is as accurate as ktime_get() is.
799 * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
801 u64
get_cpu_idle_time_us(int cpu
, u64
*last_update_time
)
803 struct tick_sched
*ts
= &per_cpu(tick_cpu_sched
, cpu
);
805 return get_cpu_sleep_time_us(ts
, &ts
->idle_sleeptime
,
806 !nr_iowait_cpu(cpu
), last_update_time
);
808 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us
);
811 * get_cpu_iowait_time_us - get the total iowait time of a CPU
812 * @cpu: CPU number to query
813 * @last_update_time: variable to store update time in. Do not update
816 * Return the cumulative iowait time (since boot) for a given
817 * CPU, in microseconds. Note this is partially broken due to
818 * the counter of iowait tasks that can be remotely updated without
819 * any synchronization. Therefore it is possible to observe backward
820 * values within two consecutive reads.
822 * This time is measured via accounting rather than sampling,
823 * and is as accurate as ktime_get() is.
825 * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
827 u64
get_cpu_iowait_time_us(int cpu
, u64
*last_update_time
)
829 struct tick_sched
*ts
= &per_cpu(tick_cpu_sched
, cpu
);
831 return get_cpu_sleep_time_us(ts
, &ts
->iowait_sleeptime
,
832 nr_iowait_cpu(cpu
), last_update_time
);
834 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us
);
836 static void tick_nohz_restart(struct tick_sched
*ts
, ktime_t now
)
838 hrtimer_cancel(&ts
->sched_timer
);
839 hrtimer_set_expires(&ts
->sched_timer
, ts
->last_tick
);
841 /* Forward the time to expire in the future */
842 hrtimer_forward(&ts
->sched_timer
, now
, TICK_NSEC
);
844 if (tick_sched_flag_test(ts
, TS_FLAG_HIGHRES
)) {
845 hrtimer_start_expires(&ts
->sched_timer
,
846 HRTIMER_MODE_ABS_PINNED_HARD
);
848 tick_program_event(hrtimer_get_expires(&ts
->sched_timer
), 1);
852 * Reset to make sure the next tick stop doesn't get fooled by past
853 * cached clock deadline.
858 static inline bool local_timer_softirq_pending(void)
860 return local_timers_pending() & BIT(TIMER_SOFTIRQ
);
864 * Read jiffies and the time when jiffies were updated last
866 u64
get_jiffies_update(unsigned long *basej
)
868 unsigned long basejiff
;
873 seq
= read_seqcount_begin(&jiffies_seq
);
874 basemono
= last_jiffies_update
;
876 } while (read_seqcount_retry(&jiffies_seq
, seq
));
882 * tick_nohz_next_event() - return the clock monotonic based next event
883 * @ts: pointer to tick_sched struct
887 * *%0 - When the next event is a maximum of TICK_NSEC in the future
888 * and the tick is not stopped yet
889 * *%next_event - Next event based on clock monotonic
891 static ktime_t
tick_nohz_next_event(struct tick_sched
*ts
, int cpu
)
893 u64 basemono
, next_tick
, delta
, expires
;
894 unsigned long basejiff
;
897 basemono
= get_jiffies_update(&basejiff
);
898 ts
->last_jiffies
= basejiff
;
899 ts
->timer_expires_base
= basemono
;
902 * Keep the periodic tick, when RCU, architecture or irq_work
904 * Aside of that, check whether the local timer softirq is
905 * pending. If so, its a bad idea to call get_next_timer_interrupt(),
906 * because there is an already expired timer, so it will request
907 * immediate expiry, which rearms the hardware timer with a
908 * minimal delta, which brings us back to this place
909 * immediately. Lather, rinse and repeat...
911 if (rcu_needs_cpu() || arch_needs_cpu() ||
912 irq_work_needs_cpu() || local_timer_softirq_pending()) {
913 next_tick
= basemono
+ TICK_NSEC
;
916 * Get the next pending timer. If high resolution
917 * timers are enabled this only takes the timer wheel
918 * timers into account. If high resolution timers are
919 * disabled this also looks at the next expiring
922 next_tick
= get_next_timer_interrupt(basejiff
, basemono
);
923 ts
->next_timer
= next_tick
;
926 /* Make sure next_tick is never before basemono! */
927 if (WARN_ON_ONCE(basemono
> next_tick
))
928 next_tick
= basemono
;
931 * If the tick is due in the next period, keep it ticking or
932 * force prod the timer.
934 delta
= next_tick
- basemono
;
935 if (delta
<= (u64
)TICK_NSEC
) {
937 * We've not stopped the tick yet, and there's a timer in the
938 * next period, so no point in stopping it either, bail.
940 if (!tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)) {
941 ts
->timer_expires
= 0;
947 * If this CPU is the one which had the do_timer() duty last, we limit
948 * the sleep time to the timekeeping 'max_deferment' value.
949 * Otherwise we can sleep as long as we want.
951 delta
= timekeeping_max_deferment();
952 tick_cpu
= READ_ONCE(tick_do_timer_cpu
);
953 if (tick_cpu
!= cpu
&&
954 (tick_cpu
!= TICK_DO_TIMER_NONE
|| !tick_sched_flag_test(ts
, TS_FLAG_DO_TIMER_LAST
)))
957 /* Calculate the next expiry time */
958 if (delta
< (KTIME_MAX
- basemono
))
959 expires
= basemono
+ delta
;
963 ts
->timer_expires
= min_t(u64
, expires
, next_tick
);
966 return ts
->timer_expires
;
969 static void tick_nohz_stop_tick(struct tick_sched
*ts
, int cpu
)
971 struct clock_event_device
*dev
= __this_cpu_read(tick_cpu_device
.evtdev
);
972 unsigned long basejiff
= ts
->last_jiffies
;
973 u64 basemono
= ts
->timer_expires_base
;
974 bool timer_idle
= tick_sched_flag_test(ts
, TS_FLAG_STOPPED
);
978 /* Make sure we won't be trying to stop it twice in a row. */
979 ts
->timer_expires_base
= 0;
982 * Now the tick should be stopped definitely - so the timer base needs
983 * to be marked idle as well to not miss a newly queued timer.
985 expires
= timer_base_try_to_set_idle(basejiff
, basemono
, &timer_idle
);
986 if (expires
> ts
->timer_expires
) {
988 * This path could only happen when the first timer was removed
989 * between calculating the possible sleep length and now (when
990 * high resolution mode is not active, timer could also be a
993 * We have to stick to the original calculated expiry value to
994 * not stop the tick for too long with a shallow C-state (which
995 * was programmed by cpuidle because of an early next expiration
998 expires
= ts
->timer_expires
;
1001 /* If the timer base is not idle, retain the not yet stopped tick. */
1006 * If this CPU is the one which updates jiffies, then give up
1007 * the assignment and let it be taken by the CPU which runs
1008 * the tick timer next, which might be this CPU as well. If we
1009 * don't drop this here, the jiffies might be stale and
1010 * do_timer() never gets invoked. Keep track of the fact that it
1011 * was the one which had the do_timer() duty last.
1013 tick_cpu
= READ_ONCE(tick_do_timer_cpu
);
1014 if (tick_cpu
== cpu
) {
1015 WRITE_ONCE(tick_do_timer_cpu
, TICK_DO_TIMER_NONE
);
1016 tick_sched_flag_set(ts
, TS_FLAG_DO_TIMER_LAST
);
1017 } else if (tick_cpu
!= TICK_DO_TIMER_NONE
) {
1018 tick_sched_flag_clear(ts
, TS_FLAG_DO_TIMER_LAST
);
1021 /* Skip reprogram of event if it's not changed */
1022 if (tick_sched_flag_test(ts
, TS_FLAG_STOPPED
) && (expires
== ts
->next_tick
)) {
1023 /* Sanity check: make sure clockevent is actually programmed */
1024 if (expires
== KTIME_MAX
|| ts
->next_tick
== hrtimer_get_expires(&ts
->sched_timer
))
1027 WARN_ONCE(1, "basemono: %llu ts->next_tick: %llu dev->next_event: %llu "
1028 "timer->active: %d timer->expires: %llu\n", basemono
, ts
->next_tick
,
1029 dev
->next_event
, hrtimer_active(&ts
->sched_timer
),
1030 hrtimer_get_expires(&ts
->sched_timer
));
1034 * tick_nohz_stop_tick() can be called several times before
1035 * tick_nohz_restart_sched_tick() is called. This happens when
1036 * interrupts arrive which do not cause a reschedule. In the first
1037 * call we save the current tick time, so we can restart the
1038 * scheduler tick in tick_nohz_restart_sched_tick().
1040 if (!tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)) {
1041 calc_load_nohz_start();
1044 ts
->last_tick
= hrtimer_get_expires(&ts
->sched_timer
);
1045 tick_sched_flag_set(ts
, TS_FLAG_STOPPED
);
1046 trace_tick_stop(1, TICK_DEP_MASK_NONE
);
1049 ts
->next_tick
= expires
;
1052 * If the expiration time == KTIME_MAX, then we simply stop
1055 if (unlikely(expires
== KTIME_MAX
)) {
1056 if (tick_sched_flag_test(ts
, TS_FLAG_HIGHRES
))
1057 hrtimer_cancel(&ts
->sched_timer
);
1059 tick_program_event(KTIME_MAX
, 1);
1063 if (tick_sched_flag_test(ts
, TS_FLAG_HIGHRES
)) {
1064 hrtimer_start(&ts
->sched_timer
, expires
,
1065 HRTIMER_MODE_ABS_PINNED_HARD
);
1067 hrtimer_set_expires(&ts
->sched_timer
, expires
);
1068 tick_program_event(expires
, 1);
1072 static void tick_nohz_retain_tick(struct tick_sched
*ts
)
1074 ts
->timer_expires_base
= 0;
1077 #ifdef CONFIG_NO_HZ_FULL
1078 static void tick_nohz_full_stop_tick(struct tick_sched
*ts
, int cpu
)
1080 if (tick_nohz_next_event(ts
, cpu
))
1081 tick_nohz_stop_tick(ts
, cpu
);
1083 tick_nohz_retain_tick(ts
);
1085 #endif /* CONFIG_NO_HZ_FULL */
1087 static void tick_nohz_restart_sched_tick(struct tick_sched
*ts
, ktime_t now
)
1089 /* Update jiffies first */
1090 tick_do_update_jiffies64(now
);
1093 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
1094 * the clock forward checks in the enqueue path:
1098 calc_load_nohz_stop();
1099 touch_softlockup_watchdog_sched();
1101 /* Cancel the scheduled timer and restore the tick: */
1102 tick_sched_flag_clear(ts
, TS_FLAG_STOPPED
);
1103 tick_nohz_restart(ts
, now
);
1106 static void __tick_nohz_full_update_tick(struct tick_sched
*ts
,
1109 #ifdef CONFIG_NO_HZ_FULL
1110 int cpu
= smp_processor_id();
1112 if (can_stop_full_tick(cpu
, ts
))
1113 tick_nohz_full_stop_tick(ts
, cpu
);
1114 else if (tick_sched_flag_test(ts
, TS_FLAG_STOPPED
))
1115 tick_nohz_restart_sched_tick(ts
, now
);
1119 static void tick_nohz_full_update_tick(struct tick_sched
*ts
)
1121 if (!tick_nohz_full_cpu(smp_processor_id()))
1124 if (!tick_sched_flag_test(ts
, TS_FLAG_NOHZ
))
1127 __tick_nohz_full_update_tick(ts
, ktime_get());
1131 * A pending softirq outside an IRQ (or softirq disabled section) context
1132 * should be waiting for ksoftirqd to handle it. Therefore we shouldn't
1133 * reach this code due to the need_resched() early check in can_stop_idle_tick().
1135 * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
1136 * cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
1137 * triggering the code below, since wakep_softirqd() is ignored.
1140 static bool report_idle_softirq(void)
1142 static int ratelimit
;
1143 unsigned int pending
= local_softirq_pending();
1145 if (likely(!pending
))
1148 /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
1149 if (!cpu_active(smp_processor_id())) {
1150 pending
&= ~SOFTIRQ_HOTPLUG_SAFE_MASK
;
1155 if (ratelimit
>= 10)
1158 /* On RT, softirq handling may be waiting on some lock */
1159 if (local_bh_blocked())
1162 pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
1169 static bool can_stop_idle_tick(int cpu
, struct tick_sched
*ts
)
1171 WARN_ON_ONCE(cpu_is_offline(cpu
));
1173 if (unlikely(!tick_sched_flag_test(ts
, TS_FLAG_NOHZ
)))
1179 if (unlikely(report_idle_softirq()))
1182 if (tick_nohz_full_enabled()) {
1183 int tick_cpu
= READ_ONCE(tick_do_timer_cpu
);
1186 * Keep the tick alive to guarantee timekeeping progression
1187 * if there are full dynticks CPUs around
1189 if (tick_cpu
== cpu
)
1192 /* Should not happen for nohz-full */
1193 if (WARN_ON_ONCE(tick_cpu
== TICK_DO_TIMER_NONE
))
1201 * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
1203 * When the next event is more than a tick into the future, stop the idle tick
1205 void tick_nohz_idle_stop_tick(void)
1207 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1208 int cpu
= smp_processor_id();
1212 * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
1213 * tick timer expiration time is known already.
1215 if (ts
->timer_expires_base
)
1216 expires
= ts
->timer_expires
;
1217 else if (can_stop_idle_tick(cpu
, ts
))
1218 expires
= tick_nohz_next_event(ts
, cpu
);
1224 if (expires
> 0LL) {
1225 int was_stopped
= tick_sched_flag_test(ts
, TS_FLAG_STOPPED
);
1227 tick_nohz_stop_tick(ts
, cpu
);
1230 ts
->idle_expires
= expires
;
1232 if (!was_stopped
&& tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)) {
1233 ts
->idle_jiffies
= ts
->last_jiffies
;
1234 nohz_balance_enter_idle(cpu
);
1237 tick_nohz_retain_tick(ts
);
1241 void tick_nohz_idle_retain_tick(void)
1243 tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched
));
1247 * tick_nohz_idle_enter - prepare for entering idle on the current CPU
1249 * Called when we start the idle loop.
1251 void tick_nohz_idle_enter(void)
1253 struct tick_sched
*ts
;
1255 lockdep_assert_irqs_enabled();
1257 local_irq_disable();
1259 ts
= this_cpu_ptr(&tick_cpu_sched
);
1261 WARN_ON_ONCE(ts
->timer_expires_base
);
1263 tick_sched_flag_set(ts
, TS_FLAG_INIDLE
);
1264 tick_nohz_start_idle(ts
);
1270 * tick_nohz_irq_exit - Notify the tick about IRQ exit
1272 * A timer may have been added/modified/deleted either by the current IRQ,
1273 * or by another place using this IRQ as a notification. This IRQ may have
1274 * also updated the RCU callback list. These events may require a
1275 * re-evaluation of the next tick. Depending on the context:
1277 * 1) If the CPU is idle and no resched is pending, just proceed with idle
1278 * time accounting. The next tick will be re-evaluated on the next idle
1281 * 2) If the CPU is nohz_full:
1283 * 2.1) If there is any tick dependency, restart the tick if stopped.
1285 * 2.2) If there is no tick dependency, (re-)evaluate the next tick and
1286 * stop/update it accordingly.
1288 void tick_nohz_irq_exit(void)
1290 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1292 if (tick_sched_flag_test(ts
, TS_FLAG_INIDLE
))
1293 tick_nohz_start_idle(ts
);
1295 tick_nohz_full_update_tick(ts
);
1299 * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
1301 * Return: %true if the tick handler has run, otherwise %false
1303 bool tick_nohz_idle_got_tick(void)
1305 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1307 if (ts
->got_idle_tick
) {
1308 ts
->got_idle_tick
= 0;
1315 * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
1316 * or the tick, whichever expires first. Note that, if the tick has been
1317 * stopped, it returns the next hrtimer.
1319 * Called from power state control code with interrupts disabled
1321 * Return: the next expiration time
1323 ktime_t
tick_nohz_get_next_hrtimer(void)
1325 return __this_cpu_read(tick_cpu_device
.evtdev
)->next_event
;
1329 * tick_nohz_get_sleep_length - return the expected length of the current sleep
1330 * @delta_next: duration until the next event if the tick cannot be stopped
1332 * Called from power state control code with interrupts disabled.
1334 * The return value of this function and/or the value returned by it through the
1335 * @delta_next pointer can be negative which must be taken into account by its
1338 * Return: the expected length of the current sleep
1340 ktime_t
tick_nohz_get_sleep_length(ktime_t
*delta_next
)
1342 struct clock_event_device
*dev
= __this_cpu_read(tick_cpu_device
.evtdev
);
1343 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1344 int cpu
= smp_processor_id();
1346 * The idle entry time is expected to be a sufficient approximation of
1347 * the current time at this point.
1349 ktime_t now
= ts
->idle_entrytime
;
1352 WARN_ON_ONCE(!tick_sched_flag_test(ts
, TS_FLAG_INIDLE
));
1354 *delta_next
= ktime_sub(dev
->next_event
, now
);
1356 if (!can_stop_idle_tick(cpu
, ts
))
1359 next_event
= tick_nohz_next_event(ts
, cpu
);
1364 * If the next highres timer to expire is earlier than 'next_event', the
1365 * idle governor needs to know that.
1367 next_event
= min_t(u64
, next_event
,
1368 hrtimer_next_event_without(&ts
->sched_timer
));
1370 return ktime_sub(next_event
, now
);
1374 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1375 * for a particular CPU.
1376 * @cpu: target CPU number
1378 * Called from the schedutil frequency scaling governor in scheduler context.
1380 * Return: the current idle calls counter value for @cpu
1382 unsigned long tick_nohz_get_idle_calls_cpu(int cpu
)
1384 struct tick_sched
*ts
= tick_get_tick_sched(cpu
);
1386 return ts
->idle_calls
;
1389 static void tick_nohz_account_idle_time(struct tick_sched
*ts
,
1392 unsigned long ticks
;
1394 ts
->idle_exittime
= now
;
1396 if (vtime_accounting_enabled_this_cpu())
1399 * We stopped the tick in idle. update_process_times() would miss the
1400 * time we slept, as it does only a 1 tick accounting.
1401 * Enforce that this is accounted to idle !
1403 ticks
= jiffies
- ts
->idle_jiffies
;
1405 * We might be one off. Do not randomly account a huge number of ticks!
1407 if (ticks
&& ticks
< LONG_MAX
)
1408 account_idle_ticks(ticks
);
1411 void tick_nohz_idle_restart_tick(void)
1413 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1415 if (tick_sched_flag_test(ts
, TS_FLAG_STOPPED
)) {
1416 ktime_t now
= ktime_get();
1417 tick_nohz_restart_sched_tick(ts
, now
);
1418 tick_nohz_account_idle_time(ts
, now
);
1422 static void tick_nohz_idle_update_tick(struct tick_sched
*ts
, ktime_t now
)
1424 if (tick_nohz_full_cpu(smp_processor_id()))
1425 __tick_nohz_full_update_tick(ts
, now
);
1427 tick_nohz_restart_sched_tick(ts
, now
);
1429 tick_nohz_account_idle_time(ts
, now
);
1433 * tick_nohz_idle_exit - Update the tick upon idle task exit
1435 * When the idle task exits, update the tick depending on the
1436 * following situations:
1438 * 1) If the CPU is not in nohz_full mode (most cases), then
1441 * 2) If the CPU is in nohz_full mode (corner case):
1442 * 2.1) If the tick can be kept stopped (no tick dependencies)
1443 * then re-evaluate the next tick and try to keep it stopped
1444 * as long as possible.
1445 * 2.2) If the tick has dependencies, restart the tick.
1448 void tick_nohz_idle_exit(void)
1450 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1451 bool idle_active
, tick_stopped
;
1454 local_irq_disable();
1456 WARN_ON_ONCE(!tick_sched_flag_test(ts
, TS_FLAG_INIDLE
));
1457 WARN_ON_ONCE(ts
->timer_expires_base
);
1459 tick_sched_flag_clear(ts
, TS_FLAG_INIDLE
);
1460 idle_active
= tick_sched_flag_test(ts
, TS_FLAG_IDLE_ACTIVE
);
1461 tick_stopped
= tick_sched_flag_test(ts
, TS_FLAG_STOPPED
);
1463 if (idle_active
|| tick_stopped
)
1467 tick_nohz_stop_idle(ts
, now
);
1470 tick_nohz_idle_update_tick(ts
, now
);
1476 * In low-resolution mode, the tick handler must be implemented directly
1477 * at the clockevent level. hrtimer can't be used instead, because its
1478 * infrastructure actually relies on the tick itself as a backend in
1479 * low-resolution mode (see hrtimer_run_queues()).
1481 static void tick_nohz_lowres_handler(struct clock_event_device
*dev
)
1483 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1485 dev
->next_event
= KTIME_MAX
;
1487 if (likely(tick_nohz_handler(&ts
->sched_timer
) == HRTIMER_RESTART
))
1488 tick_program_event(hrtimer_get_expires(&ts
->sched_timer
), 1);
1491 static inline void tick_nohz_activate(struct tick_sched
*ts
)
1493 if (!tick_nohz_enabled
)
1495 tick_sched_flag_set(ts
, TS_FLAG_NOHZ
);
1496 /* One update is enough */
1497 if (!test_and_set_bit(0, &tick_nohz_active
))
1498 timers_update_nohz();
1502 * tick_nohz_switch_to_nohz - switch to NOHZ mode
1504 static void tick_nohz_switch_to_nohz(void)
1506 if (!tick_nohz_enabled
)
1509 if (tick_switch_to_oneshot(tick_nohz_lowres_handler
))
1513 * Recycle the hrtimer in 'ts', so we can share the
1516 tick_setup_sched_timer(false);
1519 static inline void tick_nohz_irq_enter(void)
1521 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1524 if (!tick_sched_flag_test(ts
, TS_FLAG_STOPPED
| TS_FLAG_IDLE_ACTIVE
))
1527 if (tick_sched_flag_test(ts
, TS_FLAG_IDLE_ACTIVE
))
1528 tick_nohz_stop_idle(ts
, now
);
1530 * If all CPUs are idle we may need to update a stale jiffies value.
1531 * Note nohz_full is a special case: a timekeeper is guaranteed to stay
1532 * alive but it might be busy looping with interrupts disabled in some
1533 * rare case (typically stop machine). So we must make sure we have a
1536 if (tick_sched_flag_test(ts
, TS_FLAG_STOPPED
))
1537 tick_nohz_update_jiffies(now
);
1542 static inline void tick_nohz_switch_to_nohz(void) { }
1543 static inline void tick_nohz_irq_enter(void) { }
1544 static inline void tick_nohz_activate(struct tick_sched
*ts
) { }
1546 #endif /* CONFIG_NO_HZ_COMMON */
1549 * Called from irq_enter() to notify about the possible interruption of idle()
1551 void tick_irq_enter(void)
1553 tick_check_oneshot_broadcast_this_cpu();
1554 tick_nohz_irq_enter();
1557 static int sched_skew_tick
;
1559 static int __init
skew_tick(char *str
)
1561 get_option(&str
, &sched_skew_tick
);
1565 early_param("skew_tick", skew_tick
);
1568 * tick_setup_sched_timer - setup the tick emulation timer
1569 * @hrtimer: whether to use the hrtimer or not
1571 void tick_setup_sched_timer(bool hrtimer
)
1573 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1575 /* Emulate tick processing via per-CPU hrtimers: */
1576 hrtimer_init(&ts
->sched_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS_HARD
);
1578 if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS
) && hrtimer
) {
1579 tick_sched_flag_set(ts
, TS_FLAG_HIGHRES
);
1580 ts
->sched_timer
.function
= tick_nohz_handler
;
1583 /* Get the next period (per-CPU) */
1584 hrtimer_set_expires(&ts
->sched_timer
, tick_init_jiffy_update());
1586 /* Offset the tick to avert 'jiffies_lock' contention. */
1587 if (sched_skew_tick
) {
1588 u64 offset
= TICK_NSEC
>> 1;
1589 do_div(offset
, num_possible_cpus());
1590 offset
*= smp_processor_id();
1591 hrtimer_add_expires_ns(&ts
->sched_timer
, offset
);
1594 hrtimer_forward_now(&ts
->sched_timer
, TICK_NSEC
);
1595 if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS
) && hrtimer
)
1596 hrtimer_start_expires(&ts
->sched_timer
, HRTIMER_MODE_ABS_PINNED_HARD
);
1598 tick_program_event(hrtimer_get_expires(&ts
->sched_timer
), 1);
1599 tick_nohz_activate(ts
);
1603 * Shut down the tick and make sure the CPU won't try to retake the timekeeping
1604 * duty before disabling IRQs in idle for the last time.
1606 void tick_sched_timer_dying(int cpu
)
1608 struct tick_sched
*ts
= &per_cpu(tick_cpu_sched
, cpu
);
1609 ktime_t idle_sleeptime
, iowait_sleeptime
;
1610 unsigned long idle_calls
, idle_sleeps
;
1612 /* This must happen before hrtimers are migrated! */
1613 if (tick_sched_flag_test(ts
, TS_FLAG_HIGHRES
))
1614 hrtimer_cancel(&ts
->sched_timer
);
1616 idle_sleeptime
= ts
->idle_sleeptime
;
1617 iowait_sleeptime
= ts
->iowait_sleeptime
;
1618 idle_calls
= ts
->idle_calls
;
1619 idle_sleeps
= ts
->idle_sleeps
;
1620 memset(ts
, 0, sizeof(*ts
));
1621 ts
->idle_sleeptime
= idle_sleeptime
;
1622 ts
->iowait_sleeptime
= iowait_sleeptime
;
1623 ts
->idle_calls
= idle_calls
;
1624 ts
->idle_sleeps
= idle_sleeps
;
1628 * Async notification about clocksource changes
1630 void tick_clock_notify(void)
1634 for_each_possible_cpu(cpu
)
1635 set_bit(0, &per_cpu(tick_cpu_sched
, cpu
).check_clocks
);
1639 * Async notification about clock event changes
1641 void tick_oneshot_notify(void)
1643 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1645 set_bit(0, &ts
->check_clocks
);
1649 * Check if a change happened, which makes oneshot possible.
1651 * Called cyclically from the hrtimer softirq (driven by the timer
1652 * softirq). 'allow_nohz' signals that we can switch into low-res NOHZ
1653 * mode, because high resolution timers are disabled (either compile
1654 * or runtime). Called with interrupts disabled.
1656 int tick_check_oneshot_change(int allow_nohz
)
1658 struct tick_sched
*ts
= this_cpu_ptr(&tick_cpu_sched
);
1660 if (!test_and_clear_bit(0, &ts
->check_clocks
))
1663 if (tick_sched_flag_test(ts
, TS_FLAG_NOHZ
))
1666 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1672 tick_nohz_switch_to_nohz();