2 * VMI paravirtual timer support routines.
4 * Copyright (C) 2005, VMware, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Send feedback to dhecht@vmware.com
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
31 #include <linux/spinlock.h>
32 #include <linux/init.h>
33 #include <linux/errno.h>
34 #include <linux/jiffies.h>
35 #include <linux/interrupt.h>
36 #include <linux/kernel_stat.h>
37 #include <linux/rcupdate.h>
38 #include <linux/clocksource.h>
40 #include <asm/timer.h>
43 #include <asm/div64.h>
44 #include <asm/timer.h>
48 #include <asm/vmi_time.h>
50 #include <mach_timer.h>
53 #ifdef CONFIG_X86_LOCAL_APIC
54 #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
56 #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
59 /* Cached VMI operations */
60 struct vmi_timer_ops vmi_timer_ops
;
62 #ifdef CONFIG_NO_IDLE_HZ
64 /* /proc/sys/kernel/hz_timer state. */
68 static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs
);
69 static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies
);
70 static DEFINE_PER_CPU(unsigned long, idle_start_jiffies
);
72 #endif /* CONFIG_NO_IDLE_HZ */
74 /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75 static int alarm_hz
= CONFIG_VMI_ALARM_HZ
;
77 /* Cache of the value get_cycle_frequency / HZ. */
78 static signed long long cycles_per_jiffy
;
80 /* Cache of the value get_cycle_frequency / alarm_hz. */
81 static signed long long cycles_per_alarm
;
83 /* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85 static unsigned long long real_cycles_accounted_system
;
87 /* The number of cycles accounted for by update_process_times(), per cpu. */
88 static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu
);
90 /* The number of stolen cycles accounted, per cpu. */
91 static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu
);
94 static cycle_t
read_real_cycles(void)
96 return vmi_timer_ops
.get_cycle_counter(VMI_CYCLES_REAL
);
99 static cycle_t
read_available_cycles(void)
101 return vmi_timer_ops
.get_cycle_counter(VMI_CYCLES_AVAILABLE
);
105 static cycle_t
read_stolen_cycles(void)
107 return vmi_timer_ops
.get_cycle_counter(VMI_CYCLES_STOLEN
);
111 static struct clocksource clocksource_vmi
= {
114 .read
= read_real_cycles
,
115 .mask
= CLOCKSOURCE_MASK(64),
116 .mult
= 0, /* to be set */
118 .flags
= CLOCK_SOURCE_IS_CONTINUOUS
,
122 /* Timer interrupt handler. */
123 static irqreturn_t
vmi_timer_interrupt(int irq
, void *dev_id
);
125 static struct irqaction vmi_timer_irq
= {
126 .handler
= vmi_timer_interrupt
,
127 .flags
= IRQF_DISABLED
,
128 .mask
= CPU_MASK_NONE
,
133 static int __init
vmi_timer_alarm_rate_setup(char* str
)
136 if (get_option(&str
, &alarm_rate
) == 1 && alarm_rate
> 0) {
137 alarm_hz
= alarm_rate
;
138 printk(KERN_WARNING
"VMI timer alarm HZ set to %d\n", alarm_hz
);
142 __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup
);
146 static void vmi_get_wallclock_ts(struct timespec
*ts
)
148 unsigned long long wallclock
;
149 wallclock
= vmi_timer_ops
.get_wallclock(); // nsec units
150 ts
->tv_nsec
= do_div(wallclock
, 1000000000);
151 ts
->tv_sec
= wallclock
;
154 unsigned long vmi_get_wallclock(void)
157 vmi_get_wallclock_ts(&ts
);
161 int vmi_set_wallclock(unsigned long now
)
166 unsigned long long vmi_get_sched_cycles(void)
168 return read_available_cycles();
171 unsigned long vmi_cpu_khz(void)
173 unsigned long long khz
;
175 khz
= vmi_timer_ops
.get_cycle_frequency();
176 (void)do_div(khz
, 1000);
180 void __init
vmi_time_init(void)
182 unsigned long long cycles_per_sec
, cycles_per_msec
;
185 local_irq_save(flags
);
186 setup_irq(0, &vmi_timer_irq
);
187 #ifdef CONFIG_X86_LOCAL_APIC
188 set_intr_gate(LOCAL_TIMER_VECTOR
, apic_vmi_timer_interrupt
);
191 real_cycles_accounted_system
= read_real_cycles();
192 per_cpu(process_times_cycles_accounted_cpu
, 0) = read_available_cycles();
194 cycles_per_sec
= vmi_timer_ops
.get_cycle_frequency();
195 cycles_per_jiffy
= cycles_per_sec
;
196 (void)do_div(cycles_per_jiffy
, HZ
);
197 cycles_per_alarm
= cycles_per_sec
;
198 (void)do_div(cycles_per_alarm
, alarm_hz
);
199 cycles_per_msec
= cycles_per_sec
;
200 (void)do_div(cycles_per_msec
, 1000);
202 printk(KERN_WARNING
"VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
203 "cycles/alarm = %llu\n", cycles_per_sec
, cycles_per_jiffy
,
206 clocksource_vmi
.mult
= clocksource_khz2mult(cycles_per_msec
,
207 clocksource_vmi
.shift
);
208 if (clocksource_register(&clocksource_vmi
))
209 printk(KERN_WARNING
"Error registering VMITIME clocksource.");
212 outb_p(0x3a, PIT_MODE
); /* binary, mode 5, LSB/MSB, ch 0 */
214 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
215 * reduce the latency calling update_process_times. */
216 vmi_timer_ops
.set_alarm(
217 VMI_ALARM_WIRED_IRQ0
| VMI_ALARM_IS_PERIODIC
| VMI_CYCLES_AVAILABLE
,
218 per_cpu(process_times_cycles_accounted_cpu
, 0) + cycles_per_alarm
,
221 local_irq_restore(flags
);
224 #ifdef CONFIG_X86_LOCAL_APIC
226 void __init
vmi_timer_setup_boot_alarm(void)
230 /* Route the interrupt to the correct vector. */
231 apic_write_around(APIC_LVTT
, LOCAL_TIMER_VECTOR
);
233 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
234 vmi_timer_ops
.cancel_alarm(VMI_CYCLES_AVAILABLE
);
235 vmi_timer_ops
.set_alarm(
236 VMI_ALARM_WIRED_LVTT
| VMI_ALARM_IS_PERIODIC
| VMI_CYCLES_AVAILABLE
,
237 per_cpu(process_times_cycles_accounted_cpu
, 0) + cycles_per_alarm
,
242 /* Initialize the time accounting variables for an AP on an SMP system.
243 * Also, set the local alarm for the AP. */
244 void __devinit
vmi_timer_setup_secondary_alarm(void)
246 int cpu
= smp_processor_id();
248 /* Route the interrupt to the correct vector. */
249 apic_write_around(APIC_LVTT
, LOCAL_TIMER_VECTOR
);
251 per_cpu(process_times_cycles_accounted_cpu
, cpu
) = read_available_cycles();
253 vmi_timer_ops
.set_alarm(
254 VMI_ALARM_WIRED_LVTT
| VMI_ALARM_IS_PERIODIC
| VMI_CYCLES_AVAILABLE
,
255 per_cpu(process_times_cycles_accounted_cpu
, cpu
) + cycles_per_alarm
,
261 /* Update system wide (real) time accounting (e.g. jiffies, xtime). */
262 static void vmi_account_real_cycles(unsigned long long cur_real_cycles
)
264 long long cycles_not_accounted
;
266 write_seqlock(&xtime_lock
);
268 cycles_not_accounted
= cur_real_cycles
- real_cycles_accounted_system
;
269 while (cycles_not_accounted
>= cycles_per_jiffy
) {
270 /* systems wide jiffies. */
273 cycles_not_accounted
-= cycles_per_jiffy
;
274 real_cycles_accounted_system
+= cycles_per_jiffy
;
277 write_sequnlock(&xtime_lock
);
280 /* Update per-cpu process times. */
281 static void vmi_account_process_times_cycles(struct pt_regs
*regs
, int cpu
,
282 unsigned long long cur_process_times_cycles
)
284 long long cycles_not_accounted
;
285 cycles_not_accounted
= cur_process_times_cycles
-
286 per_cpu(process_times_cycles_accounted_cpu
, cpu
);
288 while (cycles_not_accounted
>= cycles_per_jiffy
) {
289 /* Account time to the current process. This includes
290 * calling into the scheduler to decrement the timeslice
291 * and possibly reschedule.*/
292 update_process_times(user_mode(regs
));
293 /* XXX handle /proc/profile multiplier. */
294 profile_tick(CPU_PROFILING
);
296 cycles_not_accounted
-= cycles_per_jiffy
;
297 per_cpu(process_times_cycles_accounted_cpu
, cpu
) += cycles_per_jiffy
;
301 #ifdef CONFIG_NO_IDLE_HZ
302 /* Update per-cpu idle times. Used when a no-hz halt is ended. */
303 static void vmi_account_no_hz_idle_cycles(int cpu
,
304 unsigned long long cur_process_times_cycles
)
306 long long cycles_not_accounted
;
307 unsigned long no_idle_hz_jiffies
= 0;
309 cycles_not_accounted
= cur_process_times_cycles
-
310 per_cpu(process_times_cycles_accounted_cpu
, cpu
);
312 while (cycles_not_accounted
>= cycles_per_jiffy
) {
313 no_idle_hz_jiffies
++;
314 cycles_not_accounted
-= cycles_per_jiffy
;
315 per_cpu(process_times_cycles_accounted_cpu
, cpu
) += cycles_per_jiffy
;
317 /* Account time to the idle process. */
318 account_steal_time(idle_task(cpu
), jiffies_to_cputime(no_idle_hz_jiffies
));
322 /* Update per-cpu stolen time. */
323 static void vmi_account_stolen_cycles(int cpu
,
324 unsigned long long cur_real_cycles
,
325 unsigned long long cur_avail_cycles
)
327 long long stolen_cycles_not_accounted
;
328 unsigned long stolen_jiffies
= 0;
330 if (cur_real_cycles
< cur_avail_cycles
)
333 stolen_cycles_not_accounted
= cur_real_cycles
- cur_avail_cycles
-
334 per_cpu(stolen_cycles_accounted_cpu
, cpu
);
336 while (stolen_cycles_not_accounted
>= cycles_per_jiffy
) {
338 stolen_cycles_not_accounted
-= cycles_per_jiffy
;
339 per_cpu(stolen_cycles_accounted_cpu
, cpu
) += cycles_per_jiffy
;
341 /* HACK: pass NULL to force time onto cpustat->steal. */
342 account_steal_time(NULL
, jiffies_to_cputime(stolen_jiffies
));
345 /* Body of either IRQ0 interrupt handler (UP no local-APIC) or
346 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
347 static void vmi_local_timer_interrupt(int cpu
)
349 unsigned long long cur_real_cycles
, cur_process_times_cycles
;
351 cur_real_cycles
= read_real_cycles();
352 cur_process_times_cycles
= read_available_cycles();
353 /* Update system wide (real) time state (xtime, jiffies). */
354 vmi_account_real_cycles(cur_real_cycles
);
355 /* Update per-cpu process times. */
356 vmi_account_process_times_cycles(get_irq_regs(), cpu
, cur_process_times_cycles
);
357 /* Update time stolen from this cpu by the hypervisor. */
358 vmi_account_stolen_cycles(cpu
, cur_real_cycles
, cur_process_times_cycles
);
361 #ifdef CONFIG_NO_IDLE_HZ
363 /* Must be called only from idle loop, with interrupts disabled. */
364 int vmi_stop_hz_timer(void)
366 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
368 unsigned long seq
, next
;
369 unsigned long long real_cycles_expiry
;
370 int cpu
= smp_processor_id();
372 BUG_ON(!irqs_disabled());
373 if (sysctl_hz_timer
!= 0)
376 cpu_set(cpu
, nohz_cpu_mask
);
379 if (rcu_needs_cpu(cpu
) || local_softirq_pending() ||
380 (next
= next_timer_interrupt(),
381 time_before_eq(next
, jiffies
+ HZ
/CONFIG_VMI_ALARM_HZ
))) {
382 cpu_clear(cpu
, nohz_cpu_mask
);
386 /* Convert jiffies to the real cycle counter. */
388 seq
= read_seqbegin(&xtime_lock
);
389 real_cycles_expiry
= real_cycles_accounted_system
+
390 (long)(next
- jiffies
) * cycles_per_jiffy
;
391 } while (read_seqretry(&xtime_lock
, seq
));
393 /* This cpu is going idle. Disable the periodic alarm. */
394 vmi_timer_ops
.cancel_alarm(VMI_CYCLES_AVAILABLE
);
395 per_cpu(idle_start_jiffies
, cpu
) = jiffies
;
396 /* Set the real time alarm to expire at the next event. */
397 vmi_timer_ops
.set_alarm(
398 VMI_ALARM_WIRING
| VMI_ALARM_IS_ONESHOT
| VMI_CYCLES_REAL
,
399 real_cycles_expiry
, 0);
403 static void vmi_reenable_hz_timer(int cpu
)
405 /* For /proc/vmi/info idle_hz stat. */
406 per_cpu(vmi_idle_no_hz_jiffies
, cpu
) += jiffies
- per_cpu(idle_start_jiffies
, cpu
);
407 per_cpu(vmi_idle_no_hz_irqs
, cpu
)++;
409 /* Don't bother explicitly cancelling the one-shot alarm -- at
410 * worse we will receive a spurious timer interrupt. */
411 vmi_timer_ops
.set_alarm(
412 VMI_ALARM_WIRING
| VMI_ALARM_IS_PERIODIC
| VMI_CYCLES_AVAILABLE
,
413 per_cpu(process_times_cycles_accounted_cpu
, cpu
) + cycles_per_alarm
,
415 /* Indicate this cpu is no longer nohz idle. */
416 cpu_clear(cpu
, nohz_cpu_mask
);
419 /* Called from interrupt handlers when (local) HZ timer is disabled. */
420 void vmi_account_time_restart_hz_timer(void)
422 unsigned long long cur_real_cycles
, cur_process_times_cycles
;
423 int cpu
= smp_processor_id();
425 BUG_ON(!irqs_disabled());
426 /* Account the time during which the HZ timer was disabled. */
427 cur_real_cycles
= read_real_cycles();
428 cur_process_times_cycles
= read_available_cycles();
429 /* Update system wide (real) time state (xtime, jiffies). */
430 vmi_account_real_cycles(cur_real_cycles
);
431 /* Update per-cpu idle times. */
432 vmi_account_no_hz_idle_cycles(cpu
, cur_process_times_cycles
);
433 /* Update time stolen from this cpu by the hypervisor. */
434 vmi_account_stolen_cycles(cpu
, cur_real_cycles
, cur_process_times_cycles
);
435 /* Reenable the hz timer. */
436 vmi_reenable_hz_timer(cpu
);
439 #endif /* CONFIG_NO_IDLE_HZ */
441 /* UP (and no local-APIC) VMI-timer alarm interrupt handler.
442 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
443 * APIC setup and setup_boot_vmi_alarm() is called. */
444 static irqreturn_t
vmi_timer_interrupt(int irq
, void *dev_id
)
446 vmi_local_timer_interrupt(smp_processor_id());
450 #ifdef CONFIG_X86_LOCAL_APIC
452 /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
453 * Also used in UP when CONFIG_X86_LOCAL_APIC.
454 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
455 void smp_apic_vmi_timer_interrupt(struct pt_regs
*regs
)
457 struct pt_regs
*old_regs
= set_irq_regs(regs
);
458 int cpu
= smp_processor_id();
461 * the NMI deadlock-detector uses this.
463 per_cpu(irq_stat
,cpu
).apic_timer_irqs
++;
466 * NOTE! We'd better ACK the irq immediately,
467 * because timer handling can be slow.
472 * update_process_times() expects us to have done irq_enter().
473 * Besides, if we don't timer interrupts ignore the global
474 * interrupt lock, which is the WrongThing (tm) to do.
477 vmi_local_timer_interrupt(cpu
);
479 set_irq_regs(old_regs
);
482 #endif /* CONFIG_X86_LOCAL_APIC */