2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
18 #include <asm/timer.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
24 #include "mach_timer.h"
28 #ifdef CONFIG_HPET_TIMER
29 static unsigned long hpet_usec_quotient
;
30 static unsigned long hpet_last
;
31 static struct timer_opts timer_tsc
;
34 static inline void cpufreq_delayed_get(void);
36 int tsc_disable __initdata
= 0;
38 extern spinlock_t i8253_lock
;
41 /* Number of usecs that the last interrupt was delayed */
42 static int delay_at_last_interrupt
;
44 static unsigned long last_tsc_low
; /* lsb 32 bits of Time Stamp Counter */
45 static unsigned long last_tsc_high
; /* msb 32 bits of Time Stamp Counter */
46 static unsigned long long monotonic_base
;
47 static seqlock_t monotonic_lock
= SEQLOCK_UNLOCKED
;
49 /* convert from cycles(64bits) => nanoseconds (64bits)
51 * ns = cycles / (freq / ns_per_sec)
52 * ns = cycles * (ns_per_sec / freq)
53 * ns = cycles * (10^9 / (cpu_mhz * 10^6))
54 * ns = cycles * (10^3 / cpu_mhz)
56 * Then we use scaling math (suggested by george@mvista.com) to get:
57 * ns = cycles * (10^3 * SC / cpu_mhz) / SC
58 * ns = cycles * cyc2ns_scale / SC
60 * And since SC is a constant power of two, we can convert the div
62 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
64 static unsigned long cyc2ns_scale
;
65 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
67 static inline void set_cyc2ns_scale(unsigned long cpu_mhz
)
69 cyc2ns_scale
= (1000 << CYC2NS_SCALE_FACTOR
)/cpu_mhz
;
72 static inline unsigned long long cycles_2_ns(unsigned long long cyc
)
74 return (cyc
* cyc2ns_scale
) >> CYC2NS_SCALE_FACTOR
;
77 static int count2
; /* counter for mark_offset_tsc() */
79 /* Cached *multiplier* to convert TSC counts to microseconds.
80 * (see the equation below).
81 * Equal to 2^32 * (1 / (clocks per usec) ).
82 * Initialized in time_init.
84 static unsigned long fast_gettimeoffset_quotient
;
86 static unsigned long get_offset_tsc(void)
88 register unsigned long eax
, edx
;
90 /* Read the Time Stamp Counter */
94 /* .. relative to previous jiffy (32 bits is enough) */
95 eax
-= last_tsc_low
; /* tsc_low delta */
98 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
99 * = (tsc_low delta) * (usecs_per_clock)
100 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
102 * Using a mull instead of a divl saves up to 31 clock cycles
103 * in the critical path.
107 :"=a" (eax
), "=d" (edx
)
108 :"rm" (fast_gettimeoffset_quotient
),
111 /* our adjusted time offset in microseconds */
112 return delay_at_last_interrupt
+ edx
;
115 static unsigned long long monotonic_clock_tsc(void)
117 unsigned long long last_offset
, this_offset
, base
;
120 /* atomically read monotonic base & last_offset */
122 seq
= read_seqbegin(&monotonic_lock
);
123 last_offset
= ((unsigned long long)last_tsc_high
<<32)|last_tsc_low
;
124 base
= monotonic_base
;
125 } while (read_seqretry(&monotonic_lock
, seq
));
127 /* Read the Time Stamp Counter */
128 rdtscll(this_offset
);
130 /* return the value in ns */
131 return base
+ cycles_2_ns(this_offset
- last_offset
);
135 * Scheduler clock - returns current time in nanosec units.
137 unsigned long long sched_clock(void)
139 unsigned long long this_offset
;
142 * In the NUMA case we dont use the TSC as they are not
143 * synchronized across all CPUs.
148 /* no locking but a rare wrong value is not a big deal */
149 return jiffies_64
* (1000000000 / HZ
);
151 /* Read the Time Stamp Counter */
152 rdtscll(this_offset
);
154 /* return the value in ns */
155 return cycles_2_ns(this_offset
);
158 static void delay_tsc(unsigned long loops
)
160 unsigned long bclock
, now
;
167 } while ((now
-bclock
) < loops
);
170 #ifdef CONFIG_HPET_TIMER
171 static void mark_offset_tsc_hpet(void)
173 unsigned long long this_offset
, last_offset
;
174 unsigned long offset
, temp
, hpet_current
;
176 write_seqlock(&monotonic_lock
);
177 last_offset
= ((unsigned long long)last_tsc_high
<<32)|last_tsc_low
;
179 * It is important that these two operations happen almost at
180 * the same time. We do the RDTSC stuff first, since it's
181 * faster. To avoid any inconsistencies, we need interrupts
185 * Interrupts are just disabled locally since the timer irq
186 * has the SA_INTERRUPT flag set. -arca
188 /* read Pentium cycle counter */
190 hpet_current
= hpet_readl(HPET_COUNTER
);
191 rdtsc(last_tsc_low
, last_tsc_high
);
193 /* lost tick compensation */
194 offset
= hpet_readl(HPET_T0_CMP
) - hpet_tick
;
195 if (unlikely(((offset
- hpet_last
) > hpet_tick
) && (hpet_last
!= 0))) {
196 int lost_ticks
= (offset
- hpet_last
) / hpet_tick
;
197 jiffies_64
+= lost_ticks
;
199 hpet_last
= hpet_current
;
201 /* update the monotonic base value */
202 this_offset
= ((unsigned long long)last_tsc_high
<<32)|last_tsc_low
;
203 monotonic_base
+= cycles_2_ns(this_offset
- last_offset
);
204 write_sequnlock(&monotonic_lock
);
206 /* calculate delay_at_last_interrupt */
208 * Time offset = (hpet delta) * ( usecs per HPET clock )
209 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
210 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
212 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
214 delay_at_last_interrupt
= hpet_current
- offset
;
215 ASM_MUL64_REG(temp
, delay_at_last_interrupt
,
216 hpet_usec_quotient
, delay_at_last_interrupt
);
221 #ifdef CONFIG_CPU_FREQ
222 #include <linux/workqueue.h>
224 static unsigned int cpufreq_delayed_issched
= 0;
225 static unsigned int cpufreq_init
= 0;
226 static struct work_struct cpufreq_delayed_get_work
;
228 static void handle_cpufreq_delayed_get(void *v
)
231 for_each_online_cpu(cpu
) {
234 cpufreq_delayed_issched
= 0;
237 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
238 * to verify the CPU frequency the timing core thinks the CPU is running
239 * at is still correct.
241 static inline void cpufreq_delayed_get(void)
243 if (cpufreq_init
&& !cpufreq_delayed_issched
) {
244 cpufreq_delayed_issched
= 1;
245 printk(KERN_DEBUG
"Losing some ticks... checking if CPU frequency changed.\n");
246 schedule_work(&cpufreq_delayed_get_work
);
250 /* If the CPU frequency is scaled, TSC-based delays will need a different
251 * loops_per_jiffy value to function properly.
254 static unsigned int ref_freq
= 0;
255 static unsigned long loops_per_jiffy_ref
= 0;
258 static unsigned long fast_gettimeoffset_ref
= 0;
259 static unsigned long cpu_khz_ref
= 0;
263 time_cpufreq_notifier(struct notifier_block
*nb
, unsigned long val
,
266 struct cpufreq_freqs
*freq
= data
;
268 if (val
!= CPUFREQ_RESUMECHANGE
)
269 write_seqlock_irq(&xtime_lock
);
271 ref_freq
= freq
->old
;
272 loops_per_jiffy_ref
= cpu_data
[freq
->cpu
].loops_per_jiffy
;
274 fast_gettimeoffset_ref
= fast_gettimeoffset_quotient
;
275 cpu_khz_ref
= cpu_khz
;
279 if ((val
== CPUFREQ_PRECHANGE
&& freq
->old
< freq
->new) ||
280 (val
== CPUFREQ_POSTCHANGE
&& freq
->old
> freq
->new) ||
281 (val
== CPUFREQ_RESUMECHANGE
)) {
282 if (!(freq
->flags
& CPUFREQ_CONST_LOOPS
))
283 cpu_data
[freq
->cpu
].loops_per_jiffy
= cpufreq_scale(loops_per_jiffy_ref
, ref_freq
, freq
->new);
286 cpu_khz
= cpufreq_scale(cpu_khz_ref
, ref_freq
, freq
->new);
288 if (!(freq
->flags
& CPUFREQ_CONST_LOOPS
)) {
289 fast_gettimeoffset_quotient
= cpufreq_scale(fast_gettimeoffset_ref
, freq
->new, ref_freq
);
290 set_cyc2ns_scale(cpu_khz
/1000);
296 if (val
!= CPUFREQ_RESUMECHANGE
)
297 write_sequnlock_irq(&xtime_lock
);
302 static struct notifier_block time_cpufreq_notifier_block
= {
303 .notifier_call
= time_cpufreq_notifier
307 static int __init
cpufreq_tsc(void)
310 INIT_WORK(&cpufreq_delayed_get_work
, handle_cpufreq_delayed_get
, NULL
);
311 ret
= cpufreq_register_notifier(&time_cpufreq_notifier_block
,
312 CPUFREQ_TRANSITION_NOTIFIER
);
317 core_initcall(cpufreq_tsc
);
319 #else /* CONFIG_CPU_FREQ */
320 static inline void cpufreq_delayed_get(void) { return; }
323 static void mark_offset_tsc(void)
325 unsigned long lost
,delay
;
326 unsigned long delta
= last_tsc_low
;
329 static int count1
= 0;
330 unsigned long long this_offset
, last_offset
;
331 static int lost_count
= 0;
333 write_seqlock(&monotonic_lock
);
334 last_offset
= ((unsigned long long)last_tsc_high
<<32)|last_tsc_low
;
336 * It is important that these two operations happen almost at
337 * the same time. We do the RDTSC stuff first, since it's
338 * faster. To avoid any inconsistencies, we need interrupts
343 * Interrupts are just disabled locally since the timer irq
344 * has the SA_INTERRUPT flag set. -arca
347 /* read Pentium cycle counter */
349 rdtsc(last_tsc_low
, last_tsc_high
);
351 spin_lock(&i8253_lock
);
352 outb_p(0x00, PIT_MODE
); /* latch the count ASAP */
354 count
= inb_p(PIT_CH0
); /* read the latched count */
355 count
|= inb(PIT_CH0
) << 8;
358 * VIA686a test code... reset the latch if count > max + 1
359 * from timer_pit.c - cjb
362 outb_p(0x34, PIT_MODE
);
363 outb_p(LATCH
& 0xff, PIT_CH0
);
364 outb(LATCH
>> 8, PIT_CH0
);
368 spin_unlock(&i8253_lock
);
370 if (pit_latch_buggy
) {
371 /* get center value of last 3 time lutch */
372 if ((count2
>= count
&& count
>= count1
)
373 || (count1
>= count
&& count
>= count2
)) {
374 count2
= count1
; count1
= count
;
375 } else if ((count1
>= count2
&& count2
>= count
)
376 || (count
>= count2
&& count2
>= count1
)) {
377 countmp
= count
;count
= count2
;
378 count2
= count1
;count1
= countmp
;
380 count2
= count1
; count1
= count
; count
= count1
;
384 /* lost tick compensation */
385 delta
= last_tsc_low
- delta
;
387 register unsigned long eax
, edx
;
390 :"=a" (eax
), "=d" (edx
)
391 :"rm" (fast_gettimeoffset_quotient
),
395 delta
+= delay_at_last_interrupt
;
396 lost
= delta
/(1000000/HZ
);
397 delay
= delta
%(1000000/HZ
);
399 jiffies_64
+= lost
-1;
401 /* sanity check to ensure we're not always losing ticks */
402 if (lost_count
++ > 100) {
403 printk(KERN_WARNING
"Losing too many ticks!\n");
404 printk(KERN_WARNING
"TSC cannot be used as a timesource. \n");
405 printk(KERN_WARNING
"Possible reasons for this are:\n");
406 printk(KERN_WARNING
" You're running with Speedstep,\n");
407 printk(KERN_WARNING
" You don't have DMA enabled for your hard disk (see hdparm),\n");
408 printk(KERN_WARNING
" Incorrect TSC synchronization on an SMP system (see dmesg).\n");
409 printk(KERN_WARNING
"Falling back to a sane timesource now.\n");
413 /* ... but give the TSC a fair chance */
415 cpufreq_delayed_get();
418 /* update the monotonic base value */
419 this_offset
= ((unsigned long long)last_tsc_high
<<32)|last_tsc_low
;
420 monotonic_base
+= cycles_2_ns(this_offset
- last_offset
);
421 write_sequnlock(&monotonic_lock
);
423 /* calculate delay_at_last_interrupt */
424 count
= ((LATCH
-1) - count
) * TICK_SIZE
;
425 delay_at_last_interrupt
= (count
+ LATCH
/2) / LATCH
;
427 /* catch corner case where tick rollover occured
428 * between tsc and pit reads (as noted when
429 * usec delta is > 90% # of usecs/tick)
431 if (lost
&& abs(delay
- delay_at_last_interrupt
) > (900000/HZ
))
435 static int __init
init_tsc(char* override
)
438 /* check clock override */
439 if (override
[0] && strncmp(override
,"tsc",3)) {
440 #ifdef CONFIG_HPET_TIMER
441 if (is_hpet_enabled()) {
442 printk(KERN_ERR
"Warning: clock= override failed. Defaulting to tsc\n");
451 * If we have APM enabled or the CPU clock speed is variable
452 * (CPU stops clock on HLT or slows clock to save power)
453 * then the TSC timestamps may diverge by up to 1 jiffy from
454 * 'real time' but nothing will break.
455 * The most frequent case is that the CPU is "woken" from a halt
456 * state by the timer interrupt itself, so we get 0 error. In the
457 * rare cases where a driver would "wake" the CPU and request a
458 * timestamp, the maximum error is < 1 jiffy. But timestamps are
459 * still perfectly ordered.
460 * Note that the TSC counter will be reset if APM suspends
461 * to disk; this won't break the kernel, though, 'cuz we're
462 * smart. See arch/i386/kernel/apm.c.
465 * Firstly we have to do a CPU check for chips with
466 * a potentially buggy TSC. At this point we haven't run
467 * the ident/bugs checks so we must run this hook as it
468 * may turn off the TSC flag.
470 * NOTE: this doesn't yet handle SMP 486 machines where only
471 * some CPU's have a TSC. Thats never worked and nobody has
472 * moaned if you have the only one in the world - you fix it!
475 count2
= LATCH
; /* initialize counter for mark_offset_tsc() */
478 unsigned long tsc_quotient
;
479 #ifdef CONFIG_HPET_TIMER
480 if (is_hpet_enabled() && hpet_use_timer
) {
481 unsigned long result
, remain
;
482 printk("Using TSC for gettimeofday\n");
483 tsc_quotient
= calibrate_tsc_hpet(NULL
);
484 timer_tsc
.mark_offset
= &mark_offset_tsc_hpet
;
486 * Math to calculate hpet to usec multiplier
487 * Look for the comments at get_offset_tsc_hpet()
489 ASM_DIV64_REG(result
, remain
, hpet_tick
,
490 0, KERNEL_TICK_USEC
);
491 if (remain
> (hpet_tick
>> 1))
492 result
++; /* rounding the result */
494 hpet_usec_quotient
= result
;
498 tsc_quotient
= calibrate_tsc();
502 fast_gettimeoffset_quotient
= tsc_quotient
;
505 * We could be more selective here I suspect
506 * and just enable this for the next intel chips ?
508 /* report CPU clock rate in Hz.
509 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
510 * clock/second. Our precision is about 100 ppm.
512 { unsigned long eax
=0, edx
=1000;
514 :"=a" (cpu_khz
), "=d" (edx
)
516 "0" (eax
), "1" (edx
));
517 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz
/ 1000, cpu_khz
% 1000);
519 set_cyc2ns_scale(cpu_khz
/1000);
526 #ifndef CONFIG_X86_TSC
527 /* disable flag for tsc. Takes effect by clearing the TSC cpu flag
529 static int __init
tsc_setup(char *str
)
535 static int __init
tsc_setup(char *str
)
537 printk(KERN_WARNING
"notsc: Kernel compiled with CONFIG_X86_TSC, "
538 "cannot disable TSC.\n");
542 __setup("notsc", tsc_setup
);
546 /************************************************************/
548 /* tsc timer_opts struct */
549 static struct timer_opts timer_tsc
= {
551 .mark_offset
= mark_offset_tsc
,
552 .get_offset
= get_offset_tsc
,
553 .monotonic_clock
= monotonic_clock_tsc
,
557 struct init_timer_opts __initdata timer_tsc_init
= {