arch/powerpc/kernel/time.c

   1 /*
   2  * Common time routines among all ppc machines.
   3  *
   4  * Written by Cort Dougan (cort@cs.nmt.edu) to merge
   5  * Paul Mackerras' version and mine for PReP and Pmac.
   6  * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
   7  * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
   8  *
   9  * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
  10  * to make clock more stable (2.4.0-test5). The only thing
  11  * that this code assumes is that the timebases have been synchronized
  12  * by firmware on SMP and are never stopped (never do sleep
  13  * on SMP then, nap and doze are OK).
  14  *
  15  * Speeded up do_gettimeofday by getting rid of references to
  16  * xtime (which required locks for consistency). (mikejc@us.ibm.com)
  17  *
  18  * TODO (not necessarily in this file):
  19  * - improve precision and reproducibility of timebase frequency
  20  * measurement at boot time.
  21  * - for astronomical applications: add a new function to get
  22  * non ambiguous timestamps even around leap seconds. This needs
  23  * a new timestamp format and a good name.
  24  *
  25  * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
  26  *             "A Kernel Model for Precision Timekeeping" by Dave Mills
  27  *
  28  *      This program is free software; you can redistribute it and/or
  29  *      modify it under the terms of the GNU General Public License
  30  *      as published by the Free Software Foundation; either version
  31  *      2 of the License, or (at your option) any later version.
  32  */
  33
  34 #include <linux/errno.h>
  35 #include <linux/export.h>
  36 #include <linux/sched.h>
  37 #include <linux/kernel.h>
  38 #include <linux/param.h>
  39 #include <linux/string.h>
  40 #include <linux/mm.h>
  41 #include <linux/interrupt.h>
  42 #include <linux/timex.h>
  43 #include <linux/kernel_stat.h>
  44 #include <linux/time.h>
  45 #include <linux/init.h>
  46 #include <linux/profile.h>
  47 #include <linux/cpu.h>
  48 #include <linux/security.h>
  49 #include <linux/percpu.h>
  50 #include <linux/rtc.h>
  51 #include <linux/jiffies.h>
  52 #include <linux/posix-timers.h>
  53 #include <linux/irq.h>
  54 #include <linux/delay.h>
  55 #include <linux/irq_work.h>
  56 #include <asm/trace.h>
  57
  58 #include <asm/io.h>
  59 #include <asm/processor.h>
  60 #include <asm/nvram.h>
  61 #include <asm/cache.h>
  62 #include <asm/machdep.h>
  63 #include <asm/uaccess.h>
  64 #include <asm/time.h>
  65 #include <asm/prom.h>
  66 #include <asm/irq.h>
  67 #include <asm/div64.h>
  68 #include <asm/smp.h>
  69 #include <asm/vdso_datapage.h>
  70 #include <asm/firmware.h>
  71 #include <asm/cputime.h>
  72
  73 /* powerpc clocksource/clockevent code */
  74
  75 #include <linux/clockchips.h>
  76 #include <linux/timekeeper_internal.h>
  77
  78 static cycle_t rtc_read(struct clocksource *);
  79 static struct clocksource clocksource_rtc = {
  80         .name         = "rtc",
  81         .rating       = 400,
  82         .flags        = CLOCK_SOURCE_IS_CONTINUOUS,
  83         .mask         = CLOCKSOURCE_MASK(64),
  84         .read         = rtc_read,
  85 };
  86
  87 static cycle_t timebase_read(struct clocksource *);
  88 static struct clocksource clocksource_timebase = {
  89         .name         = "timebase",
  90         .rating       = 400,
  91         .flags        = CLOCK_SOURCE_IS_CONTINUOUS,
  92         .mask         = CLOCKSOURCE_MASK(64),
  93         .read         = timebase_read,
  94 };
  95
  96 #define DECREMENTER_MAX 0x7fffffff
  97
  98 static int decrementer_set_next_event(unsigned long evt,
  99                                       struct clock_event_device *dev);
 100 static void decrementer_set_mode(enum clock_event_mode mode,
 101                                  struct clock_event_device *dev);
 102
 103 struct clock_event_device decrementer_clockevent = {
 104         .name           = "decrementer",
 105         .rating         = 200,
 106         .irq            = 0,
 107         .set_next_event = decrementer_set_next_event,
 108         .set_mode       = decrementer_set_mode,
 109         .features       = CLOCK_EVT_FEAT_ONESHOT,
 110 };
 111 EXPORT_SYMBOL(decrementer_clockevent);
 112
 113 DEFINE_PER_CPU(u64, decrementers_next_tb);
 114 static DEFINE_PER_CPU(struct clock_event_device, decrementers);
 115
 116 #define XSEC_PER_SEC (1024*1024)
 117
 118 #ifdef CONFIG_PPC64
 119 #define SCALE_XSEC(xsec, max)   (((xsec) * max) / XSEC_PER_SEC)
 120 #else
 121 /* compute ((xsec << 12) * max) >> 32 */
 122 #define SCALE_XSEC(xsec, max)   mulhwu((xsec) << 12, max)
 123 #endif
 124
 125 unsigned long tb_ticks_per_jiffy;
 126 unsigned long tb_ticks_per_usec = 100; /* sane default */
 127 EXPORT_SYMBOL(tb_ticks_per_usec);
 128 unsigned long tb_ticks_per_sec;
 129 EXPORT_SYMBOL(tb_ticks_per_sec);        /* for cputime_t conversions */
 130
 131 DEFINE_SPINLOCK(rtc_lock);
 132 EXPORT_SYMBOL_GPL(rtc_lock);
 133
 134 static u64 tb_to_ns_scale __read_mostly;
 135 static unsigned tb_to_ns_shift __read_mostly;
 136 static u64 boot_tb __read_mostly;
 137
 138 extern struct timezone sys_tz;
 139 static long timezone_offset;
 140
 141 unsigned long ppc_proc_freq;
 142 EXPORT_SYMBOL_GPL(ppc_proc_freq);
 143 unsigned long ppc_tb_freq;
 144 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 145
 146 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 147 /*
 148  * Factors for converting from cputime_t (timebase ticks) to
 149  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
 150  * These are all stored as 0.64 fixed-point binary fractions.
 151  */
 152 u64 __cputime_jiffies_factor;
 153 EXPORT_SYMBOL(__cputime_jiffies_factor);
 154 u64 __cputime_usec_factor;
 155 EXPORT_SYMBOL(__cputime_usec_factor);
 156 u64 __cputime_sec_factor;
 157 EXPORT_SYMBOL(__cputime_sec_factor);
 158 u64 __cputime_clockt_factor;
 159 EXPORT_SYMBOL(__cputime_clockt_factor);
 160 DEFINE_PER_CPU(unsigned long, cputime_last_delta);
 161 DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 162
 163 cputime_t cputime_one_jiffy;
 164
 165 void (*dtl_consumer)(struct dtl_entry *, u64);
 166
 167 static void calc_cputime_factors(void)
 168 {
 169         struct div_result res;
 170
 171         div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
 172         __cputime_jiffies_factor = res.result_low;
 173         div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
 174         __cputime_usec_factor = res.result_low;
 175         div128_by_32(1, 0, tb_ticks_per_sec, &res);
 176         __cputime_sec_factor = res.result_low;
 177         div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
 178         __cputime_clockt_factor = res.result_low;
 179 }
 180
 181 /*
 182  * Read the SPURR on systems that have it, otherwise the PURR,
 183  * or if that doesn't exist return the timebase value passed in.
 184  */
 185 static u64 read_spurr(u64 tb)
 186 {
 187         if (cpu_has_feature(CPU_FTR_SPURR))
 188                 return mfspr(SPRN_SPURR);
 189         if (cpu_has_feature(CPU_FTR_PURR))
 190                 return mfspr(SPRN_PURR);
 191         return tb;
 192 }
 193
 194 #ifdef CONFIG_PPC_SPLPAR
 195
 196 /*
 197  * Scan the dispatch trace log and count up the stolen time.
 198  * Should be called with interrupts disabled.
 199  */
 200 static u64 scan_dispatch_log(u64 stop_tb)
 201 {
 202         u64 i = local_paca->dtl_ridx;
 203         struct dtl_entry *dtl = local_paca->dtl_curr;
 204         struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
 205         struct lppaca *vpa = local_paca->lppaca_ptr;
 206         u64 tb_delta;
 207         u64 stolen = 0;
 208         u64 dtb;
 209
 210         if (!dtl)
 211                 return 0;
 212
 213         if (i == be64_to_cpu(vpa->dtl_idx))
 214                 return 0;
 215         while (i < be64_to_cpu(vpa->dtl_idx)) {
 216                 dtb = be64_to_cpu(dtl->timebase);
 217                 tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
 218                         be32_to_cpu(dtl->ready_to_enqueue_time);
 219                 barrier();
 220                 if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
 221                         /* buffer has overflowed */
 222                         i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
 223                         dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
 224                         continue;
 225                 }
 226                 if (dtb > stop_tb)
 227                         break;
 228                 if (dtl_consumer)
 229                         dtl_consumer(dtl, i);
 230                 stolen += tb_delta;
 231                 ++i;
 232                 ++dtl;
 233                 if (dtl == dtl_end)
 234                         dtl = local_paca->dispatch_log;
 235         }
 236         local_paca->dtl_ridx = i;
 237         local_paca->dtl_curr = dtl;
 238         return stolen;
 239 }
 240
 241 /*
 242  * Accumulate stolen time by scanning the dispatch trace log.
 243  * Called on entry from user mode.
 244  */
 245 void accumulate_stolen_time(void)
 246 {
 247         u64 sst, ust;
 248
 249         u8 save_soft_enabled = local_paca->soft_enabled;
 250
 251         /* We are called early in the exception entry, before
 252          * soft/hard_enabled are sync'ed to the expected state
 253          * for the exception. We are hard disabled but the PACA
 254          * needs to reflect that so various debug stuff doesn't
 255          * complain
 256          */
 257         local_paca->soft_enabled = 0;
 258
 259         sst = scan_dispatch_log(local_paca->starttime_user);
 260         ust = scan_dispatch_log(local_paca->starttime);
 261         local_paca->system_time -= sst;
 262         local_paca->user_time -= ust;
 263         local_paca->stolen_time += ust + sst;
 264
 265         local_paca->soft_enabled = save_soft_enabled;
 266 }
 267
 268 static inline u64 calculate_stolen_time(u64 stop_tb)
 269 {
 270         u64 stolen = 0;
 271
 272         if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) {
 273                 stolen = scan_dispatch_log(stop_tb);
 274                 get_paca()->system_time -= stolen;
 275         }
 276
 277         stolen += get_paca()->stolen_time;
 278         get_paca()->stolen_time = 0;
 279         return stolen;
 280 }
 281
 282 #else /* CONFIG_PPC_SPLPAR */
 283 static inline u64 calculate_stolen_time(u64 stop_tb)
 284 {
 285         return 0;
 286 }
 287
 288 #endif /* CONFIG_PPC_SPLPAR */
 289
 290 /*
 291  * Account time for a transition between system, hard irq
 292  * or soft irq state.
 293  */
 294 static u64 vtime_delta(struct task_struct *tsk,
 295                         u64 *sys_scaled, u64 *stolen)
 296 {
 297         u64 now, nowscaled, deltascaled;
 298         u64 udelta, delta, user_scaled;
 299
 300         WARN_ON_ONCE(!irqs_disabled());
 301
 302         now = mftb();
 303         nowscaled = read_spurr(now);
 304         get_paca()->system_time += now - get_paca()->starttime;
 305         get_paca()->starttime = now;
 306         deltascaled = nowscaled - get_paca()->startspurr;
 307         get_paca()->startspurr = nowscaled;
 308
 309         *stolen = calculate_stolen_time(now);
 310
 311         delta = get_paca()->system_time;
 312         get_paca()->system_time = 0;
 313         udelta = get_paca()->user_time - get_paca()->utime_sspurr;
 314         get_paca()->utime_sspurr = get_paca()->user_time;
 315
 316         /*
 317          * Because we don't read the SPURR on every kernel entry/exit,
 318          * deltascaled includes both user and system SPURR ticks.
 319          * Apportion these ticks to system SPURR ticks and user
 320          * SPURR ticks in the same ratio as the system time (delta)
 321          * and user time (udelta) values obtained from the timebase
 322          * over the same interval.  The system ticks get accounted here;
 323          * the user ticks get saved up in paca->user_time_scaled to be
 324          * used by account_process_tick.
 325          */
 326         *sys_scaled = delta;
 327         user_scaled = udelta;
 328         if (deltascaled != delta + udelta) {
 329                 if (udelta) {
 330                         *sys_scaled = deltascaled * delta / (delta + udelta);
 331                         user_scaled = deltascaled - *sys_scaled;
 332                 } else {
 333                         *sys_scaled = deltascaled;
 334                 }
 335         }
 336         get_paca()->user_time_scaled += user_scaled;
 337
 338         return delta;
 339 }
 340
 341 void vtime_account_system(struct task_struct *tsk)
 342 {
 343         u64 delta, sys_scaled, stolen;
 344
 345         delta = vtime_delta(tsk, &sys_scaled, &stolen);
 346         account_system_time(tsk, 0, delta, sys_scaled);
 347         if (stolen)
 348                 account_steal_time(stolen);
 349 }
 350 EXPORT_SYMBOL_GPL(vtime_account_system);
 351
 352 void vtime_account_idle(struct task_struct *tsk)
 353 {
 354         u64 delta, sys_scaled, stolen;
 355
 356         delta = vtime_delta(tsk, &sys_scaled, &stolen);
 357         account_idle_time(delta + stolen);
 358 }
 359
 360 /*
 361  * Transfer the user time accumulated in the paca
 362  * by the exception entry and exit code to the generic
 363  * process user time records.
 364  * Must be called with interrupts disabled.
 365  * Assumes that vtime_account_system/idle() has been called
 366  * recently (i.e. since the last entry from usermode) so that
 367  * get_paca()->user_time_scaled is up to date.
 368  */
 369 void vtime_account_user(struct task_struct *tsk)
 370 {
 371         cputime_t utime, utimescaled;
 372
 373         utime = get_paca()->user_time;
 374         utimescaled = get_paca()->user_time_scaled;
 375         get_paca()->user_time = 0;
 376         get_paca()->user_time_scaled = 0;
 377         get_paca()->utime_sspurr = 0;
 378         account_user_time(tsk, utime, utimescaled);
 379 }
 380
 381 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 382 #define calc_cputime_factors()
 383 #endif
 384
 385 void __delay(unsigned long loops)
 386 {
 387         unsigned long start;
 388         int diff;
 389
 390         if (__USE_RTC()) {
 391                 start = get_rtcl();
 392                 do {
 393                         /* the RTCL register wraps at 1000000000 */
 394                         diff = get_rtcl() - start;
 395                         if (diff < 0)
 396                                 diff += 1000000000;
 397                 } while (diff < loops);
 398         } else {
 399                 start = get_tbl();
 400                 while (get_tbl() - start < loops)
 401                         HMT_low();
 402                 HMT_medium();
 403         }
 404 }
 405 EXPORT_SYMBOL(__delay);
 406
 407 void udelay(unsigned long usecs)
 408 {
 409         __delay(tb_ticks_per_usec * usecs);
 410 }
 411 EXPORT_SYMBOL(udelay);
 412
 413 #ifdef CONFIG_SMP
 414 unsigned long profile_pc(struct pt_regs *regs)
 415 {
 416         unsigned long pc = instruction_pointer(regs);
 417
 418         if (in_lock_functions(pc))
 419                 return regs->link;
 420
 421         return pc;
 422 }
 423 EXPORT_SYMBOL(profile_pc);
 424 #endif
 425
 426 #ifdef CONFIG_IRQ_WORK
 427
 428 /*
 429  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
 430  */
 431 #ifdef CONFIG_PPC64
 432 static inline unsigned long test_irq_work_pending(void)
 433 {
 434         unsigned long x;
 435
 436         asm volatile("lbz %0,%1(13)"
 437                 : "=r" (x)
 438                 : "i" (offsetof(struct paca_struct, irq_work_pending)));
 439         return x;
 440 }
 441
 442 static inline void set_irq_work_pending_flag(void)
 443 {
 444         asm volatile("stb %0,%1(13)" : :
 445                 "r" (1),
 446                 "i" (offsetof(struct paca_struct, irq_work_pending)));
 447 }
 448
 449 static inline void clear_irq_work_pending(void)
 450 {
 451         asm volatile("stb %0,%1(13)" : :
 452                 "r" (0),
 453                 "i" (offsetof(struct paca_struct, irq_work_pending)));
 454 }
 455
 456 #else /* 32-bit */
 457
 458 DEFINE_PER_CPU(u8, irq_work_pending);
 459
 460 #define set_irq_work_pending_flag()     __get_cpu_var(irq_work_pending) = 1
 461 #define test_irq_work_pending()         __get_cpu_var(irq_work_pending)
 462 #define clear_irq_work_pending()        __get_cpu_var(irq_work_pending) = 0
 463
 464 #endif /* 32 vs 64 bit */
 465
 466 void arch_irq_work_raise(void)
 467 {
 468         preempt_disable();
 469         set_irq_work_pending_flag();
 470         set_dec(1);
 471         preempt_enable();
 472 }
 473
 474 #else  /* CONFIG_IRQ_WORK */
 475
 476 #define test_irq_work_pending() 0
 477 #define clear_irq_work_pending()
 478
 479 #endif /* CONFIG_IRQ_WORK */
 480
 481 /*
 482  * timer_interrupt - gets called when the decrementer overflows,
 483  * with interrupts disabled.
 484  */
 485 void timer_interrupt(struct pt_regs * regs)
 486 {
 487         struct pt_regs *old_regs;
 488         u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
 489         struct clock_event_device *evt = &__get_cpu_var(decrementers);
 490         u64 now;
 491
 492         /* Ensure a positive value is written to the decrementer, or else
 493          * some CPUs will continue to take decrementer exceptions.
 494          */
 495         set_dec(DECREMENTER_MAX);
 496
 497         /* Some implementations of hotplug will get timer interrupts while
 498          * offline, just ignore these and we also need to set
 499          * decrementers_next_tb as MAX to make sure __check_irq_replay
 500          * don't replay timer interrupt when return, otherwise we'll trap
 501          * here infinitely :(
 502          */
 503         if (!cpu_online(smp_processor_id())) {
 504                 *next_tb = ~(u64)0;
 505                 return;
 506         }
 507
 508         /* Conditionally hard-enable interrupts now that the DEC has been
 509          * bumped to its maximum value
 510          */
 511         may_hard_irq_enable();
 512
 513         __get_cpu_var(irq_stat).timer_irqs++;
 514
 515 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 516         if (atomic_read(&ppc_n_lost_interrupts) != 0)
 517                 do_IRQ(regs);
 518 #endif
 519
 520         old_regs = set_irq_regs(regs);
 521         irq_enter();
 522
 523         trace_timer_interrupt_entry(regs);
 524
 525         if (test_irq_work_pending()) {
 526                 clear_irq_work_pending();
 527                 irq_work_run();
 528         }
 529
 530         now = get_tb_or_rtc();
 531         if (now >= *next_tb) {
 532                 *next_tb = ~(u64)0;
 533                 if (evt->event_handler)
 534                         evt->event_handler(evt);
 535         } else {
 536                 now = *next_tb - now;
 537                 if (now <= DECREMENTER_MAX)
 538                         set_dec((int)now);
 539         }
 540
 541 #ifdef CONFIG_PPC64
 542         /* collect purr register values often, for accurate calculations */
 543         if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 544                 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 545                 cu->current_tb = mfspr(SPRN_PURR);
 546         }
 547 #endif
 548
 549         trace_timer_interrupt_exit(regs);
 550
 551         irq_exit();
 552         set_irq_regs(old_regs);
 553 }
 554
 555 /*
 556  * Hypervisor decrementer interrupts shouldn't occur but are sometimes
 557  * left pending on exit from a KVM guest.  We don't need to do anything
 558  * to clear them, as they are edge-triggered.
 559  */
 560 void hdec_interrupt(struct pt_regs *regs)
 561 {
 562 }
 563
 564 #ifdef CONFIG_SUSPEND
 565 static void generic_suspend_disable_irqs(void)
 566 {
 567         /* Disable the decrementer, so that it doesn't interfere
 568          * with suspending.
 569          */
 570
 571         set_dec(DECREMENTER_MAX);
 572         local_irq_disable();
 573         set_dec(DECREMENTER_MAX);
 574 }
 575
 576 static void generic_suspend_enable_irqs(void)
 577 {
 578         local_irq_enable();
 579 }
 580
 581 /* Overrides the weak version in kernel/power/main.c */
 582 void arch_suspend_disable_irqs(void)
 583 {
 584         if (ppc_md.suspend_disable_irqs)
 585                 ppc_md.suspend_disable_irqs();
 586         generic_suspend_disable_irqs();
 587 }
 588
 589 /* Overrides the weak version in kernel/power/main.c */
 590 void arch_suspend_enable_irqs(void)
 591 {
 592         generic_suspend_enable_irqs();
 593         if (ppc_md.suspend_enable_irqs)
 594                 ppc_md.suspend_enable_irqs();
 595 }
 596 #endif
 597
 598 /*
 599  * Scheduler clock - returns current time in nanosec units.
 600  *
 601  * Note: mulhdu(a, b) (multiply high double unsigned) returns
 602  * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
 603  * are 64-bit unsigned numbers.
 604  */
 605 unsigned long long sched_clock(void)
 606 {
 607         if (__USE_RTC())
 608                 return get_rtc();
 609         return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 610 }
 611
 612 static int __init get_freq(char *name, int cells, unsigned long *val)
 613 {
 614         struct device_node *cpu;
 615         const __be32 *fp;
 616         int found = 0;
 617
 618         /* The cpu node should have timebase and clock frequency properties */
 619         cpu = of_find_node_by_type(NULL, "cpu");
 620
 621         if (cpu) {
 622                 fp = of_get_property(cpu, name, NULL);
 623                 if (fp) {
 624                         found = 1;
 625                         *val = of_read_ulong(fp, cells);
 626                 }
 627
 628                 of_node_put(cpu);
 629         }
 630
 631         return found;
 632 }
 633
 634 void start_cpu_decrementer(void)
 635 {
 636 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 637         /* Clear any pending timer interrupts */
 638         mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
 639
 640         /* Enable decrementer interrupt */
 641         mtspr(SPRN_TCR, TCR_DIE);
 642 #endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
 643 }
 644
 645 void __init generic_calibrate_decr(void)
 646 {
 647         ppc_tb_freq = DEFAULT_TB_FREQ;          /* hardcoded default */
 648
 649         if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) &&
 650             !get_freq("timebase-frequency", 1, &ppc_tb_freq)) {
 651
 652                 printk(KERN_ERR "WARNING: Estimating decrementer frequency "
 653                                 "(not found)\n");
 654         }
 655
 656         ppc_proc_freq = DEFAULT_PROC_FREQ;      /* hardcoded default */
 657
 658         if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) &&
 659             !get_freq("clock-frequency", 1, &ppc_proc_freq)) {
 660
 661                 printk(KERN_ERR "WARNING: Estimating processor frequency "
 662                                 "(not found)\n");
 663         }
 664 }
 665
 666 int update_persistent_clock(struct timespec now)
 667 {
 668         struct rtc_time tm;
 669
 670         if (!ppc_md.set_rtc_time)
 671                 return -ENODEV;
 672
 673         to_tm(now.tv_sec + 1 + timezone_offset, &tm);
 674         tm.tm_year -= 1900;
 675         tm.tm_mon -= 1;
 676
 677         return ppc_md.set_rtc_time(&tm);
 678 }
 679
 680 static void __read_persistent_clock(struct timespec *ts)
 681 {
 682         struct rtc_time tm;
 683         static int first = 1;
 684
 685         ts->tv_nsec = 0;
 686         /* XXX this is a litle fragile but will work okay in the short term */
 687         if (first) {
 688                 first = 0;
 689                 if (ppc_md.time_init)
 690                         timezone_offset = ppc_md.time_init();
 691
 692                 /* get_boot_time() isn't guaranteed to be safe to call late */
 693                 if (ppc_md.get_boot_time) {
 694                         ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
 695                         return;
 696                 }
 697         }
 698         if (!ppc_md.get_rtc_time) {
 699                 ts->tv_sec = 0;
 700                 return;
 701         }
 702         ppc_md.get_rtc_time(&tm);
 703
 704         ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
 705                             tm.tm_hour, tm.tm_min, tm.tm_sec);
 706 }
 707
 708 void read_persistent_clock(struct timespec *ts)
 709 {
 710         __read_persistent_clock(ts);
 711
 712         /* Sanitize it in case real time clock is set below EPOCH */
 713         if (ts->tv_sec < 0) {
 714                 ts->tv_sec = 0;
 715                 ts->tv_nsec = 0;
 716         }
 717
 718 }
 719
 720 /* clocksource code */
 721 static cycle_t rtc_read(struct clocksource *cs)
 722 {
 723         return (cycle_t)get_rtc();
 724 }
 725
 726 static cycle_t timebase_read(struct clocksource *cs)
 727 {
 728         return (cycle_t)get_tb();
 729 }
 730
 731 void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 732                         struct clocksource *clock, u32 mult)
 733 {
 734         u64 new_tb_to_xs, new_stamp_xsec;
 735         u32 frac_sec;
 736
 737         if (clock != &clocksource_timebase)
 738                 return;
 739
 740         /* Make userspace gettimeofday spin until we're done. */
 741         ++vdso_data->tb_update_count;
 742         smp_mb();
 743
 744         /* 19342813113834067 ~= 2^(20+64) / 1e9 */
 745         new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
 746         new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
 747         do_div(new_stamp_xsec, 1000000000);
 748         new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
 749
 750         BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
 751         /* this is tv_nsec / 1e9 as a 0.32 fraction */
 752         frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;
 753
 754         /*
 755          * tb_update_count is used to allow the userspace gettimeofday code
 756          * to assure itself that it sees a consistent view of the tb_to_xs and
 757          * stamp_xsec variables.  It reads the tb_update_count, then reads
 758          * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If
 759          * the two values of tb_update_count match and are even then the
 760          * tb_to_xs and stamp_xsec values are consistent.  If not, then it
 761          * loops back and reads them again until this criteria is met.
 762          * We expect the caller to have done the first increment of
 763          * vdso_data->tb_update_count already.
 764          */
 765         vdso_data->tb_orig_stamp = clock->cycle_last;
 766         vdso_data->stamp_xsec = new_stamp_xsec;
 767         vdso_data->tb_to_xs = new_tb_to_xs;
 768         vdso_data->wtom_clock_sec = wtm->tv_sec;
 769         vdso_data->wtom_clock_nsec = wtm->tv_nsec;
 770         vdso_data->stamp_xtime = *wall_time;
 771         vdso_data->stamp_sec_fraction = frac_sec;
 772         smp_wmb();
 773         ++(vdso_data->tb_update_count);
 774 }
 775
 776 void update_vsyscall_tz(void)
 777 {
 778         vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
 779         vdso_data->tz_dsttime = sys_tz.tz_dsttime;
 780 }
 781
 782 static void __init clocksource_init(void)
 783 {
 784         struct clocksource *clock;
 785
 786         if (__USE_RTC())
 787                 clock = &clocksource_rtc;
 788         else
 789                 clock = &clocksource_timebase;
 790
 791         if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
 792                 printk(KERN_ERR "clocksource: %s is already registered\n",
 793                        clock->name);
 794                 return;
 795         }
 796
 797         printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
 798                clock->name, clock->mult, clock->shift);
 799 }
 800
 801 static int decrementer_set_next_event(unsigned long evt,
 802                                       struct clock_event_device *dev)
 803 {
 804         __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
 805         set_dec(evt);
 806         return 0;
 807 }
 808
 809 static void decrementer_set_mode(enum clock_event_mode mode,
 810                                  struct clock_event_device *dev)
 811 {
 812         if (mode != CLOCK_EVT_MODE_ONESHOT)
 813                 decrementer_set_next_event(DECREMENTER_MAX, dev);
 814 }
 815
 816 static void register_decrementer_clockevent(int cpu)
 817 {
 818         struct clock_event_device *dec = &per_cpu(decrementers, cpu);
 819
 820         *dec = decrementer_clockevent;
 821         dec->cpumask = cpumask_of(cpu);
 822
 823         printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
 824                     dec->name, dec->mult, dec->shift, cpu);
 825
 826         clockevents_register_device(dec);
 827 }
 828
 829 static void __init init_decrementer_clockevent(void)
 830 {
 831         int cpu = smp_processor_id();
 832
 833         clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
 834
 835         decrementer_clockevent.max_delta_ns =
 836                 clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
 837         decrementer_clockevent.min_delta_ns =
 838                 clockevent_delta2ns(2, &decrementer_clockevent);
 839
 840         register_decrementer_clockevent(cpu);
 841 }
 842
 843 void secondary_cpu_time_init(void)
 844 {
 845         /* Start the decrementer on CPUs that have manual control
 846          * such as BookE
 847          */
 848         start_cpu_decrementer();
 849
 850         /* FIME: Should make unrelatred change to move snapshot_timebase
 851          * call here ! */
 852         register_decrementer_clockevent(smp_processor_id());
 853 }
 854
 855 /* This function is only called on the boot processor */
 856 void __init time_init(void)
 857 {
 858         struct div_result res;
 859         u64 scale;
 860         unsigned shift;
 861
 862         if (__USE_RTC()) {
 863                 /* 601 processor: dec counts down by 128 every 128ns */
 864                 ppc_tb_freq = 1000000000;
 865         } else {
 866                 /* Normal PowerPC with timebase register */
 867                 ppc_md.calibrate_decr();
 868                 printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
 869                        ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
 870                 printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
 871                        ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
 872         }
 873
 874         tb_ticks_per_jiffy = ppc_tb_freq / HZ;
 875         tb_ticks_per_sec = ppc_tb_freq;
 876         tb_ticks_per_usec = ppc_tb_freq / 1000000;
 877         calc_cputime_factors();
 878         setup_cputime_one_jiffy();
 879
 880         /*
 881          * Compute scale factor for sched_clock.
 882          * The calibrate_decr() function has set tb_ticks_per_sec,
 883          * which is the timebase frequency.
 884          * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
 885          * the 128-bit result as a 64.64 fixed-point number.
 886          * We then shift that number right until it is less than 1.0,
 887          * giving us the scale factor and shift count to use in
 888          * sched_clock().
 889          */
 890         div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
 891         scale = res.result_low;
 892         for (shift = 0; res.result_high != 0; ++shift) {
 893                 scale = (scale >> 1) | (res.result_high << 63);
 894                 res.result_high >>= 1;
 895         }
 896         tb_to_ns_scale = scale;
 897         tb_to_ns_shift = shift;
 898         /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
 899         boot_tb = get_tb_or_rtc();
 900
 901         /* If platform provided a timezone (pmac), we correct the time */
 902         if (timezone_offset) {
 903                 sys_tz.tz_minuteswest = -timezone_offset / 60;
 904                 sys_tz.tz_dsttime = 0;
 905         }
 906
 907         vdso_data->tb_update_count = 0;
 908         vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 909
 910         /* Start the decrementer on CPUs that have manual control
 911          * such as BookE
 912          */
 913         start_cpu_decrementer();
 914
 915         /* Register the clocksource */
 916         clocksource_init();
 917
 918         init_decrementer_clockevent();
 919 }
 920
 921
 922 #define FEBRUARY        2
 923 #define STARTOFTIME     1970
 924 #define SECDAY          86400L
 925 #define SECYR           (SECDAY * 365)
 926 #define leapyear(year)          ((year) % 4 == 0 && \
 927                                  ((year) % 100 != 0 || (year) % 400 == 0))
 928 #define days_in_year(a)         (leapyear(a) ? 366 : 365)
 929 #define days_in_month(a)        (month_days[(a) - 1])
 930
 931 static int month_days[12] = {
 932         31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
 933 };
 934
 935 /*
 936  * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
 937  */
 938 void GregorianDay(struct rtc_time * tm)
 939 {
 940         int leapsToDate;
 941         int lastYear;
 942         int day;
 943         int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
 944
 945         lastYear = tm->tm_year - 1;
 946
 947         /*
 948          * Number of leap corrections to apply up to end of last year
 949          */
 950         leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
 951
 952         /*
 953          * This year is a leap year if it is divisible by 4 except when it is
 954          * divisible by 100 unless it is divisible by 400
 955          *
 956          * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was
 957          */
 958         day = tm->tm_mon > 2 && leapyear(tm->tm_year);
 959
 960         day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
 961                    tm->tm_mday;
 962
 963         tm->tm_wday = day % 7;
 964 }
 965
 966 void to_tm(int tim, struct rtc_time * tm)
 967 {
 968         register int    i;
 969         register long   hms, day;
 970
 971         day = tim / SECDAY;
 972         hms = tim % SECDAY;
 973
 974         /* Hours, minutes, seconds are easy */
 975         tm->tm_hour = hms / 3600;
 976         tm->tm_min = (hms % 3600) / 60;
 977         tm->tm_sec = (hms % 3600) % 60;
 978
 979         /* Number of years in days */
 980         for (i = STARTOFTIME; day >= days_in_year(i); i++)
 981                 day -= days_in_year(i);
 982         tm->tm_year = i;
 983
 984         /* Number of months in days left */
 985         if (leapyear(tm->tm_year))
 986                 days_in_month(FEBRUARY) = 29;
 987         for (i = 1; day >= days_in_month(i); i++)
 988                 day -= days_in_month(i);
 989         days_in_month(FEBRUARY) = 28;
 990         tm->tm_mon = i;
 991
 992         /* Days are what is left over (+1) from all that. */
 993         tm->tm_mday = day + 1;
 994
 995         /*
 996          * Determine the day of week
 997          */
 998         GregorianDay(tm);
 999 }
1000
1001 /*
1002  * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
1003  * result.
1004  */
1005 void div128_by_32(u64 dividend_high, u64 dividend_low,
1006                   unsigned divisor, struct div_result *dr)
1007 {
1008         unsigned long a, b, c, d;
1009         unsigned long w, x, y, z;
1010         u64 ra, rb, rc;
1011
1012         a = dividend_high >> 32;
1013         b = dividend_high & 0xffffffff;
1014         c = dividend_low >> 32;
1015         d = dividend_low & 0xffffffff;
1016
1017         w = a / divisor;
1018         ra = ((u64)(a - (w * divisor)) << 32) + b;
1019
1020         rb = ((u64) do_div(ra, divisor) << 32) + c;
1021         x = ra;
1022
1023         rc = ((u64) do_div(rb, divisor) << 32) + d;
1024         y = rb;
1025
1026         do_div(rc, divisor);
1027         z = rc;
1028
1029         dr->result_high = ((u64)w << 32) + x;
1030         dr->result_low  = ((u64)y << 32) + z;
1031
1032 }
1033
1034 /* We don't need to calibrate delay, we use the CPU timebase for that */
1035 void calibrate_delay(void)
1036 {
1037         /* Some generic code (such as spinlock debug) use loops_per_jiffy
1038          * as the number of __delay(1) in a jiffy, so make it so
1039          */
1040         loops_per_jiffy = tb_ticks_per_jiffy;
1041 }
1042
1043 static int __init rtc_init(void)
1044 {
1045         struct platform_device *pdev;
1046
1047         if (!ppc_md.get_rtc_time)
1048                 return -ENODEV;
1049
1050         pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
1051
1052         return PTR_ERR_OR_ZERO(pdev);
1053 }
1054
1055 module_init(rtc_init);