kernel/time/timekeeping.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  Kernel timekeeping code and accessor functions. Based on code from
   4  *  timer.c, moved in commit 8524070b7982.
   5  */
   6 #include <linux/timekeeper_internal.h>
   7 #include <linux/module.h>
   8 #include <linux/interrupt.h>
   9 #include <linux/percpu.h>
  10 #include <linux/init.h>
  11 #include <linux/mm.h>
  12 #include <linux/nmi.h>
  13 #include <linux/sched.h>
  14 #include <linux/sched/loadavg.h>
  15 #include <linux/sched/clock.h>
  16 #include <linux/syscore_ops.h>
  17 #include <linux/clocksource.h>
  18 #include <linux/jiffies.h>
  19 #include <linux/time.h>
  20 #include <linux/timex.h>
  21 #include <linux/tick.h>
  22 #include <linux/stop_machine.h>
  23 #include <linux/pvclock_gtod.h>
  24 #include <linux/compiler.h>
  25 #include <linux/audit.h>
  26 #include <linux/random.h>
  27
  28 #include "tick-internal.h"
  29 #include "ntp_internal.h"
  30 #include "timekeeping_internal.h"
  31
  32 #define TK_CLEAR_NTP            (1 << 0)
  33 #define TK_CLOCK_WAS_SET        (1 << 1)
  34
  35 #define TK_UPDATE_ALL           (TK_CLEAR_NTP | TK_CLOCK_WAS_SET)
  36
  37 enum timekeeping_adv_mode {
  38         /* Update timekeeper when a tick has passed */
  39         TK_ADV_TICK,
  40
  41         /* Update timekeeper on a direct frequency change */
  42         TK_ADV_FREQ
  43 };
  44
  45 /*
  46  * The most important data for readout fits into a single 64 byte
  47  * cache line.
  48  */
  49 struct tk_data {
  50         seqcount_raw_spinlock_t seq;
  51         struct timekeeper       timekeeper;
  52         struct timekeeper       shadow_timekeeper;
  53         raw_spinlock_t          lock;
  54 } ____cacheline_aligned;
  55
  56 static struct tk_data tk_core;
  57
  58 /* flag for if timekeeping is suspended */
  59 int __read_mostly timekeeping_suspended;
  60
  61 /**
  62  * struct tk_fast - NMI safe timekeeper
  63  * @seq:        Sequence counter for protecting updates. The lowest bit
  64  *              is the index for the tk_read_base array
  65  * @base:       tk_read_base array. Access is indexed by the lowest bit of
  66  *              @seq.
  67  *
  68  * See @update_fast_timekeeper() below.
  69  */
  70 struct tk_fast {
  71         seqcount_latch_t        seq;
  72         struct tk_read_base     base[2];
  73 };
  74
  75 /* Suspend-time cycles value for halted fast timekeeper. */
  76 static u64 cycles_at_suspend;
  77
  78 static u64 dummy_clock_read(struct clocksource *cs)
  79 {
  80         if (timekeeping_suspended)
  81                 return cycles_at_suspend;
  82         return local_clock();
  83 }
  84
  85 static struct clocksource dummy_clock = {
  86         .read = dummy_clock_read,
  87 };
  88
  89 /*
  90  * Boot time initialization which allows local_clock() to be utilized
  91  * during early boot when clocksources are not available. local_clock()
  92  * returns nanoseconds already so no conversion is required, hence mult=1
  93  * and shift=0. When the first proper clocksource is installed then
  94  * the fast time keepers are updated with the correct values.
  95  */
  96 #define FAST_TK_INIT                                            \
  97         {                                                       \
  98                 .clock          = &dummy_clock,                 \
  99                 .mask           = CLOCKSOURCE_MASK(64),         \
 100                 .mult           = 1,                            \
 101                 .shift          = 0,                            \
 102         }
 103
 104 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
 105         .seq     = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
 106         .base[0] = FAST_TK_INIT,
 107         .base[1] = FAST_TK_INIT,
 108 };
 109
 110 static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
 111         .seq     = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
 112         .base[0] = FAST_TK_INIT,
 113         .base[1] = FAST_TK_INIT,
 114 };
 115
 116 unsigned long timekeeper_lock_irqsave(void)
 117 {
 118         unsigned long flags;
 119
 120         raw_spin_lock_irqsave(&tk_core.lock, flags);
 121         return flags;
 122 }
 123
 124 void timekeeper_unlock_irqrestore(unsigned long flags)
 125 {
 126         raw_spin_unlock_irqrestore(&tk_core.lock, flags);
 127 }
 128
 129 /*
 130  * Multigrain timestamps require tracking the latest fine-grained timestamp
 131  * that has been issued, and never returning a coarse-grained timestamp that is
 132  * earlier than that value.
 133  *
 134  * mg_floor represents the latest fine-grained time that has been handed out as
 135  * a file timestamp on the system. This is tracked as a monotonic ktime_t, and
 136  * converted to a realtime clock value on an as-needed basis.
 137  *
 138  * Maintaining mg_floor ensures the multigrain interfaces never issue a
 139  * timestamp earlier than one that has been previously issued.
 140  *
 141  * The exception to this rule is when there is a backward realtime clock jump. If
 142  * such an event occurs, a timestamp can appear to be earlier than a previous one.
 143  */
 144 static __cacheline_aligned_in_smp atomic64_t mg_floor;
 145
 146 static inline void tk_normalize_xtime(struct timekeeper *tk)
 147 {
 148         while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
 149                 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 150                 tk->xtime_sec++;
 151         }
 152         while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
 153                 tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
 154                 tk->raw_sec++;
 155         }
 156 }
 157
 158 static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
 159 {
 160         struct timespec64 ts;
 161
 162         ts.tv_sec = tk->xtime_sec;
 163         ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 164         return ts;
 165 }
 166
 167 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 168 {
 169         tk->xtime_sec = ts->tv_sec;
 170         tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 171 }
 172
 173 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 174 {
 175         tk->xtime_sec += ts->tv_sec;
 176         tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
 177         tk_normalize_xtime(tk);
 178 }
 179
 180 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 181 {
 182         struct timespec64 tmp;
 183
 184         /*
 185          * Verify consistency of: offset_real = -wall_to_monotonic
 186          * before modifying anything
 187          */
 188         set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
 189                                         -tk->wall_to_monotonic.tv_nsec);
 190         WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
 191         tk->wall_to_monotonic = wtm;
 192         set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
 193         /* Paired with READ_ONCE() in ktime_mono_to_any() */
 194         WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp));
 195         WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0)));
 196 }
 197
 198 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 199 {
 200         /* Paired with READ_ONCE() in ktime_mono_to_any() */
 201         WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta));
 202         /*
 203          * Timespec representation for VDSO update to avoid 64bit division
 204          * on every update.
 205          */
 206         tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
 207 }
 208
 209 /*
 210  * tk_clock_read - atomic clocksource read() helper
 211  *
 212  * This helper is necessary to use in the read paths because, while the
 213  * seqcount ensures we don't return a bad value while structures are updated,
 214  * it doesn't protect from potential crashes. There is the possibility that
 215  * the tkr's clocksource may change between the read reference, and the
 216  * clock reference passed to the read function.  This can cause crashes if
 217  * the wrong clocksource is passed to the wrong read function.
 218  * This isn't necessary to use when holding the tk_core.lock or doing
 219  * a read of the fast-timekeeper tkrs (which is protected by its own locking
 220  * and update logic).
 221  */
 222 static inline u64 tk_clock_read(const struct tk_read_base *tkr)
 223 {
 224         struct clocksource *clock = READ_ONCE(tkr->clock);
 225
 226         return clock->read(clock);
 227 }
 228
 229 /**
 230  * tk_setup_internals - Set up internals to use clocksource clock.
 231  *
 232  * @tk:         The target timekeeper to setup.
 233  * @clock:              Pointer to clocksource.
 234  *
 235  * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
 236  * pair and interval request.
 237  *
 238  * Unless you're the timekeeping code, you should not be using this!
 239  */
 240 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 241 {
 242         u64 interval;
 243         u64 tmp, ntpinterval;
 244         struct clocksource *old_clock;
 245
 246         ++tk->cs_was_changed_seq;
 247         old_clock = tk->tkr_mono.clock;
 248         tk->tkr_mono.clock = clock;
 249         tk->tkr_mono.mask = clock->mask;
 250         tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 251
 252         tk->tkr_raw.clock = clock;
 253         tk->tkr_raw.mask = clock->mask;
 254         tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 255
 256         /* Do the ns -> cycle conversion first, using original mult */
 257         tmp = NTP_INTERVAL_LENGTH;
 258         tmp <<= clock->shift;
 259         ntpinterval = tmp;
 260         tmp += clock->mult/2;
 261         do_div(tmp, clock->mult);
 262         if (tmp == 0)
 263                 tmp = 1;
 264
 265         interval = (u64) tmp;
 266         tk->cycle_interval = interval;
 267
 268         /* Go back from cycles -> shifted ns */
 269         tk->xtime_interval = interval * clock->mult;
 270         tk->xtime_remainder = ntpinterval - tk->xtime_interval;
 271         tk->raw_interval = interval * clock->mult;
 272
 273          /* if changing clocks, convert xtime_nsec shift units */
 274         if (old_clock) {
 275                 int shift_change = clock->shift - old_clock->shift;
 276                 if (shift_change < 0) {
 277                         tk->tkr_mono.xtime_nsec >>= -shift_change;
 278                         tk->tkr_raw.xtime_nsec >>= -shift_change;
 279                 } else {
 280                         tk->tkr_mono.xtime_nsec <<= shift_change;
 281                         tk->tkr_raw.xtime_nsec <<= shift_change;
 282                 }
 283         }
 284
 285         tk->tkr_mono.shift = clock->shift;
 286         tk->tkr_raw.shift = clock->shift;
 287
 288         tk->ntp_error = 0;
 289         tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
 290         tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
 291
 292         /*
 293          * The timekeeper keeps its own mult values for the currently
 294          * active clocksource. These value will be adjusted via NTP
 295          * to counteract clock drifting.
 296          */
 297         tk->tkr_mono.mult = clock->mult;
 298         tk->tkr_raw.mult = clock->mult;
 299         tk->ntp_err_mult = 0;
 300         tk->skip_second_overflow = 0;
 301 }
 302
 303 /* Timekeeper helper functions. */
 304 static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta)
 305 {
 306         return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift);
 307 }
 308
 309 static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
 310 {
 311         /* Calculate the delta since the last update_wall_time() */
 312         u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;
 313
 314         /*
 315          * This detects both negative motion and the case where the delta
 316          * overflows the multiplication with tkr->mult.
 317          */
 318         if (unlikely(delta > tkr->clock->max_cycles)) {
 319                 /*
 320                  * Handle clocksource inconsistency between CPUs to prevent
 321                  * time from going backwards by checking for the MSB of the
 322                  * mask being set in the delta.
 323                  */
 324                 if (delta & ~(mask >> 1))
 325                         return tkr->xtime_nsec >> tkr->shift;
 326
 327                 return delta_to_ns_safe(tkr, delta);
 328         }
 329
 330         return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
 331 }
 332
 333 static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
 334 {
 335         return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
 336 }
 337
 338 /**
 339  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
 340  * @tkr: Timekeeping readout base from which we take the update
 341  * @tkf: Pointer to NMI safe timekeeper
 342  *
 343  * We want to use this from any context including NMI and tracing /
 344  * instrumenting the timekeeping code itself.
 345  *
 346  * Employ the latch technique; see @write_seqcount_latch.
 347  *
 348  * So if a NMI hits the update of base[0] then it will use base[1]
 349  * which is still consistent. In the worst case this can result is a
 350  * slightly wrong timestamp (a few nanoseconds). See
 351  * @ktime_get_mono_fast_ns.
 352  */
 353 static void update_fast_timekeeper(const struct tk_read_base *tkr,
 354                                    struct tk_fast *tkf)
 355 {
 356         struct tk_read_base *base = tkf->base;
 357
 358         /* Force readers off to base[1] */
 359         write_seqcount_latch_begin(&tkf->seq);
 360
 361         /* Update base[0] */
 362         memcpy(base, tkr, sizeof(*base));
 363
 364         /* Force readers back to base[0] */
 365         write_seqcount_latch(&tkf->seq);
 366
 367         /* Update base[1] */
 368         memcpy(base + 1, base, sizeof(*base));
 369
 370         write_seqcount_latch_end(&tkf->seq);
 371 }
 372
 373 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 374 {
 375         struct tk_read_base *tkr;
 376         unsigned int seq;
 377         u64 now;
 378
 379         do {
 380                 seq = read_seqcount_latch(&tkf->seq);
 381                 tkr = tkf->base + (seq & 0x01);
 382                 now = ktime_to_ns(tkr->base);
 383                 now += timekeeping_get_ns(tkr);
 384         } while (read_seqcount_latch_retry(&tkf->seq, seq));
 385
 386         return now;
 387 }
 388
 389 /**
 390  * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
 391  *
 392  * This timestamp is not guaranteed to be monotonic across an update.
 393  * The timestamp is calculated by:
 394  *
 395  *      now = base_mono + clock_delta * slope
 396  *
 397  * So if the update lowers the slope, readers who are forced to the
 398  * not yet updated second array are still using the old steeper slope.
 399  *
 400  * tmono
 401  * ^
 402  * |    o  n
 403  * |   o n
 404  * |  u
 405  * | o
 406  * |o
 407  * |12345678---> reader order
 408  *
 409  * o = old slope
 410  * u = update
 411  * n = new slope
 412  *
 413  * So reader 6 will observe time going backwards versus reader 5.
 414  *
 415  * While other CPUs are likely to be able to observe that, the only way
 416  * for a CPU local observation is when an NMI hits in the middle of
 417  * the update. Timestamps taken from that NMI context might be ahead
 418  * of the following timestamps. Callers need to be aware of that and
 419  * deal with it.
 420  */
 421 u64 notrace ktime_get_mono_fast_ns(void)
 422 {
 423         return __ktime_get_fast_ns(&tk_fast_mono);
 424 }
 425 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
 426
 427 /**
 428  * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw
 429  *
 430  * Contrary to ktime_get_mono_fast_ns() this is always correct because the
 431  * conversion factor is not affected by NTP/PTP correction.
 432  */
 433 u64 notrace ktime_get_raw_fast_ns(void)
 434 {
 435         return __ktime_get_fast_ns(&tk_fast_raw);
 436 }
 437 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 438
 439 /**
 440  * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
 441  *
 442  * To keep it NMI safe since we're accessing from tracing, we're not using a
 443  * separate timekeeper with updates to monotonic clock and boot offset
 444  * protected with seqcounts. This has the following minor side effects:
 445  *
 446  * (1) Its possible that a timestamp be taken after the boot offset is updated
 447  * but before the timekeeper is updated. If this happens, the new boot offset
 448  * is added to the old timekeeping making the clock appear to update slightly
 449  * earlier:
 450  *    CPU 0                                        CPU 1
 451  *    timekeeping_inject_sleeptime64()
 452  *    __timekeeping_inject_sleeptime(tk, delta);
 453  *                                                 timestamp();
 454  *    timekeeping_update_staged(tkd, TK_CLEAR_NTP...);
 455  *
 456  * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
 457  * partially updated.  Since the tk->offs_boot update is a rare event, this
 458  * should be a rare occurrence which postprocessing should be able to handle.
 459  *
 460  * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns()
 461  * apply as well.
 462  */
 463 u64 notrace ktime_get_boot_fast_ns(void)
 464 {
 465         struct timekeeper *tk = &tk_core.timekeeper;
 466
 467         return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
 468 }
 469 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
 470
 471 /**
 472  * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
 473  *
 474  * The same limitations as described for ktime_get_boot_fast_ns() apply. The
 475  * mono time and the TAI offset are not read atomically which may yield wrong
 476  * readouts. However, an update of the TAI offset is an rare event e.g., caused
 477  * by settime or adjtimex with an offset. The user of this function has to deal
 478  * with the possibility of wrong timestamps in post processing.
 479  */
 480 u64 notrace ktime_get_tai_fast_ns(void)
 481 {
 482         struct timekeeper *tk = &tk_core.timekeeper;
 483
 484         return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
 485 }
 486 EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
 487
 488 static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
 489 {
 490         struct tk_read_base *tkr;
 491         u64 basem, baser, delta;
 492         unsigned int seq;
 493
 494         do {
 495                 seq = raw_read_seqcount_latch(&tkf->seq);
 496                 tkr = tkf->base + (seq & 0x01);
 497                 basem = ktime_to_ns(tkr->base);
 498                 baser = ktime_to_ns(tkr->base_real);
 499                 delta = timekeeping_get_ns(tkr);
 500         } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
 501
 502         if (mono)
 503                 *mono = basem + delta;
 504         return baser + delta;
 505 }
 506
 507 /**
 508  * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
 509  *
 510  * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering.
 511  */
 512 u64 ktime_get_real_fast_ns(void)
 513 {
 514         return __ktime_get_real_fast(&tk_fast_mono, NULL);
 515 }
 516 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
 517
 518 /**
 519  * ktime_get_fast_timestamps: - NMI safe timestamps
 520  * @snapshot:   Pointer to timestamp storage
 521  *
 522  * Stores clock monotonic, boottime and realtime timestamps.
 523  *
 524  * Boot time is a racy access on 32bit systems if the sleep time injection
 525  * happens late during resume and not in timekeeping_resume(). That could
 526  * be avoided by expanding struct tk_read_base with boot offset for 32bit
 527  * and adding more overhead to the update. As this is a hard to observe
 528  * once per resume event which can be filtered with reasonable effort using
 529  * the accurate mono/real timestamps, it's probably not worth the trouble.
 530  *
 531  * Aside of that it might be possible on 32 and 64 bit to observe the
 532  * following when the sleep time injection happens late:
 533  *
 534  * CPU 0                                CPU 1
 535  * timekeeping_resume()
 536  * ktime_get_fast_timestamps()
 537  *      mono, real = __ktime_get_real_fast()
 538  *                                      inject_sleep_time()
 539  *                                         update boot offset
 540  *      boot = mono + bootoffset;
 541  *
 542  * That means that boot time already has the sleep time adjustment, but
 543  * real time does not. On the next readout both are in sync again.
 544  *
 545  * Preventing this for 64bit is not really feasible without destroying the
 546  * careful cache layout of the timekeeper because the sequence count and
 547  * struct tk_read_base would then need two cache lines instead of one.
 548  *
 549  * Access to the time keeper clock source is disabled across the innermost
 550  * steps of suspend/resume. The accessors still work, but the timestamps
 551  * are frozen until time keeping is resumed which happens very early.
 552  *
 553  * For regular suspend/resume there is no observable difference vs. sched
 554  * clock, but it might affect some of the nasty low level debug printks.
 555  *
 556  * OTOH, access to sched clock is not guaranteed across suspend/resume on
 557  * all systems either so it depends on the hardware in use.
 558  *
 559  * If that turns out to be a real problem then this could be mitigated by
 560  * using sched clock in a similar way as during early boot. But it's not as
 561  * trivial as on early boot because it needs some careful protection
 562  * against the clock monotonic timestamp jumping backwards on resume.
 563  */
 564 void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
 565 {
 566         struct timekeeper *tk = &tk_core.timekeeper;
 567
 568         snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
 569         snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
 570 }
 571
 572 /**
 573  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
 574  * @tk: Timekeeper to snapshot.
 575  *
 576  * It generally is unsafe to access the clocksource after timekeeping has been
 577  * suspended, so take a snapshot of the readout base of @tk and use it as the
 578  * fast timekeeper's readout base while suspended.  It will return the same
 579  * number of cycles every time until timekeeping is resumed at which time the
 580  * proper readout base for the fast timekeeper will be restored automatically.
 581  */
 582 static void halt_fast_timekeeper(const struct timekeeper *tk)
 583 {
 584         static struct tk_read_base tkr_dummy;
 585         const struct tk_read_base *tkr = &tk->tkr_mono;
 586
 587         memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 588         cycles_at_suspend = tk_clock_read(tkr);
 589         tkr_dummy.clock = &dummy_clock;
 590         tkr_dummy.base_real = tkr->base + tk->offs_real;
 591         update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 592
 593         tkr = &tk->tkr_raw;
 594         memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 595         tkr_dummy.clock = &dummy_clock;
 596         update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 597 }
 598
 599 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
 600
 601 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
 602 {
 603         raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
 604 }
 605
 606 /**
 607  * pvclock_gtod_register_notifier - register a pvclock timedata update listener
 608  * @nb: Pointer to the notifier block to register
 609  */
 610 int pvclock_gtod_register_notifier(struct notifier_block *nb)
 611 {
 612         struct timekeeper *tk = &tk_core.timekeeper;
 613         int ret;
 614
 615         guard(raw_spinlock_irqsave)(&tk_core.lock);
 616         ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
 617         update_pvclock_gtod(tk, true);
 618
 619         return ret;
 620 }
 621 EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
 622
 623 /**
 624  * pvclock_gtod_unregister_notifier - unregister a pvclock
 625  * timedata update listener
 626  * @nb: Pointer to the notifier block to unregister
 627  */
 628 int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 629 {
 630         guard(raw_spinlock_irqsave)(&tk_core.lock);
 631         return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
 632 }
 633 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 634
 635 /*
 636  * tk_update_leap_state - helper to update the next_leap_ktime
 637  */
 638 static inline void tk_update_leap_state(struct timekeeper *tk)
 639 {
 640         tk->next_leap_ktime = ntp_get_next_leap();
 641         if (tk->next_leap_ktime != KTIME_MAX)
 642                 /* Convert to monotonic time */
 643                 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
 644 }
 645
 646 /*
 647  * Leap state update for both shadow and the real timekeeper
 648  * Separate to spare a full memcpy() of the timekeeper.
 649  */
 650 static void tk_update_leap_state_all(struct tk_data *tkd)
 651 {
 652         write_seqcount_begin(&tkd->seq);
 653         tk_update_leap_state(&tkd->shadow_timekeeper);
 654         tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime;
 655         write_seqcount_end(&tkd->seq);
 656 }
 657
 658 /*
 659  * Update the ktime_t based scalar nsec members of the timekeeper
 660  */
 661 static inline void tk_update_ktime_data(struct timekeeper *tk)
 662 {
 663         u64 seconds;
 664         u32 nsec;
 665
 666         /*
 667          * The xtime based monotonic readout is:
 668          *      nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
 669          * The ktime based monotonic readout is:
 670          *      nsec = base_mono + now();
 671          * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
 672          */
 673         seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
 674         nsec = (u32) tk->wall_to_monotonic.tv_nsec;
 675         tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 676
 677         /*
 678          * The sum of the nanoseconds portions of xtime and
 679          * wall_to_monotonic can be greater/equal one second. Take
 680          * this into account before updating tk->ktime_sec.
 681          */
 682         nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 683         if (nsec >= NSEC_PER_SEC)
 684                 seconds++;
 685         tk->ktime_sec = seconds;
 686
 687         /* Update the monotonic raw base */
 688         tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
 689 }
 690
 691 /*
 692  * Restore the shadow timekeeper from the real timekeeper.
 693  */
 694 static void timekeeping_restore_shadow(struct tk_data *tkd)
 695 {
 696         lockdep_assert_held(&tkd->lock);
 697         memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper));
 698 }
 699
 700 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action)
 701 {
 702         struct timekeeper *tk = &tk_core.shadow_timekeeper;
 703
 704         lockdep_assert_held(&tkd->lock);
 705
 706         /*
 707          * Block out readers before running the updates below because that
 708          * updates VDSO and other time related infrastructure. Not blocking
 709          * the readers might let a reader see time going backwards when
 710          * reading from the VDSO after the VDSO update and then reading in
 711          * the kernel from the timekeeper before that got updated.
 712          */
 713         write_seqcount_begin(&tkd->seq);
 714
 715         if (action & TK_CLEAR_NTP) {
 716                 tk->ntp_error = 0;
 717                 ntp_clear();
 718         }
 719
 720         tk_update_leap_state(tk);
 721         tk_update_ktime_data(tk);
 722
 723         update_vsyscall(tk);
 724         update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 725
 726         tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
 727         update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
 728         update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 729
 730         if (action & TK_CLOCK_WAS_SET)
 731                 tk->clock_was_set_seq++;
 732
 733         /*
 734          * Update the real timekeeper.
 735          *
 736          * We could avoid this memcpy() by switching pointers, but that has
 737          * the downside that the reader side does not longer benefit from
 738          * the cacheline optimized data layout of the timekeeper and requires
 739          * another indirection.
 740          */
 741         memcpy(&tkd->timekeeper, tk, sizeof(*tk));
 742         write_seqcount_end(&tkd->seq);
 743 }
 744
 745 /**
 746  * timekeeping_forward_now - update clock to the current time
 747  * @tk:         Pointer to the timekeeper to update
 748  *
 749  * Forward the current clock to update its state since the last call to
 750  * update_wall_time(). This is useful before significant clock changes,
 751  * as it avoids having to deal with this time offset explicitly.
 752  */
 753 static void timekeeping_forward_now(struct timekeeper *tk)
 754 {
 755         u64 cycle_now, delta;
 756
 757         cycle_now = tk_clock_read(&tk->tkr_mono);
 758         delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 759         tk->tkr_mono.cycle_last = cycle_now;
 760         tk->tkr_raw.cycle_last  = cycle_now;
 761
 762         while (delta > 0) {
 763                 u64 max = tk->tkr_mono.clock->max_cycles;
 764                 u64 incr = delta < max ? delta : max;
 765
 766                 tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult;
 767                 tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult;
 768                 tk_normalize_xtime(tk);
 769                 delta -= incr;
 770         }
 771 }
 772
 773 /**
 774  * ktime_get_real_ts64 - Returns the time of day in a timespec64.
 775  * @ts:         pointer to the timespec to be set
 776  *
 777  * Returns the time of day in a timespec64 (WARN if suspended).
 778  */
 779 void ktime_get_real_ts64(struct timespec64 *ts)
 780 {
 781         struct timekeeper *tk = &tk_core.timekeeper;
 782         unsigned int seq;
 783         u64 nsecs;
 784
 785         WARN_ON(timekeeping_suspended);
 786
 787         do {
 788                 seq = read_seqcount_begin(&tk_core.seq);
 789
 790                 ts->tv_sec = tk->xtime_sec;
 791                 nsecs = timekeeping_get_ns(&tk->tkr_mono);
 792
 793         } while (read_seqcount_retry(&tk_core.seq, seq));
 794
 795         ts->tv_nsec = 0;
 796         timespec64_add_ns(ts, nsecs);
 797 }
 798 EXPORT_SYMBOL(ktime_get_real_ts64);
 799
 800 ktime_t ktime_get(void)
 801 {
 802         struct timekeeper *tk = &tk_core.timekeeper;
 803         unsigned int seq;
 804         ktime_t base;
 805         u64 nsecs;
 806
 807         WARN_ON(timekeeping_suspended);
 808
 809         do {
 810                 seq = read_seqcount_begin(&tk_core.seq);
 811                 base = tk->tkr_mono.base;
 812                 nsecs = timekeeping_get_ns(&tk->tkr_mono);
 813
 814         } while (read_seqcount_retry(&tk_core.seq, seq));
 815
 816         return ktime_add_ns(base, nsecs);
 817 }
 818 EXPORT_SYMBOL_GPL(ktime_get);
 819
 820 u32 ktime_get_resolution_ns(void)
 821 {
 822         struct timekeeper *tk = &tk_core.timekeeper;
 823         unsigned int seq;
 824         u32 nsecs;
 825
 826         WARN_ON(timekeeping_suspended);
 827
 828         do {
 829                 seq = read_seqcount_begin(&tk_core.seq);
 830                 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
 831         } while (read_seqcount_retry(&tk_core.seq, seq));
 832
 833         return nsecs;
 834 }
 835 EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
 836
 837 static ktime_t *offsets[TK_OFFS_MAX] = {
 838         [TK_OFFS_REAL]  = &tk_core.timekeeper.offs_real,
 839         [TK_OFFS_BOOT]  = &tk_core.timekeeper.offs_boot,
 840         [TK_OFFS_TAI]   = &tk_core.timekeeper.offs_tai,
 841 };
 842
 843 ktime_t ktime_get_with_offset(enum tk_offsets offs)
 844 {
 845         struct timekeeper *tk = &tk_core.timekeeper;
 846         unsigned int seq;
 847         ktime_t base, *offset = offsets[offs];
 848         u64 nsecs;
 849
 850         WARN_ON(timekeeping_suspended);
 851
 852         do {
 853                 seq = read_seqcount_begin(&tk_core.seq);
 854                 base = ktime_add(tk->tkr_mono.base, *offset);
 855                 nsecs = timekeeping_get_ns(&tk->tkr_mono);
 856
 857         } while (read_seqcount_retry(&tk_core.seq, seq));
 858
 859         return ktime_add_ns(base, nsecs);
 860
 861 }
 862 EXPORT_SYMBOL_GPL(ktime_get_with_offset);
 863
 864 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
 865 {
 866         struct timekeeper *tk = &tk_core.timekeeper;
 867         unsigned int seq;
 868         ktime_t base, *offset = offsets[offs];
 869         u64 nsecs;
 870
 871         WARN_ON(timekeeping_suspended);
 872
 873         do {
 874                 seq = read_seqcount_begin(&tk_core.seq);
 875                 base = ktime_add(tk->tkr_mono.base, *offset);
 876                 nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 877
 878         } while (read_seqcount_retry(&tk_core.seq, seq));
 879
 880         return ktime_add_ns(base, nsecs);
 881 }
 882 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 883
 884 /**
 885  * ktime_mono_to_any() - convert monotonic time to any other time
 886  * @tmono:      time to convert.
 887  * @offs:       which offset to use
 888  */
 889 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
 890 {
 891         ktime_t *offset = offsets[offs];
 892         unsigned int seq;
 893         ktime_t tconv;
 894
 895         if (IS_ENABLED(CONFIG_64BIT)) {
 896                 /*
 897                  * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and
 898                  * tk_update_sleep_time().
 899                  */
 900                 return ktime_add(tmono, READ_ONCE(*offset));
 901         }
 902
 903         do {
 904                 seq = read_seqcount_begin(&tk_core.seq);
 905                 tconv = ktime_add(tmono, *offset);
 906         } while (read_seqcount_retry(&tk_core.seq, seq));
 907
 908         return tconv;
 909 }
 910 EXPORT_SYMBOL_GPL(ktime_mono_to_any);
 911
 912 /**
 913  * ktime_get_raw - Returns the raw monotonic time in ktime_t format
 914  */
 915 ktime_t ktime_get_raw(void)
 916 {
 917         struct timekeeper *tk = &tk_core.timekeeper;
 918         unsigned int seq;
 919         ktime_t base;
 920         u64 nsecs;
 921
 922         do {
 923                 seq = read_seqcount_begin(&tk_core.seq);
 924                 base = tk->tkr_raw.base;
 925                 nsecs = timekeeping_get_ns(&tk->tkr_raw);
 926
 927         } while (read_seqcount_retry(&tk_core.seq, seq));
 928
 929         return ktime_add_ns(base, nsecs);
 930 }
 931 EXPORT_SYMBOL_GPL(ktime_get_raw);
 932
 933 /**
 934  * ktime_get_ts64 - get the monotonic clock in timespec64 format
 935  * @ts:         pointer to timespec variable
 936  *
 937  * The function calculates the monotonic clock from the realtime
 938  * clock and the wall_to_monotonic offset and stores the result
 939  * in normalized timespec64 format in the variable pointed to by @ts.
 940  */
 941 void ktime_get_ts64(struct timespec64 *ts)
 942 {
 943         struct timekeeper *tk = &tk_core.timekeeper;
 944         struct timespec64 tomono;
 945         unsigned int seq;
 946         u64 nsec;
 947
 948         WARN_ON(timekeeping_suspended);
 949
 950         do {
 951                 seq = read_seqcount_begin(&tk_core.seq);
 952                 ts->tv_sec = tk->xtime_sec;
 953                 nsec = timekeeping_get_ns(&tk->tkr_mono);
 954                 tomono = tk->wall_to_monotonic;
 955
 956         } while (read_seqcount_retry(&tk_core.seq, seq));
 957
 958         ts->tv_sec += tomono.tv_sec;
 959         ts->tv_nsec = 0;
 960         timespec64_add_ns(ts, nsec + tomono.tv_nsec);
 961 }
 962 EXPORT_SYMBOL_GPL(ktime_get_ts64);
 963
 964 /**
 965  * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
 966  *
 967  * Returns the seconds portion of CLOCK_MONOTONIC with a single non
 968  * serialized read. tk->ktime_sec is of type 'unsigned long' so this
 969  * works on both 32 and 64 bit systems. On 32 bit systems the readout
 970  * covers ~136 years of uptime which should be enough to prevent
 971  * premature wrap arounds.
 972  */
 973 time64_t ktime_get_seconds(void)
 974 {
 975         struct timekeeper *tk = &tk_core.timekeeper;
 976
 977         WARN_ON(timekeeping_suspended);
 978         return tk->ktime_sec;
 979 }
 980 EXPORT_SYMBOL_GPL(ktime_get_seconds);
 981
 982 /**
 983  * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
 984  *
 985  * Returns the wall clock seconds since 1970.
 986  *
 987  * For 64bit systems the fast access to tk->xtime_sec is preserved. On
 988  * 32bit systems the access must be protected with the sequence
 989  * counter to provide "atomic" access to the 64bit tk->xtime_sec
 990  * value.
 991  */
 992 time64_t ktime_get_real_seconds(void)
 993 {
 994         struct timekeeper *tk = &tk_core.timekeeper;
 995         time64_t seconds;
 996         unsigned int seq;
 997
 998         if (IS_ENABLED(CONFIG_64BIT))
 999                 return tk->xtime_sec;
1000
1001         do {
1002                 seq = read_seqcount_begin(&tk_core.seq);
1003                 seconds = tk->xtime_sec;
1004
1005         } while (read_seqcount_retry(&tk_core.seq, seq));
1006
1007         return seconds;
1008 }
1009 EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
1010
1011 /**
1012  * __ktime_get_real_seconds - The same as ktime_get_real_seconds
1013  * but without the sequence counter protect. This internal function
1014  * is called just when timekeeping lock is already held.
1015  */
1016 noinstr time64_t __ktime_get_real_seconds(void)
1017 {
1018         struct timekeeper *tk = &tk_core.timekeeper;
1019
1020         return tk->xtime_sec;
1021 }
1022
1023 /**
1024  * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
1025  * @systime_snapshot:   pointer to struct receiving the system time snapshot
1026  */
1027 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
1028 {
1029         struct timekeeper *tk = &tk_core.timekeeper;
1030         unsigned int seq;
1031         ktime_t base_raw;
1032         ktime_t base_real;
1033         ktime_t base_boot;
1034         u64 nsec_raw;
1035         u64 nsec_real;
1036         u64 now;
1037
1038         WARN_ON_ONCE(timekeeping_suspended);
1039
1040         do {
1041                 seq = read_seqcount_begin(&tk_core.seq);
1042                 now = tk_clock_read(&tk->tkr_mono);
1043                 systime_snapshot->cs_id = tk->tkr_mono.clock->id;
1044                 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
1045                 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
1046                 base_real = ktime_add(tk->tkr_mono.base,
1047                                       tk_core.timekeeper.offs_real);
1048                 base_boot = ktime_add(tk->tkr_mono.base,
1049                                       tk_core.timekeeper.offs_boot);
1050                 base_raw = tk->tkr_raw.base;
1051                 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
1052                 nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
1053         } while (read_seqcount_retry(&tk_core.seq, seq));
1054
1055         systime_snapshot->cycles = now;
1056         systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
1057         systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real);
1058         systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
1059 }
1060 EXPORT_SYMBOL_GPL(ktime_get_snapshot);
1061
1062 /* Scale base by mult/div checking for overflow */
1063 static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
1064 {
1065         u64 tmp, rem;
1066
1067         tmp = div64_u64_rem(*base, div, &rem);
1068
1069         if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
1070             ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
1071                 return -EOVERFLOW;
1072         tmp *= mult;
1073
1074         rem = div64_u64(rem * mult, div);
1075         *base = tmp + rem;
1076         return 0;
1077 }
1078
1079 /**
1080  * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
1081  * @history:                    Snapshot representing start of history
1082  * @partial_history_cycles:     Cycle offset into history (fractional part)
1083  * @total_history_cycles:       Total history length in cycles
1084  * @discontinuity:              True indicates clock was set on history period
1085  * @ts:                         Cross timestamp that should be adjusted using
1086  *      partial/total ratio
1087  *
1088  * Helper function used by get_device_system_crosststamp() to correct the
1089  * crosstimestamp corresponding to the start of the current interval to the
1090  * system counter value (timestamp point) provided by the driver. The
1091  * total_history_* quantities are the total history starting at the provided
1092  * reference point and ending at the start of the current interval. The cycle
1093  * count between the driver timestamp point and the start of the current
1094  * interval is partial_history_cycles.
1095  */
1096 static int adjust_historical_crosststamp(struct system_time_snapshot *history,
1097                                          u64 partial_history_cycles,
1098                                          u64 total_history_cycles,
1099                                          bool discontinuity,
1100                                          struct system_device_crosststamp *ts)
1101 {
1102         struct timekeeper *tk = &tk_core.timekeeper;
1103         u64 corr_raw, corr_real;
1104         bool interp_forward;
1105         int ret;
1106
1107         if (total_history_cycles == 0 || partial_history_cycles == 0)
1108                 return 0;
1109
1110         /* Interpolate shortest distance from beginning or end of history */
1111         interp_forward = partial_history_cycles > total_history_cycles / 2;
1112         partial_history_cycles = interp_forward ?
1113                 total_history_cycles - partial_history_cycles :
1114                 partial_history_cycles;
1115
1116         /*
1117          * Scale the monotonic raw time delta by:
1118          *      partial_history_cycles / total_history_cycles
1119          */
1120         corr_raw = (u64)ktime_to_ns(
1121                 ktime_sub(ts->sys_monoraw, history->raw));
1122         ret = scale64_check_overflow(partial_history_cycles,
1123                                      total_history_cycles, &corr_raw);
1124         if (ret)
1125                 return ret;
1126
1127         /*
1128          * If there is a discontinuity in the history, scale monotonic raw
1129          *      correction by:
1130          *      mult(real)/mult(raw) yielding the realtime correction
1131          * Otherwise, calculate the realtime correction similar to monotonic
1132          *      raw calculation
1133          */
1134         if (discontinuity) {
1135                 corr_real = mul_u64_u32_div
1136                         (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
1137         } else {
1138                 corr_real = (u64)ktime_to_ns(
1139                         ktime_sub(ts->sys_realtime, history->real));
1140                 ret = scale64_check_overflow(partial_history_cycles,
1141                                              total_history_cycles, &corr_real);
1142                 if (ret)
1143                         return ret;
1144         }
1145
1146         /* Fixup monotonic raw and real time time values */
1147         if (interp_forward) {
1148                 ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
1149                 ts->sys_realtime = ktime_add_ns(history->real, corr_real);
1150         } else {
1151                 ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
1152                 ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
1153         }
1154
1155         return 0;
1156 }
1157
1158 /*
1159  * timestamp_in_interval - true if ts is chronologically in [start, end]
1160  *
1161  * True if ts occurs chronologically at or after start, and before or at end.
1162  */
1163 static bool timestamp_in_interval(u64 start, u64 end, u64 ts)
1164 {
1165         if (ts >= start && ts <= end)
1166                 return true;
1167         if (start > end && (ts >= start || ts <= end))
1168                 return true;
1169         return false;
1170 }
1171
1172 static bool convert_clock(u64 *val, u32 numerator, u32 denominator)
1173 {
1174         u64 rem, res;
1175
1176         if (!numerator || !denominator)
1177                 return false;
1178
1179         res = div64_u64_rem(*val, denominator, &rem) * numerator;
1180         *val = res + div_u64(rem * numerator, denominator);
1181         return true;
1182 }
1183
1184 static bool convert_base_to_cs(struct system_counterval_t *scv)
1185 {
1186         struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock;
1187         struct clocksource_base *base;
1188         u32 num, den;
1189
1190         /* The timestamp was taken from the time keeper clock source */
1191         if (cs->id == scv->cs_id)
1192                 return true;
1193
1194         /*
1195          * Check whether cs_id matches the base clock. Prevent the compiler from
1196          * re-evaluating @base as the clocksource might change concurrently.
1197          */
1198         base = READ_ONCE(cs->base);
1199         if (!base || base->id != scv->cs_id)
1200                 return false;
1201
1202         num = scv->use_nsecs ? cs->freq_khz : base->numerator;
1203         den = scv->use_nsecs ? USEC_PER_SEC : base->denominator;
1204
1205         if (!convert_clock(&scv->cycles, num, den))
1206                 return false;
1207
1208         scv->cycles += base->offset;
1209         return true;
1210 }
1211
1212 static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id)
1213 {
1214         struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock;
1215         struct clocksource_base *base;
1216
1217         /*
1218          * Check whether base_id matches the base clock. Prevent the compiler from
1219          * re-evaluating @base as the clocksource might change concurrently.
1220          */
1221         base = READ_ONCE(cs->base);
1222         if (!base || base->id != base_id)
1223                 return false;
1224
1225         *cycles -= base->offset;
1226         if (!convert_clock(cycles, base->denominator, base->numerator))
1227                 return false;
1228         return true;
1229 }
1230
1231 static bool convert_ns_to_cs(u64 *delta)
1232 {
1233         struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
1234
1235         if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta))
1236                 return false;
1237
1238         *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult);
1239         return true;
1240 }
1241
1242 /**
1243  * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp
1244  * @treal:      CLOCK_REALTIME timestamp to convert
1245  * @base_id:    base clocksource id
1246  * @cycles:     pointer to store the converted base clock timestamp
1247  *
1248  * Converts a supplied, future realtime clock value to the corresponding base clock value.
1249  *
1250  * Return:  true if the conversion is successful, false otherwise.
1251  */
1252 bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles)
1253 {
1254         struct timekeeper *tk = &tk_core.timekeeper;
1255         unsigned int seq;
1256         u64 delta;
1257
1258         do {
1259                 seq = read_seqcount_begin(&tk_core.seq);
1260                 if ((u64)treal < tk->tkr_mono.base_real)
1261                         return false;
1262                 delta = (u64)treal - tk->tkr_mono.base_real;
1263                 if (!convert_ns_to_cs(&delta))
1264                         return false;
1265                 *cycles = tk->tkr_mono.cycle_last + delta;
1266                 if (!convert_cs_to_base(cycles, base_id))
1267                         return false;
1268         } while (read_seqcount_retry(&tk_core.seq, seq));
1269
1270         return true;
1271 }
1272 EXPORT_SYMBOL_GPL(ktime_real_to_base_clock);
1273
1274 /**
1275  * get_device_system_crosststamp - Synchronously capture system/device timestamp
1276  * @get_time_fn:        Callback to get simultaneous device time and
1277  *      system counter from the device driver
1278  * @ctx:                Context passed to get_time_fn()
1279  * @history_begin:      Historical reference point used to interpolate system
1280  *      time when counter provided by the driver is before the current interval
1281  * @xtstamp:            Receives simultaneously captured system and device time
1282  *
1283  * Reads a timestamp from a device and correlates it to system time
1284  */
1285 int get_device_system_crosststamp(int (*get_time_fn)
1286                                   (ktime_t *device_time,
1287                                    struct system_counterval_t *sys_counterval,
1288                                    void *ctx),
1289                                   void *ctx,
1290                                   struct system_time_snapshot *history_begin,
1291                                   struct system_device_crosststamp *xtstamp)
1292 {
1293         struct system_counterval_t system_counterval;
1294         struct timekeeper *tk = &tk_core.timekeeper;
1295         u64 cycles, now, interval_start;
1296         unsigned int clock_was_set_seq = 0;
1297         ktime_t base_real, base_raw;
1298         u64 nsec_real, nsec_raw;
1299         u8 cs_was_changed_seq;
1300         unsigned int seq;
1301         bool do_interp;
1302         int ret;
1303
1304         do {
1305                 seq = read_seqcount_begin(&tk_core.seq);
1306                 /*
1307                  * Try to synchronously capture device time and a system
1308                  * counter value calling back into the device driver
1309                  */
1310                 ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
1311                 if (ret)
1312                         return ret;
1313
1314                 /*
1315                  * Verify that the clocksource ID associated with the captured
1316                  * system counter value is the same as for the currently
1317                  * installed timekeeper clocksource
1318                  */
1319                 if (system_counterval.cs_id == CSID_GENERIC ||
1320                     !convert_base_to_cs(&system_counterval))
1321                         return -ENODEV;
1322                 cycles = system_counterval.cycles;
1323
1324                 /*
1325                  * Check whether the system counter value provided by the
1326                  * device driver is on the current timekeeping interval.
1327                  */
1328                 now = tk_clock_read(&tk->tkr_mono);
1329                 interval_start = tk->tkr_mono.cycle_last;
1330                 if (!timestamp_in_interval(interval_start, now, cycles)) {
1331                         clock_was_set_seq = tk->clock_was_set_seq;
1332                         cs_was_changed_seq = tk->cs_was_changed_seq;
1333                         cycles = interval_start;
1334                         do_interp = true;
1335                 } else {
1336                         do_interp = false;
1337                 }
1338
1339                 base_real = ktime_add(tk->tkr_mono.base,
1340                                       tk_core.timekeeper.offs_real);
1341                 base_raw = tk->tkr_raw.base;
1342
1343                 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles);
1344                 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles);
1345         } while (read_seqcount_retry(&tk_core.seq, seq));
1346
1347         xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
1348         xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
1349
1350         /*
1351          * Interpolate if necessary, adjusting back from the start of the
1352          * current interval
1353          */
1354         if (do_interp) {
1355                 u64 partial_history_cycles, total_history_cycles;
1356                 bool discontinuity;
1357
1358                 /*
1359                  * Check that the counter value is not before the provided
1360                  * history reference and that the history doesn't cross a
1361                  * clocksource change
1362                  */
1363                 if (!history_begin ||
1364                     !timestamp_in_interval(history_begin->cycles,
1365                                            cycles, system_counterval.cycles) ||
1366                     history_begin->cs_was_changed_seq != cs_was_changed_seq)
1367                         return -EINVAL;
1368                 partial_history_cycles = cycles - system_counterval.cycles;
1369                 total_history_cycles = cycles - history_begin->cycles;
1370                 discontinuity =
1371                         history_begin->clock_was_set_seq != clock_was_set_seq;
1372
1373                 ret = adjust_historical_crosststamp(history_begin,
1374                                                     partial_history_cycles,
1375                                                     total_history_cycles,
1376                                                     discontinuity, xtstamp);
1377                 if (ret)
1378                         return ret;
1379         }
1380
1381         return 0;
1382 }
1383 EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
1384
1385 /**
1386  * timekeeping_clocksource_has_base - Check whether the current clocksource
1387  *                                    is based on given a base clock
1388  * @id:         base clocksource ID
1389  *
1390  * Note:        The return value is a snapshot which can become invalid right
1391  *              after the function returns.
1392  *
1393  * Return:      true if the timekeeper clocksource has a base clock with @id,
1394  *              false otherwise
1395  */
1396 bool timekeeping_clocksource_has_base(enum clocksource_ids id)
1397 {
1398         /*
1399          * This is a snapshot, so no point in using the sequence
1400          * count. Just prevent the compiler from re-evaluating @base as the
1401          * clocksource might change concurrently.
1402          */
1403         struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base);
1404
1405         return base ? base->id == id : false;
1406 }
1407 EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base);
1408
1409 /**
1410  * do_settimeofday64 - Sets the time of day.
1411  * @ts:     pointer to the timespec64 variable containing the new time
1412  *
1413  * Sets the time of day to the new time and update NTP and notify hrtimers
1414  */
1415 int do_settimeofday64(const struct timespec64 *ts)
1416 {
1417         struct timespec64 ts_delta, xt;
1418
1419         if (!timespec64_valid_settod(ts))
1420                 return -EINVAL;
1421
1422         scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
1423                 struct timekeeper *tks = &tk_core.shadow_timekeeper;
1424
1425                 timekeeping_forward_now(tks);
1426
1427                 xt = tk_xtime(tks);
1428                 ts_delta = timespec64_sub(*ts, xt);
1429
1430                 if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) {
1431                         timekeeping_restore_shadow(&tk_core);
1432                         return -EINVAL;
1433                 }
1434
1435                 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta));
1436                 tk_set_xtime(tks, ts);
1437                 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
1438         }
1439
1440         /* Signal hrtimers about time change */
1441         clock_was_set(CLOCK_SET_WALL);
1442
1443         audit_tk_injoffset(ts_delta);
1444         add_device_randomness(ts, sizeof(*ts));
1445         return 0;
1446 }
1447 EXPORT_SYMBOL(do_settimeofday64);
1448
1449 /**
1450  * timekeeping_inject_offset - Adds or subtracts from the current time.
1451  * @ts:         Pointer to the timespec variable containing the offset
1452  *
1453  * Adds or subtracts an offset value from the current time.
1454  */
1455 static int timekeeping_inject_offset(const struct timespec64 *ts)
1456 {
1457         if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
1458                 return -EINVAL;
1459
1460         scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
1461                 struct timekeeper *tks = &tk_core.shadow_timekeeper;
1462                 struct timespec64 tmp;
1463
1464                 timekeeping_forward_now(tks);
1465
1466                 /* Make sure the proposed value is valid */
1467                 tmp = timespec64_add(tk_xtime(tks), *ts);
1468                 if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 ||
1469                     !timespec64_valid_settod(&tmp)) {
1470                         timekeeping_restore_shadow(&tk_core);
1471                         return -EINVAL;
1472                 }
1473
1474                 tk_xtime_add(tks, ts);
1475                 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts));
1476                 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
1477         }
1478
1479         /* Signal hrtimers about time change */
1480         clock_was_set(CLOCK_SET_WALL);
1481         return 0;
1482 }
1483
1484 /*
1485  * Indicates if there is an offset between the system clock and the hardware
1486  * clock/persistent clock/rtc.
1487  */
1488 int persistent_clock_is_local;
1489
1490 /*
1491  * Adjust the time obtained from the CMOS to be UTC time instead of
1492  * local time.
1493  *
1494  * This is ugly, but preferable to the alternatives.  Otherwise we
1495  * would either need to write a program to do it in /etc/rc (and risk
1496  * confusion if the program gets run more than once; it would also be
1497  * hard to make the program warp the clock precisely n hours)  or
1498  * compile in the timezone information into the kernel.  Bad, bad....
1499  *
1500  *                                              - TYT, 1992-01-01
1501  *
1502  * The best thing to do is to keep the CMOS clock in universal time (UTC)
1503  * as real UNIX machines always do it. This avoids all headaches about
1504  * daylight saving times and warping kernel clocks.
1505  */
1506 void timekeeping_warp_clock(void)
1507 {
1508         if (sys_tz.tz_minuteswest != 0) {
1509                 struct timespec64 adjust;
1510
1511                 persistent_clock_is_local = 1;
1512                 adjust.tv_sec = sys_tz.tz_minuteswest * 60;
1513                 adjust.tv_nsec = 0;
1514                 timekeeping_inject_offset(&adjust);
1515         }
1516 }
1517
1518 /*
1519  * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
1520  */
1521 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
1522 {
1523         tk->tai_offset = tai_offset;
1524         tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
1525 }
1526
1527 /*
1528  * change_clocksource - Swaps clocksources if a new one is available
1529  *
1530  * Accumulates current time interval and initializes new clocksource
1531  */
1532 static int change_clocksource(void *data)
1533 {
1534         struct clocksource *new = data, *old = NULL;
1535
1536         /*
1537          * If the clocksource is in a module, get a module reference.
1538          * Succeeds for built-in code (owner == NULL) as well. Abort if the
1539          * reference can't be acquired.
1540          */
1541         if (!try_module_get(new->owner))
1542                 return 0;
1543
1544         /* Abort if the device can't be enabled */
1545         if (new->enable && new->enable(new) != 0) {
1546                 module_put(new->owner);
1547                 return 0;
1548         }
1549
1550         scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
1551                 struct timekeeper *tks = &tk_core.shadow_timekeeper;
1552
1553                 timekeeping_forward_now(tks);
1554                 old = tks->tkr_mono.clock;
1555                 tk_setup_internals(tks, new);
1556                 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
1557         }
1558
1559         if (old) {
1560                 if (old->disable)
1561                         old->disable(old);
1562                 module_put(old->owner);
1563         }
1564
1565         return 0;
1566 }
1567
1568 /**
1569  * timekeeping_notify - Install a new clock source
1570  * @clock:              pointer to the clock source
1571  *
1572  * This function is called from clocksource.c after a new, better clock
1573  * source has been registered. The caller holds the clocksource_mutex.
1574  */
1575 int timekeeping_notify(struct clocksource *clock)
1576 {
1577         struct timekeeper *tk = &tk_core.timekeeper;
1578
1579         if (tk->tkr_mono.clock == clock)
1580                 return 0;
1581         stop_machine(change_clocksource, clock, NULL);
1582         tick_clock_notify();
1583         return tk->tkr_mono.clock == clock ? 0 : -1;
1584 }
1585
1586 /**
1587  * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
1588  * @ts:         pointer to the timespec64 to be set
1589  *
1590  * Returns the raw monotonic time (completely un-modified by ntp)
1591  */
1592 void ktime_get_raw_ts64(struct timespec64 *ts)
1593 {
1594         struct timekeeper *tk = &tk_core.timekeeper;
1595         unsigned int seq;
1596         u64 nsecs;
1597
1598         do {
1599                 seq = read_seqcount_begin(&tk_core.seq);
1600                 ts->tv_sec = tk->raw_sec;
1601                 nsecs = timekeeping_get_ns(&tk->tkr_raw);
1602
1603         } while (read_seqcount_retry(&tk_core.seq, seq));
1604
1605         ts->tv_nsec = 0;
1606         timespec64_add_ns(ts, nsecs);
1607 }
1608 EXPORT_SYMBOL(ktime_get_raw_ts64);
1609
1610
1611 /**
1612  * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
1613  */
1614 int timekeeping_valid_for_hres(void)
1615 {
1616         struct timekeeper *tk = &tk_core.timekeeper;
1617         unsigned int seq;
1618         int ret;
1619
1620         do {
1621                 seq = read_seqcount_begin(&tk_core.seq);
1622
1623                 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
1624
1625         } while (read_seqcount_retry(&tk_core.seq, seq));
1626
1627         return ret;
1628 }
1629
1630 /**
1631  * timekeeping_max_deferment - Returns max time the clocksource can be deferred
1632  */
1633 u64 timekeeping_max_deferment(void)
1634 {
1635         struct timekeeper *tk = &tk_core.timekeeper;
1636         unsigned int seq;
1637         u64 ret;
1638
1639         do {
1640                 seq = read_seqcount_begin(&tk_core.seq);
1641
1642                 ret = tk->tkr_mono.clock->max_idle_ns;
1643
1644         } while (read_seqcount_retry(&tk_core.seq, seq));
1645
1646         return ret;
1647 }
1648
1649 /**
1650  * read_persistent_clock64 -  Return time from the persistent clock.
1651  * @ts: Pointer to the storage for the readout value
1652  *
1653  * Weak dummy function for arches that do not yet support it.
1654  * Reads the time from the battery backed persistent clock.
1655  * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
1656  *
1657  *  XXX - Do be sure to remove it once all arches implement it.
1658  */
1659 void __weak read_persistent_clock64(struct timespec64 *ts)
1660 {
1661         ts->tv_sec = 0;
1662         ts->tv_nsec = 0;
1663 }
1664
1665 /**
1666  * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
1667  *                                        from the boot.
1668  * @wall_time:    current time as returned by persistent clock
1669  * @boot_offset:  offset that is defined as wall_time - boot_time
1670  *
1671  * Weak dummy function for arches that do not yet support it.
1672  *
1673  * The default function calculates offset based on the current value of
1674  * local_clock(). This way architectures that support sched_clock() but don't
1675  * support dedicated boot time clock will provide the best estimate of the
1676  * boot time.
1677  */
1678 void __weak __init
1679 read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
1680                                      struct timespec64 *boot_offset)
1681 {
1682         read_persistent_clock64(wall_time);
1683         *boot_offset = ns_to_timespec64(local_clock());
1684 }
1685
1686 static __init void tkd_basic_setup(struct tk_data *tkd)
1687 {
1688         raw_spin_lock_init(&tkd->lock);
1689         seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock);
1690 }
1691
1692 /*
1693  * Flag reflecting whether timekeeping_resume() has injected sleeptime.
1694  *
1695  * The flag starts of false and is only set when a suspend reaches
1696  * timekeeping_suspend(), timekeeping_resume() sets it to false when the
1697  * timekeeper clocksource is not stopping across suspend and has been
1698  * used to update sleep time. If the timekeeper clocksource has stopped
1699  * then the flag stays true and is used by the RTC resume code to decide
1700  * whether sleeptime must be injected and if so the flag gets false then.
1701  *
1702  * If a suspend fails before reaching timekeeping_resume() then the flag
1703  * stays false and prevents erroneous sleeptime injection.
1704  */
1705 static bool suspend_timing_needed;
1706
1707 /* Flag for if there is a persistent clock on this platform */
1708 static bool persistent_clock_exists;
1709
1710 /*
1711  * timekeeping_init - Initializes the clocksource and common timekeeping values
1712  */
1713 void __init timekeeping_init(void)
1714 {
1715         struct timespec64 wall_time, boot_offset, wall_to_mono;
1716         struct timekeeper *tks = &tk_core.shadow_timekeeper;
1717         struct clocksource *clock;
1718
1719         tkd_basic_setup(&tk_core);
1720
1721         read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
1722         if (timespec64_valid_settod(&wall_time) &&
1723             timespec64_to_ns(&wall_time) > 0) {
1724                 persistent_clock_exists = true;
1725         } else if (timespec64_to_ns(&wall_time) != 0) {
1726                 pr_warn("Persistent clock returned invalid value");
1727                 wall_time = (struct timespec64){0};
1728         }
1729
1730         if (timespec64_compare(&wall_time, &boot_offset) < 0)
1731                 boot_offset = (struct timespec64){0};
1732
1733         /*
1734          * We want set wall_to_mono, so the following is true:
1735          * wall time + wall_to_mono = boot time
1736          */
1737         wall_to_mono = timespec64_sub(boot_offset, wall_time);
1738
1739         guard(raw_spinlock_irqsave)(&tk_core.lock);
1740
1741         ntp_init();
1742
1743         clock = clocksource_default_clock();
1744         if (clock->enable)
1745                 clock->enable(clock);
1746         tk_setup_internals(tks, clock);
1747
1748         tk_set_xtime(tks, &wall_time);
1749         tks->raw_sec = 0;
1750
1751         tk_set_wall_to_mono(tks, wall_to_mono);
1752
1753         timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
1754 }
1755
1756 /* time in seconds when suspend began for persistent clock */
1757 static struct timespec64 timekeeping_suspend_time;
1758
1759 /**
1760  * __timekeeping_inject_sleeptime - Internal function to add sleep interval
1761  * @tk:         Pointer to the timekeeper to be updated
1762  * @delta:      Pointer to the delta value in timespec64 format
1763  *
1764  * Takes a timespec offset measuring a suspend interval and properly
1765  * adds the sleep offset to the timekeeping variables.
1766  */
1767 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
1768                                            const struct timespec64 *delta)
1769 {
1770         if (!timespec64_valid_strict(delta)) {
1771                 printk_deferred(KERN_WARNING
1772                                 "__timekeeping_inject_sleeptime: Invalid "
1773                                 "sleep delta value!\n");
1774                 return;
1775         }
1776         tk_xtime_add(tk, delta);
1777         tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
1778         tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
1779         tk_debug_account_sleep_time(delta);
1780 }
1781
1782 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
1783 /*
1784  * We have three kinds of time sources to use for sleep time
1785  * injection, the preference order is:
1786  * 1) non-stop clocksource
1787  * 2) persistent clock (ie: RTC accessible when irqs are off)
1788  * 3) RTC
1789  *
1790  * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
1791  * If system has neither 1) nor 2), 3) will be used finally.
1792  *
1793  *
1794  * If timekeeping has injected sleeptime via either 1) or 2),
1795  * 3) becomes needless, so in this case we don't need to call
1796  * rtc_resume(), and this is what timekeeping_rtc_skipresume()
1797  * means.
1798  */
1799 bool timekeeping_rtc_skipresume(void)
1800 {
1801         return !suspend_timing_needed;
1802 }
1803
1804 /*
1805  * 1) can be determined whether to use or not only when doing
1806  * timekeeping_resume() which is invoked after rtc_suspend(),
1807  * so we can't skip rtc_suspend() surely if system has 1).
1808  *
1809  * But if system has 2), 2) will definitely be used, so in this
1810  * case we don't need to call rtc_suspend(), and this is what
1811  * timekeeping_rtc_skipsuspend() means.
1812  */
1813 bool timekeeping_rtc_skipsuspend(void)
1814 {
1815         return persistent_clock_exists;
1816 }
1817
1818 /**
1819  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
1820  * @delta: pointer to a timespec64 delta value
1821  *
1822  * This hook is for architectures that cannot support read_persistent_clock64
1823  * because their RTC/persistent clock is only accessible when irqs are enabled.
1824  * and also don't have an effective nonstop clocksource.
1825  *
1826  * This function should only be called by rtc_resume(), and allows
1827  * a suspend offset to be injected into the timekeeping values.
1828  */
1829 void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
1830 {
1831         scoped_guard(raw_spinlock_irqsave, &tk_core.lock) {
1832                 struct timekeeper *tks = &tk_core.shadow_timekeeper;
1833
1834                 suspend_timing_needed = false;
1835                 timekeeping_forward_now(tks);
1836                 __timekeeping_inject_sleeptime(tks, delta);
1837                 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
1838         }
1839
1840         /* Signal hrtimers about time change */
1841         clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
1842 }
1843 #endif
1844
1845 /**
1846  * timekeeping_resume - Resumes the generic timekeeping subsystem.
1847  */
1848 void timekeeping_resume(void)
1849 {
1850         struct timekeeper *tks = &tk_core.shadow_timekeeper;
1851         struct clocksource *clock = tks->tkr_mono.clock;
1852         struct timespec64 ts_new, ts_delta;
1853         bool inject_sleeptime = false;
1854         u64 cycle_now, nsec;
1855         unsigned long flags;
1856
1857         read_persistent_clock64(&ts_new);
1858
1859         clockevents_resume();
1860         clocksource_resume();
1861
1862         raw_spin_lock_irqsave(&tk_core.lock, flags);
1863
1864         /*
1865          * After system resumes, we need to calculate the suspended time and
1866          * compensate it for the OS time. There are 3 sources that could be
1867          * used: Nonstop clocksource during suspend, persistent clock and rtc
1868          * device.
1869          *
1870          * One specific platform may have 1 or 2 or all of them, and the
1871          * preference will be:
1872          *      suspend-nonstop clocksource -> persistent clock -> rtc
1873          * The less preferred source will only be tried if there is no better
1874          * usable source. The rtc part is handled separately in rtc core code.
1875          */
1876         cycle_now = tk_clock_read(&tks->tkr_mono);
1877         nsec = clocksource_stop_suspend_timing(clock, cycle_now);
1878         if (nsec > 0) {
1879                 ts_delta = ns_to_timespec64(nsec);
1880                 inject_sleeptime = true;
1881         } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
1882                 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
1883                 inject_sleeptime = true;
1884         }
1885
1886         if (inject_sleeptime) {
1887                 suspend_timing_needed = false;
1888                 __timekeeping_inject_sleeptime(tks, &ts_delta);
1889         }
1890
1891         /* Re-base the last cycle value */
1892         tks->tkr_mono.cycle_last = cycle_now;
1893         tks->tkr_raw.cycle_last  = cycle_now;
1894
1895         tks->ntp_error = 0;
1896         timekeeping_suspended = 0;
1897         timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
1898         raw_spin_unlock_irqrestore(&tk_core.lock, flags);
1899
1900         touch_softlockup_watchdog();
1901
1902         /* Resume the clockevent device(s) and hrtimers */
1903         tick_resume();
1904         /* Notify timerfd as resume is equivalent to clock_was_set() */
1905         timerfd_resume();
1906 }
1907
1908 int timekeeping_suspend(void)
1909 {
1910         struct timekeeper *tks = &tk_core.shadow_timekeeper;
1911         struct timespec64 delta, delta_delta;
1912         static struct timespec64 old_delta;
1913         struct clocksource *curr_clock;
1914         unsigned long flags;
1915         u64 cycle_now;
1916
1917         read_persistent_clock64(&timekeeping_suspend_time);
1918
1919         /*
1920          * On some systems the persistent_clock can not be detected at
1921          * timekeeping_init by its return value, so if we see a valid
1922          * value returned, update the persistent_clock_exists flag.
1923          */
1924         if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
1925                 persistent_clock_exists = true;
1926
1927         suspend_timing_needed = true;
1928
1929         raw_spin_lock_irqsave(&tk_core.lock, flags);
1930         timekeeping_forward_now(tks);
1931         timekeeping_suspended = 1;
1932
1933         /*
1934          * Since we've called forward_now, cycle_last stores the value
1935          * just read from the current clocksource. Save this to potentially
1936          * use in suspend timing.
1937          */
1938         curr_clock = tks->tkr_mono.clock;
1939         cycle_now = tks->tkr_mono.cycle_last;
1940         clocksource_start_suspend_timing(curr_clock, cycle_now);
1941
1942         if (persistent_clock_exists) {
1943                 /*
1944                  * To avoid drift caused by repeated suspend/resumes,
1945                  * which each can add ~1 second drift error,
1946                  * try to compensate so the difference in system time
1947                  * and persistent_clock time stays close to constant.
1948                  */
1949                 delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time);
1950                 delta_delta = timespec64_sub(delta, old_delta);
1951                 if (abs(delta_delta.tv_sec) >= 2) {
1952                         /*
1953                          * if delta_delta is too large, assume time correction
1954                          * has occurred and set old_delta to the current delta.
1955                          */
1956                         old_delta = delta;
1957                 } else {
1958                         /* Otherwise try to adjust old_system to compensate */
1959                         timekeeping_suspend_time =
1960                                 timespec64_add(timekeeping_suspend_time, delta_delta);
1961                 }
1962         }
1963
1964         timekeeping_update_from_shadow(&tk_core, 0);
1965         halt_fast_timekeeper(tks);
1966         raw_spin_unlock_irqrestore(&tk_core.lock, flags);
1967
1968         tick_suspend();
1969         clocksource_suspend();
1970         clockevents_suspend();
1971
1972         return 0;
1973 }
1974
1975 /* sysfs resume/suspend bits for timekeeping */
1976 static struct syscore_ops timekeeping_syscore_ops = {
1977         .resume         = timekeeping_resume,
1978         .suspend        = timekeeping_suspend,
1979 };
1980
1981 static int __init timekeeping_init_ops(void)
1982 {
1983         register_syscore_ops(&timekeeping_syscore_ops);
1984         return 0;
1985 }
1986 device_initcall(timekeeping_init_ops);
1987
1988 /*
1989  * Apply a multiplier adjustment to the timekeeper
1990  */
1991 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
1992                                                          s64 offset,
1993                                                          s32 mult_adj)
1994 {
1995         s64 interval = tk->cycle_interval;
1996
1997         if (mult_adj == 0) {
1998                 return;
1999         } else if (mult_adj == -1) {
2000                 interval = -interval;
2001                 offset = -offset;
2002         } else if (mult_adj != 1) {
2003                 interval *= mult_adj;
2004                 offset *= mult_adj;
2005         }
2006
2007         /*
2008          * So the following can be confusing.
2009          *
2010          * To keep things simple, lets assume mult_adj == 1 for now.
2011          *
2012          * When mult_adj != 1, remember that the interval and offset values
2013          * have been appropriately scaled so the math is the same.
2014          *
2015          * The basic idea here is that we're increasing the multiplier
2016          * by one, this causes the xtime_interval to be incremented by
2017          * one cycle_interval. This is because:
2018          *      xtime_interval = cycle_interval * mult
2019          * So if mult is being incremented by one:
2020          *      xtime_interval = cycle_interval * (mult + 1)
2021          * Its the same as:
2022          *      xtime_interval = (cycle_interval * mult) + cycle_interval
2023          * Which can be shortened to:
2024          *      xtime_interval += cycle_interval
2025          *
2026          * So offset stores the non-accumulated cycles. Thus the current
2027          * time (in shifted nanoseconds) is:
2028          *      now = (offset * adj) + xtime_nsec
2029          * Now, even though we're adjusting the clock frequency, we have
2030          * to keep time consistent. In other words, we can't jump back
2031          * in time, and we also want to avoid jumping forward in time.
2032          *
2033          * So given the same offset value, we need the time to be the same
2034          * both before and after the freq adjustment.
2035          *      now = (offset * adj_1) + xtime_nsec_1
2036          *      now = (offset * adj_2) + xtime_nsec_2
2037          * So:
2038          *      (offset * adj_1) + xtime_nsec_1 =
2039          *              (offset * adj_2) + xtime_nsec_2
2040          * And we know:
2041          *      adj_2 = adj_1 + 1
2042          * So:
2043          *      (offset * adj_1) + xtime_nsec_1 =
2044          *              (offset * (adj_1+1)) + xtime_nsec_2
2045          *      (offset * adj_1) + xtime_nsec_1 =
2046          *              (offset * adj_1) + offset + xtime_nsec_2
2047          * Canceling the sides:
2048          *      xtime_nsec_1 = offset + xtime_nsec_2
2049          * Which gives us:
2050          *      xtime_nsec_2 = xtime_nsec_1 - offset
2051          * Which simplifies to:
2052          *      xtime_nsec -= offset
2053          */
2054         if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
2055                 /* NTP adjustment caused clocksource mult overflow */
2056                 WARN_ON_ONCE(1);
2057                 return;
2058         }
2059
2060         tk->tkr_mono.mult += mult_adj;
2061         tk->xtime_interval += interval;
2062         tk->tkr_mono.xtime_nsec -= offset;
2063 }
2064
2065 /*
2066  * Adjust the timekeeper's multiplier to the correct frequency
2067  * and also to reduce the accumulated error value.
2068  */
2069 static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
2070 {
2071         u64 ntp_tl = ntp_tick_length();
2072         u32 mult;
2073
2074         /*
2075          * Determine the multiplier from the current NTP tick length.
2076          * Avoid expensive division when the tick length doesn't change.
2077          */
2078         if (likely(tk->ntp_tick == ntp_tl)) {
2079                 mult = tk->tkr_mono.mult - tk->ntp_err_mult;
2080         } else {
2081                 tk->ntp_tick = ntp_tl;
2082                 mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
2083                                  tk->xtime_remainder, tk->cycle_interval);
2084         }
2085
2086         /*
2087          * If the clock is behind the NTP time, increase the multiplier by 1
2088          * to catch up with it. If it's ahead and there was a remainder in the
2089          * tick division, the clock will slow down. Otherwise it will stay
2090          * ahead until the tick length changes to a non-divisible value.
2091          */
2092         tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
2093         mult += tk->ntp_err_mult;
2094
2095         timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
2096
2097         if (unlikely(tk->tkr_mono.clock->maxadj &&
2098                 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
2099                         > tk->tkr_mono.clock->maxadj))) {
2100                 printk_once(KERN_WARNING
2101                         "Adjusting %s more than 11%% (%ld vs %ld)\n",
2102                         tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
2103                         (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
2104         }
2105
2106         /*
2107          * It may be possible that when we entered this function, xtime_nsec
2108          * was very small.  Further, if we're slightly speeding the clocksource
2109          * in the code above, its possible the required corrective factor to
2110          * xtime_nsec could cause it to underflow.
2111          *
2112          * Now, since we have already accumulated the second and the NTP
2113          * subsystem has been notified via second_overflow(), we need to skip
2114          * the next update.
2115          */
2116         if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
2117                 tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
2118                                                         tk->tkr_mono.shift;
2119                 tk->xtime_sec--;
2120                 tk->skip_second_overflow = 1;
2121         }
2122 }
2123
2124 /*
2125  * accumulate_nsecs_to_secs - Accumulates nsecs into secs
2126  *
2127  * Helper function that accumulates the nsecs greater than a second
2128  * from the xtime_nsec field to the xtime_secs field.
2129  * It also calls into the NTP code to handle leapsecond processing.
2130  */
2131 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
2132 {
2133         u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
2134         unsigned int clock_set = 0;
2135
2136         while (tk->tkr_mono.xtime_nsec >= nsecps) {
2137                 int leap;
2138
2139                 tk->tkr_mono.xtime_nsec -= nsecps;
2140                 tk->xtime_sec++;
2141
2142                 /*
2143                  * Skip NTP update if this second was accumulated before,
2144                  * i.e. xtime_nsec underflowed in timekeeping_adjust()
2145                  */
2146                 if (unlikely(tk->skip_second_overflow)) {
2147                         tk->skip_second_overflow = 0;
2148                         continue;
2149                 }
2150
2151                 /* Figure out if its a leap sec and apply if needed */
2152                 leap = second_overflow(tk->xtime_sec);
2153                 if (unlikely(leap)) {
2154                         struct timespec64 ts;
2155
2156                         tk->xtime_sec += leap;
2157
2158                         ts.tv_sec = leap;
2159                         ts.tv_nsec = 0;
2160                         tk_set_wall_to_mono(tk,
2161                                 timespec64_sub(tk->wall_to_monotonic, ts));
2162
2163                         __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
2164
2165                         clock_set = TK_CLOCK_WAS_SET;
2166                 }
2167         }
2168         return clock_set;
2169 }
2170
2171 /*
2172  * logarithmic_accumulation - shifted accumulation of cycles
2173  *
2174  * This functions accumulates a shifted interval of cycles into
2175  * a shifted interval nanoseconds. Allows for O(log) accumulation
2176  * loop.
2177  *
2178  * Returns the unconsumed cycles.
2179  */
2180 static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
2181                                     u32 shift, unsigned int *clock_set)
2182 {
2183         u64 interval = tk->cycle_interval << shift;
2184         u64 snsec_per_sec;
2185
2186         /* If the offset is smaller than a shifted interval, do nothing */
2187         if (offset < interval)
2188                 return offset;
2189
2190         /* Accumulate one shifted interval */
2191         offset -= interval;
2192         tk->tkr_mono.cycle_last += interval;
2193         tk->tkr_raw.cycle_last  += interval;
2194
2195         tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
2196         *clock_set |= accumulate_nsecs_to_secs(tk);
2197
2198         /* Accumulate raw time */
2199         tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
2200         snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
2201         while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
2202                 tk->tkr_raw.xtime_nsec -= snsec_per_sec;
2203                 tk->raw_sec++;
2204         }
2205
2206         /* Accumulate error between NTP and clock interval */
2207         tk->ntp_error += tk->ntp_tick << shift;
2208         tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
2209                                                 (tk->ntp_error_shift + shift);
2210
2211         return offset;
2212 }
2213
2214 /*
2215  * timekeeping_advance - Updates the timekeeper to the current time and
2216  * current NTP tick length
2217  */
2218 static bool timekeeping_advance(enum timekeeping_adv_mode mode)
2219 {
2220         struct timekeeper *tk = &tk_core.shadow_timekeeper;
2221         struct timekeeper *real_tk = &tk_core.timekeeper;
2222         unsigned int clock_set = 0;
2223         int shift = 0, maxshift;
2224         u64 offset;
2225
2226         guard(raw_spinlock_irqsave)(&tk_core.lock);
2227
2228         /* Make sure we're fully resumed: */
2229         if (unlikely(timekeeping_suspended))
2230                 return false;
2231
2232         offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
2233                                    tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
2234
2235         /* Check if there's really nothing to do */
2236         if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
2237                 return false;
2238
2239         /*
2240          * With NO_HZ we may have to accumulate many cycle_intervals
2241          * (think "ticks") worth of time at once. To do this efficiently,
2242          * we calculate the largest doubling multiple of cycle_intervals
2243          * that is smaller than the offset.  We then accumulate that
2244          * chunk in one go, and then try to consume the next smaller
2245          * doubled multiple.
2246          */
2247         shift = ilog2(offset) - ilog2(tk->cycle_interval);
2248         shift = max(0, shift);
2249         /* Bound shift to one less than what overflows tick_length */
2250         maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
2251         shift = min(shift, maxshift);
2252         while (offset >= tk->cycle_interval) {
2253                 offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
2254                 if (offset < tk->cycle_interval<<shift)
2255                         shift--;
2256         }
2257
2258         /* Adjust the multiplier to correct NTP error */
2259         timekeeping_adjust(tk, offset);
2260
2261         /*
2262          * Finally, make sure that after the rounding
2263          * xtime_nsec isn't larger than NSEC_PER_SEC
2264          */
2265         clock_set |= accumulate_nsecs_to_secs(tk);
2266
2267         timekeeping_update_from_shadow(&tk_core, clock_set);
2268
2269         return !!clock_set;
2270 }
2271
2272 /**
2273  * update_wall_time - Uses the current clocksource to increment the wall time
2274  *
2275  */
2276 void update_wall_time(void)
2277 {
2278         if (timekeeping_advance(TK_ADV_TICK))
2279                 clock_was_set_delayed();
2280 }
2281
2282 /**
2283  * getboottime64 - Return the real time of system boot.
2284  * @ts:         pointer to the timespec64 to be set
2285  *
2286  * Returns the wall-time of boot in a timespec64.
2287  *
2288  * This is based on the wall_to_monotonic offset and the total suspend
2289  * time. Calls to settimeofday will affect the value returned (which
2290  * basically means that however wrong your real time clock is at boot time,
2291  * you get the right time here).
2292  */
2293 void getboottime64(struct timespec64 *ts)
2294 {
2295         struct timekeeper *tk = &tk_core.timekeeper;
2296         ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
2297
2298         *ts = ktime_to_timespec64(t);
2299 }
2300 EXPORT_SYMBOL_GPL(getboottime64);
2301
2302 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
2303 {
2304         struct timekeeper *tk = &tk_core.timekeeper;
2305         unsigned int seq;
2306
2307         do {
2308                 seq = read_seqcount_begin(&tk_core.seq);
2309
2310                 *ts = tk_xtime(tk);
2311         } while (read_seqcount_retry(&tk_core.seq, seq));
2312 }
2313 EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
2314
2315 /**
2316  * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor
2317  * @ts:         timespec64 to be filled
2318  *
2319  * Fetch the global mg_floor value, convert it to realtime and compare it
2320  * to the current coarse-grained time. Fill @ts with whichever is
2321  * latest. Note that this is a filesystem-specific interface and should be
2322  * avoided outside of that context.
2323  */
2324 void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts)
2325 {
2326         struct timekeeper *tk = &tk_core.timekeeper;
2327         u64 floor = atomic64_read(&mg_floor);
2328         ktime_t f_real, offset, coarse;
2329         unsigned int seq;
2330
2331         do {
2332                 seq = read_seqcount_begin(&tk_core.seq);
2333                 *ts = tk_xtime(tk);
2334                 offset = tk_core.timekeeper.offs_real;
2335         } while (read_seqcount_retry(&tk_core.seq, seq));
2336
2337         coarse = timespec64_to_ktime(*ts);
2338         f_real = ktime_add(floor, offset);
2339         if (ktime_after(f_real, coarse))
2340                 *ts = ktime_to_timespec64(f_real);
2341 }
2342
2343 /**
2344  * ktime_get_real_ts64_mg - attempt to update floor value and return result
2345  * @ts:         pointer to the timespec to be set
2346  *
2347  * Get a monotonic fine-grained time value and attempt to swap it into
2348  * mg_floor. If that succeeds then accept the new floor value. If it fails
2349  * then another task raced in during the interim time and updated the
2350  * floor.  Since any update to the floor must be later than the previous
2351  * floor, either outcome is acceptable.
2352  *
2353  * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(),
2354  * and determining that the resulting coarse-grained timestamp did not effect
2355  * a change in ctime. Any more recent floor value would effect a change to
2356  * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure.
2357  *
2358  * @ts will be filled with the latest floor value, regardless of the outcome of
2359  * the cmpxchg. Note that this is a filesystem specific interface and should be
2360  * avoided outside of that context.
2361  */
2362 void ktime_get_real_ts64_mg(struct timespec64 *ts)
2363 {
2364         struct timekeeper *tk = &tk_core.timekeeper;
2365         ktime_t old = atomic64_read(&mg_floor);
2366         ktime_t offset, mono;
2367         unsigned int seq;
2368         u64 nsecs;
2369
2370         do {
2371                 seq = read_seqcount_begin(&tk_core.seq);
2372
2373                 ts->tv_sec = tk->xtime_sec;
2374                 mono = tk->tkr_mono.base;
2375                 nsecs = timekeeping_get_ns(&tk->tkr_mono);
2376                 offset = tk_core.timekeeper.offs_real;
2377         } while (read_seqcount_retry(&tk_core.seq, seq));
2378
2379         mono = ktime_add_ns(mono, nsecs);
2380
2381         /*
2382          * Attempt to update the floor with the new time value. As any
2383          * update must be later then the existing floor, and would effect
2384          * a change to ctime from the perspective of the current task,
2385          * accept the resulting floor value regardless of the outcome of
2386          * the swap.
2387          */
2388         if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
2389                 ts->tv_nsec = 0;
2390                 timespec64_add_ns(ts, nsecs);
2391                 timekeeping_inc_mg_floor_swaps();
2392         } else {
2393                 /*
2394                  * Another task changed mg_floor since "old" was fetched.
2395                  * "old" has been updated with the latest value of "mg_floor".
2396                  * That value is newer than the previous floor value, which
2397                  * is enough to effect a change to ctime. Accept it.
2398                  */
2399                 *ts = ktime_to_timespec64(ktime_add(old, offset));
2400         }
2401 }
2402
2403 void ktime_get_coarse_ts64(struct timespec64 *ts)
2404 {
2405         struct timekeeper *tk = &tk_core.timekeeper;
2406         struct timespec64 now, mono;
2407         unsigned int seq;
2408
2409         do {
2410                 seq = read_seqcount_begin(&tk_core.seq);
2411
2412                 now = tk_xtime(tk);
2413                 mono = tk->wall_to_monotonic;
2414         } while (read_seqcount_retry(&tk_core.seq, seq));
2415
2416         set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
2417                                 now.tv_nsec + mono.tv_nsec);
2418 }
2419 EXPORT_SYMBOL(ktime_get_coarse_ts64);
2420
2421 /*
2422  * Must hold jiffies_lock
2423  */
2424 void do_timer(unsigned long ticks)
2425 {
2426         jiffies_64 += ticks;
2427         calc_global_load();
2428 }
2429
2430 /**
2431  * ktime_get_update_offsets_now - hrtimer helper
2432  * @cwsseq:     pointer to check and store the clock was set sequence number
2433  * @offs_real:  pointer to storage for monotonic -> realtime offset
2434  * @offs_boot:  pointer to storage for monotonic -> boottime offset
2435  * @offs_tai:   pointer to storage for monotonic -> clock tai offset
2436  *
2437  * Returns current monotonic time and updates the offsets if the
2438  * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
2439  * different.
2440  *
2441  * Called from hrtimer_interrupt() or retrigger_next_event()
2442  */
2443 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
2444                                      ktime_t *offs_boot, ktime_t *offs_tai)
2445 {
2446         struct timekeeper *tk = &tk_core.timekeeper;
2447         unsigned int seq;
2448         ktime_t base;
2449         u64 nsecs;
2450
2451         do {
2452                 seq = read_seqcount_begin(&tk_core.seq);
2453
2454                 base = tk->tkr_mono.base;
2455                 nsecs = timekeeping_get_ns(&tk->tkr_mono);
2456                 base = ktime_add_ns(base, nsecs);
2457
2458                 if (*cwsseq != tk->clock_was_set_seq) {
2459                         *cwsseq = tk->clock_was_set_seq;
2460                         *offs_real = tk->offs_real;
2461                         *offs_boot = tk->offs_boot;
2462                         *offs_tai = tk->offs_tai;
2463                 }
2464
2465                 /* Handle leapsecond insertion adjustments */
2466                 if (unlikely(base >= tk->next_leap_ktime))
2467                         *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
2468
2469         } while (read_seqcount_retry(&tk_core.seq, seq));
2470
2471         return base;
2472 }
2473
2474 /*
2475  * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
2476  */
2477 static int timekeeping_validate_timex(const struct __kernel_timex *txc)
2478 {
2479         if (txc->modes & ADJ_ADJTIME) {
2480                 /* singleshot must not be used with any other mode bits */
2481                 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
2482                         return -EINVAL;
2483                 if (!(txc->modes & ADJ_OFFSET_READONLY) &&
2484                     !capable(CAP_SYS_TIME))
2485                         return -EPERM;
2486         } else {
2487                 /* In order to modify anything, you gotta be super-user! */
2488                 if (txc->modes && !capable(CAP_SYS_TIME))
2489                         return -EPERM;
2490                 /*
2491                  * if the quartz is off by more than 10% then
2492                  * something is VERY wrong!
2493                  */
2494                 if (txc->modes & ADJ_TICK &&
2495                     (txc->tick <  900000/USER_HZ ||
2496                      txc->tick > 1100000/USER_HZ))
2497                         return -EINVAL;
2498         }
2499
2500         if (txc->modes & ADJ_SETOFFSET) {
2501                 /* In order to inject time, you gotta be super-user! */
2502                 if (!capable(CAP_SYS_TIME))
2503                         return -EPERM;
2504
2505                 /*
2506                  * Validate if a timespec/timeval used to inject a time
2507                  * offset is valid.  Offsets can be positive or negative, so
2508                  * we don't check tv_sec. The value of the timeval/timespec
2509                  * is the sum of its fields,but *NOTE*:
2510                  * The field tv_usec/tv_nsec must always be non-negative and
2511                  * we can't have more nanoseconds/microseconds than a second.
2512                  */
2513                 if (txc->time.tv_usec < 0)
2514                         return -EINVAL;
2515
2516                 if (txc->modes & ADJ_NANO) {
2517                         if (txc->time.tv_usec >= NSEC_PER_SEC)
2518                                 return -EINVAL;
2519                 } else {
2520                         if (txc->time.tv_usec >= USEC_PER_SEC)
2521                                 return -EINVAL;
2522                 }
2523         }
2524
2525         /*
2526          * Check for potential multiplication overflows that can
2527          * only happen on 64-bit systems:
2528          */
2529         if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
2530                 if (LLONG_MIN / PPM_SCALE > txc->freq)
2531                         return -EINVAL;
2532                 if (LLONG_MAX / PPM_SCALE < txc->freq)
2533                         return -EINVAL;
2534         }
2535
2536         return 0;
2537 }
2538
2539 /**
2540  * random_get_entropy_fallback - Returns the raw clock source value,
2541  * used by random.c for platforms with no valid random_get_entropy().
2542  */
2543 unsigned long random_get_entropy_fallback(void)
2544 {
2545         struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
2546         struct clocksource *clock = READ_ONCE(tkr->clock);
2547
2548         if (unlikely(timekeeping_suspended || !clock))
2549                 return 0;
2550         return clock->read(clock);
2551 }
2552 EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
2553
2554 /**
2555  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
2556  * @txc:        Pointer to kernel_timex structure containing NTP parameters
2557  */
2558 int do_adjtimex(struct __kernel_timex *txc)
2559 {
2560         struct audit_ntp_data ad;
2561         bool offset_set = false;
2562         bool clock_set = false;
2563         struct timespec64 ts;
2564         int ret;
2565
2566         /* Validate the data before disabling interrupts */
2567         ret = timekeeping_validate_timex(txc);
2568         if (ret)
2569                 return ret;
2570         add_device_randomness(txc, sizeof(*txc));
2571
2572         if (txc->modes & ADJ_SETOFFSET) {
2573                 struct timespec64 delta;
2574
2575                 delta.tv_sec  = txc->time.tv_sec;
2576                 delta.tv_nsec = txc->time.tv_usec;
2577                 if (!(txc->modes & ADJ_NANO))
2578                         delta.tv_nsec *= 1000;
2579                 ret = timekeeping_inject_offset(&delta);
2580                 if (ret)
2581                         return ret;
2582
2583                 offset_set = delta.tv_sec != 0;
2584                 audit_tk_injoffset(delta);
2585         }
2586
2587         audit_ntp_init(&ad);
2588
2589         ktime_get_real_ts64(&ts);
2590         add_device_randomness(&ts, sizeof(ts));
2591
2592         scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
2593                 struct timekeeper *tks = &tk_core.shadow_timekeeper;
2594                 s32 orig_tai, tai;
2595
2596                 orig_tai = tai = tks->tai_offset;
2597                 ret = __do_adjtimex(txc, &ts, &tai, &ad);
2598
2599                 if (tai != orig_tai) {
2600                         __timekeeping_set_tai_offset(tks, tai);
2601                         timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
2602                         clock_set = true;
2603                 } else {
2604                         tk_update_leap_state_all(&tk_core);
2605                 }
2606         }
2607
2608         audit_ntp_log(&ad);
2609
2610         /* Update the multiplier immediately if frequency was set directly */
2611         if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
2612                 clock_set |= timekeeping_advance(TK_ADV_FREQ);
2613
2614         if (clock_set)
2615                 clock_was_set(CLOCK_SET_WALL);
2616
2617         ntp_notify_cmos_timer(offset_set);
2618
2619         return ret;
2620 }
2621
2622 #ifdef CONFIG_NTP_PPS
2623 /**
2624  * hardpps() - Accessor function to NTP __hardpps function
2625  * @phase_ts:   Pointer to timespec64 structure representing phase timestamp
2626  * @raw_ts:     Pointer to timespec64 structure representing raw timestamp
2627  */
2628 void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
2629 {
2630         guard(raw_spinlock_irqsave)(&tk_core.lock);
2631         __hardpps(phase_ts, raw_ts);
2632 }
2633 EXPORT_SYMBOL(hardpps);
2634 #endif /* CONFIG_NTP_PPS */