arch/x86/entry/vdso/vclock_gettime.c

   1 /*
   2  * Copyright 2006 Andi Kleen, SUSE Labs.
   3  * Subject to the GNU Public License, v.2
   4  *
   5  * Fast user context implementation of clock_gettime, gettimeofday, and time.
   6  *
   7  * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
   8  *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
   9  *
  10  * The code should have no internal unresolved relocations.
  11  * Check with readelf after changing.
  12  */
  13
  14 #include <uapi/linux/time.h>
  15 #include <asm/vgtod.h>
  16 #include <asm/vvar.h>
  17 #include <asm/unistd.h>
  18 #include <asm/msr.h>
  19 #include <asm/pvclock.h>
  20 #include <asm/mshyperv.h>
  21 #include <linux/math64.h>
  22 #include <linux/time.h>
  23 #include <linux/kernel.h>
  24
  25 #define gtod (&VVAR(vsyscall_gtod_data))
  26
  27 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
  28 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
  29 extern time_t __vdso_time(time_t *t);
  30
  31 #ifdef CONFIG_PARAVIRT_CLOCK
  32 extern u8 pvclock_page
  33         __attribute__((visibility("hidden")));
  34 #endif
  35
  36 #ifdef CONFIG_HYPERV_TSCPAGE
  37 extern u8 hvclock_page
  38         __attribute__((visibility("hidden")));
  39 #endif
  40
  41 #ifndef BUILD_VDSO32
  42
  43 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
  44 {
  45         long ret;
  46         asm ("syscall" : "=a" (ret), "=m" (*ts) :
  47              "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
  48              "rcx", "r11");
  49         return ret;
  50 }
  51
  52 #else
  53
  54 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
  55 {
  56         long ret;
  57
  58         asm (
  59                 "mov %%ebx, %%edx \n"
  60                 "mov %[clock], %%ebx \n"
  61                 "call __kernel_vsyscall \n"
  62                 "mov %%edx, %%ebx \n"
  63                 : "=a" (ret), "=m" (*ts)
  64                 : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
  65                 : "edx");
  66         return ret;
  67 }
  68
  69 #endif
  70
  71 #ifdef CONFIG_PARAVIRT_CLOCK
  72 static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
  73 {
  74         return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
  75 }
  76
  77 static notrace u64 vread_pvclock(void)
  78 {
  79         const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
  80         u32 version;
  81         u64 ret;
  82
  83         /*
  84          * Note: The kernel and hypervisor must guarantee that cpu ID
  85          * number maps 1:1 to per-CPU pvclock time info.
  86          *
  87          * Because the hypervisor is entirely unaware of guest userspace
  88          * preemption, it cannot guarantee that per-CPU pvclock time
  89          * info is updated if the underlying CPU changes or that that
  90          * version is increased whenever underlying CPU changes.
  91          *
  92          * On KVM, we are guaranteed that pvti updates for any vCPU are
  93          * atomic as seen by *all* vCPUs.  This is an even stronger
  94          * guarantee than we get with a normal seqlock.
  95          *
  96          * On Xen, we don't appear to have that guarantee, but Xen still
  97          * supplies a valid seqlock using the version field.
  98          *
  99          * We only do pvclock vdso timing at all if
 100          * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
 101          * mean that all vCPUs have matching pvti and that the TSC is
 102          * synced, so we can just look at vCPU 0's pvti.
 103          */
 104
 105         do {
 106                 version = pvclock_read_begin(pvti);
 107
 108                 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
 109                         return U64_MAX;
 110
 111                 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
 112         } while (pvclock_read_retry(pvti, version));
 113
 114         return ret;
 115 }
 116 #endif
 117 #ifdef CONFIG_HYPERV_TSCPAGE
 118 static notrace u64 vread_hvclock(void)
 119 {
 120         const struct ms_hyperv_tsc_page *tsc_pg =
 121                 (const struct ms_hyperv_tsc_page *)&hvclock_page;
 122
 123         return hv_read_tsc_page(tsc_pg);
 124 }
 125 #endif
 126
 127 notrace static inline u64 vgetcyc(int mode)
 128 {
 129         if (mode == VCLOCK_TSC)
 130                 return (u64)rdtsc_ordered();
 131 #ifdef CONFIG_PARAVIRT_CLOCK
 132         else if (mode == VCLOCK_PVCLOCK)
 133                 return vread_pvclock();
 134 #endif
 135 #ifdef CONFIG_HYPERV_TSCPAGE
 136         else if (mode == VCLOCK_HVCLOCK)
 137                 return vread_hvclock();
 138 #endif
 139         return U64_MAX;
 140 }
 141
 142 notrace static int do_hres(clockid_t clk, struct timespec *ts)
 143 {
 144         struct vgtod_ts *base = &gtod->basetime[clk];
 145         u64 cycles, last, sec, ns;
 146         unsigned int seq;
 147
 148         do {
 149                 seq = gtod_read_begin(gtod);
 150                 cycles = vgetcyc(gtod->vclock_mode);
 151                 ns = base->nsec;
 152                 last = gtod->cycle_last;
 153                 if (unlikely((s64)cycles < 0))
 154                         return vdso_fallback_gettime(clk, ts);
 155                 if (cycles > last)
 156                         ns += (cycles - last) * gtod->mult;
 157                 ns >>= gtod->shift;
 158                 sec = base->sec;
 159         } while (unlikely(gtod_read_retry(gtod, seq)));
 160
 161         /*
 162          * Do this outside the loop: a race inside the loop could result
 163          * in __iter_div_u64_rem() being extremely slow.
 164          */
 165         ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
 166         ts->tv_nsec = ns;
 167
 168         return 0;
 169 }
 170
 171 notrace static void do_coarse(clockid_t clk, struct timespec *ts)
 172 {
 173         struct vgtod_ts *base = &gtod->basetime[clk];
 174         unsigned int seq;
 175
 176         do {
 177                 seq = gtod_read_begin(gtod);
 178                 ts->tv_sec = base->sec;
 179                 ts->tv_nsec = base->nsec;
 180         } while (unlikely(gtod_read_retry(gtod, seq)));
 181 }
 182
 183 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 184 {
 185         unsigned int msk;
 186
 187         /* Sort out negative (CPU/FD) and invalid clocks */
 188         if (unlikely((unsigned int) clock >= MAX_CLOCKS))
 189                 return vdso_fallback_gettime(clock, ts);
 190
 191         /*
 192          * Convert the clockid to a bitmask and use it to check which
 193          * clocks are handled in the VDSO directly.
 194          */
 195         msk = 1U << clock;
 196         if (likely(msk & VGTOD_HRES)) {
 197                 return do_hres(clock, ts);
 198         } else if (msk & VGTOD_COARSE) {
 199                 do_coarse(clock, ts);
 200                 return 0;
 201         }
 202         return vdso_fallback_gettime(clock, ts);
 203 }
 204
 205 int clock_gettime(clockid_t, struct timespec *)
 206         __attribute__((weak, alias("__vdso_clock_gettime")));
 207
 208 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 209 {
 210         if (likely(tv != NULL)) {
 211                 struct timespec *ts = (struct timespec *) tv;
 212
 213                 do_hres(CLOCK_REALTIME, ts);
 214                 tv->tv_usec /= 1000;
 215         }
 216         if (unlikely(tz != NULL)) {
 217                 tz->tz_minuteswest = gtod->tz_minuteswest;
 218                 tz->tz_dsttime = gtod->tz_dsttime;
 219         }
 220
 221         return 0;
 222 }
 223 int gettimeofday(struct timeval *, struct timezone *)
 224         __attribute__((weak, alias("__vdso_gettimeofday")));
 225
 226 /*
 227  * This will break when the xtime seconds get inaccurate, but that is
 228  * unlikely
 229  */
 230 notrace time_t __vdso_time(time_t *t)
 231 {
 232         /* This is atomic on x86 so we don't need any locks. */
 233         time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
 234
 235         if (t)
 236                 *t = result;
 237         return result;
 238 }
 239 time_t time(time_t *t)
 240         __attribute__((weak, alias("__vdso_time")));