2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
7 * The code should have no internal unresolved relocations.
8 * Check with readelf after changing.
11 /* Disable profiling for userspace code: */
12 #define DISABLE_BRANCH_PROFILING
14 #include <linux/kernel.h>
15 #include <linux/posix-timers.h>
16 #include <linux/time.h>
17 #include <linux/string.h>
18 #include <asm/vsyscall.h>
19 #include <asm/fixmap.h>
20 #include <asm/vgtod.h>
21 #include <asm/timex.h>
23 #include <asm/unistd.h>
25 #include <asm/pvclock.h>
27 #define gtod (&VVAR(vsyscall_gtod_data))
29 notrace
static cycle_t
vread_tsc(void)
35 * Empirically, a fence (of type that depends on the CPU)
36 * before rdtsc is enough to ensure that rdtsc is ordered
37 * with respect to loads. The various CPU manuals are unclear
38 * as to whether rdtsc can be reordered with later loads,
39 * but no one has ever seen it happen.
42 ret
= (cycle_t
)vget_cycles();
44 last
= VVAR(vsyscall_gtod_data
).clock
.cycle_last
;
46 if (likely(ret
>= last
))
50 * GCC likes to generate cmov here, but this branch is extremely
51 * predictable (it's just a funciton of time and the likely is
52 * very likely) and there's a data dependence, so force GCC
53 * to generate a branch instead. I don't barrier() because
54 * we don't actually need a barrier, and if this function
55 * ever gets inlined it will generate worse code.
61 static notrace cycle_t
vread_hpet(void)
63 return readl((const void __iomem
*)fix_to_virt(VSYSCALL_HPET
) + HPET_COUNTER
);
66 #ifdef CONFIG_PARAVIRT_CLOCK
68 static notrace
const struct pvclock_vsyscall_time_info
*get_pvti(int cpu
)
70 const struct pvclock_vsyscall_time_info
*pvti_base
;
71 int idx
= cpu
/ (PAGE_SIZE
/PVTI_SIZE
);
72 int offset
= cpu
% (PAGE_SIZE
/PVTI_SIZE
);
74 BUG_ON(PVCLOCK_FIXMAP_BEGIN
+ idx
> PVCLOCK_FIXMAP_END
);
76 pvti_base
= (struct pvclock_vsyscall_time_info
*)
77 __fix_to_virt(PVCLOCK_FIXMAP_BEGIN
+idx
);
79 return &pvti_base
[offset
];
82 static notrace cycle_t
vread_pvclock(int *mode
)
84 const struct pvclock_vsyscall_time_info
*pvti
;
93 * Note: hypervisor must guarantee that:
94 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
95 * 2. that per-CPU pvclock time info is updated if the
96 * underlying CPU changes.
97 * 3. that version is increased whenever underlying CPU
102 cpu
= __getcpu() & VGETCPU_CPU_MASK
;
103 /* TODO: We can put vcpu id into higher bits of pvti.version.
104 * This will save a couple of cycles by getting rid of
105 * __getcpu() calls (Gleb).
108 pvti
= get_pvti(cpu
);
110 version
= __pvclock_read_cycles(&pvti
->pvti
, &ret
, &flags
);
113 * Test we're still on the cpu as well as the version.
114 * We could have been migrated just after the first
115 * vgetcpu but before fetching the version, so we
116 * wouldn't notice a version change.
118 cpu1
= __getcpu() & VGETCPU_CPU_MASK
;
119 } while (unlikely(cpu
!= cpu1
||
120 (pvti
->pvti
.version
& 1) ||
121 pvti
->pvti
.version
!= version
));
123 if (unlikely(!(flags
& PVCLOCK_TSC_STABLE_BIT
)))
126 /* refer to tsc.c read_tsc() comment for rationale */
127 last
= VVAR(vsyscall_gtod_data
).clock
.cycle_last
;
129 if (likely(ret
>= last
))
136 notrace
static long vdso_fallback_gettime(long clock
, struct timespec
*ts
)
139 asm("syscall" : "=a" (ret
) :
140 "0" (__NR_clock_gettime
),"D" (clock
), "S" (ts
) : "memory");
144 notrace
static long vdso_fallback_gtod(struct timeval
*tv
, struct timezone
*tz
)
148 asm("syscall" : "=a" (ret
) :
149 "0" (__NR_gettimeofday
), "D" (tv
), "S" (tz
) : "memory");
154 notrace
static inline u64
vgetsns(int *mode
)
158 if (gtod
->clock
.vclock_mode
== VCLOCK_TSC
)
159 cycles
= vread_tsc();
160 else if (gtod
->clock
.vclock_mode
== VCLOCK_HPET
)
161 cycles
= vread_hpet();
162 #ifdef CONFIG_PARAVIRT_CLOCK
163 else if (gtod
->clock
.vclock_mode
== VCLOCK_PVCLOCK
)
164 cycles
= vread_pvclock(mode
);
168 v
= (cycles
- gtod
->clock
.cycle_last
) & gtod
->clock
.mask
;
169 return v
* gtod
->clock
.mult
;
172 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
173 notrace
static int __always_inline
do_realtime(struct timespec
*ts
)
181 seq
= raw_read_seqcount_begin(>od
->seq
);
182 mode
= gtod
->clock
.vclock_mode
;
183 ts
->tv_sec
= gtod
->wall_time_sec
;
184 ns
= gtod
->wall_time_snsec
;
185 ns
+= vgetsns(&mode
);
186 ns
>>= gtod
->clock
.shift
;
187 } while (unlikely(read_seqcount_retry(>od
->seq
, seq
)));
189 timespec_add_ns(ts
, ns
);
193 notrace
static int do_monotonic(struct timespec
*ts
)
201 seq
= raw_read_seqcount_begin(>od
->seq
);
202 mode
= gtod
->clock
.vclock_mode
;
203 ts
->tv_sec
= gtod
->monotonic_time_sec
;
204 ns
= gtod
->monotonic_time_snsec
;
205 ns
+= vgetsns(&mode
);
206 ns
>>= gtod
->clock
.shift
;
207 } while (unlikely(read_seqcount_retry(>od
->seq
, seq
)));
208 timespec_add_ns(ts
, ns
);
213 notrace
static int do_realtime_coarse(struct timespec
*ts
)
217 seq
= raw_read_seqcount_begin(>od
->seq
);
218 ts
->tv_sec
= gtod
->wall_time_coarse
.tv_sec
;
219 ts
->tv_nsec
= gtod
->wall_time_coarse
.tv_nsec
;
220 } while (unlikely(read_seqcount_retry(>od
->seq
, seq
)));
224 notrace
static int do_monotonic_coarse(struct timespec
*ts
)
228 seq
= raw_read_seqcount_begin(>od
->seq
);
229 ts
->tv_sec
= gtod
->monotonic_time_coarse
.tv_sec
;
230 ts
->tv_nsec
= gtod
->monotonic_time_coarse
.tv_nsec
;
231 } while (unlikely(read_seqcount_retry(>od
->seq
, seq
)));
236 notrace
int __vdso_clock_gettime(clockid_t clock
, struct timespec
*ts
)
238 int ret
= VCLOCK_NONE
;
242 ret
= do_realtime(ts
);
244 case CLOCK_MONOTONIC
:
245 ret
= do_monotonic(ts
);
247 case CLOCK_REALTIME_COARSE
:
248 return do_realtime_coarse(ts
);
249 case CLOCK_MONOTONIC_COARSE
:
250 return do_monotonic_coarse(ts
);
253 if (ret
== VCLOCK_NONE
)
254 return vdso_fallback_gettime(clock
, ts
);
257 int clock_gettime(clockid_t
, struct timespec
*)
258 __attribute__((weak
, alias("__vdso_clock_gettime")));
260 notrace
int __vdso_gettimeofday(struct timeval
*tv
, struct timezone
*tz
)
262 long ret
= VCLOCK_NONE
;
264 if (likely(tv
!= NULL
)) {
265 BUILD_BUG_ON(offsetof(struct timeval
, tv_usec
) !=
266 offsetof(struct timespec
, tv_nsec
) ||
267 sizeof(*tv
) != sizeof(struct timespec
));
268 ret
= do_realtime((struct timespec
*)tv
);
271 if (unlikely(tz
!= NULL
)) {
272 /* Avoid memcpy. Some old compilers fail to inline it */
273 tz
->tz_minuteswest
= gtod
->sys_tz
.tz_minuteswest
;
274 tz
->tz_dsttime
= gtod
->sys_tz
.tz_dsttime
;
277 if (ret
== VCLOCK_NONE
)
278 return vdso_fallback_gtod(tv
, tz
);
281 int gettimeofday(struct timeval
*, struct timezone
*)
282 __attribute__((weak
, alias("__vdso_gettimeofday")));
285 * This will break when the xtime seconds get inaccurate, but that is
288 notrace
time_t __vdso_time(time_t *t
)
290 /* This is atomic on x86_64 so we don't need any locks. */
291 time_t result
= ACCESS_ONCE(VVAR(vsyscall_gtod_data
).wall_time_sec
);
298 __attribute__((weak
, alias("__vdso_time")));