1 // SPDX-License-Identifier: GPL-2.0
3 * Xen time implementation.
5 * This is implemented in terms of a clocksource driver which uses
6 * the hypervisor clock as a nanosecond timebase, and a clockevent
7 * driver which uses the hypervisor's timer mechanism.
9 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
11 #include <linux/kernel.h>
12 #include <linux/interrupt.h>
13 #include <linux/clocksource.h>
14 #include <linux/clockchips.h>
15 #include <linux/gfp.h>
16 #include <linux/slab.h>
17 #include <linux/pvclock_gtod.h>
18 #include <linux/timekeeper_internal.h>
20 #include <asm/pvclock.h>
21 #include <asm/xen/hypervisor.h>
22 #include <asm/xen/hypercall.h>
24 #include <xen/events.h>
25 #include <xen/features.h>
26 #include <xen/interface/xen.h>
27 #include <xen/interface/vcpu.h>
31 /* Xen may fire a timer up to this many ns early */
32 #define TIMER_SLOP 100000
34 /* Get the TSC speed from Xen */
35 static unsigned long xen_tsc_khz(void)
37 struct pvclock_vcpu_time_info
*info
=
38 &HYPERVISOR_shared_info
->vcpu_info
[0].time
;
40 return pvclock_tsc_khz(info
);
43 u64
xen_clocksource_read(void)
45 struct pvclock_vcpu_time_info
*src
;
48 preempt_disable_notrace();
49 src
= &__this_cpu_read(xen_vcpu
)->time
;
50 ret
= pvclock_clocksource_read(src
);
51 preempt_enable_notrace();
55 static u64
xen_clocksource_get_cycles(struct clocksource
*cs
)
57 return xen_clocksource_read();
60 static void xen_read_wallclock(struct timespec
*ts
)
62 struct shared_info
*s
= HYPERVISOR_shared_info
;
63 struct pvclock_wall_clock
*wall_clock
= &(s
->wc
);
64 struct pvclock_vcpu_time_info
*vcpu_time
;
66 vcpu_time
= &get_cpu_var(xen_vcpu
)->time
;
67 pvclock_read_wallclock(wall_clock
, vcpu_time
, ts
);
68 put_cpu_var(xen_vcpu
);
71 static void xen_get_wallclock(struct timespec
*now
)
73 xen_read_wallclock(now
);
76 static int xen_set_wallclock(const struct timespec
*now
)
81 static int xen_pvclock_gtod_notify(struct notifier_block
*nb
,
82 unsigned long was_set
, void *priv
)
84 /* Protected by the calling core code serialization */
85 static struct timespec64 next_sync
;
87 struct xen_platform_op op
;
88 struct timespec64 now
;
89 struct timekeeper
*tk
= priv
;
90 static bool settime64_supported
= true;
93 now
.tv_sec
= tk
->xtime_sec
;
94 now
.tv_nsec
= (long)(tk
->tkr_mono
.xtime_nsec
>> tk
->tkr_mono
.shift
);
97 * We only take the expensive HV call when the clock was set
98 * or when the 11 minutes RTC synchronization time elapsed.
100 if (!was_set
&& timespec64_compare(&now
, &next_sync
) < 0)
104 if (settime64_supported
) {
105 op
.cmd
= XENPF_settime64
;
106 op
.u
.settime64
.mbz
= 0;
107 op
.u
.settime64
.secs
= now
.tv_sec
;
108 op
.u
.settime64
.nsecs
= now
.tv_nsec
;
109 op
.u
.settime64
.system_time
= xen_clocksource_read();
111 op
.cmd
= XENPF_settime32
;
112 op
.u
.settime32
.secs
= now
.tv_sec
;
113 op
.u
.settime32
.nsecs
= now
.tv_nsec
;
114 op
.u
.settime32
.system_time
= xen_clocksource_read();
117 ret
= HYPERVISOR_platform_op(&op
);
119 if (ret
== -ENOSYS
&& settime64_supported
) {
120 settime64_supported
= false;
127 * Move the next drift compensation time 11 minutes
128 * ahead. That's emulating the sync_cmos_clock() update for
132 next_sync
.tv_sec
+= 11 * 60;
137 static struct notifier_block xen_pvclock_gtod_notifier
= {
138 .notifier_call
= xen_pvclock_gtod_notify
,
141 static struct clocksource xen_clocksource __read_mostly
= {
144 .read
= xen_clocksource_get_cycles
,
146 .flags
= CLOCK_SOURCE_IS_CONTINUOUS
,
150 Xen clockevent implementation
152 Xen has two clockevent implementations:
154 The old timer_op one works with all released versions of Xen prior
155 to version 3.0.4. This version of the hypervisor provides a
156 single-shot timer with nanosecond resolution. However, sharing the
157 same event channel is a 100Hz tick which is delivered while the
158 vcpu is running. We don't care about or use this tick, but it will
159 cause the core time code to think the timer fired too soon, and
160 will end up resetting it each time. It could be filtered, but
161 doing so has complications when the ktime clocksource is not yet
162 the xen clocksource (ie, at boot time).
164 The new vcpu_op-based timer interface allows the tick timer period
165 to be changed or turned off. The tick timer is not useful as a
166 periodic timer because events are only delivered to running vcpus.
167 The one-shot timer can report when a timeout is in the past, so
168 set_next_event is capable of returning -ETIME when appropriate.
169 This interface is used when available.
174 Get a hypervisor absolute time. In theory we could maintain an
175 offset between the kernel's time and the hypervisor's time, and
176 apply that to a kernel's absolute timeout. Unfortunately the
177 hypervisor and kernel times can drift even if the kernel is using
178 the Xen clocksource, because ntp can warp the kernel's clocksource.
180 static s64
get_abs_timeout(unsigned long delta
)
182 return xen_clocksource_read() + delta
;
185 static int xen_timerop_shutdown(struct clock_event_device
*evt
)
188 HYPERVISOR_set_timer_op(0);
193 static int xen_timerop_set_next_event(unsigned long delta
,
194 struct clock_event_device
*evt
)
196 WARN_ON(!clockevent_state_oneshot(evt
));
198 if (HYPERVISOR_set_timer_op(get_abs_timeout(delta
)) < 0)
201 /* We may have missed the deadline, but there's no real way of
202 knowing for sure. If the event was in the past, then we'll
203 get an immediate interrupt. */
208 static const struct clock_event_device xen_timerop_clockevent
= {
210 .features
= CLOCK_EVT_FEAT_ONESHOT
,
212 .max_delta_ns
= 0xffffffff,
213 .max_delta_ticks
= 0xffffffff,
214 .min_delta_ns
= TIMER_SLOP
,
215 .min_delta_ticks
= TIMER_SLOP
,
221 .set_state_shutdown
= xen_timerop_shutdown
,
222 .set_next_event
= xen_timerop_set_next_event
,
225 static int xen_vcpuop_shutdown(struct clock_event_device
*evt
)
227 int cpu
= smp_processor_id();
229 if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer
, xen_vcpu_nr(cpu
),
231 HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer
, xen_vcpu_nr(cpu
),
238 static int xen_vcpuop_set_oneshot(struct clock_event_device
*evt
)
240 int cpu
= smp_processor_id();
242 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer
, xen_vcpu_nr(cpu
),
249 static int xen_vcpuop_set_next_event(unsigned long delta
,
250 struct clock_event_device
*evt
)
252 int cpu
= smp_processor_id();
253 struct vcpu_set_singleshot_timer single
;
256 WARN_ON(!clockevent_state_oneshot(evt
));
258 single
.timeout_abs_ns
= get_abs_timeout(delta
);
259 /* Get an event anyway, even if the timeout is already expired */
262 ret
= HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer
, xen_vcpu_nr(cpu
),
269 static const struct clock_event_device xen_vcpuop_clockevent
= {
271 .features
= CLOCK_EVT_FEAT_ONESHOT
,
273 .max_delta_ns
= 0xffffffff,
274 .max_delta_ticks
= 0xffffffff,
275 .min_delta_ns
= TIMER_SLOP
,
276 .min_delta_ticks
= TIMER_SLOP
,
282 .set_state_shutdown
= xen_vcpuop_shutdown
,
283 .set_state_oneshot
= xen_vcpuop_set_oneshot
,
284 .set_next_event
= xen_vcpuop_set_next_event
,
287 static const struct clock_event_device
*xen_clockevent
=
288 &xen_timerop_clockevent
;
290 struct xen_clock_event_device
{
291 struct clock_event_device evt
;
294 static DEFINE_PER_CPU(struct xen_clock_event_device
, xen_clock_events
) = { .evt
.irq
= -1 };
296 static irqreturn_t
xen_timer_interrupt(int irq
, void *dev_id
)
298 struct clock_event_device
*evt
= this_cpu_ptr(&xen_clock_events
.evt
);
302 if (evt
->event_handler
) {
303 evt
->event_handler(evt
);
310 void xen_teardown_timer(int cpu
)
312 struct clock_event_device
*evt
;
313 evt
= &per_cpu(xen_clock_events
, cpu
).evt
;
316 unbind_from_irqhandler(evt
->irq
, NULL
);
321 void xen_setup_timer(int cpu
)
323 struct xen_clock_event_device
*xevt
= &per_cpu(xen_clock_events
, cpu
);
324 struct clock_event_device
*evt
= &xevt
->evt
;
327 WARN(evt
->irq
>= 0, "IRQ%d for CPU%d is already allocated\n", evt
->irq
, cpu
);
329 xen_teardown_timer(cpu
);
331 printk(KERN_INFO
"installing Xen timer for CPU %d\n", cpu
);
333 snprintf(xevt
->name
, sizeof(xevt
->name
), "timer%d", cpu
);
335 irq
= bind_virq_to_irqhandler(VIRQ_TIMER
, cpu
, xen_timer_interrupt
,
336 IRQF_PERCPU
|IRQF_NOBALANCING
|IRQF_TIMER
|
337 IRQF_FORCE_RESUME
|IRQF_EARLY_RESUME
,
339 (void)xen_set_irq_priority(irq
, XEN_IRQ_PRIORITY_MAX
);
341 memcpy(evt
, xen_clockevent
, sizeof(*evt
));
343 evt
->cpumask
= cpumask_of(cpu
);
348 void xen_setup_cpu_clockevents(void)
350 clockevents_register_device(this_cpu_ptr(&xen_clock_events
.evt
));
353 void xen_timer_resume(void)
359 if (xen_clockevent
!= &xen_vcpuop_clockevent
)
362 for_each_online_cpu(cpu
) {
363 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer
,
364 xen_vcpu_nr(cpu
), NULL
))
369 static const struct pv_time_ops xen_time_ops __initconst
= {
370 .sched_clock
= xen_clocksource_read
,
371 .steal_clock
= xen_steal_clock
,
374 static struct pvclock_vsyscall_time_info
*xen_clock __read_mostly
;
376 void xen_save_time_memory_area(void)
378 struct vcpu_register_time_memory_area t
;
386 ret
= HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area
, 0, &t
);
388 pr_notice("Cannot save secondary vcpu_time_info (err %d)",
391 clear_page(xen_clock
);
394 void xen_restore_time_memory_area(void)
396 struct vcpu_register_time_memory_area t
;
402 t
.addr
.v
= &xen_clock
->pvti
;
404 ret
= HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area
, 0, &t
);
407 * We don't disable VCLOCK_PVCLOCK entirely if it fails to register the
408 * secondary time info with Xen or if we migrated to a host without the
409 * necessary flags. On both of these cases what happens is either
410 * process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT
411 * bit set. Userspace checks the latter and if 0, it discards the data
412 * in pvti and fallbacks to a system call for a reliable timestamp.
415 pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
419 static void xen_setup_vsyscall_time_info(void)
421 struct vcpu_register_time_memory_area t
;
422 struct pvclock_vsyscall_time_info
*ti
;
425 ti
= (struct pvclock_vsyscall_time_info
*)get_zeroed_page(GFP_KERNEL
);
429 t
.addr
.v
= &ti
->pvti
;
431 ret
= HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area
, 0, &t
);
433 pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret
);
434 free_page((unsigned long)ti
);
439 * If primary time info had this bit set, secondary should too since
440 * it's the same data on both just different memory regions. But we
441 * still check it in case hypervisor is buggy.
443 if (!(ti
->pvti
.flags
& PVCLOCK_TSC_STABLE_BIT
)) {
445 ret
= HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area
,
448 free_page((unsigned long)ti
);
450 pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n");
455 pvclock_set_pvti_cpu0_va(xen_clock
);
457 xen_clocksource
.archdata
.vclock_mode
= VCLOCK_PVCLOCK
;
460 static void __init
xen_time_init(void)
462 struct pvclock_vcpu_time_info
*pvti
;
463 int cpu
= smp_processor_id();
466 /* As Dom0 is never moved, no penalty on using TSC there */
467 if (xen_initial_domain())
468 xen_clocksource
.rating
= 275;
470 clocksource_register_hz(&xen_clocksource
, NSEC_PER_SEC
);
472 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer
, xen_vcpu_nr(cpu
),
474 /* Successfully turned off 100Hz tick, so we have the
475 vcpuop-based timer interface */
476 printk(KERN_DEBUG
"Xen: using vcpuop timer interface\n");
477 xen_clockevent
= &xen_vcpuop_clockevent
;
480 /* Set initial system time with full resolution */
481 xen_read_wallclock(&tp
);
482 do_settimeofday(&tp
);
484 setup_force_cpu_cap(X86_FEATURE_TSC
);
487 * We check ahead on the primary time info if this
488 * bit is supported hence speeding up Xen clocksource.
490 pvti
= &__this_cpu_read(xen_vcpu
)->time
;
491 if (pvti
->flags
& PVCLOCK_TSC_STABLE_BIT
) {
492 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT
);
493 xen_setup_vsyscall_time_info();
496 xen_setup_runstate_info(cpu
);
497 xen_setup_timer(cpu
);
498 xen_setup_cpu_clockevents();
500 xen_time_setup_guest();
502 if (xen_initial_domain())
503 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier
);
506 void __ref
xen_init_time_ops(void)
508 pv_time_ops
= xen_time_ops
;
510 x86_init
.timers
.timer_init
= xen_time_init
;
511 x86_init
.timers
.setup_percpu_clockev
= x86_init_noop
;
512 x86_cpuinit
.setup_percpu_clockev
= x86_init_noop
;
514 x86_platform
.calibrate_tsc
= xen_tsc_khz
;
515 x86_platform
.get_wallclock
= xen_get_wallclock
;
516 /* Dom0 uses the native method to set the hardware RTC. */
517 if (!xen_initial_domain())
518 x86_platform
.set_wallclock
= xen_set_wallclock
;
521 #ifdef CONFIG_XEN_PVHVM
522 static void xen_hvm_setup_cpu_clockevents(void)
524 int cpu
= smp_processor_id();
525 xen_setup_runstate_info(cpu
);
527 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
528 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
529 * early bootup and also during CPU hotplug events).
531 xen_setup_cpu_clockevents();
534 void __init
xen_hvm_init_time_ops(void)
537 * vector callback is needed otherwise we cannot receive interrupts
538 * on cpu > 0 and at this point we don't know how many cpus are
541 if (!xen_have_vector_callback
)
544 if (!xen_feature(XENFEAT_hvm_safe_pvclock
)) {
545 printk(KERN_INFO
"Xen doesn't support pvclock on HVM,"
546 "disable pv timer\n");
550 pv_time_ops
= xen_time_ops
;
551 x86_init
.timers
.setup_percpu_clockev
= xen_time_init
;
552 x86_cpuinit
.setup_percpu_clockev
= xen_hvm_setup_cpu_clockevents
;
554 x86_platform
.calibrate_tsc
= xen_tsc_khz
;
555 x86_platform
.get_wallclock
= xen_get_wallclock
;
556 x86_platform
.set_wallclock
= xen_set_wallclock
;