1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2010, Microsoft Corporation.
6 * Haiyang Zhang <haiyangz@microsoft.com>
7 * Hank Janssen <hjanssen@microsoft.com>
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/sysctl.h>
16 #include <linux/reboot.h>
17 #include <linux/hyperv.h>
18 #include <linux/clockchips.h>
19 #include <linux/ptp_clock_kernel.h>
20 #include <asm/mshyperv.h>
22 #include "hyperv_vmbus.h"
28 #define SD_VERSION_3_1 (SD_MAJOR << 16 | SD_MINOR_1)
29 #define SD_VERSION_3_2 (SD_MAJOR << 16 | SD_MINOR_2)
30 #define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
33 #define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
37 #define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
40 #define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
43 #define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
47 #define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
50 #define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
52 static int sd_srv_version
;
53 static int ts_srv_version
;
54 static int hb_srv_version
;
56 #define SD_VER_COUNT 4
57 static const int sd_versions
[] = {
64 #define TS_VER_COUNT 3
65 static const int ts_versions
[] = {
71 #define HB_VER_COUNT 2
72 static const int hb_versions
[] = {
77 #define FW_VER_COUNT 2
78 static const int fw_versions
[] = {
84 * Send the "hibernate" udev event in a thread context.
86 struct hibernate_work_context
{
87 struct work_struct work
;
88 struct hv_device
*dev
;
91 static struct hibernate_work_context hibernate_context
;
92 static bool hibernation_supported
;
94 static void send_hibernate_uevent(struct work_struct
*work
)
96 char *uevent_env
[2] = { "EVENT=hibernate", NULL
};
97 struct hibernate_work_context
*ctx
;
99 ctx
= container_of(work
, struct hibernate_work_context
, work
);
101 kobject_uevent_env(&ctx
->dev
->device
.kobj
, KOBJ_CHANGE
, uevent_env
);
103 pr_info("Sent hibernation uevent\n");
106 static int hv_shutdown_init(struct hv_util_service
*srv
)
108 struct vmbus_channel
*channel
= srv
->channel
;
110 INIT_WORK(&hibernate_context
.work
, send_hibernate_uevent
);
111 hibernate_context
.dev
= channel
->device_obj
;
113 hibernation_supported
= hv_is_hibernation_supported();
118 static void shutdown_onchannelcallback(void *context
);
119 static struct hv_util_service util_shutdown
= {
120 .util_cb
= shutdown_onchannelcallback
,
121 .util_init
= hv_shutdown_init
,
124 static int hv_timesync_init(struct hv_util_service
*srv
);
125 static int hv_timesync_pre_suspend(void);
126 static void hv_timesync_deinit(void);
128 static void timesync_onchannelcallback(void *context
);
129 static struct hv_util_service util_timesynch
= {
130 .util_cb
= timesync_onchannelcallback
,
131 .util_init
= hv_timesync_init
,
132 .util_pre_suspend
= hv_timesync_pre_suspend
,
133 .util_deinit
= hv_timesync_deinit
,
136 static void heartbeat_onchannelcallback(void *context
);
137 static struct hv_util_service util_heartbeat
= {
138 .util_cb
= heartbeat_onchannelcallback
,
141 static struct hv_util_service util_kvp
= {
142 .util_cb
= hv_kvp_onchannelcallback
,
143 .util_init
= hv_kvp_init
,
144 .util_pre_suspend
= hv_kvp_pre_suspend
,
145 .util_pre_resume
= hv_kvp_pre_resume
,
146 .util_deinit
= hv_kvp_deinit
,
149 static struct hv_util_service util_vss
= {
150 .util_cb
= hv_vss_onchannelcallback
,
151 .util_init
= hv_vss_init
,
152 .util_pre_suspend
= hv_vss_pre_suspend
,
153 .util_pre_resume
= hv_vss_pre_resume
,
154 .util_deinit
= hv_vss_deinit
,
157 static void perform_shutdown(struct work_struct
*dummy
)
159 orderly_poweroff(true);
162 static void perform_restart(struct work_struct
*dummy
)
168 * Perform the shutdown operation in a thread context.
170 static DECLARE_WORK(shutdown_work
, perform_shutdown
);
173 * Perform the restart operation in a thread context.
175 static DECLARE_WORK(restart_work
, perform_restart
);
177 static void shutdown_onchannelcallback(void *context
)
179 struct vmbus_channel
*channel
= context
;
180 struct work_struct
*work
= NULL
;
183 u8
*shut_txf_buf
= util_shutdown
.recv_buffer
;
185 struct shutdown_msg_data
*shutdown_msg
;
187 struct icmsg_hdr
*icmsghdrp
;
189 if (vmbus_recvpacket(channel
, shut_txf_buf
, HV_HYP_PAGE_SIZE
, &recvlen
, &requestid
)) {
190 pr_err_ratelimited("Shutdown request received. Could not read into shut txf buf\n");
197 /* Ensure recvlen is big enough to read header data */
198 if (recvlen
< ICMSG_HDR
) {
199 pr_err_ratelimited("Shutdown request received. Packet length too small: %d\n",
204 icmsghdrp
= (struct icmsg_hdr
*)&shut_txf_buf
[sizeof(struct vmbuspipe_hdr
)];
206 if (icmsghdrp
->icmsgtype
== ICMSGTYPE_NEGOTIATE
) {
207 if (vmbus_prep_negotiate_resp(icmsghdrp
,
208 shut_txf_buf
, recvlen
,
209 fw_versions
, FW_VER_COUNT
,
210 sd_versions
, SD_VER_COUNT
,
211 NULL
, &sd_srv_version
)) {
212 pr_info("Shutdown IC version %d.%d\n",
213 sd_srv_version
>> 16,
214 sd_srv_version
& 0xFFFF);
216 } else if (icmsghdrp
->icmsgtype
== ICMSGTYPE_SHUTDOWN
) {
217 /* Ensure recvlen is big enough to contain shutdown_msg_data struct */
218 if (recvlen
< ICMSG_HDR
+ sizeof(struct shutdown_msg_data
)) {
219 pr_err_ratelimited("Invalid shutdown msg data. Packet length too small: %u\n",
224 shutdown_msg
= (struct shutdown_msg_data
*)&shut_txf_buf
[ICMSG_HDR
];
227 * shutdown_msg->flags can be 0(shut down), 2(reboot),
228 * or 4(hibernate). It may bitwise-OR 1, which means
229 * performing the request by force. Linux always tries
230 * to perform the request by force.
232 switch (shutdown_msg
->flags
) {
235 icmsghdrp
->status
= HV_S_OK
;
236 work
= &shutdown_work
;
237 pr_info("Shutdown request received - graceful shutdown initiated\n");
241 icmsghdrp
->status
= HV_S_OK
;
242 work
= &restart_work
;
243 pr_info("Restart request received - graceful restart initiated\n");
247 pr_info("Hibernation request received\n");
248 icmsghdrp
->status
= hibernation_supported
?
250 if (hibernation_supported
)
251 work
= &hibernate_context
.work
;
254 icmsghdrp
->status
= HV_E_FAIL
;
255 pr_info("Shutdown request received - Invalid request\n");
259 icmsghdrp
->status
= HV_E_FAIL
;
260 pr_err_ratelimited("Shutdown request received. Invalid msg type: %d\n",
261 icmsghdrp
->icmsgtype
);
264 icmsghdrp
->icflags
= ICMSGHDRFLAG_TRANSACTION
265 | ICMSGHDRFLAG_RESPONSE
;
267 vmbus_sendpacket(channel
, shut_txf_buf
,
269 VM_PKT_DATA_INBAND
, 0);
276 * Set the host time in a process context.
278 static struct work_struct adj_time_work
;
281 * The last time sample, received from the host. PTP device responds to
282 * requests by using this data and the current partition-wide time reference
291 static bool timesync_implicit
;
293 module_param(timesync_implicit
, bool, 0644);
294 MODULE_PARM_DESC(timesync_implicit
, "If set treat SAMPLE as SYNC when clock is behind");
296 static inline u64
reftime_to_ns(u64 reftime
)
298 return (reftime
- WLTIMEDELTA
) * 100;
302 * Hard coded threshold for host timesync delay: 600 seconds
304 static const u64 HOST_TIMESYNC_DELAY_THRESH
= 600 * (u64
)NSEC_PER_SEC
;
306 static int hv_get_adj_host_time(struct timespec64
*ts
)
308 u64 newtime
, reftime
, timediff_adj
;
312 spin_lock_irqsave(&host_ts
.lock
, flags
);
313 reftime
= hv_read_reference_counter();
316 * We need to let the caller know that last update from host
317 * is older than the max allowable threshold. clock_gettime()
318 * and PTP ioctl do not have a documented error that we could
319 * return for this specific case. Use ESTALE to report this.
321 timediff_adj
= reftime
- host_ts
.ref_time
;
322 if (timediff_adj
* 100 > HOST_TIMESYNC_DELAY_THRESH
) {
323 pr_warn_once("TIMESYNC IC: Stale time stamp, %llu nsecs old\n",
324 (timediff_adj
* 100));
328 newtime
= host_ts
.host_time
+ timediff_adj
;
329 *ts
= ns_to_timespec64(reftime_to_ns(newtime
));
330 spin_unlock_irqrestore(&host_ts
.lock
, flags
);
335 static void hv_set_host_time(struct work_struct
*work
)
338 struct timespec64 ts
;
340 if (!hv_get_adj_host_time(&ts
))
341 do_settimeofday64(&ts
);
345 * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume.
346 * Force a sync if the guest is behind.
348 static inline bool hv_implicit_sync(u64 host_time
)
350 struct timespec64 new_ts
;
351 struct timespec64 threshold_ts
;
353 new_ts
= ns_to_timespec64(reftime_to_ns(host_time
));
354 ktime_get_real_ts64(&threshold_ts
);
356 threshold_ts
.tv_sec
+= 5;
359 * If guest behind the host by 5 or more seconds.
361 if (timespec64_compare(&new_ts
, &threshold_ts
) >= 0)
368 * Synchronize time with host after reboot, restore, etc.
370 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
371 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
372 * message after the timesync channel is opened. Since the hv_utils module is
373 * loaded after hv_vmbus, the first message is usually missed. This bit is
374 * considered a hard request to discipline the clock.
376 * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
377 * typically used as a hint to the guest. The guest is under no obligation
378 * to discipline the clock.
380 static inline void adj_guesttime(u64 hosttime
, u64 reftime
, u8 adj_flags
)
386 * Save the adjusted time sample from the host and the snapshot
387 * of the current system time.
389 spin_lock_irqsave(&host_ts
.lock
, flags
);
391 cur_reftime
= hv_read_reference_counter();
392 host_ts
.host_time
= hosttime
;
393 host_ts
.ref_time
= cur_reftime
;
396 * TimeSync v4 messages contain reference time (guest's Hyper-V
397 * clocksource read when the time sample was generated), we can
398 * improve the precision by adding the delta between now and the
399 * time of generation. For older protocols we set
400 * reftime == cur_reftime on call.
402 host_ts
.host_time
+= (cur_reftime
- reftime
);
404 spin_unlock_irqrestore(&host_ts
.lock
, flags
);
406 /* Schedule work to do do_settimeofday64() */
407 if ((adj_flags
& ICTIMESYNCFLAG_SYNC
) ||
408 (timesync_implicit
&& hv_implicit_sync(host_ts
.host_time
)))
409 schedule_work(&adj_time_work
);
413 * Time Sync Channel message handler.
415 static void timesync_onchannelcallback(void *context
)
417 struct vmbus_channel
*channel
= context
;
420 struct icmsg_hdr
*icmsghdrp
;
421 struct ictimesync_data
*timedatap
;
422 struct ictimesync_ref_data
*refdata
;
423 u8
*time_txf_buf
= util_timesynch
.recv_buffer
;
426 * Drain the ring buffer and use the last packet to update
430 int ret
= vmbus_recvpacket(channel
, time_txf_buf
,
431 HV_HYP_PAGE_SIZE
, &recvlen
,
434 pr_err_ratelimited("TimeSync IC pkt recv failed (Err: %d)\n",
442 /* Ensure recvlen is big enough to read header data */
443 if (recvlen
< ICMSG_HDR
) {
444 pr_err_ratelimited("Timesync request received. Packet length too small: %d\n",
449 icmsghdrp
= (struct icmsg_hdr
*)&time_txf_buf
[
450 sizeof(struct vmbuspipe_hdr
)];
452 if (icmsghdrp
->icmsgtype
== ICMSGTYPE_NEGOTIATE
) {
453 if (vmbus_prep_negotiate_resp(icmsghdrp
,
454 time_txf_buf
, recvlen
,
455 fw_versions
, FW_VER_COUNT
,
456 ts_versions
, TS_VER_COUNT
,
457 NULL
, &ts_srv_version
)) {
458 pr_info("TimeSync IC version %d.%d\n",
459 ts_srv_version
>> 16,
460 ts_srv_version
& 0xFFFF);
462 } else if (icmsghdrp
->icmsgtype
== ICMSGTYPE_TIMESYNC
) {
463 if (ts_srv_version
> TS_VERSION_3
) {
464 /* Ensure recvlen is big enough to read ictimesync_ref_data */
465 if (recvlen
< ICMSG_HDR
+ sizeof(struct ictimesync_ref_data
)) {
466 pr_err_ratelimited("Invalid ictimesync ref data. Length too small: %u\n",
470 refdata
= (struct ictimesync_ref_data
*)&time_txf_buf
[ICMSG_HDR
];
472 adj_guesttime(refdata
->parenttime
,
473 refdata
->vmreferencetime
,
476 /* Ensure recvlen is big enough to read ictimesync_data */
477 if (recvlen
< ICMSG_HDR
+ sizeof(struct ictimesync_data
)) {
478 pr_err_ratelimited("Invalid ictimesync data. Length too small: %u\n",
482 timedatap
= (struct ictimesync_data
*)&time_txf_buf
[ICMSG_HDR
];
484 adj_guesttime(timedatap
->parenttime
,
485 hv_read_reference_counter(),
489 icmsghdrp
->status
= HV_E_FAIL
;
490 pr_err_ratelimited("Timesync request received. Invalid msg type: %d\n",
491 icmsghdrp
->icmsgtype
);
494 icmsghdrp
->icflags
= ICMSGHDRFLAG_TRANSACTION
495 | ICMSGHDRFLAG_RESPONSE
;
497 vmbus_sendpacket(channel
, time_txf_buf
,
499 VM_PKT_DATA_INBAND
, 0);
504 * Heartbeat functionality.
505 * Every two seconds, Hyper-V send us a heartbeat request message.
506 * we respond to this message, and Hyper-V knows we are alive.
508 static void heartbeat_onchannelcallback(void *context
)
510 struct vmbus_channel
*channel
= context
;
513 struct icmsg_hdr
*icmsghdrp
;
514 struct heartbeat_msg_data
*heartbeat_msg
;
515 u8
*hbeat_txf_buf
= util_heartbeat
.recv_buffer
;
519 if (vmbus_recvpacket(channel
, hbeat_txf_buf
, HV_HYP_PAGE_SIZE
,
520 &recvlen
, &requestid
)) {
521 pr_err_ratelimited("Heartbeat request received. Could not read into hbeat txf buf\n");
528 /* Ensure recvlen is big enough to read header data */
529 if (recvlen
< ICMSG_HDR
) {
530 pr_err_ratelimited("Heartbeat request received. Packet length too small: %d\n",
535 icmsghdrp
= (struct icmsg_hdr
*)&hbeat_txf_buf
[
536 sizeof(struct vmbuspipe_hdr
)];
538 if (icmsghdrp
->icmsgtype
== ICMSGTYPE_NEGOTIATE
) {
539 if (vmbus_prep_negotiate_resp(icmsghdrp
,
540 hbeat_txf_buf
, recvlen
,
541 fw_versions
, FW_VER_COUNT
,
542 hb_versions
, HB_VER_COUNT
,
543 NULL
, &hb_srv_version
)) {
545 pr_info("Heartbeat IC version %d.%d\n",
546 hb_srv_version
>> 16,
547 hb_srv_version
& 0xFFFF);
549 } else if (icmsghdrp
->icmsgtype
== ICMSGTYPE_HEARTBEAT
) {
551 * Ensure recvlen is big enough to read seq_num. Reserved area is not
552 * included in the check as the host may not fill it up entirely
554 if (recvlen
< ICMSG_HDR
+ sizeof(u64
)) {
555 pr_err_ratelimited("Invalid heartbeat msg data. Length too small: %u\n",
559 heartbeat_msg
= (struct heartbeat_msg_data
*)&hbeat_txf_buf
[ICMSG_HDR
];
561 heartbeat_msg
->seq_num
+= 1;
563 icmsghdrp
->status
= HV_E_FAIL
;
564 pr_err_ratelimited("Heartbeat request received. Invalid msg type: %d\n",
565 icmsghdrp
->icmsgtype
);
568 icmsghdrp
->icflags
= ICMSGHDRFLAG_TRANSACTION
569 | ICMSGHDRFLAG_RESPONSE
;
571 vmbus_sendpacket(channel
, hbeat_txf_buf
,
573 VM_PKT_DATA_INBAND
, 0);
577 #define HV_UTIL_RING_SEND_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
578 #define HV_UTIL_RING_RECV_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
580 static int util_probe(struct hv_device
*dev
,
581 const struct hv_vmbus_device_id
*dev_id
)
583 struct hv_util_service
*srv
=
584 (struct hv_util_service
*)dev_id
->driver_data
;
587 srv
->recv_buffer
= kmalloc(HV_HYP_PAGE_SIZE
* 4, GFP_KERNEL
);
588 if (!srv
->recv_buffer
)
590 srv
->channel
= dev
->channel
;
591 if (srv
->util_init
) {
592 ret
= srv
->util_init(srv
);
600 * The set of services managed by the util driver are not performance
601 * critical and do not need batched reading. Furthermore, some services
602 * such as KVP can only handle one message from the host at a time.
603 * Turn off batched reading for all util drivers before we open the
606 set_channel_read_mode(dev
->channel
, HV_CALL_DIRECT
);
608 hv_set_drvdata(dev
, srv
);
610 ret
= vmbus_open(dev
->channel
, HV_UTIL_RING_SEND_SIZE
,
611 HV_UTIL_RING_RECV_SIZE
, NULL
, 0, srv
->util_cb
,
619 if (srv
->util_deinit
)
622 kfree(srv
->recv_buffer
);
626 static void util_remove(struct hv_device
*dev
)
628 struct hv_util_service
*srv
= hv_get_drvdata(dev
);
630 if (srv
->util_deinit
)
632 vmbus_close(dev
->channel
);
633 kfree(srv
->recv_buffer
);
637 * When we're in util_suspend(), all the userspace processes have been frozen
638 * (refer to hibernate() -> freeze_processes()). The userspace is thawed only
639 * after the whole resume procedure, including util_resume(), finishes.
641 static int util_suspend(struct hv_device
*dev
)
643 struct hv_util_service
*srv
= hv_get_drvdata(dev
);
646 if (srv
->util_pre_suspend
) {
647 ret
= srv
->util_pre_suspend();
652 vmbus_close(dev
->channel
);
657 static int util_resume(struct hv_device
*dev
)
659 struct hv_util_service
*srv
= hv_get_drvdata(dev
);
662 if (srv
->util_pre_resume
) {
663 ret
= srv
->util_pre_resume();
668 ret
= vmbus_open(dev
->channel
, HV_UTIL_RING_SEND_SIZE
,
669 HV_UTIL_RING_RECV_SIZE
, NULL
, 0, srv
->util_cb
,
674 static const struct hv_vmbus_device_id id_table
[] = {
677 .driver_data
= (unsigned long)&util_shutdown
679 /* Time synch guid */
681 .driver_data
= (unsigned long)&util_timesynch
684 { HV_HEART_BEAT_GUID
,
685 .driver_data
= (unsigned long)&util_heartbeat
689 .driver_data
= (unsigned long)&util_kvp
693 .driver_data
= (unsigned long)&util_vss
698 MODULE_DEVICE_TABLE(vmbus
, id_table
);
700 /* The one and only one */
701 static struct hv_driver util_drv
= {
703 .id_table
= id_table
,
705 .remove
= util_remove
,
706 .suspend
= util_suspend
,
707 .resume
= util_resume
,
709 .probe_type
= PROBE_PREFER_ASYNCHRONOUS
,
713 static int hv_ptp_enable(struct ptp_clock_info
*info
,
714 struct ptp_clock_request
*request
, int on
)
719 static int hv_ptp_settime(struct ptp_clock_info
*p
, const struct timespec64
*ts
)
724 static int hv_ptp_adjfine(struct ptp_clock_info
*ptp
, long delta
)
728 static int hv_ptp_adjtime(struct ptp_clock_info
*ptp
, s64 delta
)
733 static int hv_ptp_gettime(struct ptp_clock_info
*info
, struct timespec64
*ts
)
735 return hv_get_adj_host_time(ts
);
738 static struct ptp_clock_info ptp_hyperv_info
= {
740 .enable
= hv_ptp_enable
,
741 .adjtime
= hv_ptp_adjtime
,
742 .adjfine
= hv_ptp_adjfine
,
743 .gettime64
= hv_ptp_gettime
,
744 .settime64
= hv_ptp_settime
,
745 .owner
= THIS_MODULE
,
748 static struct ptp_clock
*hv_ptp_clock
;
750 static int hv_timesync_init(struct hv_util_service
*srv
)
752 spin_lock_init(&host_ts
.lock
);
754 INIT_WORK(&adj_time_work
, hv_set_host_time
);
757 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
758 * disabled but the driver is still useful without the PTP device
759 * as it still handles the ICTIMESYNCFLAG_SYNC case.
761 hv_ptp_clock
= ptp_clock_register(&ptp_hyperv_info
, NULL
);
762 if (IS_ERR_OR_NULL(hv_ptp_clock
)) {
763 pr_err("cannot register PTP clock: %d\n",
764 PTR_ERR_OR_ZERO(hv_ptp_clock
));
771 static void hv_timesync_cancel_work(void)
773 cancel_work_sync(&adj_time_work
);
776 static int hv_timesync_pre_suspend(void)
778 hv_timesync_cancel_work();
782 static void hv_timesync_deinit(void)
785 ptp_clock_unregister(hv_ptp_clock
);
787 hv_timesync_cancel_work();
790 static int __init
init_hyperv_utils(void)
792 pr_info("Registering HyperV Utility Driver\n");
794 return vmbus_driver_register(&util_drv
);
797 static void exit_hyperv_utils(void)
799 pr_info("De-Registered HyperV Utility Driver\n");
801 vmbus_driver_unregister(&util_drv
);
804 module_init(init_hyperv_utils
);
805 module_exit(exit_hyperv_utils
);
807 MODULE_DESCRIPTION("Hyper-V Utilities");
808 MODULE_LICENSE("GPL");