1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2010, Microsoft Corporation.
6 * Haiyang Zhang <haiyangz@microsoft.com>
7 * Hank Janssen <hjanssen@microsoft.com>
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/sysctl.h>
16 #include <linux/reboot.h>
17 #include <linux/hyperv.h>
18 #include <linux/clockchips.h>
19 #include <linux/ptp_clock_kernel.h>
20 #include <clocksource/hyperv_timer.h>
21 #include <asm/mshyperv.h>
23 #include "hyperv_vmbus.h"
29 #define SD_VERSION_3_1 (SD_MAJOR << 16 | SD_MINOR_1)
30 #define SD_VERSION_3_2 (SD_MAJOR << 16 | SD_MINOR_2)
31 #define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
34 #define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
38 #define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
41 #define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
44 #define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
48 #define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
51 #define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
53 static int sd_srv_version
;
54 static int ts_srv_version
;
55 static int hb_srv_version
;
57 #define SD_VER_COUNT 4
58 static const int sd_versions
[] = {
65 #define TS_VER_COUNT 3
66 static const int ts_versions
[] = {
72 #define HB_VER_COUNT 2
73 static const int hb_versions
[] = {
78 #define FW_VER_COUNT 2
79 static const int fw_versions
[] = {
85 * Send the "hibernate" udev event in a thread context.
87 struct hibernate_work_context
{
88 struct work_struct work
;
89 struct hv_device
*dev
;
92 static struct hibernate_work_context hibernate_context
;
93 static bool hibernation_supported
;
95 static void send_hibernate_uevent(struct work_struct
*work
)
97 char *uevent_env
[2] = { "EVENT=hibernate", NULL
};
98 struct hibernate_work_context
*ctx
;
100 ctx
= container_of(work
, struct hibernate_work_context
, work
);
102 kobject_uevent_env(&ctx
->dev
->device
.kobj
, KOBJ_CHANGE
, uevent_env
);
104 pr_info("Sent hibernation uevent\n");
107 static int hv_shutdown_init(struct hv_util_service
*srv
)
109 struct vmbus_channel
*channel
= srv
->channel
;
111 INIT_WORK(&hibernate_context
.work
, send_hibernate_uevent
);
112 hibernate_context
.dev
= channel
->device_obj
;
114 hibernation_supported
= hv_is_hibernation_supported();
119 static void shutdown_onchannelcallback(void *context
);
120 static struct hv_util_service util_shutdown
= {
121 .util_cb
= shutdown_onchannelcallback
,
122 .util_init
= hv_shutdown_init
,
125 static int hv_timesync_init(struct hv_util_service
*srv
);
126 static int hv_timesync_pre_suspend(void);
127 static void hv_timesync_deinit(void);
129 static void timesync_onchannelcallback(void *context
);
130 static struct hv_util_service util_timesynch
= {
131 .util_cb
= timesync_onchannelcallback
,
132 .util_init
= hv_timesync_init
,
133 .util_pre_suspend
= hv_timesync_pre_suspend
,
134 .util_deinit
= hv_timesync_deinit
,
137 static void heartbeat_onchannelcallback(void *context
);
138 static struct hv_util_service util_heartbeat
= {
139 .util_cb
= heartbeat_onchannelcallback
,
142 static struct hv_util_service util_kvp
= {
143 .util_cb
= hv_kvp_onchannelcallback
,
144 .util_init
= hv_kvp_init
,
145 .util_pre_suspend
= hv_kvp_pre_suspend
,
146 .util_pre_resume
= hv_kvp_pre_resume
,
147 .util_deinit
= hv_kvp_deinit
,
150 static struct hv_util_service util_vss
= {
151 .util_cb
= hv_vss_onchannelcallback
,
152 .util_init
= hv_vss_init
,
153 .util_pre_suspend
= hv_vss_pre_suspend
,
154 .util_pre_resume
= hv_vss_pre_resume
,
155 .util_deinit
= hv_vss_deinit
,
158 static struct hv_util_service util_fcopy
= {
159 .util_cb
= hv_fcopy_onchannelcallback
,
160 .util_init
= hv_fcopy_init
,
161 .util_pre_suspend
= hv_fcopy_pre_suspend
,
162 .util_pre_resume
= hv_fcopy_pre_resume
,
163 .util_deinit
= hv_fcopy_deinit
,
166 static void perform_shutdown(struct work_struct
*dummy
)
168 orderly_poweroff(true);
171 static void perform_restart(struct work_struct
*dummy
)
177 * Perform the shutdown operation in a thread context.
179 static DECLARE_WORK(shutdown_work
, perform_shutdown
);
182 * Perform the restart operation in a thread context.
184 static DECLARE_WORK(restart_work
, perform_restart
);
186 static void shutdown_onchannelcallback(void *context
)
188 struct vmbus_channel
*channel
= context
;
189 struct work_struct
*work
= NULL
;
192 u8
*shut_txf_buf
= util_shutdown
.recv_buffer
;
194 struct shutdown_msg_data
*shutdown_msg
;
196 struct icmsg_hdr
*icmsghdrp
;
198 vmbus_recvpacket(channel
, shut_txf_buf
,
199 HV_HYP_PAGE_SIZE
, &recvlen
, &requestid
);
202 icmsghdrp
= (struct icmsg_hdr
*)&shut_txf_buf
[
203 sizeof(struct vmbuspipe_hdr
)];
205 if (icmsghdrp
->icmsgtype
== ICMSGTYPE_NEGOTIATE
) {
206 if (vmbus_prep_negotiate_resp(icmsghdrp
, shut_txf_buf
,
207 fw_versions
, FW_VER_COUNT
,
208 sd_versions
, SD_VER_COUNT
,
209 NULL
, &sd_srv_version
)) {
210 pr_info("Shutdown IC version %d.%d\n",
211 sd_srv_version
>> 16,
212 sd_srv_version
& 0xFFFF);
216 (struct shutdown_msg_data
*)&shut_txf_buf
[
217 sizeof(struct vmbuspipe_hdr
) +
218 sizeof(struct icmsg_hdr
)];
221 * shutdown_msg->flags can be 0(shut down), 2(reboot),
222 * or 4(hibernate). It may bitwise-OR 1, which means
223 * performing the request by force. Linux always tries
224 * to perform the request by force.
226 switch (shutdown_msg
->flags
) {
229 icmsghdrp
->status
= HV_S_OK
;
230 work
= &shutdown_work
;
231 pr_info("Shutdown request received -"
232 " graceful shutdown initiated\n");
236 icmsghdrp
->status
= HV_S_OK
;
237 work
= &restart_work
;
238 pr_info("Restart request received -"
239 " graceful restart initiated\n");
243 pr_info("Hibernation request received\n");
244 icmsghdrp
->status
= hibernation_supported
?
246 if (hibernation_supported
)
247 work
= &hibernate_context
.work
;
250 icmsghdrp
->status
= HV_E_FAIL
;
251 pr_info("Shutdown request received -"
252 " Invalid request\n");
257 icmsghdrp
->icflags
= ICMSGHDRFLAG_TRANSACTION
258 | ICMSGHDRFLAG_RESPONSE
;
260 vmbus_sendpacket(channel
, shut_txf_buf
,
262 VM_PKT_DATA_INBAND
, 0);
270 * Set the host time in a process context.
272 static struct work_struct adj_time_work
;
275 * The last time sample, received from the host. PTP device responds to
276 * requests by using this data and the current partition-wide time reference
285 static inline u64
reftime_to_ns(u64 reftime
)
287 return (reftime
- WLTIMEDELTA
) * 100;
291 * Hard coded threshold for host timesync delay: 600 seconds
293 static const u64 HOST_TIMESYNC_DELAY_THRESH
= 600 * (u64
)NSEC_PER_SEC
;
295 static int hv_get_adj_host_time(struct timespec64
*ts
)
297 u64 newtime
, reftime
, timediff_adj
;
301 spin_lock_irqsave(&host_ts
.lock
, flags
);
302 reftime
= hv_read_reference_counter();
305 * We need to let the caller know that last update from host
306 * is older than the max allowable threshold. clock_gettime()
307 * and PTP ioctl do not have a documented error that we could
308 * return for this specific case. Use ESTALE to report this.
310 timediff_adj
= reftime
- host_ts
.ref_time
;
311 if (timediff_adj
* 100 > HOST_TIMESYNC_DELAY_THRESH
) {
312 pr_warn_once("TIMESYNC IC: Stale time stamp, %llu nsecs old\n",
313 (timediff_adj
* 100));
317 newtime
= host_ts
.host_time
+ timediff_adj
;
318 *ts
= ns_to_timespec64(reftime_to_ns(newtime
));
319 spin_unlock_irqrestore(&host_ts
.lock
, flags
);
324 static void hv_set_host_time(struct work_struct
*work
)
327 struct timespec64 ts
;
329 if (!hv_get_adj_host_time(&ts
))
330 do_settimeofday64(&ts
);
334 * Synchronize time with host after reboot, restore, etc.
336 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
337 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
338 * message after the timesync channel is opened. Since the hv_utils module is
339 * loaded after hv_vmbus, the first message is usually missed. This bit is
340 * considered a hard request to discipline the clock.
342 * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
343 * typically used as a hint to the guest. The guest is under no obligation
344 * to discipline the clock.
346 static inline void adj_guesttime(u64 hosttime
, u64 reftime
, u8 adj_flags
)
352 * Save the adjusted time sample from the host and the snapshot
353 * of the current system time.
355 spin_lock_irqsave(&host_ts
.lock
, flags
);
357 cur_reftime
= hv_read_reference_counter();
358 host_ts
.host_time
= hosttime
;
359 host_ts
.ref_time
= cur_reftime
;
362 * TimeSync v4 messages contain reference time (guest's Hyper-V
363 * clocksource read when the time sample was generated), we can
364 * improve the precision by adding the delta between now and the
365 * time of generation. For older protocols we set
366 * reftime == cur_reftime on call.
368 host_ts
.host_time
+= (cur_reftime
- reftime
);
370 spin_unlock_irqrestore(&host_ts
.lock
, flags
);
372 /* Schedule work to do do_settimeofday64() */
373 if (adj_flags
& ICTIMESYNCFLAG_SYNC
)
374 schedule_work(&adj_time_work
);
378 * Time Sync Channel message handler.
380 static void timesync_onchannelcallback(void *context
)
382 struct vmbus_channel
*channel
= context
;
385 struct icmsg_hdr
*icmsghdrp
;
386 struct ictimesync_data
*timedatap
;
387 struct ictimesync_ref_data
*refdata
;
388 u8
*time_txf_buf
= util_timesynch
.recv_buffer
;
391 * Drain the ring buffer and use the last packet to update
395 int ret
= vmbus_recvpacket(channel
, time_txf_buf
,
396 HV_HYP_PAGE_SIZE
, &recvlen
,
399 pr_warn_once("TimeSync IC pkt recv failed (Err: %d)\n",
407 icmsghdrp
= (struct icmsg_hdr
*)&time_txf_buf
[
408 sizeof(struct vmbuspipe_hdr
)];
410 if (icmsghdrp
->icmsgtype
== ICMSGTYPE_NEGOTIATE
) {
411 if (vmbus_prep_negotiate_resp(icmsghdrp
, time_txf_buf
,
412 fw_versions
, FW_VER_COUNT
,
413 ts_versions
, TS_VER_COUNT
,
414 NULL
, &ts_srv_version
)) {
415 pr_info("TimeSync IC version %d.%d\n",
416 ts_srv_version
>> 16,
417 ts_srv_version
& 0xFFFF);
420 if (ts_srv_version
> TS_VERSION_3
) {
421 refdata
= (struct ictimesync_ref_data
*)
423 sizeof(struct vmbuspipe_hdr
) +
424 sizeof(struct icmsg_hdr
)];
426 adj_guesttime(refdata
->parenttime
,
427 refdata
->vmreferencetime
,
430 timedatap
= (struct ictimesync_data
*)
432 sizeof(struct vmbuspipe_hdr
) +
433 sizeof(struct icmsg_hdr
)];
434 adj_guesttime(timedatap
->parenttime
,
435 hv_read_reference_counter(),
440 icmsghdrp
->icflags
= ICMSGHDRFLAG_TRANSACTION
441 | ICMSGHDRFLAG_RESPONSE
;
443 vmbus_sendpacket(channel
, time_txf_buf
,
445 VM_PKT_DATA_INBAND
, 0);
450 * Heartbeat functionality.
451 * Every two seconds, Hyper-V send us a heartbeat request message.
452 * we respond to this message, and Hyper-V knows we are alive.
454 static void heartbeat_onchannelcallback(void *context
)
456 struct vmbus_channel
*channel
= context
;
459 struct icmsg_hdr
*icmsghdrp
;
460 struct heartbeat_msg_data
*heartbeat_msg
;
461 u8
*hbeat_txf_buf
= util_heartbeat
.recv_buffer
;
465 vmbus_recvpacket(channel
, hbeat_txf_buf
,
466 HV_HYP_PAGE_SIZE
, &recvlen
, &requestid
);
471 icmsghdrp
= (struct icmsg_hdr
*)&hbeat_txf_buf
[
472 sizeof(struct vmbuspipe_hdr
)];
474 if (icmsghdrp
->icmsgtype
== ICMSGTYPE_NEGOTIATE
) {
475 if (vmbus_prep_negotiate_resp(icmsghdrp
,
477 fw_versions
, FW_VER_COUNT
,
478 hb_versions
, HB_VER_COUNT
,
479 NULL
, &hb_srv_version
)) {
481 pr_info("Heartbeat IC version %d.%d\n",
482 hb_srv_version
>> 16,
483 hb_srv_version
& 0xFFFF);
487 (struct heartbeat_msg_data
*)&hbeat_txf_buf
[
488 sizeof(struct vmbuspipe_hdr
) +
489 sizeof(struct icmsg_hdr
)];
491 heartbeat_msg
->seq_num
+= 1;
494 icmsghdrp
->icflags
= ICMSGHDRFLAG_TRANSACTION
495 | ICMSGHDRFLAG_RESPONSE
;
497 vmbus_sendpacket(channel
, hbeat_txf_buf
,
499 VM_PKT_DATA_INBAND
, 0);
503 #define HV_UTIL_RING_SEND_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
504 #define HV_UTIL_RING_RECV_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
506 static int util_probe(struct hv_device
*dev
,
507 const struct hv_vmbus_device_id
*dev_id
)
509 struct hv_util_service
*srv
=
510 (struct hv_util_service
*)dev_id
->driver_data
;
513 srv
->recv_buffer
= kmalloc(HV_HYP_PAGE_SIZE
* 4, GFP_KERNEL
);
514 if (!srv
->recv_buffer
)
516 srv
->channel
= dev
->channel
;
517 if (srv
->util_init
) {
518 ret
= srv
->util_init(srv
);
526 * The set of services managed by the util driver are not performance
527 * critical and do not need batched reading. Furthermore, some services
528 * such as KVP can only handle one message from the host at a time.
529 * Turn off batched reading for all util drivers before we open the
532 set_channel_read_mode(dev
->channel
, HV_CALL_DIRECT
);
534 hv_set_drvdata(dev
, srv
);
536 ret
= vmbus_open(dev
->channel
, HV_UTIL_RING_SEND_SIZE
,
537 HV_UTIL_RING_RECV_SIZE
, NULL
, 0, srv
->util_cb
,
545 if (srv
->util_deinit
)
548 kfree(srv
->recv_buffer
);
552 static int util_remove(struct hv_device
*dev
)
554 struct hv_util_service
*srv
= hv_get_drvdata(dev
);
556 if (srv
->util_deinit
)
558 vmbus_close(dev
->channel
);
559 kfree(srv
->recv_buffer
);
565 * When we're in util_suspend(), all the userspace processes have been frozen
566 * (refer to hibernate() -> freeze_processes()). The userspace is thawed only
567 * after the whole resume procedure, including util_resume(), finishes.
569 static int util_suspend(struct hv_device
*dev
)
571 struct hv_util_service
*srv
= hv_get_drvdata(dev
);
574 if (srv
->util_pre_suspend
) {
575 ret
= srv
->util_pre_suspend();
580 vmbus_close(dev
->channel
);
585 static int util_resume(struct hv_device
*dev
)
587 struct hv_util_service
*srv
= hv_get_drvdata(dev
);
590 if (srv
->util_pre_resume
) {
591 ret
= srv
->util_pre_resume();
596 ret
= vmbus_open(dev
->channel
, HV_UTIL_RING_SEND_SIZE
,
597 HV_UTIL_RING_RECV_SIZE
, NULL
, 0, srv
->util_cb
,
602 static const struct hv_vmbus_device_id id_table
[] = {
605 .driver_data
= (unsigned long)&util_shutdown
607 /* Time synch guid */
609 .driver_data
= (unsigned long)&util_timesynch
612 { HV_HEART_BEAT_GUID
,
613 .driver_data
= (unsigned long)&util_heartbeat
617 .driver_data
= (unsigned long)&util_kvp
621 .driver_data
= (unsigned long)&util_vss
625 .driver_data
= (unsigned long)&util_fcopy
630 MODULE_DEVICE_TABLE(vmbus
, id_table
);
632 /* The one and only one */
633 static struct hv_driver util_drv
= {
635 .id_table
= id_table
,
637 .remove
= util_remove
,
638 .suspend
= util_suspend
,
639 .resume
= util_resume
,
641 .probe_type
= PROBE_PREFER_ASYNCHRONOUS
,
645 static int hv_ptp_enable(struct ptp_clock_info
*info
,
646 struct ptp_clock_request
*request
, int on
)
651 static int hv_ptp_settime(struct ptp_clock_info
*p
, const struct timespec64
*ts
)
656 static int hv_ptp_adjfreq(struct ptp_clock_info
*ptp
, s32 delta
)
660 static int hv_ptp_adjtime(struct ptp_clock_info
*ptp
, s64 delta
)
665 static int hv_ptp_gettime(struct ptp_clock_info
*info
, struct timespec64
*ts
)
667 return hv_get_adj_host_time(ts
);
670 static struct ptp_clock_info ptp_hyperv_info
= {
672 .enable
= hv_ptp_enable
,
673 .adjtime
= hv_ptp_adjtime
,
674 .adjfreq
= hv_ptp_adjfreq
,
675 .gettime64
= hv_ptp_gettime
,
676 .settime64
= hv_ptp_settime
,
677 .owner
= THIS_MODULE
,
680 static struct ptp_clock
*hv_ptp_clock
;
682 static int hv_timesync_init(struct hv_util_service
*srv
)
684 /* TimeSync requires Hyper-V clocksource. */
685 if (!hv_read_reference_counter
)
688 spin_lock_init(&host_ts
.lock
);
690 INIT_WORK(&adj_time_work
, hv_set_host_time
);
693 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
694 * disabled but the driver is still useful without the PTP device
695 * as it still handles the ICTIMESYNCFLAG_SYNC case.
697 hv_ptp_clock
= ptp_clock_register(&ptp_hyperv_info
, NULL
);
698 if (IS_ERR_OR_NULL(hv_ptp_clock
)) {
699 pr_err("cannot register PTP clock: %ld\n",
700 PTR_ERR(hv_ptp_clock
));
707 static void hv_timesync_cancel_work(void)
709 cancel_work_sync(&adj_time_work
);
712 static int hv_timesync_pre_suspend(void)
714 hv_timesync_cancel_work();
718 static void hv_timesync_deinit(void)
721 ptp_clock_unregister(hv_ptp_clock
);
723 hv_timesync_cancel_work();
726 static int __init
init_hyperv_utils(void)
728 pr_info("Registering HyperV Utility Driver\n");
730 return vmbus_driver_register(&util_drv
);
733 static void exit_hyperv_utils(void)
735 pr_info("De-Registered HyperV Utility Driver\n");
737 vmbus_driver_unregister(&util_drv
);
740 module_init(init_hyperv_utils
);
741 module_exit(exit_hyperv_utils
);
743 MODULE_DESCRIPTION("Hyper-V Utilities");
744 MODULE_LICENSE("GPL");