1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2009, Microsoft Corporation.
6 * Haiyang Zhang <haiyangz@microsoft.com>
7 * Hank Janssen <hjanssen@microsoft.com>
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/wait.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/hyperv.h>
18 #include <linux/uio.h>
19 #include <linux/interrupt.h>
21 #include <asm/mshyperv.h>
23 #include "hyperv_vmbus.h"
26 * hv_gpadl_size - Return the real size of a gpadl, the size that Hyper-V uses
28 * For BUFFER gpadl, Hyper-V uses the exact same size as the guest does.
30 * For RING gpadl, in each ring, the guest uses one PAGE_SIZE as the header
31 * (because of the alignment requirement), however, the hypervisor only
32 * uses the first HV_HYP_PAGE_SIZE as the header, therefore leaving a
33 * (PAGE_SIZE - HV_HYP_PAGE_SIZE) gap. And since there are two rings in a
34 * ringbuffer, the total size for a RING gpadl that Hyper-V uses is the
35 * total size that the guest uses minus twice of the gap size.
37 static inline u32
hv_gpadl_size(enum hv_gpadl_type type
, u32 size
)
43 /* The size of a ringbuffer must be page-aligned */
44 BUG_ON(size
% PAGE_SIZE
);
46 * Two things to notice here:
47 * 1) We're processing two ring buffers as a unit
48 * 2) We're skipping any space larger than HV_HYP_PAGE_SIZE in
49 * the first guest-size page of each of the two ring buffers.
50 * So we effectively subtract out two guest-size pages, and add
51 * back two Hyper-V size pages.
53 return size
- 2 * (PAGE_SIZE
- HV_HYP_PAGE_SIZE
);
60 * hv_ring_gpadl_send_hvpgoffset - Calculate the send offset (in unit of
61 * HV_HYP_PAGE) in a ring gpadl based on the
64 * @offset: the offset (in bytes) where the send ringbuffer starts in the
65 * virtual address space of the guest
67 static inline u32
hv_ring_gpadl_send_hvpgoffset(u32 offset
)
71 * For RING gpadl, in each ring, the guest uses one PAGE_SIZE as the
72 * header (because of the alignment requirement), however, the
73 * hypervisor only uses the first HV_HYP_PAGE_SIZE as the header,
74 * therefore leaving a (PAGE_SIZE - HV_HYP_PAGE_SIZE) gap.
76 * And to calculate the effective send offset in gpadl, we need to
79 return (offset
- (PAGE_SIZE
- HV_HYP_PAGE_SIZE
)) >> HV_HYP_PAGE_SHIFT
;
83 * hv_gpadl_hvpfn - Return the Hyper-V page PFN of the @i th Hyper-V page in
86 * @type: the type of the gpadl
87 * @kbuffer: the pointer to the gpadl in the guest
88 * @size: the total size (in bytes) of the gpadl
89 * @send_offset: the offset (in bytes) where the send ringbuffer starts in the
90 * virtual address space of the guest
93 static inline u64
hv_gpadl_hvpfn(enum hv_gpadl_type type
, void *kbuffer
,
94 u32 size
, u32 send_offset
, int i
)
96 int send_idx
= hv_ring_gpadl_send_hvpgoffset(send_offset
);
97 unsigned long delta
= 0UL;
100 case HV_GPADL_BUFFER
:
105 else if (i
<= send_idx
)
106 delta
= PAGE_SIZE
- HV_HYP_PAGE_SIZE
;
108 delta
= 2 * (PAGE_SIZE
- HV_HYP_PAGE_SIZE
);
115 return virt_to_hvpfn(kbuffer
+ delta
+ (HV_HYP_PAGE_SIZE
* i
));
119 * vmbus_setevent- Trigger an event notification on the specified
122 void vmbus_setevent(struct vmbus_channel
*channel
)
124 struct hv_monitor_page
*monitorpage
;
126 trace_vmbus_setevent(channel
);
129 * For channels marked as in "low latency" mode
130 * bypass the monitor page mechanism.
132 if (channel
->offermsg
.monitor_allocated
&& !channel
->low_latency
) {
133 vmbus_send_interrupt(channel
->offermsg
.child_relid
);
135 /* Get the child to parent monitor page */
136 monitorpage
= vmbus_connection
.monitor_pages
[1];
138 sync_set_bit(channel
->monitor_bit
,
139 (unsigned long *)&monitorpage
->trigger_group
140 [channel
->monitor_grp
].pending
);
143 vmbus_set_event(channel
);
146 EXPORT_SYMBOL_GPL(vmbus_setevent
);
148 /* vmbus_free_ring - drop mapping of ring buffer */
149 void vmbus_free_ring(struct vmbus_channel
*channel
)
151 hv_ringbuffer_cleanup(&channel
->outbound
);
152 hv_ringbuffer_cleanup(&channel
->inbound
);
154 if (channel
->ringbuffer_page
) {
155 __free_pages(channel
->ringbuffer_page
,
156 get_order(channel
->ringbuffer_pagecount
158 channel
->ringbuffer_page
= NULL
;
161 EXPORT_SYMBOL_GPL(vmbus_free_ring
);
163 /* vmbus_alloc_ring - allocate and map pages for ring buffer */
164 int vmbus_alloc_ring(struct vmbus_channel
*newchannel
,
165 u32 send_size
, u32 recv_size
)
170 if (send_size
% PAGE_SIZE
|| recv_size
% PAGE_SIZE
)
173 /* Allocate the ring buffer */
174 order
= get_order(send_size
+ recv_size
);
175 page
= alloc_pages_node(cpu_to_node(newchannel
->target_cpu
),
176 GFP_KERNEL
|__GFP_ZERO
, order
);
179 page
= alloc_pages(GFP_KERNEL
|__GFP_ZERO
, order
);
184 newchannel
->ringbuffer_page
= page
;
185 newchannel
->ringbuffer_pagecount
= (send_size
+ recv_size
) >> PAGE_SHIFT
;
186 newchannel
->ringbuffer_send_offset
= send_size
>> PAGE_SHIFT
;
190 EXPORT_SYMBOL_GPL(vmbus_alloc_ring
);
192 /* Used for Hyper-V Socket: a guest client's connect() to the host */
193 int vmbus_send_tl_connect_request(const guid_t
*shv_guest_servie_id
,
194 const guid_t
*shv_host_servie_id
)
196 struct vmbus_channel_tl_connect_request conn_msg
;
199 memset(&conn_msg
, 0, sizeof(conn_msg
));
200 conn_msg
.header
.msgtype
= CHANNELMSG_TL_CONNECT_REQUEST
;
201 conn_msg
.guest_endpoint_id
= *shv_guest_servie_id
;
202 conn_msg
.host_service_id
= *shv_host_servie_id
;
204 ret
= vmbus_post_msg(&conn_msg
, sizeof(conn_msg
), true);
206 trace_vmbus_send_tl_connect_request(&conn_msg
, ret
);
210 EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request
);
213 * Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
215 * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. Also, Hyper-V does not
216 * ACK such messages. IOW we can't know when the host will stop interrupting
217 * the "old" vCPU and start interrupting the "new" vCPU for the given channel.
219 * The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
220 * VERSION_WIN10_V4_1.
222 int vmbus_send_modifychannel(u32 child_relid
, u32 target_vp
)
224 struct vmbus_channel_modifychannel conn_msg
;
227 memset(&conn_msg
, 0, sizeof(conn_msg
));
228 conn_msg
.header
.msgtype
= CHANNELMSG_MODIFYCHANNEL
;
229 conn_msg
.child_relid
= child_relid
;
230 conn_msg
.target_vp
= target_vp
;
232 ret
= vmbus_post_msg(&conn_msg
, sizeof(conn_msg
), true);
234 trace_vmbus_send_modifychannel(&conn_msg
, ret
);
238 EXPORT_SYMBOL_GPL(vmbus_send_modifychannel
);
241 * create_gpadl_header - Creates a gpadl for the specified buffer
243 static int create_gpadl_header(enum hv_gpadl_type type
, void *kbuffer
,
244 u32 size
, u32 send_offset
,
245 struct vmbus_channel_msginfo
**msginfo
)
249 struct vmbus_channel_gpadl_header
*gpadl_header
;
250 struct vmbus_channel_gpadl_body
*gpadl_body
;
251 struct vmbus_channel_msginfo
*msgheader
;
252 struct vmbus_channel_msginfo
*msgbody
= NULL
;
255 int pfnsum
, pfncount
, pfnleft
, pfncurr
, pfnsize
;
257 pagecount
= hv_gpadl_size(type
, size
) >> HV_HYP_PAGE_SHIFT
;
259 /* do we need a gpadl body msg */
260 pfnsize
= MAX_SIZE_CHANNEL_MESSAGE
-
261 sizeof(struct vmbus_channel_gpadl_header
) -
262 sizeof(struct gpa_range
);
263 pfncount
= pfnsize
/ sizeof(u64
);
265 if (pagecount
> pfncount
) {
266 /* we need a gpadl body */
267 /* fill in the header */
268 msgsize
= sizeof(struct vmbus_channel_msginfo
) +
269 sizeof(struct vmbus_channel_gpadl_header
) +
270 sizeof(struct gpa_range
) + pfncount
* sizeof(u64
);
271 msgheader
= kzalloc(msgsize
, GFP_KERNEL
);
275 INIT_LIST_HEAD(&msgheader
->submsglist
);
276 msgheader
->msgsize
= msgsize
;
278 gpadl_header
= (struct vmbus_channel_gpadl_header
*)
280 gpadl_header
->rangecount
= 1;
281 gpadl_header
->range_buflen
= sizeof(struct gpa_range
) +
282 pagecount
* sizeof(u64
);
283 gpadl_header
->range
[0].byte_offset
= 0;
284 gpadl_header
->range
[0].byte_count
= hv_gpadl_size(type
, size
);
285 for (i
= 0; i
< pfncount
; i
++)
286 gpadl_header
->range
[0].pfn_array
[i
] = hv_gpadl_hvpfn(
287 type
, kbuffer
, size
, send_offset
, i
);
288 *msginfo
= msgheader
;
291 pfnleft
= pagecount
- pfncount
;
293 /* how many pfns can we fit */
294 pfnsize
= MAX_SIZE_CHANNEL_MESSAGE
-
295 sizeof(struct vmbus_channel_gpadl_body
);
296 pfncount
= pfnsize
/ sizeof(u64
);
298 /* fill in the body */
300 if (pfnleft
> pfncount
)
305 msgsize
= sizeof(struct vmbus_channel_msginfo
) +
306 sizeof(struct vmbus_channel_gpadl_body
) +
307 pfncurr
* sizeof(u64
);
308 msgbody
= kzalloc(msgsize
, GFP_KERNEL
);
311 struct vmbus_channel_msginfo
*pos
= NULL
;
312 struct vmbus_channel_msginfo
*tmp
= NULL
;
314 * Free up all the allocated messages.
316 list_for_each_entry_safe(pos
, tmp
,
317 &msgheader
->submsglist
,
320 list_del(&pos
->msglistentry
);
327 msgbody
->msgsize
= msgsize
;
329 (struct vmbus_channel_gpadl_body
*)msgbody
->msg
;
332 * Gpadl is u32 and we are using a pointer which could
334 * This is governed by the guest/host protocol and
335 * so the hypervisor guarantees that this is ok.
337 for (i
= 0; i
< pfncurr
; i
++)
338 gpadl_body
->pfn
[i
] = hv_gpadl_hvpfn(type
,
339 kbuffer
, size
, send_offset
, pfnsum
+ i
);
341 /* add to msg header */
342 list_add_tail(&msgbody
->msglistentry
,
343 &msgheader
->submsglist
);
348 /* everything fits in a header */
349 msgsize
= sizeof(struct vmbus_channel_msginfo
) +
350 sizeof(struct vmbus_channel_gpadl_header
) +
351 sizeof(struct gpa_range
) + pagecount
* sizeof(u64
);
352 msgheader
= kzalloc(msgsize
, GFP_KERNEL
);
353 if (msgheader
== NULL
)
356 INIT_LIST_HEAD(&msgheader
->submsglist
);
357 msgheader
->msgsize
= msgsize
;
359 gpadl_header
= (struct vmbus_channel_gpadl_header
*)
361 gpadl_header
->rangecount
= 1;
362 gpadl_header
->range_buflen
= sizeof(struct gpa_range
) +
363 pagecount
* sizeof(u64
);
364 gpadl_header
->range
[0].byte_offset
= 0;
365 gpadl_header
->range
[0].byte_count
= hv_gpadl_size(type
, size
);
366 for (i
= 0; i
< pagecount
; i
++)
367 gpadl_header
->range
[0].pfn_array
[i
] = hv_gpadl_hvpfn(
368 type
, kbuffer
, size
, send_offset
, i
);
370 *msginfo
= msgheader
;
381 * __vmbus_establish_gpadl - Establish a GPADL for a buffer or ringbuffer
383 * @channel: a channel
384 * @type: the type of the corresponding GPADL, only meaningful for the guest.
385 * @kbuffer: from kmalloc or vmalloc
386 * @size: page-size multiple
387 * @send_offset: the offset (in bytes) where the send ring buffer starts,
388 * should be 0 for BUFFER type gpadl
389 * @gpadl_handle: some funky thing
391 static int __vmbus_establish_gpadl(struct vmbus_channel
*channel
,
392 enum hv_gpadl_type type
, void *kbuffer
,
393 u32 size
, u32 send_offset
,
396 struct vmbus_channel_gpadl_header
*gpadlmsg
;
397 struct vmbus_channel_gpadl_body
*gpadl_body
;
398 struct vmbus_channel_msginfo
*msginfo
= NULL
;
399 struct vmbus_channel_msginfo
*submsginfo
, *tmp
;
400 struct list_head
*curr
;
401 u32 next_gpadl_handle
;
406 (atomic_inc_return(&vmbus_connection
.next_gpadl_handle
) - 1);
408 ret
= create_gpadl_header(type
, kbuffer
, size
, send_offset
, &msginfo
);
412 init_completion(&msginfo
->waitevent
);
413 msginfo
->waiting_channel
= channel
;
415 gpadlmsg
= (struct vmbus_channel_gpadl_header
*)msginfo
->msg
;
416 gpadlmsg
->header
.msgtype
= CHANNELMSG_GPADL_HEADER
;
417 gpadlmsg
->child_relid
= channel
->offermsg
.child_relid
;
418 gpadlmsg
->gpadl
= next_gpadl_handle
;
421 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
422 list_add_tail(&msginfo
->msglistentry
,
423 &vmbus_connection
.chn_msg_list
);
425 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
427 if (channel
->rescind
) {
432 ret
= vmbus_post_msg(gpadlmsg
, msginfo
->msgsize
-
433 sizeof(*msginfo
), true);
435 trace_vmbus_establish_gpadl_header(gpadlmsg
, ret
);
440 list_for_each(curr
, &msginfo
->submsglist
) {
441 submsginfo
= (struct vmbus_channel_msginfo
*)curr
;
443 (struct vmbus_channel_gpadl_body
*)submsginfo
->msg
;
445 gpadl_body
->header
.msgtype
=
446 CHANNELMSG_GPADL_BODY
;
447 gpadl_body
->gpadl
= next_gpadl_handle
;
449 ret
= vmbus_post_msg(gpadl_body
,
450 submsginfo
->msgsize
- sizeof(*submsginfo
),
453 trace_vmbus_establish_gpadl_body(gpadl_body
, ret
);
459 wait_for_completion(&msginfo
->waitevent
);
461 if (msginfo
->response
.gpadl_created
.creation_status
!= 0) {
462 pr_err("Failed to establish GPADL: err = 0x%x\n",
463 msginfo
->response
.gpadl_created
.creation_status
);
469 if (channel
->rescind
) {
474 /* At this point, we received the gpadl created msg */
475 *gpadl_handle
= gpadlmsg
->gpadl
;
478 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
479 list_del(&msginfo
->msglistentry
);
480 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
481 list_for_each_entry_safe(submsginfo
, tmp
, &msginfo
->submsglist
,
491 * vmbus_establish_gpadl - Establish a GPADL for the specified buffer
493 * @channel: a channel
494 * @kbuffer: from kmalloc or vmalloc
495 * @size: page-size multiple
496 * @gpadl_handle: some funky thing
498 int vmbus_establish_gpadl(struct vmbus_channel
*channel
, void *kbuffer
,
499 u32 size
, u32
*gpadl_handle
)
501 return __vmbus_establish_gpadl(channel
, HV_GPADL_BUFFER
, kbuffer
, size
,
504 EXPORT_SYMBOL_GPL(vmbus_establish_gpadl
);
507 * request_arr_init - Allocates memory for the requestor array. Each slot
508 * keeps track of the next available slot in the array. Initially, each
509 * slot points to the next one (as in a Linked List). The last slot
510 * does not point to anything, so its value is U64_MAX by default.
511 * @size The size of the array
513 static u64
*request_arr_init(u32 size
)
518 req_arr
= kcalloc(size
, sizeof(u64
), GFP_KERNEL
);
522 for (i
= 0; i
< size
- 1; i
++)
525 /* Last slot (no more available slots) */
526 req_arr
[i
] = U64_MAX
;
532 * vmbus_alloc_requestor - Initializes @rqstor's fields.
533 * Index 0 is the first free slot
534 * @size: Size of the requestor array
536 static int vmbus_alloc_requestor(struct vmbus_requestor
*rqstor
, u32 size
)
539 unsigned long *bitmap
;
541 rqst_arr
= request_arr_init(size
);
545 bitmap
= bitmap_zalloc(size
, GFP_KERNEL
);
551 rqstor
->req_arr
= rqst_arr
;
552 rqstor
->req_bitmap
= bitmap
;
554 rqstor
->next_request_id
= 0;
555 spin_lock_init(&rqstor
->req_lock
);
561 * vmbus_free_requestor - Frees memory allocated for @rqstor
562 * @rqstor: Pointer to the requestor struct
564 static void vmbus_free_requestor(struct vmbus_requestor
*rqstor
)
566 kfree(rqstor
->req_arr
);
567 bitmap_free(rqstor
->req_bitmap
);
570 static int __vmbus_open(struct vmbus_channel
*newchannel
,
571 void *userdata
, u32 userdatalen
,
572 void (*onchannelcallback
)(void *context
), void *context
)
574 struct vmbus_channel_open_channel
*open_msg
;
575 struct vmbus_channel_msginfo
*open_info
= NULL
;
576 struct page
*page
= newchannel
->ringbuffer_page
;
577 u32 send_pages
, recv_pages
;
581 if (userdatalen
> MAX_USER_DEFINED_BYTES
)
584 send_pages
= newchannel
->ringbuffer_send_offset
;
585 recv_pages
= newchannel
->ringbuffer_pagecount
- send_pages
;
587 if (newchannel
->state
!= CHANNEL_OPEN_STATE
)
590 /* Create and init requestor */
591 if (newchannel
->rqstor_size
) {
592 if (vmbus_alloc_requestor(&newchannel
->requestor
, newchannel
->rqstor_size
))
596 newchannel
->state
= CHANNEL_OPENING_STATE
;
597 newchannel
->onchannel_callback
= onchannelcallback
;
598 newchannel
->channel_callback_context
= context
;
600 err
= hv_ringbuffer_init(&newchannel
->outbound
, page
, send_pages
);
602 goto error_clean_ring
;
604 err
= hv_ringbuffer_init(&newchannel
->inbound
,
605 &page
[send_pages
], recv_pages
);
607 goto error_clean_ring
;
609 /* Establish the gpadl for the ring buffer */
610 newchannel
->ringbuffer_gpadlhandle
= 0;
612 err
= __vmbus_establish_gpadl(newchannel
, HV_GPADL_RING
,
613 page_address(newchannel
->ringbuffer_page
),
614 (send_pages
+ recv_pages
) << PAGE_SHIFT
,
615 newchannel
->ringbuffer_send_offset
<< PAGE_SHIFT
,
616 &newchannel
->ringbuffer_gpadlhandle
);
618 goto error_clean_ring
;
620 /* Create and init the channel open message */
621 open_info
= kmalloc(sizeof(*open_info
) +
622 sizeof(struct vmbus_channel_open_channel
),
626 goto error_free_gpadl
;
629 init_completion(&open_info
->waitevent
);
630 open_info
->waiting_channel
= newchannel
;
632 open_msg
= (struct vmbus_channel_open_channel
*)open_info
->msg
;
633 open_msg
->header
.msgtype
= CHANNELMSG_OPENCHANNEL
;
634 open_msg
->openid
= newchannel
->offermsg
.child_relid
;
635 open_msg
->child_relid
= newchannel
->offermsg
.child_relid
;
636 open_msg
->ringbuffer_gpadlhandle
= newchannel
->ringbuffer_gpadlhandle
;
638 * The unit of ->downstream_ringbuffer_pageoffset is HV_HYP_PAGE and
639 * the unit of ->ringbuffer_send_offset (i.e. send_pages) is PAGE, so
640 * here we calculate it into HV_HYP_PAGE.
642 open_msg
->downstream_ringbuffer_pageoffset
=
643 hv_ring_gpadl_send_hvpgoffset(send_pages
<< PAGE_SHIFT
);
644 open_msg
->target_vp
= hv_cpu_number_to_vp_number(newchannel
->target_cpu
);
647 memcpy(open_msg
->userdata
, userdata
, userdatalen
);
649 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
650 list_add_tail(&open_info
->msglistentry
,
651 &vmbus_connection
.chn_msg_list
);
652 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
654 if (newchannel
->rescind
) {
656 goto error_free_info
;
659 err
= vmbus_post_msg(open_msg
,
660 sizeof(struct vmbus_channel_open_channel
), true);
662 trace_vmbus_open(open_msg
, err
);
665 goto error_clean_msglist
;
667 wait_for_completion(&open_info
->waitevent
);
669 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
670 list_del(&open_info
->msglistentry
);
671 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
673 if (newchannel
->rescind
) {
675 goto error_free_info
;
678 if (open_info
->response
.open_result
.status
) {
680 goto error_free_info
;
683 newchannel
->state
= CHANNEL_OPENED_STATE
;
688 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
689 list_del(&open_info
->msglistentry
);
690 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
694 vmbus_teardown_gpadl(newchannel
, newchannel
->ringbuffer_gpadlhandle
);
695 newchannel
->ringbuffer_gpadlhandle
= 0;
697 hv_ringbuffer_cleanup(&newchannel
->outbound
);
698 hv_ringbuffer_cleanup(&newchannel
->inbound
);
699 vmbus_free_requestor(&newchannel
->requestor
);
700 newchannel
->state
= CHANNEL_OPEN_STATE
;
705 * vmbus_connect_ring - Open the channel but reuse ring buffer
707 int vmbus_connect_ring(struct vmbus_channel
*newchannel
,
708 void (*onchannelcallback
)(void *context
), void *context
)
710 return __vmbus_open(newchannel
, NULL
, 0, onchannelcallback
, context
);
712 EXPORT_SYMBOL_GPL(vmbus_connect_ring
);
715 * vmbus_open - Open the specified channel.
717 int vmbus_open(struct vmbus_channel
*newchannel
,
718 u32 send_ringbuffer_size
, u32 recv_ringbuffer_size
,
719 void *userdata
, u32 userdatalen
,
720 void (*onchannelcallback
)(void *context
), void *context
)
724 err
= vmbus_alloc_ring(newchannel
, send_ringbuffer_size
,
725 recv_ringbuffer_size
);
729 err
= __vmbus_open(newchannel
, userdata
, userdatalen
,
730 onchannelcallback
, context
);
732 vmbus_free_ring(newchannel
);
736 EXPORT_SYMBOL_GPL(vmbus_open
);
739 * vmbus_teardown_gpadl -Teardown the specified GPADL handle
741 int vmbus_teardown_gpadl(struct vmbus_channel
*channel
, u32 gpadl_handle
)
743 struct vmbus_channel_gpadl_teardown
*msg
;
744 struct vmbus_channel_msginfo
*info
;
748 info
= kmalloc(sizeof(*info
) +
749 sizeof(struct vmbus_channel_gpadl_teardown
), GFP_KERNEL
);
753 init_completion(&info
->waitevent
);
754 info
->waiting_channel
= channel
;
756 msg
= (struct vmbus_channel_gpadl_teardown
*)info
->msg
;
758 msg
->header
.msgtype
= CHANNELMSG_GPADL_TEARDOWN
;
759 msg
->child_relid
= channel
->offermsg
.child_relid
;
760 msg
->gpadl
= gpadl_handle
;
762 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
763 list_add_tail(&info
->msglistentry
,
764 &vmbus_connection
.chn_msg_list
);
765 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
767 if (channel
->rescind
)
770 ret
= vmbus_post_msg(msg
, sizeof(struct vmbus_channel_gpadl_teardown
),
773 trace_vmbus_teardown_gpadl(msg
, ret
);
778 wait_for_completion(&info
->waitevent
);
782 * If the channel has been rescinded;
783 * we will be awakened by the rescind
784 * handler; set the error code to zero so we don't leak memory.
786 if (channel
->rescind
)
789 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
790 list_del(&info
->msglistentry
);
791 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
796 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl
);
798 void vmbus_reset_channel_cb(struct vmbus_channel
*channel
)
803 * vmbus_on_event(), running in the per-channel tasklet, can race
804 * with vmbus_close_internal() in the case of SMP guest, e.g., when
805 * the former is accessing channel->inbound.ring_buffer, the latter
806 * could be freeing the ring_buffer pages, so here we must stop it
809 * vmbus_chan_sched() might call the netvsc driver callback function
810 * that ends up scheduling NAPI work that accesses the ring buffer.
811 * At this point, we have to ensure that any such work is completed
812 * and that the channel ring buffer is no longer being accessed, cf.
813 * the calls to napi_disable() in netvsc_device_remove().
815 tasklet_disable(&channel
->callback_event
);
817 /* See the inline comments in vmbus_chan_sched(). */
818 spin_lock_irqsave(&channel
->sched_lock
, flags
);
819 channel
->onchannel_callback
= NULL
;
820 spin_unlock_irqrestore(&channel
->sched_lock
, flags
);
822 channel
->sc_creation_callback
= NULL
;
824 /* Re-enable tasklet for use on re-open */
825 tasklet_enable(&channel
->callback_event
);
828 static int vmbus_close_internal(struct vmbus_channel
*channel
)
830 struct vmbus_channel_close_channel
*msg
;
833 vmbus_reset_channel_cb(channel
);
836 * In case a device driver's probe() fails (e.g.,
837 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
838 * rescinded later (e.g., we dynamically disable an Integrated Service
839 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
840 * here we should skip most of the below cleanup work.
842 if (channel
->state
!= CHANNEL_OPENED_STATE
)
845 channel
->state
= CHANNEL_OPEN_STATE
;
847 /* Send a closing message */
849 msg
= &channel
->close_msg
.msg
;
851 msg
->header
.msgtype
= CHANNELMSG_CLOSECHANNEL
;
852 msg
->child_relid
= channel
->offermsg
.child_relid
;
854 ret
= vmbus_post_msg(msg
, sizeof(struct vmbus_channel_close_channel
),
857 trace_vmbus_close_internal(msg
, ret
);
860 pr_err("Close failed: close post msg return is %d\n", ret
);
862 * If we failed to post the close msg,
863 * it is perhaps better to leak memory.
867 /* Tear down the gpadl for the channel's ring buffer */
868 else if (channel
->ringbuffer_gpadlhandle
) {
869 ret
= vmbus_teardown_gpadl(channel
,
870 channel
->ringbuffer_gpadlhandle
);
872 pr_err("Close failed: teardown gpadl return %d\n", ret
);
874 * If we failed to teardown gpadl,
875 * it is perhaps better to leak memory.
879 channel
->ringbuffer_gpadlhandle
= 0;
883 vmbus_free_requestor(&channel
->requestor
);
888 /* disconnect ring - close all channels */
889 int vmbus_disconnect_ring(struct vmbus_channel
*channel
)
891 struct vmbus_channel
*cur_channel
, *tmp
;
894 if (channel
->primary_channel
!= NULL
)
897 list_for_each_entry_safe(cur_channel
, tmp
, &channel
->sc_list
, sc_list
) {
898 if (cur_channel
->rescind
)
899 wait_for_completion(&cur_channel
->rescind_event
);
901 mutex_lock(&vmbus_connection
.channel_mutex
);
902 if (vmbus_close_internal(cur_channel
) == 0) {
903 vmbus_free_ring(cur_channel
);
905 if (cur_channel
->rescind
)
906 hv_process_channel_removal(cur_channel
);
908 mutex_unlock(&vmbus_connection
.channel_mutex
);
912 * Now close the primary.
914 mutex_lock(&vmbus_connection
.channel_mutex
);
915 ret
= vmbus_close_internal(channel
);
916 mutex_unlock(&vmbus_connection
.channel_mutex
);
920 EXPORT_SYMBOL_GPL(vmbus_disconnect_ring
);
923 * vmbus_close - Close the specified channel
925 void vmbus_close(struct vmbus_channel
*channel
)
927 if (vmbus_disconnect_ring(channel
) == 0)
928 vmbus_free_ring(channel
);
930 EXPORT_SYMBOL_GPL(vmbus_close
);
933 * vmbus_sendpacket() - Send the specified buffer on the given channel
934 * @channel: Pointer to vmbus_channel structure
935 * @buffer: Pointer to the buffer you want to send the data from.
936 * @bufferlen: Maximum size of what the buffer holds.
937 * @requestid: Identifier of the request
938 * @type: Type of packet that is being sent e.g. negotiate, time
940 * @flags: 0 or VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
942 * Sends data in @buffer directly to Hyper-V via the vmbus.
943 * This will send the data unparsed to Hyper-V.
945 * Mainly used by Hyper-V drivers.
947 int vmbus_sendpacket(struct vmbus_channel
*channel
, void *buffer
,
948 u32 bufferlen
, u64 requestid
,
949 enum vmbus_packet_type type
, u32 flags
)
951 struct vmpacket_descriptor desc
;
952 u32 packetlen
= sizeof(struct vmpacket_descriptor
) + bufferlen
;
953 u32 packetlen_aligned
= ALIGN(packetlen
, sizeof(u64
));
954 struct kvec bufferlist
[3];
955 u64 aligned_data
= 0;
956 int num_vecs
= ((bufferlen
!= 0) ? 3 : 1);
959 /* Setup the descriptor */
960 desc
.type
= type
; /* VmbusPacketTypeDataInBand; */
961 desc
.flags
= flags
; /* VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; */
962 /* in 8-bytes granularity */
963 desc
.offset8
= sizeof(struct vmpacket_descriptor
) >> 3;
964 desc
.len8
= (u16
)(packetlen_aligned
>> 3);
965 desc
.trans_id
= VMBUS_RQST_ERROR
; /* will be updated in hv_ringbuffer_write() */
967 bufferlist
[0].iov_base
= &desc
;
968 bufferlist
[0].iov_len
= sizeof(struct vmpacket_descriptor
);
969 bufferlist
[1].iov_base
= buffer
;
970 bufferlist
[1].iov_len
= bufferlen
;
971 bufferlist
[2].iov_base
= &aligned_data
;
972 bufferlist
[2].iov_len
= (packetlen_aligned
- packetlen
);
974 return hv_ringbuffer_write(channel
, bufferlist
, num_vecs
, requestid
);
976 EXPORT_SYMBOL(vmbus_sendpacket
);
979 * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
980 * packets using a GPADL Direct packet type. This interface allows you
981 * to control notifying the host. This will be useful for sending
982 * batched data. Also the sender can control the send flags
985 int vmbus_sendpacket_pagebuffer(struct vmbus_channel
*channel
,
986 struct hv_page_buffer pagebuffers
[],
987 u32 pagecount
, void *buffer
, u32 bufferlen
,
991 struct vmbus_channel_packet_page_buffer desc
;
994 u32 packetlen_aligned
;
995 struct kvec bufferlist
[3];
996 u64 aligned_data
= 0;
998 if (pagecount
> MAX_PAGE_BUFFER_COUNT
)
1002 * Adjust the size down since vmbus_channel_packet_page_buffer is the
1003 * largest size we support
1005 descsize
= sizeof(struct vmbus_channel_packet_page_buffer
) -
1006 ((MAX_PAGE_BUFFER_COUNT
- pagecount
) *
1007 sizeof(struct hv_page_buffer
));
1008 packetlen
= descsize
+ bufferlen
;
1009 packetlen_aligned
= ALIGN(packetlen
, sizeof(u64
));
1011 /* Setup the descriptor */
1012 desc
.type
= VM_PKT_DATA_USING_GPA_DIRECT
;
1013 desc
.flags
= VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
;
1014 desc
.dataoffset8
= descsize
>> 3; /* in 8-bytes granularity */
1015 desc
.length8
= (u16
)(packetlen_aligned
>> 3);
1016 desc
.transactionid
= VMBUS_RQST_ERROR
; /* will be updated in hv_ringbuffer_write() */
1018 desc
.rangecount
= pagecount
;
1020 for (i
= 0; i
< pagecount
; i
++) {
1021 desc
.range
[i
].len
= pagebuffers
[i
].len
;
1022 desc
.range
[i
].offset
= pagebuffers
[i
].offset
;
1023 desc
.range
[i
].pfn
= pagebuffers
[i
].pfn
;
1026 bufferlist
[0].iov_base
= &desc
;
1027 bufferlist
[0].iov_len
= descsize
;
1028 bufferlist
[1].iov_base
= buffer
;
1029 bufferlist
[1].iov_len
= bufferlen
;
1030 bufferlist
[2].iov_base
= &aligned_data
;
1031 bufferlist
[2].iov_len
= (packetlen_aligned
- packetlen
);
1033 return hv_ringbuffer_write(channel
, bufferlist
, 3, requestid
);
1035 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer
);
1038 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
1039 * using a GPADL Direct packet type.
1040 * The buffer includes the vmbus descriptor.
1042 int vmbus_sendpacket_mpb_desc(struct vmbus_channel
*channel
,
1043 struct vmbus_packet_mpb_array
*desc
,
1045 void *buffer
, u32 bufferlen
, u64 requestid
)
1048 u32 packetlen_aligned
;
1049 struct kvec bufferlist
[3];
1050 u64 aligned_data
= 0;
1052 packetlen
= desc_size
+ bufferlen
;
1053 packetlen_aligned
= ALIGN(packetlen
, sizeof(u64
));
1055 /* Setup the descriptor */
1056 desc
->type
= VM_PKT_DATA_USING_GPA_DIRECT
;
1057 desc
->flags
= VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
;
1058 desc
->dataoffset8
= desc_size
>> 3; /* in 8-bytes granularity */
1059 desc
->length8
= (u16
)(packetlen_aligned
>> 3);
1060 desc
->transactionid
= VMBUS_RQST_ERROR
; /* will be updated in hv_ringbuffer_write() */
1062 desc
->rangecount
= 1;
1064 bufferlist
[0].iov_base
= desc
;
1065 bufferlist
[0].iov_len
= desc_size
;
1066 bufferlist
[1].iov_base
= buffer
;
1067 bufferlist
[1].iov_len
= bufferlen
;
1068 bufferlist
[2].iov_base
= &aligned_data
;
1069 bufferlist
[2].iov_len
= (packetlen_aligned
- packetlen
);
1071 return hv_ringbuffer_write(channel
, bufferlist
, 3, requestid
);
1073 EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc
);
1076 * __vmbus_recvpacket() - Retrieve the user packet on the specified channel
1077 * @channel: Pointer to vmbus_channel structure
1078 * @buffer: Pointer to the buffer you want to receive the data into.
1079 * @bufferlen: Maximum size of what the buffer can hold.
1080 * @buffer_actual_len: The actual size of the data after it was received.
1081 * @requestid: Identifier of the request
1082 * @raw: true means keep the vmpacket_descriptor header in the received data.
1084 * Receives directly from the hyper-v vmbus and puts the data it received
1085 * into Buffer. This will receive the data unparsed from hyper-v.
1087 * Mainly used by Hyper-V drivers.
1090 __vmbus_recvpacket(struct vmbus_channel
*channel
, void *buffer
,
1091 u32 bufferlen
, u32
*buffer_actual_len
, u64
*requestid
,
1094 return hv_ringbuffer_read(channel
, buffer
, bufferlen
,
1095 buffer_actual_len
, requestid
, raw
);
1099 int vmbus_recvpacket(struct vmbus_channel
*channel
, void *buffer
,
1100 u32 bufferlen
, u32
*buffer_actual_len
,
1103 return __vmbus_recvpacket(channel
, buffer
, bufferlen
,
1104 buffer_actual_len
, requestid
, false);
1106 EXPORT_SYMBOL(vmbus_recvpacket
);
1109 * vmbus_recvpacket_raw - Retrieve the raw packet on the specified channel
1111 int vmbus_recvpacket_raw(struct vmbus_channel
*channel
, void *buffer
,
1112 u32 bufferlen
, u32
*buffer_actual_len
,
1115 return __vmbus_recvpacket(channel
, buffer
, bufferlen
,
1116 buffer_actual_len
, requestid
, true);
1118 EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw
);
1121 * vmbus_next_request_id - Returns a new request id. It is also
1122 * the index at which the guest memory address is stored.
1123 * Uses a spin lock to avoid race conditions.
1124 * @rqstor: Pointer to the requestor struct
1125 * @rqst_add: Guest memory address to be stored in the array
1127 u64
vmbus_next_request_id(struct vmbus_requestor
*rqstor
, u64 rqst_addr
)
1129 unsigned long flags
;
1131 const struct vmbus_channel
*channel
=
1132 container_of(rqstor
, const struct vmbus_channel
, requestor
);
1134 /* Check rqstor has been initialized */
1135 if (!channel
->rqstor_size
)
1136 return VMBUS_NO_RQSTOR
;
1138 spin_lock_irqsave(&rqstor
->req_lock
, flags
);
1139 current_id
= rqstor
->next_request_id
;
1141 /* Requestor array is full */
1142 if (current_id
>= rqstor
->size
) {
1143 spin_unlock_irqrestore(&rqstor
->req_lock
, flags
);
1144 return VMBUS_RQST_ERROR
;
1147 rqstor
->next_request_id
= rqstor
->req_arr
[current_id
];
1148 rqstor
->req_arr
[current_id
] = rqst_addr
;
1150 /* The already held spin lock provides atomicity */
1151 bitmap_set(rqstor
->req_bitmap
, current_id
, 1);
1153 spin_unlock_irqrestore(&rqstor
->req_lock
, flags
);
1156 * Cannot return an ID of 0, which is reserved for an unsolicited
1157 * message from Hyper-V.
1159 return current_id
+ 1;
1161 EXPORT_SYMBOL_GPL(vmbus_next_request_id
);
1164 * vmbus_request_addr - Returns the memory address stored at @trans_id
1165 * in @rqstor. Uses a spin lock to avoid race conditions.
1166 * @rqstor: Pointer to the requestor struct
1167 * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's
1170 u64
vmbus_request_addr(struct vmbus_requestor
*rqstor
, u64 trans_id
)
1172 unsigned long flags
;
1174 const struct vmbus_channel
*channel
=
1175 container_of(rqstor
, const struct vmbus_channel
, requestor
);
1177 /* Check rqstor has been initialized */
1178 if (!channel
->rqstor_size
)
1179 return VMBUS_NO_RQSTOR
;
1181 /* Hyper-V can send an unsolicited message with ID of 0 */
1185 spin_lock_irqsave(&rqstor
->req_lock
, flags
);
1187 /* Data corresponding to trans_id is stored at trans_id - 1 */
1190 /* Invalid trans_id */
1191 if (trans_id
>= rqstor
->size
|| !test_bit(trans_id
, rqstor
->req_bitmap
)) {
1192 spin_unlock_irqrestore(&rqstor
->req_lock
, flags
);
1193 return VMBUS_RQST_ERROR
;
1196 req_addr
= rqstor
->req_arr
[trans_id
];
1197 rqstor
->req_arr
[trans_id
] = rqstor
->next_request_id
;
1198 rqstor
->next_request_id
= trans_id
;
1200 /* The already held spin lock provides atomicity */
1201 bitmap_clear(rqstor
->req_bitmap
, trans_id
, 1);
1203 spin_unlock_irqrestore(&rqstor
->req_lock
, flags
);
1206 EXPORT_SYMBOL_GPL(vmbus_request_addr
);