1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2009, Microsoft Corporation.
6 * Haiyang Zhang <haiyangz@microsoft.com>
7 * Hank Janssen <hjanssen@microsoft.com>
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/kernel.h>
12 #include <linux/interrupt.h>
13 #include <linux/sched.h>
14 #include <linux/wait.h>
16 #include <linux/slab.h>
17 #include <linux/list.h>
18 #include <linux/module.h>
19 #include <linux/completion.h>
20 #include <linux/delay.h>
21 #include <linux/cpu.h>
22 #include <linux/hyperv.h>
23 #include <asm/mshyperv.h>
25 #include "hyperv_vmbus.h"
27 static void init_vp_index(struct vmbus_channel
*channel
);
29 const struct vmbus_device vmbus_devs
[] = {
37 { .dev_type
= HV_SCSI
,
61 { .dev_type
= HV_PCIE
,
66 /* Synthetic Frame Buffer */
72 /* Synthetic Keyboard */
79 { .dev_type
= HV_MOUSE
,
103 { .dev_type
= HV_SHUTDOWN
,
105 .perf_device
= false,
109 { .dev_type
= HV_FCOPY
,
111 .perf_device
= false,
115 { .dev_type
= HV_BACKUP
,
117 .perf_device
= false,
123 .perf_device
= false,
127 { .dev_type
= HV_UNKNOWN
,
128 .perf_device
= false,
132 static const struct {
134 } vmbus_unsupported_devs
[] = {
141 * The rescinded channel may be blocked waiting for a response from the host;
144 static void vmbus_rescind_cleanup(struct vmbus_channel
*channel
)
146 struct vmbus_channel_msginfo
*msginfo
;
150 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
151 channel
->rescind
= true;
152 list_for_each_entry(msginfo
, &vmbus_connection
.chn_msg_list
,
155 if (msginfo
->waiting_channel
== channel
) {
156 complete(&msginfo
->waitevent
);
160 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
163 static bool is_unsupported_vmbus_devs(const guid_t
*guid
)
167 for (i
= 0; i
< ARRAY_SIZE(vmbus_unsupported_devs
); i
++)
168 if (guid_equal(guid
, &vmbus_unsupported_devs
[i
].guid
))
173 static u16
hv_get_dev_type(const struct vmbus_channel
*channel
)
175 const guid_t
*guid
= &channel
->offermsg
.offer
.if_type
;
178 if (is_hvsock_channel(channel
) || is_unsupported_vmbus_devs(guid
))
181 for (i
= HV_IDE
; i
< HV_UNKNOWN
; i
++) {
182 if (guid_equal(guid
, &vmbus_devs
[i
].guid
))
185 pr_info("Unknown GUID: %pUl\n", guid
);
190 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
191 * @icmsghdrp: Pointer to msg header structure
192 * @buf: Raw buffer channel data
193 * @fw_version: The framework versions we can support.
194 * @fw_vercnt: The size of @fw_version.
195 * @srv_version: The service versions we can support.
196 * @srv_vercnt: The size of @srv_version.
197 * @nego_fw_version: The selected framework version.
198 * @nego_srv_version: The selected service version.
200 * Note: Versions are given in decreasing order.
202 * Set up and fill in default negotiate response message.
203 * Mainly used by Hyper-V drivers.
205 bool vmbus_prep_negotiate_resp(struct icmsg_hdr
*icmsghdrp
,
206 u8
*buf
, const int *fw_version
, int fw_vercnt
,
207 const int *srv_version
, int srv_vercnt
,
208 int *nego_fw_version
, int *nego_srv_version
)
210 int icframe_major
, icframe_minor
;
211 int icmsg_major
, icmsg_minor
;
212 int fw_major
, fw_minor
;
213 int srv_major
, srv_minor
;
215 bool found_match
= false;
216 struct icmsg_negotiate
*negop
;
218 icmsghdrp
->icmsgsize
= 0x10;
219 negop
= (struct icmsg_negotiate
*)&buf
[
220 sizeof(struct vmbuspipe_hdr
) +
221 sizeof(struct icmsg_hdr
)];
223 icframe_major
= negop
->icframe_vercnt
;
226 icmsg_major
= negop
->icmsg_vercnt
;
230 * Select the framework version number we will
234 for (i
= 0; i
< fw_vercnt
; i
++) {
235 fw_major
= (fw_version
[i
] >> 16);
236 fw_minor
= (fw_version
[i
] & 0xFFFF);
238 for (j
= 0; j
< negop
->icframe_vercnt
; j
++) {
239 if ((negop
->icversion_data
[j
].major
== fw_major
) &&
240 (negop
->icversion_data
[j
].minor
== fw_minor
)) {
241 icframe_major
= negop
->icversion_data
[j
].major
;
242 icframe_minor
= negop
->icversion_data
[j
].minor
;
257 for (i
= 0; i
< srv_vercnt
; i
++) {
258 srv_major
= (srv_version
[i
] >> 16);
259 srv_minor
= (srv_version
[i
] & 0xFFFF);
261 for (j
= negop
->icframe_vercnt
;
262 (j
< negop
->icframe_vercnt
+ negop
->icmsg_vercnt
);
265 if ((negop
->icversion_data
[j
].major
== srv_major
) &&
266 (negop
->icversion_data
[j
].minor
== srv_minor
)) {
268 icmsg_major
= negop
->icversion_data
[j
].major
;
269 icmsg_minor
= negop
->icversion_data
[j
].minor
;
280 * Respond with the framework and service
281 * version numbers we can support.
286 negop
->icframe_vercnt
= 0;
287 negop
->icmsg_vercnt
= 0;
289 negop
->icframe_vercnt
= 1;
290 negop
->icmsg_vercnt
= 1;
294 *nego_fw_version
= (icframe_major
<< 16) | icframe_minor
;
296 if (nego_srv_version
)
297 *nego_srv_version
= (icmsg_major
<< 16) | icmsg_minor
;
299 negop
->icversion_data
[0].major
= icframe_major
;
300 negop
->icversion_data
[0].minor
= icframe_minor
;
301 negop
->icversion_data
[1].major
= icmsg_major
;
302 negop
->icversion_data
[1].minor
= icmsg_minor
;
306 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp
);
309 * alloc_channel - Allocate and initialize a vmbus channel object
311 static struct vmbus_channel
*alloc_channel(void)
313 struct vmbus_channel
*channel
;
315 channel
= kzalloc(sizeof(*channel
), GFP_ATOMIC
);
319 spin_lock_init(&channel
->sched_lock
);
320 init_completion(&channel
->rescind_event
);
322 INIT_LIST_HEAD(&channel
->sc_list
);
324 tasklet_init(&channel
->callback_event
,
325 vmbus_on_event
, (unsigned long)channel
);
327 hv_ringbuffer_pre_init(channel
);
333 * free_channel - Release the resources used by the vmbus channel object
335 static void free_channel(struct vmbus_channel
*channel
)
337 tasklet_kill(&channel
->callback_event
);
338 vmbus_remove_channel_attr_group(channel
);
340 kobject_put(&channel
->kobj
);
343 void vmbus_channel_map_relid(struct vmbus_channel
*channel
)
345 if (WARN_ON(channel
->offermsg
.child_relid
>= MAX_CHANNEL_RELIDS
))
348 * The mapping of the channel's relid is visible from the CPUs that
349 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
352 * (a) In the "normal (i.e., not resuming from hibernation)" path,
353 * the full barrier in smp_store_mb() guarantees that the store
354 * is propagated to all CPUs before the add_channel_work work
355 * is queued. In turn, add_channel_work is queued before the
356 * channel's ring buffer is allocated/initialized and the
357 * OPENCHANNEL message for the channel is sent in vmbus_open().
358 * Hyper-V won't start sending the interrupts for the channel
359 * before the OPENCHANNEL message is acked. The memory barrier
360 * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
361 * that vmbus_chan_sched() must find the channel's relid in
362 * recv_int_page before retrieving the channel pointer from the
365 * (b) In the "resuming from hibernation" path, the smp_store_mb()
366 * guarantees that the store is propagated to all CPUs before
367 * the VMBus connection is marked as ready for the resume event
368 * (cf. check_ready_for_resume_event()). The interrupt handler
369 * of the VMBus driver and vmbus_chan_sched() can not run before
370 * vmbus_bus_resume() has completed execution (cf. resume_noirq).
373 vmbus_connection
.channels
[channel
->offermsg
.child_relid
],
377 void vmbus_channel_unmap_relid(struct vmbus_channel
*channel
)
379 if (WARN_ON(channel
->offermsg
.child_relid
>= MAX_CHANNEL_RELIDS
))
382 vmbus_connection
.channels
[channel
->offermsg
.child_relid
],
386 static void vmbus_release_relid(u32 relid
)
388 struct vmbus_channel_relid_released msg
;
391 memset(&msg
, 0, sizeof(struct vmbus_channel_relid_released
));
392 msg
.child_relid
= relid
;
393 msg
.header
.msgtype
= CHANNELMSG_RELID_RELEASED
;
394 ret
= vmbus_post_msg(&msg
, sizeof(struct vmbus_channel_relid_released
),
397 trace_vmbus_release_relid(&msg
, ret
);
400 void hv_process_channel_removal(struct vmbus_channel
*channel
)
402 lockdep_assert_held(&vmbus_connection
.channel_mutex
);
403 BUG_ON(!channel
->rescind
);
406 * hv_process_channel_removal() could find INVALID_RELID only for
407 * hv_sock channels. See the inline comments in vmbus_onoffer().
409 WARN_ON(channel
->offermsg
.child_relid
== INVALID_RELID
&&
410 !is_hvsock_channel(channel
));
413 * Upon suspend, an in-use hv_sock channel is removed from the array of
414 * channels and the relid is invalidated. After hibernation, when the
415 * user-space appplication destroys the channel, it's unnecessary and
416 * unsafe to remove the channel from the array of channels. See also
417 * the inline comments before the call of vmbus_release_relid() below.
419 if (channel
->offermsg
.child_relid
!= INVALID_RELID
)
420 vmbus_channel_unmap_relid(channel
);
422 if (channel
->primary_channel
== NULL
)
423 list_del(&channel
->listentry
);
425 list_del(&channel
->sc_list
);
428 * If this is a "perf" channel, updates the hv_numa_map[] masks so that
429 * init_vp_index() can (re-)use the CPU.
431 if (hv_is_perf_channel(channel
))
432 hv_clear_alloced_cpu(channel
->target_cpu
);
435 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
436 * the relid is invalidated; after hibernation, when the user-space app
437 * destroys the channel, the relid is INVALID_RELID, and in this case
438 * it's unnecessary and unsafe to release the old relid, since the same
439 * relid can refer to a completely different channel now.
441 if (channel
->offermsg
.child_relid
!= INVALID_RELID
)
442 vmbus_release_relid(channel
->offermsg
.child_relid
);
444 free_channel(channel
);
447 void vmbus_free_channels(void)
449 struct vmbus_channel
*channel
, *tmp
;
451 list_for_each_entry_safe(channel
, tmp
, &vmbus_connection
.chn_list
,
453 /* hv_process_channel_removal() needs this */
454 channel
->rescind
= true;
456 vmbus_device_unregister(channel
->device_obj
);
460 /* Note: the function can run concurrently for primary/sub channels. */
461 static void vmbus_add_channel_work(struct work_struct
*work
)
463 struct vmbus_channel
*newchannel
=
464 container_of(work
, struct vmbus_channel
, add_channel_work
);
465 struct vmbus_channel
*primary_channel
= newchannel
->primary_channel
;
469 * This state is used to indicate a successful open
470 * so that when we do close the channel normally, we
471 * can cleanup properly.
473 newchannel
->state
= CHANNEL_OPEN_STATE
;
475 if (primary_channel
!= NULL
) {
476 /* newchannel is a sub-channel. */
477 struct hv_device
*dev
= primary_channel
->device_obj
;
479 if (vmbus_add_channel_kobj(dev
, newchannel
))
482 if (primary_channel
->sc_creation_callback
!= NULL
)
483 primary_channel
->sc_creation_callback(newchannel
);
485 newchannel
->probe_done
= true;
490 * Start the process of binding the primary channel to the driver
492 newchannel
->device_obj
= vmbus_device_create(
493 &newchannel
->offermsg
.offer
.if_type
,
494 &newchannel
->offermsg
.offer
.if_instance
,
496 if (!newchannel
->device_obj
)
499 newchannel
->device_obj
->device_id
= newchannel
->device_id
;
501 * Add the new device to the bus. This will kick off device-driver
502 * binding which eventually invokes the device driver's AddDevice()
505 ret
= vmbus_device_register(newchannel
->device_obj
);
508 pr_err("unable to add child device object (relid %d)\n",
509 newchannel
->offermsg
.child_relid
);
510 kfree(newchannel
->device_obj
);
514 newchannel
->probe_done
= true;
518 mutex_lock(&vmbus_connection
.channel_mutex
);
521 * We need to set the flag, otherwise
522 * vmbus_onoffer_rescind() can be blocked.
524 newchannel
->probe_done
= true;
526 if (primary_channel
== NULL
)
527 list_del(&newchannel
->listentry
);
529 list_del(&newchannel
->sc_list
);
531 /* vmbus_process_offer() has mapped the channel. */
532 vmbus_channel_unmap_relid(newchannel
);
534 mutex_unlock(&vmbus_connection
.channel_mutex
);
536 vmbus_release_relid(newchannel
->offermsg
.child_relid
);
538 free_channel(newchannel
);
542 * vmbus_process_offer - Process the offer by creating a channel/device
543 * associated with this offer
545 static void vmbus_process_offer(struct vmbus_channel
*newchannel
)
547 struct vmbus_channel
*channel
;
548 struct workqueue_struct
*wq
;
552 * Synchronize vmbus_process_offer() and CPU hotplugging:
556 * [vmbus_process_offer()] [Hot removal of the CPU]
558 * CPU_READ_LOCK CPUS_WRITE_LOCK
559 * LOAD cpu_online_mask SEARCH chn_list
560 * STORE target_cpu LOAD target_cpu
561 * INSERT chn_list STORE cpu_online_mask
562 * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
564 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
565 * CPU2's SEARCH from *not* seeing CPU1's INSERT
567 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
568 * CPU2's LOAD from *not* seing CPU1's STORE
573 * Serializes the modifications of the chn_list list as well as
574 * the accesses to next_numa_node_id in init_vp_index().
576 mutex_lock(&vmbus_connection
.channel_mutex
);
578 init_vp_index(newchannel
);
580 /* Remember the channels that should be cleaned up upon suspend. */
581 if (is_hvsock_channel(newchannel
) || is_sub_channel(newchannel
))
582 atomic_inc(&vmbus_connection
.nr_chan_close_on_suspend
);
585 * Now that we have acquired the channel_mutex,
586 * we can release the potentially racing rescind thread.
588 atomic_dec(&vmbus_connection
.offer_in_progress
);
590 list_for_each_entry(channel
, &vmbus_connection
.chn_list
, listentry
) {
591 if (guid_equal(&channel
->offermsg
.offer
.if_type
,
592 &newchannel
->offermsg
.offer
.if_type
) &&
593 guid_equal(&channel
->offermsg
.offer
.if_instance
,
594 &newchannel
->offermsg
.offer
.if_instance
)) {
601 list_add_tail(&newchannel
->listentry
,
602 &vmbus_connection
.chn_list
);
605 * Check to see if this is a valid sub-channel.
607 if (newchannel
->offermsg
.offer
.sub_channel_index
== 0) {
608 mutex_unlock(&vmbus_connection
.channel_mutex
);
610 * Don't call free_channel(), because newchannel->kobj
611 * is not initialized yet.
618 * Process the sub-channel.
620 newchannel
->primary_channel
= channel
;
621 list_add_tail(&newchannel
->sc_list
, &channel
->sc_list
);
624 vmbus_channel_map_relid(newchannel
);
626 mutex_unlock(&vmbus_connection
.channel_mutex
);
630 * vmbus_process_offer() mustn't call channel->sc_creation_callback()
631 * directly for sub-channels, because sc_creation_callback() ->
632 * vmbus_open() may never get the host's response to the
633 * OPEN_CHANNEL message (the host may rescind a channel at any time,
634 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
635 * may not wake up the vmbus_open() as it's blocked due to a non-zero
636 * vmbus_connection.offer_in_progress, and finally we have a deadlock.
638 * The above is also true for primary channels, if the related device
639 * drivers use sync probing mode by default.
641 * And, usually the handling of primary channels and sub-channels can
642 * depend on each other, so we should offload them to different
643 * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
644 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
645 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
646 * and waits for all the sub-channels to appear, but the latter
647 * can't get the rtnl_lock and this blocks the handling of
650 INIT_WORK(&newchannel
->add_channel_work
, vmbus_add_channel_work
);
651 wq
= fnew
? vmbus_connection
.handle_primary_chan_wq
:
652 vmbus_connection
.handle_sub_chan_wq
;
653 queue_work(wq
, &newchannel
->add_channel_work
);
657 * We use this state to statically distribute the channel interrupt load.
659 static int next_numa_node_id
;
662 * Starting with Win8, we can statically distribute the incoming
663 * channel interrupt load by binding a channel to VCPU.
665 * For pre-win8 hosts or non-performance critical channels we assign the
668 * Starting with win8, performance critical channels will be distributed
669 * evenly among all the available NUMA nodes. Once the node is assigned,
670 * we will assign the CPU based on a simple round robin scheme.
672 static void init_vp_index(struct vmbus_channel
*channel
)
674 bool perf_chn
= hv_is_perf_channel(channel
);
675 cpumask_var_t available_mask
;
676 struct cpumask
*alloced_mask
;
680 if ((vmbus_proto_version
== VERSION_WS2008
) ||
681 (vmbus_proto_version
== VERSION_WIN7
) || (!perf_chn
) ||
682 !alloc_cpumask_var(&available_mask
, GFP_KERNEL
)) {
684 * Prior to win8, all channel interrupts are
685 * delivered on VMBUS_CONNECT_CPU.
686 * Also if the channel is not a performance critical
687 * channel, bind it to VMBUS_CONNECT_CPU.
688 * In case alloc_cpumask_var() fails, bind it to
691 channel
->target_cpu
= VMBUS_CONNECT_CPU
;
693 hv_set_alloced_cpu(VMBUS_CONNECT_CPU
);
698 numa_node
= next_numa_node_id
++;
699 if (numa_node
== nr_node_ids
) {
700 next_numa_node_id
= 0;
703 if (cpumask_empty(cpumask_of_node(numa_node
)))
707 alloced_mask
= &hv_context
.hv_numa_map
[numa_node
];
709 if (cpumask_weight(alloced_mask
) ==
710 cpumask_weight(cpumask_of_node(numa_node
))) {
712 * We have cycled through all the CPUs in the node;
713 * reset the alloced map.
715 cpumask_clear(alloced_mask
);
718 cpumask_xor(available_mask
, alloced_mask
, cpumask_of_node(numa_node
));
720 target_cpu
= cpumask_first(available_mask
);
721 cpumask_set_cpu(target_cpu
, alloced_mask
);
723 channel
->target_cpu
= target_cpu
;
725 free_cpumask_var(available_mask
);
728 static void vmbus_wait_for_unload(void)
732 struct hv_message
*msg
;
733 struct vmbus_channel_message_header
*hdr
;
737 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
738 * used for initial contact or to CPU0 depending on host version. When
739 * we're crashing on a different CPU let's hope that IRQ handler on
740 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
741 * functional and vmbus_unload_response() will complete
742 * vmbus_connection.unload_event. If not, the last thing we can do is
743 * read message pages for all CPUs directly.
745 * Wait no more than 10 seconds so that the panic path can't get
746 * hung forever in case the response message isn't seen.
748 for (i
= 0; i
< 1000; i
++) {
749 if (completion_done(&vmbus_connection
.unload_event
))
752 for_each_online_cpu(cpu
) {
753 struct hv_per_cpu_context
*hv_cpu
754 = per_cpu_ptr(hv_context
.cpu_context
, cpu
);
756 page_addr
= hv_cpu
->synic_message_page
;
757 msg
= (struct hv_message
*)page_addr
758 + VMBUS_MESSAGE_SINT
;
760 message_type
= READ_ONCE(msg
->header
.message_type
);
761 if (message_type
== HVMSG_NONE
)
764 hdr
= (struct vmbus_channel_message_header
*)
767 if (hdr
->msgtype
== CHANNELMSG_UNLOAD_RESPONSE
)
768 complete(&vmbus_connection
.unload_event
);
770 vmbus_signal_eom(msg
, message_type
);
777 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
778 * maybe-pending messages on all CPUs to be able to receive new
779 * messages after we reconnect.
781 for_each_online_cpu(cpu
) {
782 struct hv_per_cpu_context
*hv_cpu
783 = per_cpu_ptr(hv_context
.cpu_context
, cpu
);
785 page_addr
= hv_cpu
->synic_message_page
;
786 msg
= (struct hv_message
*)page_addr
+ VMBUS_MESSAGE_SINT
;
787 msg
->header
.message_type
= HVMSG_NONE
;
792 * vmbus_unload_response - Handler for the unload response.
794 static void vmbus_unload_response(struct vmbus_channel_message_header
*hdr
)
797 * This is a global event; just wakeup the waiting thread.
798 * Once we successfully unload, we can cleanup the monitor state.
800 complete(&vmbus_connection
.unload_event
);
803 void vmbus_initiate_unload(bool crash
)
805 struct vmbus_channel_message_header hdr
;
807 if (xchg(&vmbus_connection
.conn_state
, DISCONNECTED
) == DISCONNECTED
)
810 /* Pre-Win2012R2 hosts don't support reconnect */
811 if (vmbus_proto_version
< VERSION_WIN8_1
)
814 init_completion(&vmbus_connection
.unload_event
);
815 memset(&hdr
, 0, sizeof(struct vmbus_channel_message_header
));
816 hdr
.msgtype
= CHANNELMSG_UNLOAD
;
817 vmbus_post_msg(&hdr
, sizeof(struct vmbus_channel_message_header
),
821 * vmbus_initiate_unload() is also called on crash and the crash can be
822 * happening in an interrupt context, where scheduling is impossible.
825 wait_for_completion(&vmbus_connection
.unload_event
);
827 vmbus_wait_for_unload();
830 static void check_ready_for_resume_event(void)
833 * If all the old primary channels have been fixed up, then it's safe
836 if (atomic_dec_and_test(&vmbus_connection
.nr_chan_fixup_on_resume
))
837 complete(&vmbus_connection
.ready_for_resume_event
);
840 static void vmbus_setup_channel_state(struct vmbus_channel
*channel
,
841 struct vmbus_channel_offer_channel
*offer
)
844 * Setup state for signalling the host.
846 channel
->sig_event
= VMBUS_EVENT_CONNECTION_ID
;
848 if (vmbus_proto_version
!= VERSION_WS2008
) {
849 channel
->is_dedicated_interrupt
=
850 (offer
->is_dedicated_interrupt
!= 0);
851 channel
->sig_event
= offer
->connection_id
;
854 memcpy(&channel
->offermsg
, offer
,
855 sizeof(struct vmbus_channel_offer_channel
));
856 channel
->monitor_grp
= (u8
)offer
->monitorid
/ 32;
857 channel
->monitor_bit
= (u8
)offer
->monitorid
% 32;
858 channel
->device_id
= hv_get_dev_type(channel
);
862 * find_primary_channel_by_offer - Get the channel object given the new offer.
863 * This is only used in the resume path of hibernation.
865 static struct vmbus_channel
*
866 find_primary_channel_by_offer(const struct vmbus_channel_offer_channel
*offer
)
868 struct vmbus_channel
*channel
= NULL
, *iter
;
869 const guid_t
*inst1
, *inst2
;
871 /* Ignore sub-channel offers. */
872 if (offer
->offer
.sub_channel_index
!= 0)
875 mutex_lock(&vmbus_connection
.channel_mutex
);
877 list_for_each_entry(iter
, &vmbus_connection
.chn_list
, listentry
) {
878 inst1
= &iter
->offermsg
.offer
.if_instance
;
879 inst2
= &offer
->offer
.if_instance
;
881 if (guid_equal(inst1
, inst2
)) {
887 mutex_unlock(&vmbus_connection
.channel_mutex
);
893 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
896 static void vmbus_onoffer(struct vmbus_channel_message_header
*hdr
)
898 struct vmbus_channel_offer_channel
*offer
;
899 struct vmbus_channel
*oldchannel
, *newchannel
;
902 offer
= (struct vmbus_channel_offer_channel
*)hdr
;
904 trace_vmbus_onoffer(offer
);
906 oldchannel
= find_primary_channel_by_offer(offer
);
908 if (oldchannel
!= NULL
) {
910 * We're resuming from hibernation: all the sub-channel and
911 * hv_sock channels we had before the hibernation should have
912 * been cleaned up, and now we must be seeing a re-offered
913 * primary channel that we had before the hibernation.
917 * { Initially: channel relid = INVALID_RELID,
918 * channels[valid_relid] = NULL }
922 * [vmbus_onoffer()] [vmbus_device_release()]
924 * LOCK channel_mutex LOCK channel_mutex
925 * STORE channel relid = valid_relid LOAD r1 = channel relid
926 * MAP_RELID channel if (r1 != INVALID_RELID)
927 * UNLOCK channel_mutex UNMAP_RELID channel
928 * UNLOCK channel_mutex
930 * Forbids: r1 == valid_relid &&
931 * channels[valid_relid] == channel
933 * Note. r1 can be INVALID_RELID only for an hv_sock channel.
934 * None of the hv_sock channels which were present before the
935 * suspend are re-offered upon the resume. See the WARN_ON()
936 * in hv_process_channel_removal().
938 mutex_lock(&vmbus_connection
.channel_mutex
);
940 atomic_dec(&vmbus_connection
.offer_in_progress
);
942 WARN_ON(oldchannel
->offermsg
.child_relid
!= INVALID_RELID
);
943 /* Fix up the relid. */
944 oldchannel
->offermsg
.child_relid
= offer
->child_relid
;
946 offer_sz
= sizeof(*offer
);
947 if (memcmp(offer
, &oldchannel
->offermsg
, offer_sz
) != 0) {
949 * This is not an error, since the host can also change
950 * the other field(s) of the offer, e.g. on WS RS5
951 * (Build 17763), the offer->connection_id of the
952 * Mellanox VF vmbus device can change when the host
953 * reoffers the device upon resume.
955 pr_debug("vmbus offer changed: relid=%d\n",
958 print_hex_dump_debug("Old vmbus offer: ",
959 DUMP_PREFIX_OFFSET
, 16, 4,
960 &oldchannel
->offermsg
, offer_sz
,
962 print_hex_dump_debug("New vmbus offer: ",
963 DUMP_PREFIX_OFFSET
, 16, 4,
964 offer
, offer_sz
, false);
966 /* Fix up the old channel. */
967 vmbus_setup_channel_state(oldchannel
, offer
);
970 /* Add the channel back to the array of channels. */
971 vmbus_channel_map_relid(oldchannel
);
972 check_ready_for_resume_event();
974 mutex_unlock(&vmbus_connection
.channel_mutex
);
978 /* Allocate the channel object and save this offer. */
979 newchannel
= alloc_channel();
981 vmbus_release_relid(offer
->child_relid
);
982 atomic_dec(&vmbus_connection
.offer_in_progress
);
983 pr_err("Unable to allocate channel object\n");
987 vmbus_setup_channel_state(newchannel
, offer
);
989 vmbus_process_offer(newchannel
);
992 static void check_ready_for_suspend_event(void)
995 * If all the sub-channels or hv_sock channels have been cleaned up,
996 * then it's safe to suspend.
998 if (atomic_dec_and_test(&vmbus_connection
.nr_chan_close_on_suspend
))
999 complete(&vmbus_connection
.ready_for_suspend_event
);
1003 * vmbus_onoffer_rescind - Rescind offer handler.
1005 * We queue a work item to process this offer synchronously
1007 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header
*hdr
)
1009 struct vmbus_channel_rescind_offer
*rescind
;
1010 struct vmbus_channel
*channel
;
1012 bool clean_up_chan_for_suspend
;
1014 rescind
= (struct vmbus_channel_rescind_offer
*)hdr
;
1016 trace_vmbus_onoffer_rescind(rescind
);
1019 * The offer msg and the corresponding rescind msg
1020 * from the host are guranteed to be ordered -
1021 * offer comes in first and then the rescind.
1022 * Since we process these events in work elements,
1023 * and with preemption, we may end up processing
1024 * the events out of order. We rely on the synchronization
1025 * provided by offer_in_progress and by channel_mutex for
1026 * ordering these events:
1028 * { Initially: offer_in_progress = 1 }
1032 * [vmbus_onoffer()] [vmbus_onoffer_rescind()]
1034 * LOCK channel_mutex WAIT_ON offer_in_progress == 0
1035 * DECREMENT offer_in_progress LOCK channel_mutex
1036 * STORE channels[] LOAD channels[]
1037 * UNLOCK channel_mutex UNLOCK channel_mutex
1039 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
1042 while (atomic_read(&vmbus_connection
.offer_in_progress
) != 0) {
1044 * We wait here until any channel offer is currently
1050 mutex_lock(&vmbus_connection
.channel_mutex
);
1051 channel
= relid2channel(rescind
->child_relid
);
1052 mutex_unlock(&vmbus_connection
.channel_mutex
);
1054 if (channel
== NULL
) {
1056 * We failed in processing the offer message;
1057 * we would have cleaned up the relid in that
1063 clean_up_chan_for_suspend
= is_hvsock_channel(channel
) ||
1064 is_sub_channel(channel
);
1066 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
1067 * should make sure the channel callback is not running any more.
1069 vmbus_reset_channel_cb(channel
);
1072 * Now wait for offer handling to complete.
1074 vmbus_rescind_cleanup(channel
);
1075 while (READ_ONCE(channel
->probe_done
) == false) {
1077 * We wait here until any channel offer is currently
1084 * At this point, the rescind handling can proceed safely.
1087 if (channel
->device_obj
) {
1088 if (channel
->chn_rescind_callback
) {
1089 channel
->chn_rescind_callback(channel
);
1091 if (clean_up_chan_for_suspend
)
1092 check_ready_for_suspend_event();
1097 * We will have to unregister this device from the
1100 dev
= get_device(&channel
->device_obj
->device
);
1102 vmbus_device_unregister(channel
->device_obj
);
1106 if (channel
->primary_channel
!= NULL
) {
1108 * Sub-channel is being rescinded. Following is the channel
1109 * close sequence when initiated from the driveri (refer to
1110 * vmbus_close() for details):
1111 * 1. Close all sub-channels first
1112 * 2. Then close the primary channel.
1114 mutex_lock(&vmbus_connection
.channel_mutex
);
1115 if (channel
->state
== CHANNEL_OPEN_STATE
) {
1117 * The channel is currently not open;
1118 * it is safe for us to cleanup the channel.
1120 hv_process_channel_removal(channel
);
1122 complete(&channel
->rescind_event
);
1124 mutex_unlock(&vmbus_connection
.channel_mutex
);
1127 /* The "channel" may have been freed. Do not access it any longer. */
1129 if (clean_up_chan_for_suspend
)
1130 check_ready_for_suspend_event();
1133 void vmbus_hvsock_device_unregister(struct vmbus_channel
*channel
)
1135 BUG_ON(!is_hvsock_channel(channel
));
1137 /* We always get a rescind msg when a connection is closed. */
1138 while (!READ_ONCE(channel
->probe_done
) || !READ_ONCE(channel
->rescind
))
1141 vmbus_device_unregister(channel
->device_obj
);
1143 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister
);
1147 * vmbus_onoffers_delivered -
1148 * This is invoked when all offers have been delivered.
1150 * Nothing to do here.
1152 static void vmbus_onoffers_delivered(
1153 struct vmbus_channel_message_header
*hdr
)
1158 * vmbus_onopen_result - Open result handler.
1160 * This is invoked when we received a response to our channel open request.
1161 * Find the matching request, copy the response and signal the requesting
1164 static void vmbus_onopen_result(struct vmbus_channel_message_header
*hdr
)
1166 struct vmbus_channel_open_result
*result
;
1167 struct vmbus_channel_msginfo
*msginfo
;
1168 struct vmbus_channel_message_header
*requestheader
;
1169 struct vmbus_channel_open_channel
*openmsg
;
1170 unsigned long flags
;
1172 result
= (struct vmbus_channel_open_result
*)hdr
;
1174 trace_vmbus_onopen_result(result
);
1177 * Find the open msg, copy the result and signal/unblock the wait event
1179 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
1181 list_for_each_entry(msginfo
, &vmbus_connection
.chn_msg_list
,
1184 (struct vmbus_channel_message_header
*)msginfo
->msg
;
1186 if (requestheader
->msgtype
== CHANNELMSG_OPENCHANNEL
) {
1188 (struct vmbus_channel_open_channel
*)msginfo
->msg
;
1189 if (openmsg
->child_relid
== result
->child_relid
&&
1190 openmsg
->openid
== result
->openid
) {
1191 memcpy(&msginfo
->response
.open_result
,
1194 struct vmbus_channel_open_result
));
1195 complete(&msginfo
->waitevent
);
1200 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
1204 * vmbus_ongpadl_created - GPADL created handler.
1206 * This is invoked when we received a response to our gpadl create request.
1207 * Find the matching request, copy the response and signal the requesting
1210 static void vmbus_ongpadl_created(struct vmbus_channel_message_header
*hdr
)
1212 struct vmbus_channel_gpadl_created
*gpadlcreated
;
1213 struct vmbus_channel_msginfo
*msginfo
;
1214 struct vmbus_channel_message_header
*requestheader
;
1215 struct vmbus_channel_gpadl_header
*gpadlheader
;
1216 unsigned long flags
;
1218 gpadlcreated
= (struct vmbus_channel_gpadl_created
*)hdr
;
1220 trace_vmbus_ongpadl_created(gpadlcreated
);
1223 * Find the establish msg, copy the result and signal/unblock the wait
1226 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
1228 list_for_each_entry(msginfo
, &vmbus_connection
.chn_msg_list
,
1231 (struct vmbus_channel_message_header
*)msginfo
->msg
;
1233 if (requestheader
->msgtype
== CHANNELMSG_GPADL_HEADER
) {
1235 (struct vmbus_channel_gpadl_header
*)requestheader
;
1237 if ((gpadlcreated
->child_relid
==
1238 gpadlheader
->child_relid
) &&
1239 (gpadlcreated
->gpadl
== gpadlheader
->gpadl
)) {
1240 memcpy(&msginfo
->response
.gpadl_created
,
1243 struct vmbus_channel_gpadl_created
));
1244 complete(&msginfo
->waitevent
);
1249 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
1253 * vmbus_ongpadl_torndown - GPADL torndown handler.
1255 * This is invoked when we received a response to our gpadl teardown request.
1256 * Find the matching request, copy the response and signal the requesting
1259 static void vmbus_ongpadl_torndown(
1260 struct vmbus_channel_message_header
*hdr
)
1262 struct vmbus_channel_gpadl_torndown
*gpadl_torndown
;
1263 struct vmbus_channel_msginfo
*msginfo
;
1264 struct vmbus_channel_message_header
*requestheader
;
1265 struct vmbus_channel_gpadl_teardown
*gpadl_teardown
;
1266 unsigned long flags
;
1268 gpadl_torndown
= (struct vmbus_channel_gpadl_torndown
*)hdr
;
1270 trace_vmbus_ongpadl_torndown(gpadl_torndown
);
1273 * Find the open msg, copy the result and signal/unblock the wait event
1275 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
1277 list_for_each_entry(msginfo
, &vmbus_connection
.chn_msg_list
,
1280 (struct vmbus_channel_message_header
*)msginfo
->msg
;
1282 if (requestheader
->msgtype
== CHANNELMSG_GPADL_TEARDOWN
) {
1284 (struct vmbus_channel_gpadl_teardown
*)requestheader
;
1286 if (gpadl_torndown
->gpadl
== gpadl_teardown
->gpadl
) {
1287 memcpy(&msginfo
->response
.gpadl_torndown
,
1290 struct vmbus_channel_gpadl_torndown
));
1291 complete(&msginfo
->waitevent
);
1296 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
1300 * vmbus_onversion_response - Version response handler
1302 * This is invoked when we received a response to our initiate contact request.
1303 * Find the matching request, copy the response and signal the requesting
1306 static void vmbus_onversion_response(
1307 struct vmbus_channel_message_header
*hdr
)
1309 struct vmbus_channel_msginfo
*msginfo
;
1310 struct vmbus_channel_message_header
*requestheader
;
1311 struct vmbus_channel_version_response
*version_response
;
1312 unsigned long flags
;
1314 version_response
= (struct vmbus_channel_version_response
*)hdr
;
1316 trace_vmbus_onversion_response(version_response
);
1318 spin_lock_irqsave(&vmbus_connection
.channelmsg_lock
, flags
);
1320 list_for_each_entry(msginfo
, &vmbus_connection
.chn_msg_list
,
1323 (struct vmbus_channel_message_header
*)msginfo
->msg
;
1325 if (requestheader
->msgtype
==
1326 CHANNELMSG_INITIATE_CONTACT
) {
1327 memcpy(&msginfo
->response
.version_response
,
1329 sizeof(struct vmbus_channel_version_response
));
1330 complete(&msginfo
->waitevent
);
1333 spin_unlock_irqrestore(&vmbus_connection
.channelmsg_lock
, flags
);
1336 /* Channel message dispatch table */
1337 const struct vmbus_channel_message_table_entry
1338 channel_message_table
[CHANNELMSG_COUNT
] = {
1339 { CHANNELMSG_INVALID
, 0, NULL
, 0},
1340 { CHANNELMSG_OFFERCHANNEL
, 0, vmbus_onoffer
,
1341 sizeof(struct vmbus_channel_offer_channel
)},
1342 { CHANNELMSG_RESCIND_CHANNELOFFER
, 0, vmbus_onoffer_rescind
,
1343 sizeof(struct vmbus_channel_rescind_offer
) },
1344 { CHANNELMSG_REQUESTOFFERS
, 0, NULL
, 0},
1345 { CHANNELMSG_ALLOFFERS_DELIVERED
, 1, vmbus_onoffers_delivered
, 0},
1346 { CHANNELMSG_OPENCHANNEL
, 0, NULL
, 0},
1347 { CHANNELMSG_OPENCHANNEL_RESULT
, 1, vmbus_onopen_result
,
1348 sizeof(struct vmbus_channel_open_result
)},
1349 { CHANNELMSG_CLOSECHANNEL
, 0, NULL
, 0},
1350 { CHANNELMSG_GPADL_HEADER
, 0, NULL
, 0},
1351 { CHANNELMSG_GPADL_BODY
, 0, NULL
, 0},
1352 { CHANNELMSG_GPADL_CREATED
, 1, vmbus_ongpadl_created
,
1353 sizeof(struct vmbus_channel_gpadl_created
)},
1354 { CHANNELMSG_GPADL_TEARDOWN
, 0, NULL
, 0},
1355 { CHANNELMSG_GPADL_TORNDOWN
, 1, vmbus_ongpadl_torndown
,
1356 sizeof(struct vmbus_channel_gpadl_torndown
) },
1357 { CHANNELMSG_RELID_RELEASED
, 0, NULL
, 0},
1358 { CHANNELMSG_INITIATE_CONTACT
, 0, NULL
, 0},
1359 { CHANNELMSG_VERSION_RESPONSE
, 1, vmbus_onversion_response
,
1360 sizeof(struct vmbus_channel_version_response
)},
1361 { CHANNELMSG_UNLOAD
, 0, NULL
, 0},
1362 { CHANNELMSG_UNLOAD_RESPONSE
, 1, vmbus_unload_response
, 0},
1363 { CHANNELMSG_18
, 0, NULL
, 0},
1364 { CHANNELMSG_19
, 0, NULL
, 0},
1365 { CHANNELMSG_20
, 0, NULL
, 0},
1366 { CHANNELMSG_TL_CONNECT_REQUEST
, 0, NULL
, 0},
1367 { CHANNELMSG_MODIFYCHANNEL
, 0, NULL
, 0},
1368 { CHANNELMSG_TL_CONNECT_RESULT
, 0, NULL
, 0},
1372 * vmbus_onmessage - Handler for channel protocol messages.
1374 * This is invoked in the vmbus worker thread context.
1376 void vmbus_onmessage(struct vmbus_channel_message_header
*hdr
)
1378 trace_vmbus_on_message(hdr
);
1381 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
1382 * out of bound and the message_handler pointer can not be NULL.
1384 channel_message_table
[hdr
->msgtype
].message_handler(hdr
);
1388 * vmbus_request_offers - Send a request to get all our pending offers.
1390 int vmbus_request_offers(void)
1392 struct vmbus_channel_message_header
*msg
;
1393 struct vmbus_channel_msginfo
*msginfo
;
1396 msginfo
= kmalloc(sizeof(*msginfo
) +
1397 sizeof(struct vmbus_channel_message_header
),
1402 msg
= (struct vmbus_channel_message_header
*)msginfo
->msg
;
1404 msg
->msgtype
= CHANNELMSG_REQUESTOFFERS
;
1406 ret
= vmbus_post_msg(msg
, sizeof(struct vmbus_channel_message_header
),
1409 trace_vmbus_request_offers(ret
);
1412 pr_err("Unable to request offers - %d\n", ret
);
1423 static void invoke_sc_cb(struct vmbus_channel
*primary_channel
)
1425 struct list_head
*cur
, *tmp
;
1426 struct vmbus_channel
*cur_channel
;
1428 if (primary_channel
->sc_creation_callback
== NULL
)
1431 list_for_each_safe(cur
, tmp
, &primary_channel
->sc_list
) {
1432 cur_channel
= list_entry(cur
, struct vmbus_channel
, sc_list
);
1434 primary_channel
->sc_creation_callback(cur_channel
);
1438 void vmbus_set_sc_create_callback(struct vmbus_channel
*primary_channel
,
1439 void (*sc_cr_cb
)(struct vmbus_channel
*new_sc
))
1441 primary_channel
->sc_creation_callback
= sc_cr_cb
;
1443 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback
);
1445 bool vmbus_are_subchannels_present(struct vmbus_channel
*primary
)
1449 ret
= !list_empty(&primary
->sc_list
);
1453 * Invoke the callback on sub-channel creation.
1454 * This will present a uniform interface to the
1457 invoke_sc_cb(primary
);
1462 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present
);
1464 void vmbus_set_chn_rescind_callback(struct vmbus_channel
*channel
,
1465 void (*chn_rescind_cb
)(struct vmbus_channel
*))
1467 channel
->chn_rescind_callback
= chn_rescind_cb
;
1469 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback
);