2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
26 #include <linux/delay.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <asm/sync_bitops.h>
34 #include "hyperv_net.h"
37 * Switch the data path from the synthetic interface to the VF
40 void netvsc_switch_datapath(struct net_device
*ndev
, bool vf
)
42 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
43 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
44 struct netvsc_device
*nv_dev
= net_device_ctx
->nvdev
;
45 struct nvsp_message
*init_pkt
= &nv_dev
->channel_init_pkt
;
47 memset(init_pkt
, 0, sizeof(struct nvsp_message
));
48 init_pkt
->hdr
.msg_type
= NVSP_MSG4_TYPE_SWITCH_DATA_PATH
;
50 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
53 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
54 NVSP_DATAPATH_SYNTHETIC
;
56 vmbus_sendpacket(dev
->channel
, init_pkt
,
57 sizeof(struct nvsp_message
),
58 (unsigned long)init_pkt
,
59 VM_PKT_DATA_INBAND
, 0);
61 net_device_ctx
->datapath
= vf
;
64 static struct netvsc_device
*alloc_net_device(void)
66 struct netvsc_device
*net_device
;
68 net_device
= kzalloc(sizeof(struct netvsc_device
), GFP_KERNEL
);
72 net_device
->chan_table
[0].mrc
.buf
73 = vzalloc(NETVSC_RECVSLOT_MAX
* sizeof(struct recv_comp_data
));
75 init_waitqueue_head(&net_device
->wait_drain
);
76 net_device
->destroy
= false;
77 atomic_set(&net_device
->open_cnt
, 0);
78 net_device
->max_pkt
= RNDIS_MAX_PKT_DEFAULT
;
79 net_device
->pkt_align
= RNDIS_PKT_ALIGN_DEFAULT
;
80 init_completion(&net_device
->channel_init_wait
);
85 static void free_netvsc_device(struct rcu_head
*head
)
87 struct netvsc_device
*nvdev
88 = container_of(head
, struct netvsc_device
, rcu
);
91 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++)
92 vfree(nvdev
->chan_table
[i
].mrc
.buf
);
97 static void free_netvsc_device_rcu(struct netvsc_device
*nvdev
)
99 call_rcu(&nvdev
->rcu
, free_netvsc_device
);
102 static void netvsc_destroy_buf(struct hv_device
*device
)
104 struct nvsp_message
*revoke_packet
;
105 struct net_device
*ndev
= hv_get_drvdata(device
);
106 struct netvsc_device
*net_device
= net_device_to_netvsc_device(ndev
);
110 * If we got a section count, it means we received a
111 * SendReceiveBufferComplete msg (ie sent
112 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
113 * to send a revoke msg here
115 if (net_device
->recv_section_cnt
) {
116 /* Send the revoke receive buffer */
117 revoke_packet
= &net_device
->revoke_packet
;
118 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
120 revoke_packet
->hdr
.msg_type
=
121 NVSP_MSG1_TYPE_REVOKE_RECV_BUF
;
122 revoke_packet
->msg
.v1_msg
.
123 revoke_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
125 ret
= vmbus_sendpacket(device
->channel
,
127 sizeof(struct nvsp_message
),
128 (unsigned long)revoke_packet
,
129 VM_PKT_DATA_INBAND
, 0);
130 /* If the failure is because the channel is rescinded;
131 * ignore the failure since we cannot send on a rescinded
132 * channel. This would allow us to properly cleanup
133 * even when the channel is rescinded.
135 if (device
->channel
->rescind
)
138 * If we failed here, we might as well return and
139 * have a leak rather than continue and a bugchk
142 netdev_err(ndev
, "unable to send "
143 "revoke receive buffer to netvsp\n");
148 /* Teardown the gpadl on the vsp end */
149 if (net_device
->recv_buf_gpadl_handle
) {
150 ret
= vmbus_teardown_gpadl(device
->channel
,
151 net_device
->recv_buf_gpadl_handle
);
153 /* If we failed here, we might as well return and have a leak
154 * rather than continue and a bugchk
158 "unable to teardown receive buffer's gpadl\n");
161 net_device
->recv_buf_gpadl_handle
= 0;
164 if (net_device
->recv_buf
) {
165 /* Free up the receive buffer */
166 vfree(net_device
->recv_buf
);
167 net_device
->recv_buf
= NULL
;
170 if (net_device
->recv_section
) {
171 net_device
->recv_section_cnt
= 0;
172 kfree(net_device
->recv_section
);
173 net_device
->recv_section
= NULL
;
176 /* Deal with the send buffer we may have setup.
177 * If we got a send section size, it means we received a
178 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
179 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
180 * to send a revoke msg here
182 if (net_device
->send_section_size
) {
183 /* Send the revoke receive buffer */
184 revoke_packet
= &net_device
->revoke_packet
;
185 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
187 revoke_packet
->hdr
.msg_type
=
188 NVSP_MSG1_TYPE_REVOKE_SEND_BUF
;
189 revoke_packet
->msg
.v1_msg
.revoke_send_buf
.id
=
190 NETVSC_SEND_BUFFER_ID
;
192 ret
= vmbus_sendpacket(device
->channel
,
194 sizeof(struct nvsp_message
),
195 (unsigned long)revoke_packet
,
196 VM_PKT_DATA_INBAND
, 0);
198 /* If the failure is because the channel is rescinded;
199 * ignore the failure since we cannot send on a rescinded
200 * channel. This would allow us to properly cleanup
201 * even when the channel is rescinded.
203 if (device
->channel
->rescind
)
206 /* If we failed here, we might as well return and
207 * have a leak rather than continue and a bugchk
210 netdev_err(ndev
, "unable to send "
211 "revoke send buffer to netvsp\n");
215 /* Teardown the gpadl on the vsp end */
216 if (net_device
->send_buf_gpadl_handle
) {
217 ret
= vmbus_teardown_gpadl(device
->channel
,
218 net_device
->send_buf_gpadl_handle
);
220 /* If we failed here, we might as well return and have a leak
221 * rather than continue and a bugchk
225 "unable to teardown send buffer's gpadl\n");
228 net_device
->send_buf_gpadl_handle
= 0;
230 if (net_device
->send_buf
) {
231 /* Free up the send buffer */
232 vfree(net_device
->send_buf
);
233 net_device
->send_buf
= NULL
;
235 kfree(net_device
->send_section_map
);
238 static int netvsc_init_buf(struct hv_device
*device
,
239 struct netvsc_device
*net_device
)
242 struct nvsp_message
*init_packet
;
243 struct net_device
*ndev
;
247 ndev
= hv_get_drvdata(device
);
249 node
= cpu_to_node(device
->channel
->target_cpu
);
250 net_device
->recv_buf
= vzalloc_node(net_device
->recv_buf_size
, node
);
251 if (!net_device
->recv_buf
)
252 net_device
->recv_buf
= vzalloc(net_device
->recv_buf_size
);
254 if (!net_device
->recv_buf
) {
255 netdev_err(ndev
, "unable to allocate receive "
256 "buffer of size %d\n", net_device
->recv_buf_size
);
262 * Establish the gpadl handle for this buffer on this
263 * channel. Note: This call uses the vmbus connection rather
264 * than the channel to establish the gpadl handle.
266 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->recv_buf
,
267 net_device
->recv_buf_size
,
268 &net_device
->recv_buf_gpadl_handle
);
271 "unable to establish receive buffer's gpadl\n");
275 /* Notify the NetVsp of the gpadl handle */
276 init_packet
= &net_device
->channel_init_pkt
;
277 memset(init_packet
, 0, sizeof(struct nvsp_message
));
278 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RECV_BUF
;
279 init_packet
->msg
.v1_msg
.send_recv_buf
.
280 gpadl_handle
= net_device
->recv_buf_gpadl_handle
;
281 init_packet
->msg
.v1_msg
.
282 send_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
284 /* Send the gpadl notification request */
285 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
286 sizeof(struct nvsp_message
),
287 (unsigned long)init_packet
,
289 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
292 "unable to send receive buffer's gpadl to netvsp\n");
296 wait_for_completion(&net_device
->channel_init_wait
);
298 /* Check the response */
299 if (init_packet
->msg
.v1_msg
.
300 send_recv_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
301 netdev_err(ndev
, "Unable to complete receive buffer "
302 "initialization with NetVsp - status %d\n",
303 init_packet
->msg
.v1_msg
.
304 send_recv_buf_complete
.status
);
309 /* Parse the response */
311 net_device
->recv_section_cnt
= init_packet
->msg
.
312 v1_msg
.send_recv_buf_complete
.num_sections
;
314 net_device
->recv_section
= kmemdup(
315 init_packet
->msg
.v1_msg
.send_recv_buf_complete
.sections
,
316 net_device
->recv_section_cnt
*
317 sizeof(struct nvsp_1_receive_buffer_section
),
319 if (net_device
->recv_section
== NULL
) {
325 * For 1st release, there should only be 1 section that represents the
326 * entire receive buffer
328 if (net_device
->recv_section_cnt
!= 1 ||
329 net_device
->recv_section
->offset
!= 0) {
334 /* Now setup the send buffer.
336 net_device
->send_buf
= vzalloc_node(net_device
->send_buf_size
, node
);
337 if (!net_device
->send_buf
)
338 net_device
->send_buf
= vzalloc(net_device
->send_buf_size
);
339 if (!net_device
->send_buf
) {
340 netdev_err(ndev
, "unable to allocate send "
341 "buffer of size %d\n", net_device
->send_buf_size
);
346 /* Establish the gpadl handle for this buffer on this
347 * channel. Note: This call uses the vmbus connection rather
348 * than the channel to establish the gpadl handle.
350 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->send_buf
,
351 net_device
->send_buf_size
,
352 &net_device
->send_buf_gpadl_handle
);
355 "unable to establish send buffer's gpadl\n");
359 /* Notify the NetVsp of the gpadl handle */
360 init_packet
= &net_device
->channel_init_pkt
;
361 memset(init_packet
, 0, sizeof(struct nvsp_message
));
362 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_SEND_BUF
;
363 init_packet
->msg
.v1_msg
.send_send_buf
.gpadl_handle
=
364 net_device
->send_buf_gpadl_handle
;
365 init_packet
->msg
.v1_msg
.send_send_buf
.id
= NETVSC_SEND_BUFFER_ID
;
367 /* Send the gpadl notification request */
368 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
369 sizeof(struct nvsp_message
),
370 (unsigned long)init_packet
,
372 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
375 "unable to send send buffer's gpadl to netvsp\n");
379 wait_for_completion(&net_device
->channel_init_wait
);
381 /* Check the response */
382 if (init_packet
->msg
.v1_msg
.
383 send_send_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
384 netdev_err(ndev
, "Unable to complete send buffer "
385 "initialization with NetVsp - status %d\n",
386 init_packet
->msg
.v1_msg
.
387 send_send_buf_complete
.status
);
392 /* Parse the response */
393 net_device
->send_section_size
= init_packet
->msg
.
394 v1_msg
.send_send_buf_complete
.section_size
;
396 /* Section count is simply the size divided by the section size.
398 net_device
->send_section_cnt
=
399 net_device
->send_buf_size
/ net_device
->send_section_size
;
401 netdev_dbg(ndev
, "Send section size: %d, Section count:%d\n",
402 net_device
->send_section_size
, net_device
->send_section_cnt
);
404 /* Setup state for managing the send buffer. */
405 map_words
= DIV_ROUND_UP(net_device
->send_section_cnt
, BITS_PER_LONG
);
407 net_device
->send_section_map
= kcalloc(map_words
, sizeof(ulong
), GFP_KERNEL
);
408 if (net_device
->send_section_map
== NULL
) {
416 netvsc_destroy_buf(device
);
422 /* Negotiate NVSP protocol version */
423 static int negotiate_nvsp_ver(struct hv_device
*device
,
424 struct netvsc_device
*net_device
,
425 struct nvsp_message
*init_packet
,
428 struct net_device
*ndev
= hv_get_drvdata(device
);
431 memset(init_packet
, 0, sizeof(struct nvsp_message
));
432 init_packet
->hdr
.msg_type
= NVSP_MSG_TYPE_INIT
;
433 init_packet
->msg
.init_msg
.init
.min_protocol_ver
= nvsp_ver
;
434 init_packet
->msg
.init_msg
.init
.max_protocol_ver
= nvsp_ver
;
436 /* Send the init request */
437 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
438 sizeof(struct nvsp_message
),
439 (unsigned long)init_packet
,
441 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
446 wait_for_completion(&net_device
->channel_init_wait
);
448 if (init_packet
->msg
.init_msg
.init_complete
.status
!=
452 if (nvsp_ver
== NVSP_PROTOCOL_VERSION_1
)
455 /* NVSPv2 or later: Send NDIS config */
456 memset(init_packet
, 0, sizeof(struct nvsp_message
));
457 init_packet
->hdr
.msg_type
= NVSP_MSG2_TYPE_SEND_NDIS_CONFIG
;
458 init_packet
->msg
.v2_msg
.send_ndis_config
.mtu
= ndev
->mtu
+ ETH_HLEN
;
459 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.ieee8021q
= 1;
461 if (nvsp_ver
>= NVSP_PROTOCOL_VERSION_5
) {
462 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.sriov
= 1;
464 /* Teaming bit is needed to receive link speed updates */
465 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.teaming
= 1;
468 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
469 sizeof(struct nvsp_message
),
470 (unsigned long)init_packet
,
471 VM_PKT_DATA_INBAND
, 0);
476 static int netvsc_connect_vsp(struct hv_device
*device
,
477 struct netvsc_device
*net_device
)
479 const u32 ver_list
[] = {
480 NVSP_PROTOCOL_VERSION_1
, NVSP_PROTOCOL_VERSION_2
,
481 NVSP_PROTOCOL_VERSION_4
, NVSP_PROTOCOL_VERSION_5
483 struct nvsp_message
*init_packet
;
484 int ndis_version
, i
, ret
;
486 init_packet
= &net_device
->channel_init_pkt
;
488 /* Negotiate the latest NVSP protocol supported */
489 for (i
= ARRAY_SIZE(ver_list
) - 1; i
>= 0; i
--)
490 if (negotiate_nvsp_ver(device
, net_device
, init_packet
,
492 net_device
->nvsp_version
= ver_list
[i
];
501 pr_debug("Negotiated NVSP version:%x\n", net_device
->nvsp_version
);
503 /* Send the ndis version */
504 memset(init_packet
, 0, sizeof(struct nvsp_message
));
506 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_4
)
507 ndis_version
= 0x00060001;
509 ndis_version
= 0x0006001e;
511 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_NDIS_VER
;
512 init_packet
->msg
.v1_msg
.
513 send_ndis_ver
.ndis_major_ver
=
514 (ndis_version
& 0xFFFF0000) >> 16;
515 init_packet
->msg
.v1_msg
.
516 send_ndis_ver
.ndis_minor_ver
=
517 ndis_version
& 0xFFFF;
519 /* Send the init request */
520 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
521 sizeof(struct nvsp_message
),
522 (unsigned long)init_packet
,
523 VM_PKT_DATA_INBAND
, 0);
527 /* Post the big receive buffer to NetVSP */
528 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_2
)
529 net_device
->recv_buf_size
= NETVSC_RECEIVE_BUFFER_SIZE_LEGACY
;
531 net_device
->recv_buf_size
= NETVSC_RECEIVE_BUFFER_SIZE
;
532 net_device
->send_buf_size
= NETVSC_SEND_BUFFER_SIZE
;
534 ret
= netvsc_init_buf(device
, net_device
);
540 static void netvsc_disconnect_vsp(struct hv_device
*device
)
542 netvsc_destroy_buf(device
);
546 * netvsc_device_remove - Callback when the root bus device is removed
548 void netvsc_device_remove(struct hv_device
*device
)
550 struct net_device
*ndev
= hv_get_drvdata(device
);
551 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
552 struct netvsc_device
*net_device
= net_device_ctx
->nvdev
;
555 netvsc_disconnect_vsp(device
);
557 RCU_INIT_POINTER(net_device_ctx
->nvdev
, NULL
);
560 * At this point, no one should be accessing net_device
563 netdev_dbg(ndev
, "net device safe to remove\n");
565 /* Now, we can close the channel safely */
566 vmbus_close(device
->channel
);
568 /* And dissassociate NAPI context from device */
569 for (i
= 0; i
< net_device
->num_chn
; i
++)
570 netif_napi_del(&net_device
->chan_table
[i
].napi
);
572 /* Release all resources */
573 free_netvsc_device_rcu(net_device
);
576 #define RING_AVAIL_PERCENT_HIWATER 20
577 #define RING_AVAIL_PERCENT_LOWATER 10
580 * Get the percentage of available bytes to write in the ring.
581 * The return value is in range from 0 to 100.
583 static inline u32
hv_ringbuf_avail_percent(
584 struct hv_ring_buffer_info
*ring_info
)
586 u32 avail_read
, avail_write
;
588 hv_get_ringbuffer_availbytes(ring_info
, &avail_read
, &avail_write
);
590 return avail_write
* 100 / ring_info
->ring_datasize
;
593 static inline void netvsc_free_send_slot(struct netvsc_device
*net_device
,
596 sync_change_bit(index
, net_device
->send_section_map
);
599 static void netvsc_send_tx_complete(struct netvsc_device
*net_device
,
600 struct vmbus_channel
*incoming_channel
,
601 struct hv_device
*device
,
602 const struct vmpacket_descriptor
*desc
,
605 struct sk_buff
*skb
= (struct sk_buff
*)(unsigned long)desc
->trans_id
;
606 struct net_device
*ndev
= hv_get_drvdata(device
);
607 struct vmbus_channel
*channel
= device
->channel
;
611 /* Notify the layer above us */
613 const struct hv_netvsc_packet
*packet
614 = (struct hv_netvsc_packet
*)skb
->cb
;
615 u32 send_index
= packet
->send_buf_index
;
616 struct netvsc_stats
*tx_stats
;
618 if (send_index
!= NETVSC_INVALID_INDEX
)
619 netvsc_free_send_slot(net_device
, send_index
);
620 q_idx
= packet
->q_idx
;
621 channel
= incoming_channel
;
623 tx_stats
= &net_device
->chan_table
[q_idx
].tx_stats
;
625 u64_stats_update_begin(&tx_stats
->syncp
);
626 tx_stats
->packets
+= packet
->total_packets
;
627 tx_stats
->bytes
+= packet
->total_bytes
;
628 u64_stats_update_end(&tx_stats
->syncp
);
630 napi_consume_skb(skb
, budget
);
634 atomic_dec_return(&net_device
->chan_table
[q_idx
].queue_sends
);
636 if (net_device
->destroy
&& queue_sends
== 0)
637 wake_up(&net_device
->wait_drain
);
639 if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev
, q_idx
)) &&
640 (hv_ringbuf_avail_percent(&channel
->outbound
) > RING_AVAIL_PERCENT_HIWATER
||
642 netif_tx_wake_queue(netdev_get_tx_queue(ndev
, q_idx
));
645 static void netvsc_send_completion(struct netvsc_device
*net_device
,
646 struct vmbus_channel
*incoming_channel
,
647 struct hv_device
*device
,
648 const struct vmpacket_descriptor
*desc
,
651 struct nvsp_message
*nvsp_packet
= hv_pkt_data(desc
);
652 struct net_device
*ndev
= hv_get_drvdata(device
);
654 switch (nvsp_packet
->hdr
.msg_type
) {
655 case NVSP_MSG_TYPE_INIT_COMPLETE
:
656 case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE
:
657 case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE
:
658 case NVSP_MSG5_TYPE_SUBCHANNEL
:
659 /* Copy the response back */
660 memcpy(&net_device
->channel_init_pkt
, nvsp_packet
,
661 sizeof(struct nvsp_message
));
662 complete(&net_device
->channel_init_wait
);
665 case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
:
666 netvsc_send_tx_complete(net_device
, incoming_channel
,
667 device
, desc
, budget
);
672 "Unknown send completion type %d received!!\n",
673 nvsp_packet
->hdr
.msg_type
);
677 static u32
netvsc_get_next_send_section(struct netvsc_device
*net_device
)
679 unsigned long *map_addr
= net_device
->send_section_map
;
682 for_each_clear_bit(i
, map_addr
, net_device
->send_section_cnt
) {
683 if (sync_test_and_set_bit(i
, map_addr
) == 0)
687 return NETVSC_INVALID_INDEX
;
690 static u32
netvsc_copy_to_send_buf(struct netvsc_device
*net_device
,
691 unsigned int section_index
,
693 struct hv_netvsc_packet
*packet
,
694 struct rndis_message
*rndis_msg
,
695 struct hv_page_buffer
**pb
,
698 char *start
= net_device
->send_buf
;
699 char *dest
= start
+ (section_index
* net_device
->send_section_size
)
704 u32 remain
= packet
->total_data_buflen
% net_device
->pkt_align
;
705 u32 page_count
= packet
->cp_partial
? packet
->rmsg_pgcnt
:
706 packet
->page_buf_cnt
;
709 if (skb
->xmit_more
&& remain
&& !packet
->cp_partial
) {
710 padding
= net_device
->pkt_align
- remain
;
711 rndis_msg
->msg_len
+= padding
;
712 packet
->total_data_buflen
+= padding
;
715 for (i
= 0; i
< page_count
; i
++) {
716 char *src
= phys_to_virt((*pb
)[i
].pfn
<< PAGE_SHIFT
);
717 u32 offset
= (*pb
)[i
].offset
;
718 u32 len
= (*pb
)[i
].len
;
720 memcpy(dest
, (src
+ offset
), len
);
726 memset(dest
, 0, padding
);
733 static inline int netvsc_send_pkt(
734 struct hv_device
*device
,
735 struct hv_netvsc_packet
*packet
,
736 struct netvsc_device
*net_device
,
737 struct hv_page_buffer
**pb
,
740 struct nvsp_message nvmsg
;
741 struct netvsc_channel
*nvchan
742 = &net_device
->chan_table
[packet
->q_idx
];
743 struct vmbus_channel
*out_channel
= nvchan
->channel
;
744 struct net_device
*ndev
= hv_get_drvdata(device
);
745 struct netdev_queue
*txq
= netdev_get_tx_queue(ndev
, packet
->q_idx
);
748 struct hv_page_buffer
*pgbuf
;
749 u32 ring_avail
= hv_ringbuf_avail_percent(&out_channel
->outbound
);
751 nvmsg
.hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
;
754 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.channel_type
= 0;
756 /* 1 is RMC_CONTROL; */
757 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.channel_type
= 1;
760 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_index
=
761 packet
->send_buf_index
;
762 if (packet
->send_buf_index
== NETVSC_INVALID_INDEX
)
763 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_size
= 0;
765 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_size
=
766 packet
->total_data_buflen
;
770 if (out_channel
->rescind
)
773 if (packet
->page_buf_cnt
) {
774 pgbuf
= packet
->cp_partial
? (*pb
) +
775 packet
->rmsg_pgcnt
: (*pb
);
776 ret
= vmbus_sendpacket_pagebuffer_ctl(out_channel
,
778 packet
->page_buf_cnt
,
780 sizeof(struct nvsp_message
),
782 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
784 ret
= vmbus_sendpacket_ctl(out_channel
, &nvmsg
,
785 sizeof(struct nvsp_message
),
788 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
792 atomic_inc_return(&nvchan
->queue_sends
);
794 if (ring_avail
< RING_AVAIL_PERCENT_LOWATER
)
795 netif_tx_stop_queue(txq
);
796 } else if (ret
== -EAGAIN
) {
797 netif_tx_stop_queue(txq
);
798 if (atomic_read(&nvchan
->queue_sends
) < 1) {
799 netif_tx_wake_queue(txq
);
803 netdev_err(ndev
, "Unable to send packet %p ret %d\n",
810 /* Move packet out of multi send data (msd), and clear msd */
811 static inline void move_pkt_msd(struct hv_netvsc_packet
**msd_send
,
812 struct sk_buff
**msd_skb
,
813 struct multi_send_data
*msdp
)
815 *msd_skb
= msdp
->skb
;
816 *msd_send
= msdp
->pkt
;
822 int netvsc_send(struct hv_device
*device
,
823 struct hv_netvsc_packet
*packet
,
824 struct rndis_message
*rndis_msg
,
825 struct hv_page_buffer
**pb
,
828 struct netvsc_device
*net_device
= hv_device_to_netvsc_device(device
);
830 struct netvsc_channel
*nvchan
;
831 u32 pktlen
= packet
->total_data_buflen
, msd_len
= 0;
832 unsigned int section_index
= NETVSC_INVALID_INDEX
;
833 struct multi_send_data
*msdp
;
834 struct hv_netvsc_packet
*msd_send
= NULL
, *cur_send
= NULL
;
835 struct sk_buff
*msd_skb
= NULL
;
837 bool xmit_more
= (skb
!= NULL
) ? skb
->xmit_more
: false;
839 /* If device is rescinded, return error and packet will get dropped. */
840 if (unlikely(net_device
->destroy
))
843 /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
844 * here before the negotiation with the host is finished and
845 * send_section_map may not be allocated yet.
847 if (unlikely(!net_device
->send_section_map
))
850 nvchan
= &net_device
->chan_table
[packet
->q_idx
];
851 packet
->send_buf_index
= NETVSC_INVALID_INDEX
;
852 packet
->cp_partial
= false;
854 /* Send control message directly without accessing msd (Multi-Send
855 * Data) field which may be changed during data packet processing.
862 /* batch packets in send buffer if possible */
865 msd_len
= msdp
->pkt
->total_data_buflen
;
867 try_batch
= msd_len
> 0 && msdp
->count
< net_device
->max_pkt
;
868 if (try_batch
&& msd_len
+ pktlen
+ net_device
->pkt_align
<
869 net_device
->send_section_size
) {
870 section_index
= msdp
->pkt
->send_buf_index
;
872 } else if (try_batch
&& msd_len
+ packet
->rmsg_size
<
873 net_device
->send_section_size
) {
874 section_index
= msdp
->pkt
->send_buf_index
;
875 packet
->cp_partial
= true;
877 } else if (pktlen
+ net_device
->pkt_align
<
878 net_device
->send_section_size
) {
879 section_index
= netvsc_get_next_send_section(net_device
);
880 if (section_index
!= NETVSC_INVALID_INDEX
) {
881 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
886 if (section_index
!= NETVSC_INVALID_INDEX
) {
887 netvsc_copy_to_send_buf(net_device
,
888 section_index
, msd_len
,
889 packet
, rndis_msg
, pb
, skb
);
891 packet
->send_buf_index
= section_index
;
893 if (packet
->cp_partial
) {
894 packet
->page_buf_cnt
-= packet
->rmsg_pgcnt
;
895 packet
->total_data_buflen
= msd_len
+ packet
->rmsg_size
;
897 packet
->page_buf_cnt
= 0;
898 packet
->total_data_buflen
+= msd_len
;
902 packet
->total_packets
+= msdp
->pkt
->total_packets
;
903 packet
->total_bytes
+= msdp
->pkt
->total_bytes
;
907 dev_consume_skb_any(msdp
->skb
);
909 if (xmit_more
&& !packet
->cp_partial
) {
920 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
925 int m_ret
= netvsc_send_pkt(device
, msd_send
, net_device
,
929 netvsc_free_send_slot(net_device
,
930 msd_send
->send_buf_index
);
931 dev_kfree_skb_any(msd_skb
);
937 ret
= netvsc_send_pkt(device
, cur_send
, net_device
, pb
, skb
);
939 if (ret
!= 0 && section_index
!= NETVSC_INVALID_INDEX
)
940 netvsc_free_send_slot(net_device
, section_index
);
945 static int netvsc_send_recv_completion(struct vmbus_channel
*channel
,
946 u64 transaction_id
, u32 status
)
948 struct nvsp_message recvcompMessage
;
951 recvcompMessage
.hdr
.msg_type
=
952 NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
;
954 recvcompMessage
.msg
.v1_msg
.send_rndis_pkt_complete
.status
= status
;
956 /* Send the completion */
957 ret
= vmbus_sendpacket(channel
, &recvcompMessage
,
958 sizeof(struct nvsp_message_header
) + sizeof(u32
),
959 transaction_id
, VM_PKT_COMP
, 0);
964 static inline void count_recv_comp_slot(struct netvsc_device
*nvdev
, u16 q_idx
,
965 u32
*filled
, u32
*avail
)
967 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
968 u32 first
= mrc
->first
;
969 u32 next
= mrc
->next
;
971 *filled
= (first
> next
) ? NETVSC_RECVSLOT_MAX
- first
+ next
:
974 *avail
= NETVSC_RECVSLOT_MAX
- *filled
- 1;
977 /* Read the first filled slot, no change to index */
978 static inline struct recv_comp_data
*read_recv_comp_slot(struct netvsc_device
981 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
984 if (unlikely(!mrc
->buf
))
987 count_recv_comp_slot(nvdev
, q_idx
, &filled
, &avail
);
991 return mrc
->buf
+ mrc
->first
* sizeof(struct recv_comp_data
);
994 /* Put the first filled slot back to available pool */
995 static inline void put_recv_comp_slot(struct netvsc_device
*nvdev
, u16 q_idx
)
997 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
1000 mrc
->first
= (mrc
->first
+ 1) % NETVSC_RECVSLOT_MAX
;
1002 num_recv
= atomic_dec_return(&nvdev
->num_outstanding_recvs
);
1004 if (nvdev
->destroy
&& num_recv
== 0)
1005 wake_up(&nvdev
->wait_drain
);
1008 /* Check and send pending recv completions */
1009 static void netvsc_chk_recv_comp(struct netvsc_device
*nvdev
,
1010 struct vmbus_channel
*channel
, u16 q_idx
)
1012 struct recv_comp_data
*rcd
;
1016 rcd
= read_recv_comp_slot(nvdev
, q_idx
);
1020 ret
= netvsc_send_recv_completion(channel
, rcd
->tid
,
1025 put_recv_comp_slot(nvdev
, q_idx
);
1029 #define NETVSC_RCD_WATERMARK 80
1031 /* Get next available slot */
1032 static inline struct recv_comp_data
*get_recv_comp_slot(
1033 struct netvsc_device
*nvdev
, struct vmbus_channel
*channel
, u16 q_idx
)
1035 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
1036 u32 filled
, avail
, next
;
1037 struct recv_comp_data
*rcd
;
1039 if (unlikely(!nvdev
->recv_section
))
1042 if (unlikely(!mrc
->buf
))
1045 if (atomic_read(&nvdev
->num_outstanding_recvs
) >
1046 nvdev
->recv_section
->num_sub_allocs
* NETVSC_RCD_WATERMARK
/ 100)
1047 netvsc_chk_recv_comp(nvdev
, channel
, q_idx
);
1049 count_recv_comp_slot(nvdev
, q_idx
, &filled
, &avail
);
1054 rcd
= mrc
->buf
+ next
* sizeof(struct recv_comp_data
);
1055 mrc
->next
= (next
+ 1) % NETVSC_RECVSLOT_MAX
;
1057 atomic_inc(&nvdev
->num_outstanding_recvs
);
1062 static int netvsc_receive(struct net_device
*ndev
,
1063 struct netvsc_device
*net_device
,
1064 struct net_device_context
*net_device_ctx
,
1065 struct hv_device
*device
,
1066 struct vmbus_channel
*channel
,
1067 const struct vmpacket_descriptor
*desc
,
1068 struct nvsp_message
*nvsp
)
1070 const struct vmtransfer_page_packet_header
*vmxferpage_packet
1071 = container_of(desc
, const struct vmtransfer_page_packet_header
, d
);
1072 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1073 char *recv_buf
= net_device
->recv_buf
;
1074 u32 status
= NVSP_STAT_SUCCESS
;
1079 /* Make sure this is a valid nvsp packet */
1080 if (unlikely(nvsp
->hdr
.msg_type
!= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
)) {
1081 netif_err(net_device_ctx
, rx_err
, ndev
,
1082 "Unknown nvsp packet type received %u\n",
1083 nvsp
->hdr
.msg_type
);
1087 if (unlikely(vmxferpage_packet
->xfer_pageset_id
!= NETVSC_RECEIVE_BUFFER_ID
)) {
1088 netif_err(net_device_ctx
, rx_err
, ndev
,
1089 "Invalid xfer page set id - expecting %x got %x\n",
1090 NETVSC_RECEIVE_BUFFER_ID
,
1091 vmxferpage_packet
->xfer_pageset_id
);
1095 count
= vmxferpage_packet
->range_cnt
;
1097 /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1098 for (i
= 0; i
< count
; i
++) {
1099 void *data
= recv_buf
1100 + vmxferpage_packet
->ranges
[i
].byte_offset
;
1101 u32 buflen
= vmxferpage_packet
->ranges
[i
].byte_count
;
1103 /* Pass it to the upper layer */
1104 status
= rndis_filter_receive(ndev
, net_device
, device
,
1105 channel
, data
, buflen
);
1108 if (net_device
->chan_table
[q_idx
].mrc
.buf
) {
1109 struct recv_comp_data
*rcd
;
1111 rcd
= get_recv_comp_slot(net_device
, channel
, q_idx
);
1113 rcd
->tid
= vmxferpage_packet
->d
.trans_id
;
1114 rcd
->status
= status
;
1116 netdev_err(ndev
, "Recv_comp full buf q:%hd, tid:%llx\n",
1117 q_idx
, vmxferpage_packet
->d
.trans_id
);
1120 ret
= netvsc_send_recv_completion(channel
,
1121 vmxferpage_packet
->d
.trans_id
,
1124 netdev_err(ndev
, "Recv_comp q:%hd, tid:%llx, err:%d\n",
1125 q_idx
, vmxferpage_packet
->d
.trans_id
, ret
);
1130 static void netvsc_send_table(struct hv_device
*hdev
,
1131 struct nvsp_message
*nvmsg
)
1133 struct net_device
*ndev
= hv_get_drvdata(hdev
);
1134 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1138 count
= nvmsg
->msg
.v5_msg
.send_table
.count
;
1139 if (count
!= VRSS_SEND_TAB_SIZE
) {
1140 netdev_err(ndev
, "Received wrong send-table size:%u\n", count
);
1144 tab
= (u32
*)((unsigned long)&nvmsg
->msg
.v5_msg
.send_table
+
1145 nvmsg
->msg
.v5_msg
.send_table
.offset
);
1147 for (i
= 0; i
< count
; i
++)
1148 net_device_ctx
->tx_send_table
[i
] = tab
[i
];
1151 static void netvsc_send_vf(struct net_device_context
*net_device_ctx
,
1152 struct nvsp_message
*nvmsg
)
1154 net_device_ctx
->vf_alloc
= nvmsg
->msg
.v4_msg
.vf_assoc
.allocated
;
1155 net_device_ctx
->vf_serial
= nvmsg
->msg
.v4_msg
.vf_assoc
.serial
;
1158 static inline void netvsc_receive_inband(struct hv_device
*hdev
,
1159 struct net_device_context
*net_device_ctx
,
1160 struct nvsp_message
*nvmsg
)
1162 switch (nvmsg
->hdr
.msg_type
) {
1163 case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE
:
1164 netvsc_send_table(hdev
, nvmsg
);
1167 case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION
:
1168 netvsc_send_vf(net_device_ctx
, nvmsg
);
1173 static int netvsc_process_raw_pkt(struct hv_device
*device
,
1174 struct vmbus_channel
*channel
,
1175 struct netvsc_device
*net_device
,
1176 struct net_device
*ndev
,
1177 const struct vmpacket_descriptor
*desc
,
1180 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1181 struct nvsp_message
*nvmsg
= hv_pkt_data(desc
);
1183 switch (desc
->type
) {
1185 netvsc_send_completion(net_device
, channel
, device
,
1189 case VM_PKT_DATA_USING_XFER_PAGES
:
1190 return netvsc_receive(ndev
, net_device
, net_device_ctx
,
1191 device
, channel
, desc
, nvmsg
);
1194 case VM_PKT_DATA_INBAND
:
1195 netvsc_receive_inband(device
, net_device_ctx
, nvmsg
);
1199 netdev_err(ndev
, "unhandled packet type %d, tid %llx\n",
1200 desc
->type
, desc
->trans_id
);
1207 static struct hv_device
*netvsc_channel_to_device(struct vmbus_channel
*channel
)
1209 struct vmbus_channel
*primary
= channel
->primary_channel
;
1211 return primary
? primary
->device_obj
: channel
->device_obj
;
1214 /* Network processing softirq
1215 * Process data in incoming ring buffer from host
1216 * Stops when ring is empty or budget is met or exceeded.
1218 int netvsc_poll(struct napi_struct
*napi
, int budget
)
1220 struct netvsc_channel
*nvchan
1221 = container_of(napi
, struct netvsc_channel
, napi
);
1222 struct vmbus_channel
*channel
= nvchan
->channel
;
1223 struct hv_device
*device
= netvsc_channel_to_device(channel
);
1224 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1225 struct net_device
*ndev
= hv_get_drvdata(device
);
1226 struct netvsc_device
*net_device
= net_device_to_netvsc_device(ndev
);
1229 /* If starting a new interval */
1231 nvchan
->desc
= hv_pkt_iter_first(channel
);
1233 while (nvchan
->desc
&& work_done
< budget
) {
1234 work_done
+= netvsc_process_raw_pkt(device
, channel
, net_device
,
1235 ndev
, nvchan
->desc
, budget
);
1236 nvchan
->desc
= hv_pkt_iter_next(channel
, nvchan
->desc
);
1239 /* If receive ring was exhausted
1240 * and not doing busy poll
1241 * then re-enable host interrupts
1242 * and reschedule if ring is not empty.
1244 if (work_done
< budget
&&
1245 napi_complete_done(napi
, work_done
) &&
1246 hv_end_read(&channel
->inbound
) != 0)
1247 napi_reschedule(napi
);
1249 netvsc_chk_recv_comp(net_device
, channel
, q_idx
);
1251 /* Driver may overshoot since multiple packets per descriptor */
1252 return min(work_done
, budget
);
1255 /* Call back when data is available in host ring buffer.
1256 * Processing is deferred until network softirq (NAPI)
1258 void netvsc_channel_cb(void *context
)
1260 struct netvsc_channel
*nvchan
= context
;
1262 if (napi_schedule_prep(&nvchan
->napi
)) {
1263 /* disable interupts from host */
1264 hv_begin_read(&nvchan
->channel
->inbound
);
1266 __napi_schedule(&nvchan
->napi
);
1271 * netvsc_device_add - Callback when the device belonging to this
1274 int netvsc_device_add(struct hv_device
*device
,
1275 const struct netvsc_device_info
*device_info
)
1278 int ring_size
= device_info
->ring_size
;
1279 struct netvsc_device
*net_device
;
1280 struct net_device
*ndev
= hv_get_drvdata(device
);
1281 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1283 net_device
= alloc_net_device();
1287 net_device
->ring_size
= ring_size
;
1289 /* Because the device uses NAPI, all the interrupt batching and
1290 * control is done via Net softirq, not the channel handling
1292 set_channel_read_mode(device
->channel
, HV_CALL_ISR
);
1294 /* If we're reopening the device we may have multiple queues, fill the
1295 * chn_table with the default channel to use it before subchannels are
1297 * Initialize the channel state before we open;
1298 * we can be interrupted as soon as we open the channel.
1301 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++) {
1302 struct netvsc_channel
*nvchan
= &net_device
->chan_table
[i
];
1304 nvchan
->channel
= device
->channel
;
1307 /* Enable NAPI handler before init callbacks */
1308 netif_napi_add(ndev
, &net_device
->chan_table
[0].napi
,
1309 netvsc_poll
, NAPI_POLL_WEIGHT
);
1311 /* Open the channel */
1312 ret
= vmbus_open(device
->channel
, ring_size
* PAGE_SIZE
,
1313 ring_size
* PAGE_SIZE
, NULL
, 0,
1315 net_device
->chan_table
);
1318 netif_napi_del(&net_device
->chan_table
[0].napi
);
1319 netdev_err(ndev
, "unable to open channel: %d\n", ret
);
1323 /* Channel is opened */
1324 netdev_dbg(ndev
, "hv_netvsc channel opened successfully\n");
1326 napi_enable(&net_device
->chan_table
[0].napi
);
1328 /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1331 rcu_assign_pointer(net_device_ctx
->nvdev
, net_device
);
1333 /* Connect with the NetVsp */
1334 ret
= netvsc_connect_vsp(device
, net_device
);
1337 "unable to connect to NetVSP - %d\n", ret
);
1344 netif_napi_del(&net_device
->chan_table
[0].napi
);
1346 /* Now, we can close the channel safely */
1347 vmbus_close(device
->channel
);
1350 free_netvsc_device(&net_device
->rcu
);