2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/init.h>
23 #include <linux/atomic.h>
24 #include <linux/module.h>
25 #include <linux/highmem.h>
26 #include <linux/device.h>
28 #include <linux/delay.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/skbuff.h>
33 #include <linux/if_vlan.h>
35 #include <linux/slab.h>
37 #include <net/route.h>
39 #include <net/pkt_sched.h>
40 #include <net/checksum.h>
41 #include <net/ip6_checksum.h>
43 #include "hyperv_net.h"
45 #define RING_SIZE_MIN 64
46 #define LINKCHANGE_INT (2 * HZ)
48 static int ring_size
= 128;
49 module_param(ring_size
, int, S_IRUGO
);
50 MODULE_PARM_DESC(ring_size
, "Ring buffer size (# of pages)");
52 static const u32 default_msg
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
|
53 NETIF_MSG_LINK
| NETIF_MSG_IFUP
|
54 NETIF_MSG_IFDOWN
| NETIF_MSG_RX_ERR
|
57 static int debug
= -1;
58 module_param(debug
, int, S_IRUGO
);
59 MODULE_PARM_DESC(debug
, "Debug level (0=none,...,16=all)");
61 static void netvsc_set_multicast_list(struct net_device
*net
)
63 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
64 struct netvsc_device
*nvdev
= rtnl_dereference(net_device_ctx
->nvdev
);
66 rndis_filter_update(nvdev
);
69 static int netvsc_open(struct net_device
*net
)
71 struct net_device_context
*ndev_ctx
= netdev_priv(net
);
72 struct netvsc_device
*nvdev
= ndev_ctx
->nvdev
;
73 struct rndis_device
*rdev
;
76 netif_carrier_off(net
);
78 /* Open up the device */
79 ret
= rndis_filter_open(nvdev
);
81 netdev_err(net
, "unable to open device (ret %d).\n", ret
);
85 netif_tx_wake_all_queues(net
);
87 rdev
= nvdev
->extension
;
88 if (!rdev
->link_state
&& !ndev_ctx
->datapath
)
89 netif_carrier_on(net
);
94 static int netvsc_close(struct net_device
*net
)
96 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
97 struct netvsc_device
*nvdev
= rtnl_dereference(net_device_ctx
->nvdev
);
99 u32 aread
, i
, msec
= 10, retry
= 0, retry_max
= 20;
100 struct vmbus_channel
*chn
;
102 netif_tx_disable(net
);
104 ret
= rndis_filter_close(nvdev
);
106 netdev_err(net
, "unable to close device (ret %d).\n", ret
);
110 /* Ensure pending bytes in ring are read */
113 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
114 chn
= nvdev
->chan_table
[i
].channel
;
118 aread
= hv_get_bytes_to_read(&chn
->inbound
);
122 aread
= hv_get_bytes_to_read(&chn
->outbound
);
128 if (retry
> retry_max
|| aread
== 0)
138 netdev_err(net
, "Ring buffer not empty after closing rndis\n");
145 static void *init_ppi_data(struct rndis_message
*msg
, u32 ppi_size
,
148 struct rndis_packet
*rndis_pkt
;
149 struct rndis_per_packet_info
*ppi
;
151 rndis_pkt
= &msg
->msg
.pkt
;
152 rndis_pkt
->data_offset
+= ppi_size
;
154 ppi
= (struct rndis_per_packet_info
*)((void *)rndis_pkt
+
155 rndis_pkt
->per_pkt_info_offset
+ rndis_pkt
->per_pkt_info_len
);
157 ppi
->size
= ppi_size
;
158 ppi
->type
= pkt_type
;
159 ppi
->ppi_offset
= sizeof(struct rndis_per_packet_info
);
161 rndis_pkt
->per_pkt_info_len
+= ppi_size
;
166 /* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
167 * hash for non-TCP traffic with only IP numbers.
169 static inline u32
netvsc_get_hash(struct sk_buff
*skb
, struct sock
*sk
)
171 struct flow_keys flow
;
173 static u32 hashrnd __read_mostly
;
175 net_get_random_once(&hashrnd
, sizeof(hashrnd
));
177 if (!skb_flow_dissect_flow_keys(skb
, &flow
, 0))
180 if (flow
.basic
.ip_proto
== IPPROTO_TCP
) {
181 return skb_get_hash(skb
);
183 if (flow
.basic
.n_proto
== htons(ETH_P_IP
))
184 hash
= jhash2((u32
*)&flow
.addrs
.v4addrs
, 2, hashrnd
);
185 else if (flow
.basic
.n_proto
== htons(ETH_P_IPV6
))
186 hash
= jhash2((u32
*)&flow
.addrs
.v6addrs
, 8, hashrnd
);
190 skb_set_hash(skb
, hash
, PKT_HASH_TYPE_L3
);
196 static inline int netvsc_get_tx_queue(struct net_device
*ndev
,
197 struct sk_buff
*skb
, int old_idx
)
199 const struct net_device_context
*ndc
= netdev_priv(ndev
);
200 struct sock
*sk
= skb
->sk
;
203 q_idx
= ndc
->tx_send_table
[netvsc_get_hash(skb
, sk
) &
204 (VRSS_SEND_TAB_SIZE
- 1)];
206 /* If queue index changed record the new value */
207 if (q_idx
!= old_idx
&&
208 sk
&& sk_fullsock(sk
) && rcu_access_pointer(sk
->sk_dst_cache
))
209 sk_tx_queue_set(sk
, q_idx
);
215 * Select queue for transmit.
217 * If a valid queue has already been assigned, then use that.
218 * Otherwise compute tx queue based on hash and the send table.
220 * This is basically similar to default (__netdev_pick_tx) with the added step
221 * of using the host send_table when no other queue has been assigned.
223 * TODO support XPS - but get_xps_queue not exported
225 static u16
netvsc_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
226 void *accel_priv
, select_queue_fallback_t fallback
)
228 unsigned int num_tx_queues
= ndev
->real_num_tx_queues
;
229 int q_idx
= sk_tx_queue_get(skb
->sk
);
231 if (q_idx
< 0 || skb
->ooo_okay
) {
232 /* If forwarding a packet, we use the recorded queue when
233 * available for better cache locality.
235 if (skb_rx_queue_recorded(skb
))
236 q_idx
= skb_get_rx_queue(skb
);
238 q_idx
= netvsc_get_tx_queue(ndev
, skb
, q_idx
);
241 while (unlikely(q_idx
>= num_tx_queues
))
242 q_idx
-= num_tx_queues
;
247 static u32
fill_pg_buf(struct page
*page
, u32 offset
, u32 len
,
248 struct hv_page_buffer
*pb
)
252 /* Deal with compund pages by ignoring unused part
255 page
+= (offset
>> PAGE_SHIFT
);
256 offset
&= ~PAGE_MASK
;
261 bytes
= PAGE_SIZE
- offset
;
264 pb
[j
].pfn
= page_to_pfn(page
);
265 pb
[j
].offset
= offset
;
271 if (offset
== PAGE_SIZE
&& len
) {
281 static u32
init_page_array(void *hdr
, u32 len
, struct sk_buff
*skb
,
282 struct hv_netvsc_packet
*packet
,
283 struct hv_page_buffer
**page_buf
)
285 struct hv_page_buffer
*pb
= *page_buf
;
287 char *data
= skb
->data
;
288 int frags
= skb_shinfo(skb
)->nr_frags
;
291 /* The packet is laid out thus:
292 * 1. hdr: RNDIS header and PPI
294 * 3. skb fragment data
297 slots_used
+= fill_pg_buf(virt_to_page(hdr
),
299 len
, &pb
[slots_used
]);
301 packet
->rmsg_size
= len
;
302 packet
->rmsg_pgcnt
= slots_used
;
304 slots_used
+= fill_pg_buf(virt_to_page(data
),
305 offset_in_page(data
),
306 skb_headlen(skb
), &pb
[slots_used
]);
308 for (i
= 0; i
< frags
; i
++) {
309 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
311 slots_used
+= fill_pg_buf(skb_frag_page(frag
),
313 skb_frag_size(frag
), &pb
[slots_used
]);
318 /* Estimate number of page buffers neede to transmit
319 * Need at most 2 for RNDIS header plus skb body and fragments.
321 static unsigned int netvsc_get_slots(const struct sk_buff
*skb
)
323 return PFN_UP(offset_in_page(skb
->data
) + skb_headlen(skb
))
324 + skb_shinfo(skb
)->nr_frags
328 static u32
net_checksum_info(struct sk_buff
*skb
)
330 if (skb
->protocol
== htons(ETH_P_IP
)) {
331 struct iphdr
*ip
= ip_hdr(skb
);
333 if (ip
->protocol
== IPPROTO_TCP
)
334 return TRANSPORT_INFO_IPV4_TCP
;
335 else if (ip
->protocol
== IPPROTO_UDP
)
336 return TRANSPORT_INFO_IPV4_UDP
;
338 struct ipv6hdr
*ip6
= ipv6_hdr(skb
);
340 if (ip6
->nexthdr
== IPPROTO_TCP
)
341 return TRANSPORT_INFO_IPV6_TCP
;
342 else if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_UDP
)
343 return TRANSPORT_INFO_IPV6_UDP
;
346 return TRANSPORT_INFO_NOT_IP
;
349 static int netvsc_start_xmit(struct sk_buff
*skb
, struct net_device
*net
)
351 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
352 struct hv_netvsc_packet
*packet
= NULL
;
354 unsigned int num_data_pgs
;
355 struct rndis_message
*rndis_msg
;
356 struct rndis_packet
*rndis_pkt
;
358 struct rndis_per_packet_info
*ppi
;
360 struct hv_page_buffer page_buf
[MAX_PAGE_BUFFER_COUNT
];
361 struct hv_page_buffer
*pb
= page_buf
;
363 /* We can only transmit MAX_PAGE_BUFFER_COUNT number
364 * of pages in a single packet. If skb is scattered around
365 * more pages we try linearizing it.
367 num_data_pgs
= netvsc_get_slots(skb
);
368 if (unlikely(num_data_pgs
> MAX_PAGE_BUFFER_COUNT
)) {
369 ++net_device_ctx
->eth_stats
.tx_scattered
;
371 if (skb_linearize(skb
))
374 num_data_pgs
= netvsc_get_slots(skb
);
375 if (num_data_pgs
> MAX_PAGE_BUFFER_COUNT
) {
376 ++net_device_ctx
->eth_stats
.tx_too_big
;
382 * Place the rndis header in the skb head room and
383 * the skb->cb will be used for hv_netvsc_packet
386 ret
= skb_cow_head(skb
, RNDIS_AND_PPI_SIZE
);
390 /* Use the skb control buffer for building up the packet */
391 BUILD_BUG_ON(sizeof(struct hv_netvsc_packet
) >
392 FIELD_SIZEOF(struct sk_buff
, cb
));
393 packet
= (struct hv_netvsc_packet
*)skb
->cb
;
395 packet
->q_idx
= skb_get_queue_mapping(skb
);
397 packet
->total_data_buflen
= skb
->len
;
398 packet
->total_bytes
= skb
->len
;
399 packet
->total_packets
= 1;
401 rndis_msg
= (struct rndis_message
*)skb
->head
;
403 memset(rndis_msg
, 0, RNDIS_AND_PPI_SIZE
);
405 /* Add the rndis header */
406 rndis_msg
->ndis_msg_type
= RNDIS_MSG_PACKET
;
407 rndis_msg
->msg_len
= packet
->total_data_buflen
;
408 rndis_pkt
= &rndis_msg
->msg
.pkt
;
409 rndis_pkt
->data_offset
= sizeof(struct rndis_packet
);
410 rndis_pkt
->data_len
= packet
->total_data_buflen
;
411 rndis_pkt
->per_pkt_info_offset
= sizeof(struct rndis_packet
);
413 rndis_msg_size
= RNDIS_MESSAGE_SIZE(struct rndis_packet
);
415 hash
= skb_get_hash_raw(skb
);
416 if (hash
!= 0 && net
->real_num_tx_queues
> 1) {
417 rndis_msg_size
+= NDIS_HASH_PPI_SIZE
;
418 ppi
= init_ppi_data(rndis_msg
, NDIS_HASH_PPI_SIZE
,
420 *(u32
*)((void *)ppi
+ ppi
->ppi_offset
) = hash
;
423 if (skb_vlan_tag_present(skb
)) {
424 struct ndis_pkt_8021q_info
*vlan
;
426 rndis_msg_size
+= NDIS_VLAN_PPI_SIZE
;
427 ppi
= init_ppi_data(rndis_msg
, NDIS_VLAN_PPI_SIZE
,
429 vlan
= (struct ndis_pkt_8021q_info
*)((void *)ppi
+
431 vlan
->vlanid
= skb
->vlan_tci
& VLAN_VID_MASK
;
432 vlan
->pri
= (skb
->vlan_tci
& VLAN_PRIO_MASK
) >>
436 if (skb_is_gso(skb
)) {
437 struct ndis_tcp_lso_info
*lso_info
;
439 rndis_msg_size
+= NDIS_LSO_PPI_SIZE
;
440 ppi
= init_ppi_data(rndis_msg
, NDIS_LSO_PPI_SIZE
,
441 TCP_LARGESEND_PKTINFO
);
443 lso_info
= (struct ndis_tcp_lso_info
*)((void *)ppi
+
446 lso_info
->lso_v2_transmit
.type
= NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE
;
447 if (skb
->protocol
== htons(ETH_P_IP
)) {
448 lso_info
->lso_v2_transmit
.ip_version
=
449 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4
;
450 ip_hdr(skb
)->tot_len
= 0;
451 ip_hdr(skb
)->check
= 0;
452 tcp_hdr(skb
)->check
=
453 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
454 ip_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
456 lso_info
->lso_v2_transmit
.ip_version
=
457 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6
;
458 ipv6_hdr(skb
)->payload_len
= 0;
459 tcp_hdr(skb
)->check
=
460 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
461 &ipv6_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
463 lso_info
->lso_v2_transmit
.tcp_header_offset
= skb_transport_offset(skb
);
464 lso_info
->lso_v2_transmit
.mss
= skb_shinfo(skb
)->gso_size
;
465 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
466 if (net_checksum_info(skb
) & net_device_ctx
->tx_checksum_mask
) {
467 struct ndis_tcp_ip_checksum_info
*csum_info
;
469 rndis_msg_size
+= NDIS_CSUM_PPI_SIZE
;
470 ppi
= init_ppi_data(rndis_msg
, NDIS_CSUM_PPI_SIZE
,
471 TCPIP_CHKSUM_PKTINFO
);
473 csum_info
= (struct ndis_tcp_ip_checksum_info
*)((void *)ppi
+
476 csum_info
->transmit
.tcp_header_offset
= skb_transport_offset(skb
);
478 if (skb
->protocol
== htons(ETH_P_IP
)) {
479 csum_info
->transmit
.is_ipv4
= 1;
481 if (ip_hdr(skb
)->protocol
== IPPROTO_TCP
)
482 csum_info
->transmit
.tcp_checksum
= 1;
484 csum_info
->transmit
.udp_checksum
= 1;
486 csum_info
->transmit
.is_ipv6
= 1;
488 if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_TCP
)
489 csum_info
->transmit
.tcp_checksum
= 1;
491 csum_info
->transmit
.udp_checksum
= 1;
494 /* Can't do offload of this type of checksum */
495 if (skb_checksum_help(skb
))
500 /* Start filling in the page buffers with the rndis hdr */
501 rndis_msg
->msg_len
+= rndis_msg_size
;
502 packet
->total_data_buflen
= rndis_msg
->msg_len
;
503 packet
->page_buf_cnt
= init_page_array(rndis_msg
, rndis_msg_size
,
506 /* timestamp packet in software */
507 skb_tx_timestamp(skb
);
508 ret
= netvsc_send(net_device_ctx
->device_ctx
, packet
,
509 rndis_msg
, &pb
, skb
);
510 if (likely(ret
== 0))
513 if (ret
== -EAGAIN
) {
514 ++net_device_ctx
->eth_stats
.tx_busy
;
515 return NETDEV_TX_BUSY
;
519 ++net_device_ctx
->eth_stats
.tx_no_space
;
522 dev_kfree_skb_any(skb
);
523 net
->stats
.tx_dropped
++;
528 ++net_device_ctx
->eth_stats
.tx_no_memory
;
532 * netvsc_linkstatus_callback - Link up/down notification
534 void netvsc_linkstatus_callback(struct hv_device
*device_obj
,
535 struct rndis_message
*resp
)
537 struct rndis_indicate_status
*indicate
= &resp
->msg
.indicate_status
;
538 struct net_device
*net
;
539 struct net_device_context
*ndev_ctx
;
540 struct netvsc_reconfig
*event
;
543 net
= hv_get_drvdata(device_obj
);
548 ndev_ctx
= netdev_priv(net
);
550 /* Update the physical link speed when changing to another vSwitch */
551 if (indicate
->status
== RNDIS_STATUS_LINK_SPEED_CHANGE
) {
554 speed
= *(u32
*)((void *)indicate
+ indicate
->
555 status_buf_offset
) / 10000;
556 ndev_ctx
->speed
= speed
;
560 /* Handle these link change statuses below */
561 if (indicate
->status
!= RNDIS_STATUS_NETWORK_CHANGE
&&
562 indicate
->status
!= RNDIS_STATUS_MEDIA_CONNECT
&&
563 indicate
->status
!= RNDIS_STATUS_MEDIA_DISCONNECT
)
566 if (net
->reg_state
!= NETREG_REGISTERED
)
569 event
= kzalloc(sizeof(*event
), GFP_ATOMIC
);
572 event
->event
= indicate
->status
;
574 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
575 list_add_tail(&event
->list
, &ndev_ctx
->reconfig_events
);
576 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
578 schedule_delayed_work(&ndev_ctx
->dwork
, 0);
581 static struct sk_buff
*netvsc_alloc_recv_skb(struct net_device
*net
,
582 struct napi_struct
*napi
,
583 const struct ndis_tcp_ip_checksum_info
*csum_info
,
584 const struct ndis_pkt_8021q_info
*vlan
,
585 void *data
, u32 buflen
)
589 skb
= napi_alloc_skb(napi
, buflen
);
594 * Copy to skb. This copy is needed here since the memory pointed by
595 * hv_netvsc_packet cannot be deallocated
597 skb_put_data(skb
, data
, buflen
);
599 skb
->protocol
= eth_type_trans(skb
, net
);
601 /* skb is already created with CHECKSUM_NONE */
602 skb_checksum_none_assert(skb
);
605 * In Linux, the IP checksum is always checked.
606 * Do L4 checksum offload if enabled and present.
608 if (csum_info
&& (net
->features
& NETIF_F_RXCSUM
)) {
609 if (csum_info
->receive
.tcp_checksum_succeeded
||
610 csum_info
->receive
.udp_checksum_succeeded
)
611 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
615 u16 vlan_tci
= vlan
->vlanid
| (vlan
->pri
<< VLAN_PRIO_SHIFT
);
617 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
625 * netvsc_recv_callback - Callback when we receive a packet from the
626 * "wire" on the specified device.
628 int netvsc_recv_callback(struct net_device
*net
,
629 struct vmbus_channel
*channel
,
631 const struct ndis_tcp_ip_checksum_info
*csum_info
,
632 const struct ndis_pkt_8021q_info
*vlan
)
634 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
635 struct netvsc_device
*net_device
;
636 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
637 struct netvsc_channel
*nvchan
;
638 struct net_device
*vf_netdev
;
640 struct netvsc_stats
*rx_stats
;
642 if (net
->reg_state
!= NETREG_REGISTERED
)
643 return NVSP_STAT_FAIL
;
646 * If necessary, inject this packet into the VF interface.
647 * On Hyper-V, multicast and brodcast packets are only delivered
648 * to the synthetic interface (after subjecting these to
649 * policy filters on the host). Deliver these via the VF
650 * interface in the guest.
653 net_device
= rcu_dereference(net_device_ctx
->nvdev
);
654 if (unlikely(!net_device
))
657 nvchan
= &net_device
->chan_table
[q_idx
];
658 vf_netdev
= rcu_dereference(net_device_ctx
->vf_netdev
);
659 if (vf_netdev
&& (vf_netdev
->flags
& IFF_UP
))
662 /* Allocate a skb - TODO direct I/O to pages? */
663 skb
= netvsc_alloc_recv_skb(net
, &nvchan
->napi
,
664 csum_info
, vlan
, data
, len
);
665 if (unlikely(!skb
)) {
667 ++net
->stats
.rx_dropped
;
669 return NVSP_STAT_FAIL
;
672 if (net
!= vf_netdev
)
673 skb_record_rx_queue(skb
, q_idx
);
676 * Even if injecting the packet, record the statistics
677 * on the synthetic device because modifying the VF device
678 * statistics will not work correctly.
680 rx_stats
= &nvchan
->rx_stats
;
681 u64_stats_update_begin(&rx_stats
->syncp
);
683 rx_stats
->bytes
+= len
;
685 if (skb
->pkt_type
== PACKET_BROADCAST
)
686 ++rx_stats
->broadcast
;
687 else if (skb
->pkt_type
== PACKET_MULTICAST
)
688 ++rx_stats
->multicast
;
689 u64_stats_update_end(&rx_stats
->syncp
);
691 napi_gro_receive(&nvchan
->napi
, skb
);
697 static void netvsc_get_drvinfo(struct net_device
*net
,
698 struct ethtool_drvinfo
*info
)
700 strlcpy(info
->driver
, KBUILD_MODNAME
, sizeof(info
->driver
));
701 strlcpy(info
->fw_version
, "N/A", sizeof(info
->fw_version
));
704 static void netvsc_get_channels(struct net_device
*net
,
705 struct ethtool_channels
*channel
)
707 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
708 struct netvsc_device
*nvdev
= rtnl_dereference(net_device_ctx
->nvdev
);
711 channel
->max_combined
= nvdev
->max_chn
;
712 channel
->combined_count
= nvdev
->num_chn
;
716 static int netvsc_set_queues(struct net_device
*net
, struct hv_device
*dev
,
719 struct netvsc_device_info device_info
;
722 memset(&device_info
, 0, sizeof(device_info
));
723 device_info
.num_chn
= num_chn
;
724 device_info
.ring_size
= ring_size
;
725 device_info
.max_num_vrss_chns
= num_chn
;
727 ret
= rndis_filter_device_add(dev
, &device_info
);
731 ret
= netif_set_real_num_tx_queues(net
, num_chn
);
735 ret
= netif_set_real_num_rx_queues(net
, num_chn
);
740 static int netvsc_set_channels(struct net_device
*net
,
741 struct ethtool_channels
*channels
)
743 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
744 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
745 struct netvsc_device
*nvdev
= rtnl_dereference(net_device_ctx
->nvdev
);
746 unsigned int count
= channels
->combined_count
;
750 /* We do not support separate count for rx, tx, or other */
752 channels
->rx_count
|| channels
->tx_count
|| channels
->other_count
)
755 if (count
> net
->num_tx_queues
|| count
> VRSS_CHANNEL_MAX
)
758 if (!nvdev
|| nvdev
->destroy
)
761 if (nvdev
->nvsp_version
< NVSP_PROTOCOL_VERSION_5
)
764 if (count
> nvdev
->max_chn
)
767 was_running
= netif_running(net
);
769 ret
= netvsc_close(net
);
774 rndis_filter_device_remove(dev
, nvdev
);
776 ret
= netvsc_set_queues(net
, dev
, count
);
778 nvdev
->num_chn
= count
;
780 netvsc_set_queues(net
, dev
, nvdev
->num_chn
);
783 ret
= netvsc_open(net
);
785 /* We may have missed link change notifications */
786 schedule_delayed_work(&net_device_ctx
->dwork
, 0);
792 netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings
*cmd
)
794 struct ethtool_link_ksettings diff1
= *cmd
;
795 struct ethtool_link_ksettings diff2
= {};
797 diff1
.base
.speed
= 0;
798 diff1
.base
.duplex
= 0;
799 /* advertising and cmd are usually set */
800 ethtool_link_ksettings_zero_link_mode(&diff1
, advertising
);
802 /* We set port to PORT_OTHER */
803 diff2
.base
.port
= PORT_OTHER
;
805 return !memcmp(&diff1
, &diff2
, sizeof(diff1
));
808 static void netvsc_init_settings(struct net_device
*dev
)
810 struct net_device_context
*ndc
= netdev_priv(dev
);
812 ndc
->speed
= SPEED_UNKNOWN
;
813 ndc
->duplex
= DUPLEX_FULL
;
816 static int netvsc_get_link_ksettings(struct net_device
*dev
,
817 struct ethtool_link_ksettings
*cmd
)
819 struct net_device_context
*ndc
= netdev_priv(dev
);
821 cmd
->base
.speed
= ndc
->speed
;
822 cmd
->base
.duplex
= ndc
->duplex
;
823 cmd
->base
.port
= PORT_OTHER
;
828 static int netvsc_set_link_ksettings(struct net_device
*dev
,
829 const struct ethtool_link_ksettings
*cmd
)
831 struct net_device_context
*ndc
= netdev_priv(dev
);
834 speed
= cmd
->base
.speed
;
835 if (!ethtool_validate_speed(speed
) ||
836 !ethtool_validate_duplex(cmd
->base
.duplex
) ||
837 !netvsc_validate_ethtool_ss_cmd(cmd
))
841 ndc
->duplex
= cmd
->base
.duplex
;
846 static int netvsc_change_mtu(struct net_device
*ndev
, int mtu
)
848 struct net_device_context
*ndevctx
= netdev_priv(ndev
);
849 struct netvsc_device
*nvdev
= rtnl_dereference(ndevctx
->nvdev
);
850 struct hv_device
*hdev
= ndevctx
->device_ctx
;
851 struct netvsc_device_info device_info
;
855 if (!nvdev
|| nvdev
->destroy
)
858 was_running
= netif_running(ndev
);
860 ret
= netvsc_close(ndev
);
865 memset(&device_info
, 0, sizeof(device_info
));
866 device_info
.ring_size
= ring_size
;
867 device_info
.num_chn
= nvdev
->num_chn
;
868 device_info
.max_num_vrss_chns
= nvdev
->num_chn
;
870 rndis_filter_device_remove(hdev
, nvdev
);
872 /* 'nvdev' has been freed in rndis_filter_device_remove() ->
873 * netvsc_device_remove () -> free_netvsc_device().
874 * We mustn't access it before it's re-created in
875 * rndis_filter_device_add() -> netvsc_device_add().
880 rndis_filter_device_add(hdev
, &device_info
);
883 ret
= netvsc_open(ndev
);
885 /* We may have missed link change notifications */
886 schedule_delayed_work(&ndevctx
->dwork
, 0);
891 static void netvsc_get_stats64(struct net_device
*net
,
892 struct rtnl_link_stats64
*t
)
894 struct net_device_context
*ndev_ctx
= netdev_priv(net
);
895 struct netvsc_device
*nvdev
= rcu_dereference_rtnl(ndev_ctx
->nvdev
);
901 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
902 const struct netvsc_channel
*nvchan
= &nvdev
->chan_table
[i
];
903 const struct netvsc_stats
*stats
;
904 u64 packets
, bytes
, multicast
;
907 stats
= &nvchan
->tx_stats
;
909 start
= u64_stats_fetch_begin_irq(&stats
->syncp
);
910 packets
= stats
->packets
;
911 bytes
= stats
->bytes
;
912 } while (u64_stats_fetch_retry_irq(&stats
->syncp
, start
));
914 t
->tx_bytes
+= bytes
;
915 t
->tx_packets
+= packets
;
917 stats
= &nvchan
->rx_stats
;
919 start
= u64_stats_fetch_begin_irq(&stats
->syncp
);
920 packets
= stats
->packets
;
921 bytes
= stats
->bytes
;
922 multicast
= stats
->multicast
+ stats
->broadcast
;
923 } while (u64_stats_fetch_retry_irq(&stats
->syncp
, start
));
925 t
->rx_bytes
+= bytes
;
926 t
->rx_packets
+= packets
;
927 t
->multicast
+= multicast
;
930 t
->tx_dropped
= net
->stats
.tx_dropped
;
931 t
->tx_errors
= net
->stats
.tx_errors
;
933 t
->rx_dropped
= net
->stats
.rx_dropped
;
934 t
->rx_errors
= net
->stats
.rx_errors
;
937 static int netvsc_set_mac_addr(struct net_device
*ndev
, void *p
)
939 struct sockaddr
*addr
= p
;
940 char save_adr
[ETH_ALEN
];
941 unsigned char save_aatype
;
944 memcpy(save_adr
, ndev
->dev_addr
, ETH_ALEN
);
945 save_aatype
= ndev
->addr_assign_type
;
947 err
= eth_mac_addr(ndev
, p
);
951 err
= rndis_filter_set_device_mac(ndev
, addr
->sa_data
);
953 /* roll back to saved MAC */
954 memcpy(ndev
->dev_addr
, save_adr
, ETH_ALEN
);
955 ndev
->addr_assign_type
= save_aatype
;
961 static const struct {
962 char name
[ETH_GSTRING_LEN
];
965 { "tx_scattered", offsetof(struct netvsc_ethtool_stats
, tx_scattered
) },
966 { "tx_no_memory", offsetof(struct netvsc_ethtool_stats
, tx_no_memory
) },
967 { "tx_no_space", offsetof(struct netvsc_ethtool_stats
, tx_no_space
) },
968 { "tx_too_big", offsetof(struct netvsc_ethtool_stats
, tx_too_big
) },
969 { "tx_busy", offsetof(struct netvsc_ethtool_stats
, tx_busy
) },
972 #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
974 /* 4 statistics per queue (rx/tx packets/bytes) */
975 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
977 static int netvsc_get_sset_count(struct net_device
*dev
, int string_set
)
979 struct net_device_context
*ndc
= netdev_priv(dev
);
980 struct netvsc_device
*nvdev
= rtnl_dereference(ndc
->nvdev
);
985 switch (string_set
) {
987 return NETVSC_GLOBAL_STATS_LEN
+ NETVSC_QUEUE_STATS_LEN(nvdev
);
993 static void netvsc_get_ethtool_stats(struct net_device
*dev
,
994 struct ethtool_stats
*stats
, u64
*data
)
996 struct net_device_context
*ndc
= netdev_priv(dev
);
997 struct netvsc_device
*nvdev
= rcu_dereference(ndc
->nvdev
);
998 const void *nds
= &ndc
->eth_stats
;
999 const struct netvsc_stats
*qstats
;
1007 for (i
= 0; i
< NETVSC_GLOBAL_STATS_LEN
; i
++)
1008 data
[i
] = *(unsigned long *)(nds
+ netvsc_stats
[i
].offset
);
1010 for (j
= 0; j
< nvdev
->num_chn
; j
++) {
1011 qstats
= &nvdev
->chan_table
[j
].tx_stats
;
1014 start
= u64_stats_fetch_begin_irq(&qstats
->syncp
);
1015 packets
= qstats
->packets
;
1016 bytes
= qstats
->bytes
;
1017 } while (u64_stats_fetch_retry_irq(&qstats
->syncp
, start
));
1018 data
[i
++] = packets
;
1021 qstats
= &nvdev
->chan_table
[j
].rx_stats
;
1023 start
= u64_stats_fetch_begin_irq(&qstats
->syncp
);
1024 packets
= qstats
->packets
;
1025 bytes
= qstats
->bytes
;
1026 } while (u64_stats_fetch_retry_irq(&qstats
->syncp
, start
));
1027 data
[i
++] = packets
;
1032 static void netvsc_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
1034 struct net_device_context
*ndc
= netdev_priv(dev
);
1035 struct netvsc_device
*nvdev
= rcu_dereference(ndc
->nvdev
);
1042 switch (stringset
) {
1044 for (i
= 0; i
< ARRAY_SIZE(netvsc_stats
); i
++)
1045 memcpy(p
+ i
* ETH_GSTRING_LEN
,
1046 netvsc_stats
[i
].name
, ETH_GSTRING_LEN
);
1048 p
+= i
* ETH_GSTRING_LEN
;
1049 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
1050 sprintf(p
, "tx_queue_%u_packets", i
);
1051 p
+= ETH_GSTRING_LEN
;
1052 sprintf(p
, "tx_queue_%u_bytes", i
);
1053 p
+= ETH_GSTRING_LEN
;
1054 sprintf(p
, "rx_queue_%u_packets", i
);
1055 p
+= ETH_GSTRING_LEN
;
1056 sprintf(p
, "rx_queue_%u_bytes", i
);
1057 p
+= ETH_GSTRING_LEN
;
1065 netvsc_get_rss_hash_opts(struct netvsc_device
*nvdev
,
1066 struct ethtool_rxnfc
*info
)
1068 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
1070 switch (info
->flow_type
) {
1073 info
->data
|= RXH_L4_B_0_1
| RXH_L4_B_2_3
;
1089 netvsc_get_rxnfc(struct net_device
*dev
, struct ethtool_rxnfc
*info
,
1092 struct net_device_context
*ndc
= netdev_priv(dev
);
1093 struct netvsc_device
*nvdev
= rcu_dereference(ndc
->nvdev
);
1098 switch (info
->cmd
) {
1099 case ETHTOOL_GRXRINGS
:
1100 info
->data
= nvdev
->num_chn
;
1104 return netvsc_get_rss_hash_opts(nvdev
, info
);
1109 #ifdef CONFIG_NET_POLL_CONTROLLER
1110 static void netvsc_poll_controller(struct net_device
*dev
)
1112 struct net_device_context
*ndc
= netdev_priv(dev
);
1113 struct netvsc_device
*ndev
;
1117 ndev
= rcu_dereference(ndc
->nvdev
);
1119 for (i
= 0; i
< ndev
->num_chn
; i
++) {
1120 struct netvsc_channel
*nvchan
= &ndev
->chan_table
[i
];
1122 napi_schedule(&nvchan
->napi
);
1129 static u32
netvsc_get_rxfh_key_size(struct net_device
*dev
)
1131 return NETVSC_HASH_KEYLEN
;
1134 static u32
netvsc_rss_indir_size(struct net_device
*dev
)
1139 static int netvsc_get_rxfh(struct net_device
*dev
, u32
*indir
, u8
*key
,
1142 struct net_device_context
*ndc
= netdev_priv(dev
);
1143 struct netvsc_device
*ndev
= rcu_dereference(ndc
->nvdev
);
1144 struct rndis_device
*rndis_dev
;
1151 *hfunc
= ETH_RSS_HASH_TOP
; /* Toeplitz */
1153 rndis_dev
= ndev
->extension
;
1155 for (i
= 0; i
< ITAB_NUM
; i
++)
1156 indir
[i
] = rndis_dev
->ind_table
[i
];
1160 memcpy(key
, rndis_dev
->rss_key
, NETVSC_HASH_KEYLEN
);
1165 static int netvsc_set_rxfh(struct net_device
*dev
, const u32
*indir
,
1166 const u8
*key
, const u8 hfunc
)
1168 struct net_device_context
*ndc
= netdev_priv(dev
);
1169 struct netvsc_device
*ndev
= rtnl_dereference(ndc
->nvdev
);
1170 struct rndis_device
*rndis_dev
;
1176 if (hfunc
!= ETH_RSS_HASH_NO_CHANGE
&& hfunc
!= ETH_RSS_HASH_TOP
)
1179 rndis_dev
= ndev
->extension
;
1181 for (i
= 0; i
< ITAB_NUM
; i
++)
1182 if (indir
[i
] >= VRSS_CHANNEL_MAX
)
1185 for (i
= 0; i
< ITAB_NUM
; i
++)
1186 rndis_dev
->ind_table
[i
] = indir
[i
];
1193 key
= rndis_dev
->rss_key
;
1196 return rndis_filter_set_rss_param(rndis_dev
, key
, ndev
->num_chn
);
1199 static const struct ethtool_ops ethtool_ops
= {
1200 .get_drvinfo
= netvsc_get_drvinfo
,
1201 .get_link
= ethtool_op_get_link
,
1202 .get_ethtool_stats
= netvsc_get_ethtool_stats
,
1203 .get_sset_count
= netvsc_get_sset_count
,
1204 .get_strings
= netvsc_get_strings
,
1205 .get_channels
= netvsc_get_channels
,
1206 .set_channels
= netvsc_set_channels
,
1207 .get_ts_info
= ethtool_op_get_ts_info
,
1208 .get_rxnfc
= netvsc_get_rxnfc
,
1209 .get_rxfh_key_size
= netvsc_get_rxfh_key_size
,
1210 .get_rxfh_indir_size
= netvsc_rss_indir_size
,
1211 .get_rxfh
= netvsc_get_rxfh
,
1212 .set_rxfh
= netvsc_set_rxfh
,
1213 .get_link_ksettings
= netvsc_get_link_ksettings
,
1214 .set_link_ksettings
= netvsc_set_link_ksettings
,
1217 static const struct net_device_ops device_ops
= {
1218 .ndo_open
= netvsc_open
,
1219 .ndo_stop
= netvsc_close
,
1220 .ndo_start_xmit
= netvsc_start_xmit
,
1221 .ndo_set_rx_mode
= netvsc_set_multicast_list
,
1222 .ndo_change_mtu
= netvsc_change_mtu
,
1223 .ndo_validate_addr
= eth_validate_addr
,
1224 .ndo_set_mac_address
= netvsc_set_mac_addr
,
1225 .ndo_select_queue
= netvsc_select_queue
,
1226 .ndo_get_stats64
= netvsc_get_stats64
,
1227 #ifdef CONFIG_NET_POLL_CONTROLLER
1228 .ndo_poll_controller
= netvsc_poll_controller
,
1233 * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
1234 * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
1235 * present send GARP packet to network peers with netif_notify_peers().
1237 static void netvsc_link_change(struct work_struct
*w
)
1239 struct net_device_context
*ndev_ctx
=
1240 container_of(w
, struct net_device_context
, dwork
.work
);
1241 struct hv_device
*device_obj
= ndev_ctx
->device_ctx
;
1242 struct net_device
*net
= hv_get_drvdata(device_obj
);
1243 struct netvsc_device
*net_device
;
1244 struct rndis_device
*rdev
;
1245 struct netvsc_reconfig
*event
= NULL
;
1246 bool notify
= false, reschedule
= false;
1247 unsigned long flags
, next_reconfig
, delay
;
1250 net_device
= rtnl_dereference(ndev_ctx
->nvdev
);
1254 rdev
= net_device
->extension
;
1256 next_reconfig
= ndev_ctx
->last_reconfig
+ LINKCHANGE_INT
;
1257 if (time_is_after_jiffies(next_reconfig
)) {
1258 /* link_watch only sends one notification with current state
1259 * per second, avoid doing reconfig more frequently. Handle
1262 delay
= next_reconfig
- jiffies
;
1263 delay
= delay
< LINKCHANGE_INT
? delay
: LINKCHANGE_INT
;
1264 schedule_delayed_work(&ndev_ctx
->dwork
, delay
);
1267 ndev_ctx
->last_reconfig
= jiffies
;
1269 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1270 if (!list_empty(&ndev_ctx
->reconfig_events
)) {
1271 event
= list_first_entry(&ndev_ctx
->reconfig_events
,
1272 struct netvsc_reconfig
, list
);
1273 list_del(&event
->list
);
1274 reschedule
= !list_empty(&ndev_ctx
->reconfig_events
);
1276 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1281 switch (event
->event
) {
1282 /* Only the following events are possible due to the check in
1283 * netvsc_linkstatus_callback()
1285 case RNDIS_STATUS_MEDIA_CONNECT
:
1286 if (rdev
->link_state
) {
1287 rdev
->link_state
= false;
1288 if (!ndev_ctx
->datapath
)
1289 netif_carrier_on(net
);
1290 netif_tx_wake_all_queues(net
);
1296 case RNDIS_STATUS_MEDIA_DISCONNECT
:
1297 if (!rdev
->link_state
) {
1298 rdev
->link_state
= true;
1299 netif_carrier_off(net
);
1300 netif_tx_stop_all_queues(net
);
1304 case RNDIS_STATUS_NETWORK_CHANGE
:
1305 /* Only makes sense if carrier is present */
1306 if (!rdev
->link_state
) {
1307 rdev
->link_state
= true;
1308 netif_carrier_off(net
);
1309 netif_tx_stop_all_queues(net
);
1310 event
->event
= RNDIS_STATUS_MEDIA_CONNECT
;
1311 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1312 list_add(&event
->list
, &ndev_ctx
->reconfig_events
);
1313 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1322 netdev_notify_peers(net
);
1324 /* link_watch only sends one notification with current state per
1325 * second, handle next reconfig event in 2 seconds.
1328 schedule_delayed_work(&ndev_ctx
->dwork
, LINKCHANGE_INT
);
1336 static struct net_device
*get_netvsc_bymac(const u8
*mac
)
1338 struct net_device
*dev
;
1342 for_each_netdev(&init_net
, dev
) {
1343 if (dev
->netdev_ops
!= &device_ops
)
1344 continue; /* not a netvsc device */
1346 if (ether_addr_equal(mac
, dev
->perm_addr
))
1353 static struct net_device
*get_netvsc_byref(struct net_device
*vf_netdev
)
1355 struct net_device
*dev
;
1359 for_each_netdev(&init_net
, dev
) {
1360 struct net_device_context
*net_device_ctx
;
1362 if (dev
->netdev_ops
!= &device_ops
)
1363 continue; /* not a netvsc device */
1365 net_device_ctx
= netdev_priv(dev
);
1366 if (net_device_ctx
->nvdev
== NULL
)
1367 continue; /* device is removed */
1369 if (rtnl_dereference(net_device_ctx
->vf_netdev
) == vf_netdev
)
1370 return dev
; /* a match */
1376 static int netvsc_register_vf(struct net_device
*vf_netdev
)
1378 struct net_device
*ndev
;
1379 struct net_device_context
*net_device_ctx
;
1380 struct netvsc_device
*netvsc_dev
;
1382 if (vf_netdev
->addr_len
!= ETH_ALEN
)
1386 * We will use the MAC address to locate the synthetic interface to
1387 * associate with the VF interface. If we don't find a matching
1388 * synthetic interface, move on.
1390 ndev
= get_netvsc_bymac(vf_netdev
->perm_addr
);
1394 net_device_ctx
= netdev_priv(ndev
);
1395 netvsc_dev
= rtnl_dereference(net_device_ctx
->nvdev
);
1396 if (!netvsc_dev
|| rtnl_dereference(net_device_ctx
->vf_netdev
))
1399 netdev_info(ndev
, "VF registering: %s\n", vf_netdev
->name
);
1401 * Take a reference on the module.
1403 try_module_get(THIS_MODULE
);
1405 dev_hold(vf_netdev
);
1406 rcu_assign_pointer(net_device_ctx
->vf_netdev
, vf_netdev
);
1410 static int netvsc_vf_up(struct net_device
*vf_netdev
)
1412 struct net_device
*ndev
;
1413 struct netvsc_device
*netvsc_dev
;
1414 struct net_device_context
*net_device_ctx
;
1416 ndev
= get_netvsc_byref(vf_netdev
);
1420 net_device_ctx
= netdev_priv(ndev
);
1421 netvsc_dev
= rtnl_dereference(net_device_ctx
->nvdev
);
1423 netdev_info(ndev
, "VF up: %s\n", vf_netdev
->name
);
1426 * Open the device before switching data path.
1428 rndis_filter_open(netvsc_dev
);
1431 * notify the host to switch the data path.
1433 netvsc_switch_datapath(ndev
, true);
1434 netdev_info(ndev
, "Data path switched to VF: %s\n", vf_netdev
->name
);
1436 netif_carrier_off(ndev
);
1438 /* Now notify peers through VF device. */
1439 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS
, vf_netdev
);
1444 static int netvsc_vf_down(struct net_device
*vf_netdev
)
1446 struct net_device
*ndev
;
1447 struct netvsc_device
*netvsc_dev
;
1448 struct net_device_context
*net_device_ctx
;
1450 ndev
= get_netvsc_byref(vf_netdev
);
1454 net_device_ctx
= netdev_priv(ndev
);
1455 netvsc_dev
= rtnl_dereference(net_device_ctx
->nvdev
);
1457 netdev_info(ndev
, "VF down: %s\n", vf_netdev
->name
);
1458 netvsc_switch_datapath(ndev
, false);
1459 netdev_info(ndev
, "Data path switched from VF: %s\n", vf_netdev
->name
);
1460 rndis_filter_close(netvsc_dev
);
1461 netif_carrier_on(ndev
);
1463 /* Now notify peers through netvsc device. */
1464 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS
, ndev
);
1469 static int netvsc_unregister_vf(struct net_device
*vf_netdev
)
1471 struct net_device
*ndev
;
1472 struct net_device_context
*net_device_ctx
;
1474 ndev
= get_netvsc_byref(vf_netdev
);
1478 net_device_ctx
= netdev_priv(ndev
);
1480 netdev_info(ndev
, "VF unregistering: %s\n", vf_netdev
->name
);
1482 RCU_INIT_POINTER(net_device_ctx
->vf_netdev
, NULL
);
1484 module_put(THIS_MODULE
);
1488 static int netvsc_probe(struct hv_device
*dev
,
1489 const struct hv_vmbus_device_id
*dev_id
)
1491 struct net_device
*net
= NULL
;
1492 struct net_device_context
*net_device_ctx
;
1493 struct netvsc_device_info device_info
;
1494 struct netvsc_device
*nvdev
;
1497 net
= alloc_etherdev_mq(sizeof(struct net_device_context
),
1502 netif_carrier_off(net
);
1504 netvsc_init_settings(net
);
1506 net_device_ctx
= netdev_priv(net
);
1507 net_device_ctx
->device_ctx
= dev
;
1508 net_device_ctx
->msg_enable
= netif_msg_init(debug
, default_msg
);
1509 if (netif_msg_probe(net_device_ctx
))
1510 netdev_dbg(net
, "netvsc msg_enable: %d\n",
1511 net_device_ctx
->msg_enable
);
1513 hv_set_drvdata(dev
, net
);
1515 INIT_DELAYED_WORK(&net_device_ctx
->dwork
, netvsc_link_change
);
1517 spin_lock_init(&net_device_ctx
->lock
);
1518 INIT_LIST_HEAD(&net_device_ctx
->reconfig_events
);
1520 net
->netdev_ops
= &device_ops
;
1521 net
->ethtool_ops
= ðtool_ops
;
1522 SET_NETDEV_DEV(net
, &dev
->device
);
1524 /* We always need headroom for rndis header */
1525 net
->needed_headroom
= RNDIS_AND_PPI_SIZE
;
1527 /* Notify the netvsc driver of the new device */
1528 memset(&device_info
, 0, sizeof(device_info
));
1529 device_info
.ring_size
= ring_size
;
1530 device_info
.num_chn
= VRSS_CHANNEL_DEFAULT
;
1531 ret
= rndis_filter_device_add(dev
, &device_info
);
1533 netdev_err(net
, "unable to add netvsc device (ret %d)\n", ret
);
1535 hv_set_drvdata(dev
, NULL
);
1538 memcpy(net
->dev_addr
, device_info
.mac_adr
, ETH_ALEN
);
1540 /* hw_features computed in rndis_filter_device_add */
1541 net
->features
= net
->hw_features
|
1542 NETIF_F_HIGHDMA
| NETIF_F_SG
|
1543 NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_HW_VLAN_CTAG_RX
;
1544 net
->vlan_features
= net
->features
;
1546 /* RCU not necessary here, device not registered */
1547 nvdev
= net_device_ctx
->nvdev
;
1548 netif_set_real_num_tx_queues(net
, nvdev
->num_chn
);
1549 netif_set_real_num_rx_queues(net
, nvdev
->num_chn
);
1551 /* MTU range: 68 - 1500 or 65521 */
1552 net
->min_mtu
= NETVSC_MTU_MIN
;
1553 if (nvdev
->nvsp_version
>= NVSP_PROTOCOL_VERSION_2
)
1554 net
->max_mtu
= NETVSC_MTU
- ETH_HLEN
;
1556 net
->max_mtu
= ETH_DATA_LEN
;
1558 ret
= register_netdev(net
);
1560 pr_err("Unable to register netdev.\n");
1561 rndis_filter_device_remove(dev
, nvdev
);
1568 static int netvsc_remove(struct hv_device
*dev
)
1570 struct net_device
*net
;
1571 struct net_device_context
*ndev_ctx
;
1573 net
= hv_get_drvdata(dev
);
1576 dev_err(&dev
->device
, "No net device to remove\n");
1580 ndev_ctx
= netdev_priv(net
);
1582 netif_device_detach(net
);
1584 cancel_delayed_work_sync(&ndev_ctx
->dwork
);
1587 * Call to the vsc driver to let it know that the device is being
1588 * removed. Also blocks mtu and channel changes.
1591 rndis_filter_device_remove(dev
, ndev_ctx
->nvdev
);
1594 unregister_netdev(net
);
1596 hv_set_drvdata(dev
, NULL
);
1602 static const struct hv_vmbus_device_id id_table
[] = {
1608 MODULE_DEVICE_TABLE(vmbus
, id_table
);
1610 /* The one and only one */
1611 static struct hv_driver netvsc_drv
= {
1612 .name
= KBUILD_MODNAME
,
1613 .id_table
= id_table
,
1614 .probe
= netvsc_probe
,
1615 .remove
= netvsc_remove
,
1619 * On Hyper-V, every VF interface is matched with a corresponding
1620 * synthetic interface. The synthetic interface is presented first
1621 * to the guest. When the corresponding VF instance is registered,
1622 * we will take care of switching the data path.
1624 static int netvsc_netdev_event(struct notifier_block
*this,
1625 unsigned long event
, void *ptr
)
1627 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
1629 /* Skip our own events */
1630 if (event_dev
->netdev_ops
== &device_ops
)
1633 /* Avoid non-Ethernet type devices */
1634 if (event_dev
->type
!= ARPHRD_ETHER
)
1637 /* Avoid Vlan dev with same MAC registering as VF */
1638 if (is_vlan_dev(event_dev
))
1641 /* Avoid Bonding master dev with same MAC registering as VF */
1642 if ((event_dev
->priv_flags
& IFF_BONDING
) &&
1643 (event_dev
->flags
& IFF_MASTER
))
1647 case NETDEV_REGISTER
:
1648 return netvsc_register_vf(event_dev
);
1649 case NETDEV_UNREGISTER
:
1650 return netvsc_unregister_vf(event_dev
);
1652 return netvsc_vf_up(event_dev
);
1654 return netvsc_vf_down(event_dev
);
1660 static struct notifier_block netvsc_netdev_notifier
= {
1661 .notifier_call
= netvsc_netdev_event
,
1664 static void __exit
netvsc_drv_exit(void)
1666 unregister_netdevice_notifier(&netvsc_netdev_notifier
);
1667 vmbus_driver_unregister(&netvsc_drv
);
1670 static int __init
netvsc_drv_init(void)
1674 if (ring_size
< RING_SIZE_MIN
) {
1675 ring_size
= RING_SIZE_MIN
;
1676 pr_info("Increased ring_size to %d (min allowed)\n",
1679 ret
= vmbus_driver_register(&netvsc_drv
);
1684 register_netdevice_notifier(&netvsc_netdev_notifier
);
1688 MODULE_LICENSE("GPL");
1689 MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
1691 module_init(netvsc_drv_init
);
1692 module_exit(netvsc_drv_exit
);