1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* A network driver using virtio.
4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
7 #include <linux/netdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/ethtool.h>
10 #include <linux/module.h>
11 #include <linux/virtio.h>
12 #include <linux/virtio_net.h>
13 #include <linux/bpf.h>
14 #include <linux/bpf_trace.h>
15 #include <linux/scatterlist.h>
16 #include <linux/if_vlan.h>
17 #include <linux/slab.h>
18 #include <linux/cpu.h>
19 #include <linux/average.h>
20 #include <linux/filter.h>
21 #include <linux/kernel.h>
22 #include <linux/dim.h>
23 #include <net/route.h>
25 #include <net/net_failover.h>
26 #include <net/netdev_rx_queue.h>
27 #include <net/netdev_queues.h>
28 #include <net/xdp_sock_drv.h>
30 static int napi_weight
= NAPI_POLL_WEIGHT
;
31 module_param(napi_weight
, int, 0444);
33 static bool csum
= true, gso
= true, napi_tx
= true;
34 module_param(csum
, bool, 0444);
35 module_param(gso
, bool, 0444);
36 module_param(napi_tx
, bool, 0644);
38 /* FIXME: MTU in config. */
39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
40 #define GOOD_COPY_LEN 128
42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
44 /* Separating two types of XDP xmit */
45 #define VIRTIO_XDP_TX BIT(0)
46 #define VIRTIO_XDP_REDIR BIT(1)
48 /* RX packet size EWMA. The average packet size is used to determine the packet
49 * buffer size when refilling RX rings. As the entire RX ring may be refilled
50 * at once, the weight is chosen so that the EWMA will be insensitive to short-
51 * term, transient changes in packet size.
53 DECLARE_EWMA(pkt_len
, 0, 64)
55 #define VIRTNET_DRIVER_VERSION "1.0.0"
57 static const unsigned long guest_offloads
[] = {
58 VIRTIO_NET_F_GUEST_TSO4
,
59 VIRTIO_NET_F_GUEST_TSO6
,
60 VIRTIO_NET_F_GUEST_ECN
,
61 VIRTIO_NET_F_GUEST_UFO
,
62 VIRTIO_NET_F_GUEST_CSUM
,
63 VIRTIO_NET_F_GUEST_USO4
,
64 VIRTIO_NET_F_GUEST_USO6
,
65 VIRTIO_NET_F_GUEST_HDRLEN
68 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
69 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
70 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
71 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
72 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
73 (1ULL << VIRTIO_NET_F_GUEST_USO6))
75 struct virtnet_stat_desc
{
76 char desc
[ETH_GSTRING_LEN
];
81 struct virtnet_sq_free_stats
{
89 struct virtnet_sq_stats
{
90 struct u64_stats_sync syncp
;
94 u64_stats_t xdp_tx_drops
;
96 u64_stats_t tx_timeouts
;
101 struct virtnet_rq_stats
{
102 struct u64_stats_sync syncp
;
106 u64_stats_t xdp_packets
;
108 u64_stats_t xdp_redirects
;
109 u64_stats_t xdp_drops
;
113 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
114 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
116 #define VIRTNET_SQ_STAT_QSTAT(name, m) \
119 offsetof(struct virtnet_sq_stats, m), \
120 offsetof(struct netdev_queue_stats_tx, m), \
123 #define VIRTNET_RQ_STAT_QSTAT(name, m) \
126 offsetof(struct virtnet_rq_stats, m), \
127 offsetof(struct netdev_queue_stats_rx, m), \
130 static const struct virtnet_stat_desc virtnet_sq_stats_desc
[] = {
131 VIRTNET_SQ_STAT("xdp_tx", xdp_tx
),
132 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops
),
133 VIRTNET_SQ_STAT("kicks", kicks
),
134 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts
),
137 static const struct virtnet_stat_desc virtnet_rq_stats_desc
[] = {
138 VIRTNET_RQ_STAT("drops", drops
),
139 VIRTNET_RQ_STAT("xdp_packets", xdp_packets
),
140 VIRTNET_RQ_STAT("xdp_tx", xdp_tx
),
141 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects
),
142 VIRTNET_RQ_STAT("xdp_drops", xdp_drops
),
143 VIRTNET_RQ_STAT("kicks", kicks
),
146 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat
[] = {
147 VIRTNET_SQ_STAT_QSTAT("packets", packets
),
148 VIRTNET_SQ_STAT_QSTAT("bytes", bytes
),
149 VIRTNET_SQ_STAT_QSTAT("stop", stop
),
150 VIRTNET_SQ_STAT_QSTAT("wake", wake
),
153 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat
[] = {
154 VIRTNET_RQ_STAT_QSTAT("packets", packets
),
155 VIRTNET_RQ_STAT_QSTAT("bytes", bytes
),
158 #define VIRTNET_STATS_DESC_CQ(name) \
159 {#name, offsetof(struct virtio_net_stats_cvq, name), -1}
161 #define VIRTNET_STATS_DESC_RX(class, name) \
162 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
164 #define VIRTNET_STATS_DESC_TX(class, name) \
165 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
168 static const struct virtnet_stat_desc virtnet_stats_cvq_desc
[] = {
169 VIRTNET_STATS_DESC_CQ(command_num
),
170 VIRTNET_STATS_DESC_CQ(ok_num
),
173 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc
[] = {
174 VIRTNET_STATS_DESC_RX(basic
, packets
),
175 VIRTNET_STATS_DESC_RX(basic
, bytes
),
177 VIRTNET_STATS_DESC_RX(basic
, notifications
),
178 VIRTNET_STATS_DESC_RX(basic
, interrupts
),
181 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc
[] = {
182 VIRTNET_STATS_DESC_TX(basic
, packets
),
183 VIRTNET_STATS_DESC_TX(basic
, bytes
),
185 VIRTNET_STATS_DESC_TX(basic
, notifications
),
186 VIRTNET_STATS_DESC_TX(basic
, interrupts
),
189 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc
[] = {
190 VIRTNET_STATS_DESC_RX(csum
, needs_csum
),
193 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc
[] = {
194 VIRTNET_STATS_DESC_TX(gso
, gso_packets_noseg
),
195 VIRTNET_STATS_DESC_TX(gso
, gso_bytes_noseg
),
198 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc
[] = {
199 VIRTNET_STATS_DESC_RX(speed
, ratelimit_bytes
),
202 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc
[] = {
203 VIRTNET_STATS_DESC_TX(speed
, ratelimit_bytes
),
206 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
209 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
210 offsetof(struct netdev_queue_stats_rx, qstat_field), \
213 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
216 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
217 offsetof(struct netdev_queue_stats_tx, qstat_field), \
220 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat
[] = {
221 VIRTNET_STATS_DESC_RX_QSTAT(basic
, drops
, hw_drops
),
222 VIRTNET_STATS_DESC_RX_QSTAT(basic
, drop_overruns
, hw_drop_overruns
),
225 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat
[] = {
226 VIRTNET_STATS_DESC_TX_QSTAT(basic
, drops
, hw_drops
),
227 VIRTNET_STATS_DESC_TX_QSTAT(basic
, drop_malformed
, hw_drop_errors
),
230 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat
[] = {
231 VIRTNET_STATS_DESC_RX_QSTAT(csum
, csum_valid
, csum_unnecessary
),
232 VIRTNET_STATS_DESC_RX_QSTAT(csum
, csum_none
, csum_none
),
233 VIRTNET_STATS_DESC_RX_QSTAT(csum
, csum_bad
, csum_bad
),
236 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat
[] = {
237 VIRTNET_STATS_DESC_TX_QSTAT(csum
, csum_none
, csum_none
),
238 VIRTNET_STATS_DESC_TX_QSTAT(csum
, needs_csum
, needs_csum
),
241 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat
[] = {
242 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_packets
, hw_gro_packets
),
243 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_bytes
, hw_gro_bytes
),
244 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_packets_coalesced
, hw_gro_wire_packets
),
245 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_bytes_coalesced
, hw_gro_wire_bytes
),
248 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat
[] = {
249 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_packets
, hw_gso_packets
),
250 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_bytes
, hw_gso_bytes
),
251 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_segments
, hw_gso_wire_packets
),
252 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_segments_bytes
, hw_gso_wire_bytes
),
255 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat
[] = {
256 VIRTNET_STATS_DESC_RX_QSTAT(speed
, ratelimit_packets
, hw_drop_ratelimits
),
259 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat
[] = {
260 VIRTNET_STATS_DESC_TX_QSTAT(speed
, ratelimit_packets
, hw_drop_ratelimits
),
263 #define VIRTNET_Q_TYPE_RX 0
264 #define VIRTNET_Q_TYPE_TX 1
265 #define VIRTNET_Q_TYPE_CQ 2
267 struct virtnet_interrupt_coalesce
{
272 /* The dma information of pages allocated at a time. */
273 struct virtnet_rq_dma
{
280 /* Internal representation of a send virtqueue */
282 /* Virtqueue associated with this send _queue */
283 struct virtqueue
*vq
;
285 /* TX: fragments + linear part + virtio header */
286 struct scatterlist sg
[MAX_SKB_FRAGS
+ 2];
288 /* Name of the send queue: output.$index */
291 struct virtnet_sq_stats stats
;
293 struct virtnet_interrupt_coalesce intr_coal
;
295 struct napi_struct napi
;
297 /* Record whether sq is in reset state. */
300 struct xsk_buff_pool
*xsk_pool
;
302 dma_addr_t xsk_hdr_dma_addr
;
305 /* Internal representation of a receive virtqueue */
306 struct receive_queue
{
307 /* Virtqueue associated with this receive_queue */
308 struct virtqueue
*vq
;
310 struct napi_struct napi
;
312 struct bpf_prog __rcu
*xdp_prog
;
314 struct virtnet_rq_stats stats
;
316 /* The number of rx notifications */
319 /* Is dynamic interrupt moderation enabled? */
322 /* Used to protect dim_enabled and inter_coal */
323 struct mutex dim_lock
;
325 /* Dynamic Interrupt Moderation */
330 struct virtnet_interrupt_coalesce intr_coal
;
332 /* Chain pages by the private ptr. */
335 /* Average packet length for mergeable receive buffers. */
336 struct ewma_pkt_len mrg_avg_pkt_len
;
338 /* Page frag for packet buffer allocation. */
339 struct page_frag alloc_frag
;
341 /* RX: fragments + linear part + virtio header */
342 struct scatterlist sg
[MAX_SKB_FRAGS
+ 2];
344 /* Min single buffer size for mergeable buffers case. */
345 unsigned int min_buf_len
;
347 /* Name of this receive queue: input.$index */
350 struct xdp_rxq_info xdp_rxq
;
352 /* Record the last dma info to free after new pages is allocated. */
353 struct virtnet_rq_dma
*last_dma
;
355 struct xsk_buff_pool
*xsk_pool
;
357 /* xdp rxq used by xsk */
358 struct xdp_rxq_info xsk_rxq_info
;
360 struct xdp_buff
**xsk_buffs
;
363 /* This structure can contain rss message with maximum settings for indirection table and keysize
364 * Note, that default structure that describes RSS configuration virtio_net_rss_config
365 * contains same info but can't handle table values.
366 * In any case, structure would be passed to virtio hw through sg_buf split by parts
367 * because table sizes may be differ according to the device configuration.
369 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
370 struct virtio_net_ctrl_rss
{
372 u16 indirection_table_mask
;
373 u16 unclassified_queue
;
374 u16 hash_cfg_reserved
; /* for HASH_CONFIG (see virtio_net_hash_config for details) */
377 u8 key
[VIRTIO_NET_RSS_MAX_KEY_SIZE
];
379 u16
*indirection_table
;
382 /* Control VQ buffers: protected by the rtnl lock */
384 struct virtio_net_ctrl_hdr hdr
;
385 virtio_net_ctrl_ack status
;
388 struct virtnet_info
{
389 struct virtio_device
*vdev
;
390 struct virtqueue
*cvq
;
391 struct net_device
*dev
;
392 struct send_queue
*sq
;
393 struct receive_queue
*rq
;
396 /* Max # of queue pairs supported by the device */
399 /* # of queue pairs currently used by the driver */
400 u16 curr_queue_pairs
;
402 /* # of XDP queue pairs currently used by the driver */
405 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
408 /* I like... big packets and I cannot lie! */
411 /* number of sg entries allocated for big packets */
412 unsigned int big_packets_num_skbfrags
;
414 /* Host will merge rx buffers for big packets (shake it! shake it!) */
415 bool mergeable_rx_bufs
;
417 /* Host supports rss and/or hash report */
419 bool has_rss_hash_report
;
421 u16 rss_indir_table_size
;
422 u32 rss_hash_types_supported
;
423 u32 rss_hash_types_saved
;
424 struct virtio_net_ctrl_rss rss
;
426 /* Has control virtqueue */
429 /* Lock to protect the control VQ */
430 struct mutex cvq_lock
;
432 /* Host can handle any s/g split between our header and packet data */
435 /* Packet virtio header size */
438 /* Work struct for delayed refilling if we run low on memory. */
439 struct delayed_work refill
;
441 /* Is delayed refill enabled? */
444 /* The lock to synchronize the access to refill_enabled */
445 spinlock_t refill_lock
;
447 /* Work struct for config space updates */
448 struct work_struct config_work
;
450 /* Work struct for setting rx mode */
451 struct work_struct rx_mode_work
;
453 /* OK to queue work setting RX mode? */
454 bool rx_mode_work_enabled
;
456 /* Does the affinity hint is set for virtqueues? */
457 bool affinity_hint_set
;
459 /* CPU hotplug instances for online & dead */
460 struct hlist_node node
;
461 struct hlist_node node_dead
;
463 struct control_buf
*ctrl
;
465 /* Ethtool settings */
469 /* Is rx dynamic interrupt moderation enabled? */
472 /* Interrupt coalescing settings */
473 struct virtnet_interrupt_coalesce intr_coal_tx
;
474 struct virtnet_interrupt_coalesce intr_coal_rx
;
476 unsigned long guest_offloads
;
477 unsigned long guest_offloads_capable
;
479 /* failover when STANDBY feature enabled */
480 struct failover
*failover
;
482 u64 device_stats_cap
;
485 struct padded_vnet_hdr
{
486 struct virtio_net_hdr_v1_hash hdr
;
488 * hdr is in a separate sg buffer, and data sg buffer shares same page
489 * with this header sg. This padding makes next sg 16 byte aligned
495 struct virtio_net_common_hdr
{
497 struct virtio_net_hdr hdr
;
498 struct virtio_net_hdr_mrg_rxbuf mrg_hdr
;
499 struct virtio_net_hdr_v1_hash hash_v1_hdr
;
503 static struct virtio_net_common_hdr xsk_hdr
;
505 static void virtnet_sq_free_unused_buf(struct virtqueue
*vq
, void *buf
);
506 static int virtnet_xdp_handler(struct bpf_prog
*xdp_prog
, struct xdp_buff
*xdp
,
507 struct net_device
*dev
,
508 unsigned int *xdp_xmit
,
509 struct virtnet_rq_stats
*stats
);
510 static void virtnet_receive_done(struct virtnet_info
*vi
, struct receive_queue
*rq
,
511 struct sk_buff
*skb
, u8 flags
);
512 static struct sk_buff
*virtnet_skb_append_frag(struct sk_buff
*head_skb
,
513 struct sk_buff
*curr_skb
,
514 struct page
*page
, void *buf
,
515 int len
, int truesize
);
516 static void virtnet_xsk_completed(struct send_queue
*sq
, int num
);
518 enum virtnet_xmit_type
{
519 VIRTNET_XMIT_TYPE_SKB
,
520 VIRTNET_XMIT_TYPE_SKB_ORPHAN
,
521 VIRTNET_XMIT_TYPE_XDP
,
522 VIRTNET_XMIT_TYPE_XSK
,
525 static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss
*rss
, u16 indir_table_size
)
527 if (!indir_table_size
) {
528 rss
->indirection_table
= NULL
;
532 rss
->indirection_table
= kmalloc_array(indir_table_size
, sizeof(u16
), GFP_KERNEL
);
533 if (!rss
->indirection_table
)
539 static void rss_indirection_table_free(struct virtio_net_ctrl_rss
*rss
)
541 kfree(rss
->indirection_table
);
544 /* We use the last two bits of the pointer to distinguish the xmit type. */
545 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1))
547 #define VIRTIO_XSK_FLAG_OFFSET 2
549 static enum virtnet_xmit_type
virtnet_xmit_ptr_unpack(void **ptr
)
551 unsigned long p
= (unsigned long)*ptr
;
553 *ptr
= (void *)(p
& ~VIRTNET_XMIT_TYPE_MASK
);
555 return p
& VIRTNET_XMIT_TYPE_MASK
;
558 static void *virtnet_xmit_ptr_pack(void *ptr
, enum virtnet_xmit_type type
)
560 return (void *)((unsigned long)ptr
| type
);
563 static int virtnet_add_outbuf(struct send_queue
*sq
, int num
, void *data
,
564 enum virtnet_xmit_type type
)
566 return virtqueue_add_outbuf(sq
->vq
, sq
->sg
, num
,
567 virtnet_xmit_ptr_pack(data
, type
),
571 static u32
virtnet_ptr_to_xsk_buff_len(void *ptr
)
573 return ((unsigned long)ptr
) >> VIRTIO_XSK_FLAG_OFFSET
;
576 static void sg_fill_dma(struct scatterlist
*sg
, dma_addr_t addr
, u32 len
)
578 sg_dma_address(sg
) = addr
;
579 sg_dma_len(sg
) = len
;
582 static void __free_old_xmit(struct send_queue
*sq
, struct netdev_queue
*txq
,
583 bool in_napi
, struct virtnet_sq_free_stats
*stats
)
585 struct xdp_frame
*frame
;
590 while ((ptr
= virtqueue_get_buf(sq
->vq
, &len
)) != NULL
) {
591 switch (virtnet_xmit_ptr_unpack(&ptr
)) {
592 case VIRTNET_XMIT_TYPE_SKB
:
595 pr_debug("Sent skb %p\n", skb
);
596 stats
->napi_packets
++;
597 stats
->napi_bytes
+= skb
->len
;
598 napi_consume_skb(skb
, in_napi
);
601 case VIRTNET_XMIT_TYPE_SKB_ORPHAN
:
605 stats
->bytes
+= skb
->len
;
606 napi_consume_skb(skb
, in_napi
);
609 case VIRTNET_XMIT_TYPE_XDP
:
613 stats
->bytes
+= xdp_get_frame_len(frame
);
614 xdp_return_frame(frame
);
617 case VIRTNET_XMIT_TYPE_XSK
:
618 stats
->bytes
+= virtnet_ptr_to_xsk_buff_len(ptr
);
623 netdev_tx_completed_queue(txq
, stats
->napi_packets
, stats
->napi_bytes
);
626 static void virtnet_free_old_xmit(struct send_queue
*sq
,
627 struct netdev_queue
*txq
,
629 struct virtnet_sq_free_stats
*stats
)
631 __free_old_xmit(sq
, txq
, in_napi
, stats
);
634 virtnet_xsk_completed(sq
, stats
->xsk
);
637 /* Converting between virtqueue no. and kernel tx/rx queue no.
638 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
640 static int vq2txq(struct virtqueue
*vq
)
642 return (vq
->index
- 1) / 2;
645 static int txq2vq(int txq
)
650 static int vq2rxq(struct virtqueue
*vq
)
652 return vq
->index
/ 2;
655 static int rxq2vq(int rxq
)
660 static int vq_type(struct virtnet_info
*vi
, int qid
)
662 if (qid
== vi
->max_queue_pairs
* 2)
663 return VIRTNET_Q_TYPE_CQ
;
666 return VIRTNET_Q_TYPE_TX
;
668 return VIRTNET_Q_TYPE_RX
;
671 static inline struct virtio_net_common_hdr
*
672 skb_vnet_common_hdr(struct sk_buff
*skb
)
674 return (struct virtio_net_common_hdr
*)skb
->cb
;
678 * private is used to chain pages for big packets, put the whole
679 * most recent used list in the beginning for reuse
681 static void give_pages(struct receive_queue
*rq
, struct page
*page
)
685 /* Find end of list, sew whole thing into vi->rq.pages. */
686 for (end
= page
; end
->private; end
= (struct page
*)end
->private);
687 end
->private = (unsigned long)rq
->pages
;
691 static struct page
*get_a_page(struct receive_queue
*rq
, gfp_t gfp_mask
)
693 struct page
*p
= rq
->pages
;
696 rq
->pages
= (struct page
*)p
->private;
697 /* clear private here, it is used to chain pages */
700 p
= alloc_page(gfp_mask
);
704 static void virtnet_rq_free_buf(struct virtnet_info
*vi
,
705 struct receive_queue
*rq
, void *buf
)
707 if (vi
->mergeable_rx_bufs
)
708 put_page(virt_to_head_page(buf
));
709 else if (vi
->big_packets
)
712 put_page(virt_to_head_page(buf
));
715 static void enable_delayed_refill(struct virtnet_info
*vi
)
717 spin_lock_bh(&vi
->refill_lock
);
718 vi
->refill_enabled
= true;
719 spin_unlock_bh(&vi
->refill_lock
);
722 static void disable_delayed_refill(struct virtnet_info
*vi
)
724 spin_lock_bh(&vi
->refill_lock
);
725 vi
->refill_enabled
= false;
726 spin_unlock_bh(&vi
->refill_lock
);
729 static void enable_rx_mode_work(struct virtnet_info
*vi
)
732 vi
->rx_mode_work_enabled
= true;
736 static void disable_rx_mode_work(struct virtnet_info
*vi
)
739 vi
->rx_mode_work_enabled
= false;
743 static void virtqueue_napi_schedule(struct napi_struct
*napi
,
744 struct virtqueue
*vq
)
746 if (napi_schedule_prep(napi
)) {
747 virtqueue_disable_cb(vq
);
748 __napi_schedule(napi
);
752 static bool virtqueue_napi_complete(struct napi_struct
*napi
,
753 struct virtqueue
*vq
, int processed
)
757 opaque
= virtqueue_enable_cb_prepare(vq
);
758 if (napi_complete_done(napi
, processed
)) {
759 if (unlikely(virtqueue_poll(vq
, opaque
)))
760 virtqueue_napi_schedule(napi
, vq
);
764 virtqueue_disable_cb(vq
);
770 static void skb_xmit_done(struct virtqueue
*vq
)
772 struct virtnet_info
*vi
= vq
->vdev
->priv
;
773 struct napi_struct
*napi
= &vi
->sq
[vq2txq(vq
)].napi
;
775 /* Suppress further interrupts. */
776 virtqueue_disable_cb(vq
);
779 virtqueue_napi_schedule(napi
, vq
);
781 /* We were probably waiting for more output buffers. */
782 netif_wake_subqueue(vi
->dev
, vq2txq(vq
));
785 #define MRG_CTX_HEADER_SHIFT 22
786 static void *mergeable_len_to_ctx(unsigned int truesize
,
787 unsigned int headroom
)
789 return (void *)(unsigned long)((headroom
<< MRG_CTX_HEADER_SHIFT
) | truesize
);
792 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx
)
794 return (unsigned long)mrg_ctx
>> MRG_CTX_HEADER_SHIFT
;
797 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx
)
799 return (unsigned long)mrg_ctx
& ((1 << MRG_CTX_HEADER_SHIFT
) - 1);
802 static struct sk_buff
*virtnet_build_skb(void *buf
, unsigned int buflen
,
803 unsigned int headroom
,
808 skb
= build_skb(buf
, buflen
);
812 skb_reserve(skb
, headroom
);
818 /* Called from bottom half context */
819 static struct sk_buff
*page_to_skb(struct virtnet_info
*vi
,
820 struct receive_queue
*rq
,
821 struct page
*page
, unsigned int offset
,
822 unsigned int len
, unsigned int truesize
,
823 unsigned int headroom
)
826 struct virtio_net_common_hdr
*hdr
;
827 unsigned int copy
, hdr_len
, hdr_padded_len
;
828 struct page
*page_to_free
= NULL
;
829 int tailroom
, shinfo_size
;
830 char *p
, *hdr_p
, *buf
;
832 p
= page_address(page
) + offset
;
835 hdr_len
= vi
->hdr_len
;
836 if (vi
->mergeable_rx_bufs
)
837 hdr_padded_len
= hdr_len
;
839 hdr_padded_len
= sizeof(struct padded_vnet_hdr
);
843 offset
+= hdr_padded_len
;
845 tailroom
= truesize
- headroom
- hdr_padded_len
- len
;
847 shinfo_size
= SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
849 if (!NET_IP_ALIGN
&& len
> GOOD_COPY_LEN
&& tailroom
>= shinfo_size
) {
850 skb
= virtnet_build_skb(buf
, truesize
, p
- buf
, len
);
854 page
= (struct page
*)page
->private;
856 give_pages(rq
, page
);
860 /* copy small packet so we can reuse these pages for small data */
861 skb
= napi_alloc_skb(&rq
->napi
, GOOD_COPY_LEN
);
865 /* Copy all frame if it fits skb->head, otherwise
866 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
868 if (len
<= skb_tailroom(skb
))
872 skb_put_data(skb
, p
, copy
);
877 if (vi
->mergeable_rx_bufs
) {
879 skb_add_rx_frag(skb
, 0, page
, offset
, len
, truesize
);
886 * Verify that we can indeed put this data into a skb.
887 * This is here to handle cases when the device erroneously
888 * tries to receive more than is possible. This is usually
889 * the case of a broken device.
891 if (unlikely(len
> MAX_SKB_FRAGS
* PAGE_SIZE
)) {
892 net_dbg_ratelimited("%s: too much data\n", skb
->dev
->name
);
896 BUG_ON(offset
>= PAGE_SIZE
);
898 unsigned int frag_size
= min((unsigned)PAGE_SIZE
- offset
, len
);
899 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
, page
, offset
,
900 frag_size
, truesize
);
902 page
= (struct page
*)page
->private;
907 give_pages(rq
, page
);
910 hdr
= skb_vnet_common_hdr(skb
);
911 memcpy(hdr
, hdr_p
, hdr_len
);
913 put_page(page_to_free
);
918 static void virtnet_rq_unmap(struct receive_queue
*rq
, void *buf
, u32 len
)
920 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
921 struct page
*page
= virt_to_head_page(buf
);
922 struct virtnet_rq_dma
*dma
;
926 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
928 head
= page_address(page
);
934 if (dma
->need_sync
&& len
) {
935 offset
= buf
- (head
+ sizeof(*dma
));
937 virtqueue_dma_sync_single_range_for_cpu(rq
->vq
, dma
->addr
,
945 virtqueue_dma_unmap_single_attrs(rq
->vq
, dma
->addr
, dma
->len
,
946 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
950 static void *virtnet_rq_get_buf(struct receive_queue
*rq
, u32
*len
, void **ctx
)
952 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
955 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
957 buf
= virtqueue_get_buf_ctx(rq
->vq
, len
, ctx
);
959 virtnet_rq_unmap(rq
, buf
, *len
);
964 static void virtnet_rq_init_one_sg(struct receive_queue
*rq
, void *buf
, u32 len
)
966 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
967 struct virtnet_rq_dma
*dma
;
972 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
974 head
= page_address(rq
->alloc_frag
.page
);
980 addr
= dma
->addr
- sizeof(*dma
) + offset
;
982 sg_init_table(rq
->sg
, 1);
983 sg_fill_dma(rq
->sg
, addr
, len
);
986 static void *virtnet_rq_alloc(struct receive_queue
*rq
, u32 size
, gfp_t gfp
)
988 struct page_frag
*alloc_frag
= &rq
->alloc_frag
;
989 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
990 struct virtnet_rq_dma
*dma
;
994 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
996 head
= page_address(alloc_frag
->page
);
1001 if (!alloc_frag
->offset
) {
1003 /* Now, the new page is allocated, the last dma
1004 * will not be used. So the dma can be unmapped
1007 virtnet_rq_unmap(rq
, rq
->last_dma
, 0);
1008 rq
->last_dma
= NULL
;
1011 dma
->len
= alloc_frag
->size
- sizeof(*dma
);
1013 addr
= virtqueue_dma_map_single_attrs(rq
->vq
, dma
+ 1,
1014 dma
->len
, DMA_FROM_DEVICE
, 0);
1015 if (virtqueue_dma_mapping_error(rq
->vq
, addr
))
1019 dma
->need_sync
= virtqueue_dma_need_sync(rq
->vq
, addr
);
1021 /* Add a reference to dma to prevent the entire dma from
1022 * being released during error handling. This reference
1023 * will be freed after the pages are no longer used.
1025 get_page(alloc_frag
->page
);
1027 alloc_frag
->offset
= sizeof(*dma
);
1034 buf
= head
+ alloc_frag
->offset
;
1036 get_page(alloc_frag
->page
);
1037 alloc_frag
->offset
+= size
;
1042 static void virtnet_rq_unmap_free_buf(struct virtqueue
*vq
, void *buf
)
1044 struct virtnet_info
*vi
= vq
->vdev
->priv
;
1045 struct receive_queue
*rq
;
1051 xsk_buff_free((struct xdp_buff
*)buf
);
1055 if (!vi
->big_packets
|| vi
->mergeable_rx_bufs
)
1056 virtnet_rq_unmap(rq
, buf
, 0);
1058 virtnet_rq_free_buf(vi
, rq
, buf
);
1061 static void free_old_xmit(struct send_queue
*sq
, struct netdev_queue
*txq
,
1064 struct virtnet_sq_free_stats stats
= {0};
1066 virtnet_free_old_xmit(sq
, txq
, in_napi
, &stats
);
1068 /* Avoid overhead when no packets have been processed
1069 * happens when called speculatively from start_xmit.
1071 if (!stats
.packets
&& !stats
.napi_packets
)
1074 u64_stats_update_begin(&sq
->stats
.syncp
);
1075 u64_stats_add(&sq
->stats
.bytes
, stats
.bytes
+ stats
.napi_bytes
);
1076 u64_stats_add(&sq
->stats
.packets
, stats
.packets
+ stats
.napi_packets
);
1077 u64_stats_update_end(&sq
->stats
.syncp
);
1080 static bool is_xdp_raw_buffer_queue(struct virtnet_info
*vi
, int q
)
1082 if (q
< (vi
->curr_queue_pairs
- vi
->xdp_queue_pairs
))
1084 else if (q
< vi
->curr_queue_pairs
)
1090 static void check_sq_full_and_disable(struct virtnet_info
*vi
,
1091 struct net_device
*dev
,
1092 struct send_queue
*sq
)
1094 bool use_napi
= sq
->napi
.weight
;
1099 /* If running out of space, stop queue to avoid getting packets that we
1100 * are then unable to transmit.
1101 * An alternative would be to force queuing layer to requeue the skb by
1102 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1103 * returned in a normal path of operation: it means that driver is not
1104 * maintaining the TX queue stop/start state properly, and causes
1105 * the stack to do a non-trivial amount of useless work.
1106 * Since most packets only take 1 or 2 ring slots, stopping the queue
1107 * early means 16 slots are typically wasted.
1109 if (sq
->vq
->num_free
< 2+MAX_SKB_FRAGS
) {
1110 struct netdev_queue
*txq
= netdev_get_tx_queue(dev
, qnum
);
1112 netif_tx_stop_queue(txq
);
1113 u64_stats_update_begin(&sq
->stats
.syncp
);
1114 u64_stats_inc(&sq
->stats
.stop
);
1115 u64_stats_update_end(&sq
->stats
.syncp
);
1117 if (unlikely(!virtqueue_enable_cb_delayed(sq
->vq
)))
1118 virtqueue_napi_schedule(&sq
->napi
, sq
->vq
);
1119 } else if (unlikely(!virtqueue_enable_cb_delayed(sq
->vq
))) {
1120 /* More just got used, free them then recheck. */
1121 free_old_xmit(sq
, txq
, false);
1122 if (sq
->vq
->num_free
>= 2+MAX_SKB_FRAGS
) {
1123 netif_start_subqueue(dev
, qnum
);
1124 u64_stats_update_begin(&sq
->stats
.syncp
);
1125 u64_stats_inc(&sq
->stats
.wake
);
1126 u64_stats_update_end(&sq
->stats
.syncp
);
1127 virtqueue_disable_cb(sq
->vq
);
1133 static struct xdp_buff
*buf_to_xdp(struct virtnet_info
*vi
,
1134 struct receive_queue
*rq
, void *buf
, u32 len
)
1136 struct xdp_buff
*xdp
;
1139 xdp
= (struct xdp_buff
*)buf
;
1141 bufsize
= xsk_pool_get_rx_frame_size(rq
->xsk_pool
) + vi
->hdr_len
;
1143 if (unlikely(len
> bufsize
)) {
1144 pr_debug("%s: rx error: len %u exceeds truesize %u\n",
1145 vi
->dev
->name
, len
, bufsize
);
1146 DEV_STATS_INC(vi
->dev
, rx_length_errors
);
1151 xsk_buff_set_size(xdp
, len
);
1152 xsk_buff_dma_sync_for_cpu(xdp
);
1157 static struct sk_buff
*xsk_construct_skb(struct receive_queue
*rq
,
1158 struct xdp_buff
*xdp
)
1160 unsigned int metasize
= xdp
->data
- xdp
->data_meta
;
1161 struct sk_buff
*skb
;
1164 size
= xdp
->data_end
- xdp
->data_hard_start
;
1165 skb
= napi_alloc_skb(&rq
->napi
, size
);
1166 if (unlikely(!skb
)) {
1171 skb_reserve(skb
, xdp
->data_meta
- xdp
->data_hard_start
);
1173 size
= xdp
->data_end
- xdp
->data_meta
;
1174 memcpy(__skb_put(skb
, size
), xdp
->data_meta
, size
);
1177 __skb_pull(skb
, metasize
);
1178 skb_metadata_set(skb
, metasize
);
1186 static struct sk_buff
*virtnet_receive_xsk_small(struct net_device
*dev
, struct virtnet_info
*vi
,
1187 struct receive_queue
*rq
, struct xdp_buff
*xdp
,
1188 unsigned int *xdp_xmit
,
1189 struct virtnet_rq_stats
*stats
)
1191 struct bpf_prog
*prog
;
1196 prog
= rcu_dereference(rq
->xdp_prog
);
1198 ret
= virtnet_xdp_handler(prog
, xdp
, dev
, xdp_xmit
, stats
);
1203 return xsk_construct_skb(rq
, xdp
);
1212 u64_stats_inc(&stats
->drops
);
1217 static void xsk_drop_follow_bufs(struct net_device
*dev
,
1218 struct receive_queue
*rq
,
1220 struct virtnet_rq_stats
*stats
)
1222 struct xdp_buff
*xdp
;
1225 while (num_buf
-- > 1) {
1226 xdp
= virtqueue_get_buf(rq
->vq
, &len
);
1227 if (unlikely(!xdp
)) {
1228 pr_debug("%s: rx error: %d buffers missing\n",
1229 dev
->name
, num_buf
);
1230 DEV_STATS_INC(dev
, rx_length_errors
);
1233 u64_stats_add(&stats
->bytes
, len
);
1238 static int xsk_append_merge_buffer(struct virtnet_info
*vi
,
1239 struct receive_queue
*rq
,
1240 struct sk_buff
*head_skb
,
1242 struct virtio_net_hdr_mrg_rxbuf
*hdr
,
1243 struct virtnet_rq_stats
*stats
)
1245 struct sk_buff
*curr_skb
;
1246 struct xdp_buff
*xdp
;
1251 curr_skb
= head_skb
;
1254 buf
= virtqueue_get_buf(rq
->vq
, &len
);
1255 if (unlikely(!buf
)) {
1256 pr_debug("%s: rx error: %d buffers out of %d missing\n",
1257 vi
->dev
->name
, num_buf
,
1258 virtio16_to_cpu(vi
->vdev
,
1260 DEV_STATS_INC(vi
->dev
, rx_length_errors
);
1264 u64_stats_add(&stats
->bytes
, len
);
1266 xdp
= buf_to_xdp(vi
, rq
, buf
, len
);
1270 buf
= napi_alloc_frag(len
);
1276 memcpy(buf
, xdp
->data
- vi
->hdr_len
, len
);
1280 page
= virt_to_page(buf
);
1284 curr_skb
= virtnet_skb_append_frag(head_skb
, curr_skb
, page
,
1285 buf
, len
, truesize
);
1295 xsk_drop_follow_bufs(vi
->dev
, rq
, num_buf
, stats
);
1299 static struct sk_buff
*virtnet_receive_xsk_merge(struct net_device
*dev
, struct virtnet_info
*vi
,
1300 struct receive_queue
*rq
, struct xdp_buff
*xdp
,
1301 unsigned int *xdp_xmit
,
1302 struct virtnet_rq_stats
*stats
)
1304 struct virtio_net_hdr_mrg_rxbuf
*hdr
;
1305 struct bpf_prog
*prog
;
1306 struct sk_buff
*skb
;
1309 hdr
= xdp
->data
- vi
->hdr_len
;
1310 num_buf
= virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
);
1314 prog
= rcu_dereference(rq
->xdp_prog
);
1315 /* TODO: support multi buffer. */
1316 if (prog
&& num_buf
== 1)
1317 ret
= virtnet_xdp_handler(prog
, xdp
, dev
, xdp_xmit
, stats
);
1322 skb
= xsk_construct_skb(rq
, xdp
);
1326 if (xsk_append_merge_buffer(vi
, rq
, skb
, num_buf
, hdr
, stats
)) {
1343 xsk_drop_follow_bufs(dev
, rq
, num_buf
, stats
);
1346 u64_stats_inc(&stats
->drops
);
1350 static void virtnet_receive_xsk_buf(struct virtnet_info
*vi
, struct receive_queue
*rq
,
1352 unsigned int *xdp_xmit
,
1353 struct virtnet_rq_stats
*stats
)
1355 struct net_device
*dev
= vi
->dev
;
1356 struct sk_buff
*skb
= NULL
;
1357 struct xdp_buff
*xdp
;
1362 u64_stats_add(&stats
->bytes
, len
);
1364 xdp
= buf_to_xdp(vi
, rq
, buf
, len
);
1368 if (unlikely(len
< ETH_HLEN
)) {
1369 pr_debug("%s: short packet %i\n", dev
->name
, len
);
1370 DEV_STATS_INC(dev
, rx_length_errors
);
1375 flags
= ((struct virtio_net_common_hdr
*)(xdp
->data
- vi
->hdr_len
))->hdr
.flags
;
1377 if (!vi
->mergeable_rx_bufs
)
1378 skb
= virtnet_receive_xsk_small(dev
, vi
, rq
, xdp
, xdp_xmit
, stats
);
1380 skb
= virtnet_receive_xsk_merge(dev
, vi
, rq
, xdp
, xdp_xmit
, stats
);
1383 virtnet_receive_done(vi
, rq
, skb
, flags
);
1386 static int virtnet_add_recvbuf_xsk(struct virtnet_info
*vi
, struct receive_queue
*rq
,
1387 struct xsk_buff_pool
*pool
, gfp_t gfp
)
1389 struct xdp_buff
**xsk_buffs
;
1395 xsk_buffs
= rq
->xsk_buffs
;
1397 num
= xsk_buff_alloc_batch(pool
, xsk_buffs
, rq
->vq
->num_free
);
1401 len
= xsk_pool_get_rx_frame_size(pool
) + vi
->hdr_len
;
1403 for (i
= 0; i
< num
; ++i
) {
1404 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
1405 * We assume XDP_PACKET_HEADROOM is larger than hdr->len.
1406 * (see function virtnet_xsk_pool_enable)
1408 addr
= xsk_buff_xdp_get_dma(xsk_buffs
[i
]) - vi
->hdr_len
;
1410 sg_init_table(rq
->sg
, 1);
1411 sg_fill_dma(rq
->sg
, addr
, len
);
1413 err
= virtqueue_add_inbuf_premapped(rq
->vq
, rq
->sg
, 1,
1414 xsk_buffs
[i
], NULL
, gfp
);
1422 for (; i
< num
; ++i
)
1423 xsk_buff_free(xsk_buffs
[i
]);
1428 static void *virtnet_xsk_to_ptr(u32 len
)
1432 p
= len
<< VIRTIO_XSK_FLAG_OFFSET
;
1434 return virtnet_xmit_ptr_pack((void *)p
, VIRTNET_XMIT_TYPE_XSK
);
1437 static int virtnet_xsk_xmit_one(struct send_queue
*sq
,
1438 struct xsk_buff_pool
*pool
,
1439 struct xdp_desc
*desc
)
1441 struct virtnet_info
*vi
;
1444 vi
= sq
->vq
->vdev
->priv
;
1446 addr
= xsk_buff_raw_get_dma(pool
, desc
->addr
);
1447 xsk_buff_raw_dma_sync_for_device(pool
, addr
, desc
->len
);
1449 sg_init_table(sq
->sg
, 2);
1450 sg_fill_dma(sq
->sg
, sq
->xsk_hdr_dma_addr
, vi
->hdr_len
);
1451 sg_fill_dma(sq
->sg
+ 1, addr
, desc
->len
);
1453 return virtqueue_add_outbuf_premapped(sq
->vq
, sq
->sg
, 2,
1454 virtnet_xsk_to_ptr(desc
->len
),
1458 static int virtnet_xsk_xmit_batch(struct send_queue
*sq
,
1459 struct xsk_buff_pool
*pool
,
1460 unsigned int budget
,
1463 struct xdp_desc
*descs
= pool
->tx_descs
;
1468 budget
= min_t(u32
, budget
, sq
->vq
->num_free
);
1470 nb_pkts
= xsk_tx_peek_release_desc_batch(pool
, budget
);
1474 for (i
= 0; i
< nb_pkts
; i
++) {
1475 err
= virtnet_xsk_xmit_one(sq
, pool
, &descs
[i
]);
1476 if (unlikely(err
)) {
1477 xsk_tx_completed(sq
->xsk_pool
, nb_pkts
- i
);
1484 if (kick
&& virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
))
1490 static bool virtnet_xsk_xmit(struct send_queue
*sq
, struct xsk_buff_pool
*pool
,
1493 struct virtnet_info
*vi
= sq
->vq
->vdev
->priv
;
1494 struct virtnet_sq_free_stats stats
= {};
1495 struct net_device
*dev
= vi
->dev
;
1499 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of
1502 __free_old_xmit(sq
, netdev_get_tx_queue(dev
, sq
- vi
->sq
), true, &stats
);
1505 xsk_tx_completed(sq
->xsk_pool
, stats
.xsk
);
1507 sent
= virtnet_xsk_xmit_batch(sq
, pool
, budget
, &kicks
);
1509 if (!is_xdp_raw_buffer_queue(vi
, sq
- vi
->sq
))
1510 check_sq_full_and_disable(vi
, vi
->dev
, sq
);
1513 struct netdev_queue
*txq
;
1515 txq
= netdev_get_tx_queue(vi
->dev
, sq
- vi
->sq
);
1516 txq_trans_cond_update(txq
);
1519 u64_stats_update_begin(&sq
->stats
.syncp
);
1520 u64_stats_add(&sq
->stats
.packets
, stats
.packets
);
1521 u64_stats_add(&sq
->stats
.bytes
, stats
.bytes
);
1522 u64_stats_add(&sq
->stats
.kicks
, kicks
);
1523 u64_stats_add(&sq
->stats
.xdp_tx
, sent
);
1524 u64_stats_update_end(&sq
->stats
.syncp
);
1526 if (xsk_uses_need_wakeup(pool
))
1527 xsk_set_tx_need_wakeup(pool
);
1532 static void xsk_wakeup(struct send_queue
*sq
)
1534 if (napi_if_scheduled_mark_missed(&sq
->napi
))
1538 virtqueue_napi_schedule(&sq
->napi
, sq
->vq
);
1542 static int virtnet_xsk_wakeup(struct net_device
*dev
, u32 qid
, u32 flag
)
1544 struct virtnet_info
*vi
= netdev_priv(dev
);
1545 struct send_queue
*sq
;
1547 if (!netif_running(dev
))
1550 if (qid
>= vi
->curr_queue_pairs
)
1559 static void virtnet_xsk_completed(struct send_queue
*sq
, int num
)
1561 xsk_tx_completed(sq
->xsk_pool
, num
);
1563 /* If this is called by rx poll, start_xmit and xdp xmit we should
1564 * wakeup the tx napi to consume the xsk tx queue, because the tx
1565 * interrupt may not be triggered.
1570 static int __virtnet_xdp_xmit_one(struct virtnet_info
*vi
,
1571 struct send_queue
*sq
,
1572 struct xdp_frame
*xdpf
)
1574 struct virtio_net_hdr_mrg_rxbuf
*hdr
;
1575 struct skb_shared_info
*shinfo
;
1579 if (unlikely(xdpf
->headroom
< vi
->hdr_len
))
1582 if (unlikely(xdp_frame_has_frags(xdpf
))) {
1583 shinfo
= xdp_get_shared_info_from_frame(xdpf
);
1584 nr_frags
= shinfo
->nr_frags
;
1587 /* In wrapping function virtnet_xdp_xmit(), we need to free
1588 * up the pending old buffers, where we need to calculate the
1589 * position of skb_shared_info in xdp_get_frame_len() and
1590 * xdp_return_frame(), which will involve to xdpf->data and
1591 * xdpf->headroom. Therefore, we need to update the value of
1592 * headroom synchronously here.
1594 xdpf
->headroom
-= vi
->hdr_len
;
1595 xdpf
->data
-= vi
->hdr_len
;
1596 /* Zero header and leave csum up to XDP layers */
1598 memset(hdr
, 0, vi
->hdr_len
);
1599 xdpf
->len
+= vi
->hdr_len
;
1601 sg_init_table(sq
->sg
, nr_frags
+ 1);
1602 sg_set_buf(sq
->sg
, xdpf
->data
, xdpf
->len
);
1603 for (i
= 0; i
< nr_frags
; i
++) {
1604 skb_frag_t
*frag
= &shinfo
->frags
[i
];
1606 sg_set_page(&sq
->sg
[i
+ 1], skb_frag_page(frag
),
1607 skb_frag_size(frag
), skb_frag_off(frag
));
1610 err
= virtnet_add_outbuf(sq
, nr_frags
+ 1, xdpf
, VIRTNET_XMIT_TYPE_XDP
);
1612 return -ENOSPC
; /* Caller handle free/refcnt */
1617 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
1618 * the current cpu, so it does not need to be locked.
1620 * Here we use marco instead of inline functions because we have to deal with
1621 * three issues at the same time: 1. the choice of sq. 2. judge and execute the
1622 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
1623 * functions to perfectly solve these three problems at the same time.
1625 #define virtnet_xdp_get_sq(vi) ({ \
1626 int cpu = smp_processor_id(); \
1627 struct netdev_queue *txq; \
1628 typeof(vi) v = (vi); \
1631 if (v->curr_queue_pairs > nr_cpu_ids) { \
1632 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
1634 txq = netdev_get_tx_queue(v->dev, qp); \
1635 __netif_tx_acquire(txq); \
1637 qp = cpu % v->curr_queue_pairs; \
1638 txq = netdev_get_tx_queue(v->dev, qp); \
1639 __netif_tx_lock(txq, cpu); \
1644 #define virtnet_xdp_put_sq(vi, q) { \
1645 struct netdev_queue *txq; \
1646 typeof(vi) v = (vi); \
1648 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
1649 if (v->curr_queue_pairs > nr_cpu_ids) \
1650 __netif_tx_release(txq); \
1652 __netif_tx_unlock(txq); \
1655 static int virtnet_xdp_xmit(struct net_device
*dev
,
1656 int n
, struct xdp_frame
**frames
, u32 flags
)
1658 struct virtnet_info
*vi
= netdev_priv(dev
);
1659 struct virtnet_sq_free_stats stats
= {0};
1660 struct receive_queue
*rq
= vi
->rq
;
1661 struct bpf_prog
*xdp_prog
;
1662 struct send_queue
*sq
;
1668 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
1669 * indicate XDP resources have been successfully allocated.
1671 xdp_prog
= rcu_access_pointer(rq
->xdp_prog
);
1675 sq
= virtnet_xdp_get_sq(vi
);
1677 if (unlikely(flags
& ~XDP_XMIT_FLAGS_MASK
)) {
1682 /* Free up any pending old buffers before queueing new ones. */
1683 virtnet_free_old_xmit(sq
, netdev_get_tx_queue(dev
, sq
- vi
->sq
),
1686 for (i
= 0; i
< n
; i
++) {
1687 struct xdp_frame
*xdpf
= frames
[i
];
1689 if (__virtnet_xdp_xmit_one(vi
, sq
, xdpf
))
1695 if (!is_xdp_raw_buffer_queue(vi
, sq
- vi
->sq
))
1696 check_sq_full_and_disable(vi
, dev
, sq
);
1698 if (flags
& XDP_XMIT_FLUSH
) {
1699 if (virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
))
1703 u64_stats_update_begin(&sq
->stats
.syncp
);
1704 u64_stats_add(&sq
->stats
.bytes
, stats
.bytes
);
1705 u64_stats_add(&sq
->stats
.packets
, stats
.packets
);
1706 u64_stats_add(&sq
->stats
.xdp_tx
, n
);
1707 u64_stats_add(&sq
->stats
.xdp_tx_drops
, n
- nxmit
);
1708 u64_stats_add(&sq
->stats
.kicks
, kicks
);
1709 u64_stats_update_end(&sq
->stats
.syncp
);
1711 virtnet_xdp_put_sq(vi
, sq
);
1715 static void put_xdp_frags(struct xdp_buff
*xdp
)
1717 struct skb_shared_info
*shinfo
;
1718 struct page
*xdp_page
;
1721 if (xdp_buff_has_frags(xdp
)) {
1722 shinfo
= xdp_get_shared_info_from_buff(xdp
);
1723 for (i
= 0; i
< shinfo
->nr_frags
; i
++) {
1724 xdp_page
= skb_frag_page(&shinfo
->frags
[i
]);
1730 static int virtnet_xdp_handler(struct bpf_prog
*xdp_prog
, struct xdp_buff
*xdp
,
1731 struct net_device
*dev
,
1732 unsigned int *xdp_xmit
,
1733 struct virtnet_rq_stats
*stats
)
1735 struct xdp_frame
*xdpf
;
1739 act
= bpf_prog_run_xdp(xdp_prog
, xdp
);
1740 u64_stats_inc(&stats
->xdp_packets
);
1747 u64_stats_inc(&stats
->xdp_tx
);
1748 xdpf
= xdp_convert_buff_to_frame(xdp
);
1749 if (unlikely(!xdpf
)) {
1750 netdev_dbg(dev
, "convert buff to frame failed for xdp\n");
1754 err
= virtnet_xdp_xmit(dev
, 1, &xdpf
, 0);
1755 if (unlikely(!err
)) {
1756 xdp_return_frame_rx_napi(xdpf
);
1757 } else if (unlikely(err
< 0)) {
1758 trace_xdp_exception(dev
, xdp_prog
, act
);
1761 *xdp_xmit
|= VIRTIO_XDP_TX
;
1765 u64_stats_inc(&stats
->xdp_redirects
);
1766 err
= xdp_do_redirect(dev
, xdp
, xdp_prog
);
1770 *xdp_xmit
|= VIRTIO_XDP_REDIR
;
1774 bpf_warn_invalid_xdp_action(dev
, xdp_prog
, act
);
1777 trace_xdp_exception(dev
, xdp_prog
, act
);
1784 static unsigned int virtnet_get_headroom(struct virtnet_info
*vi
)
1786 return vi
->xdp_enabled
? XDP_PACKET_HEADROOM
: 0;
1789 /* We copy the packet for XDP in the following cases:
1791 * 1) Packet is scattered across multiple rx buffers.
1792 * 2) Headroom space is insufficient.
1794 * This is inefficient but it's a temporary condition that
1795 * we hit right after XDP is enabled and until queue is refilled
1796 * with large buffers with sufficient headroom - so it should affect
1797 * at most queue size packets.
1798 * Afterwards, the conditions to enable
1799 * XDP should preclude the underlying device from sending packets
1800 * across multiple buffers (num_buf > 1), and we make sure buffers
1801 * have enough headroom.
1803 static struct page
*xdp_linearize_page(struct receive_queue
*rq
,
1810 int tailroom
= SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1813 if (page_off
+ *len
+ tailroom
> PAGE_SIZE
)
1816 page
= alloc_page(GFP_ATOMIC
);
1820 memcpy(page_address(page
) + page_off
, page_address(p
) + offset
, *len
);
1823 while (--*num_buf
) {
1824 unsigned int buflen
;
1828 buf
= virtnet_rq_get_buf(rq
, &buflen
, NULL
);
1832 p
= virt_to_head_page(buf
);
1833 off
= buf
- page_address(p
);
1835 /* guard against a misconfigured or uncooperative backend that
1836 * is sending packet larger than the MTU.
1838 if ((page_off
+ buflen
+ tailroom
) > PAGE_SIZE
) {
1843 memcpy(page_address(page
) + page_off
,
1844 page_address(p
) + off
, buflen
);
1849 /* Headroom does not contribute to packet length */
1850 *len
= page_off
- XDP_PACKET_HEADROOM
;
1853 __free_pages(page
, 0);
1857 static struct sk_buff
*receive_small_build_skb(struct virtnet_info
*vi
,
1858 unsigned int xdp_headroom
,
1862 unsigned int header_offset
;
1863 unsigned int headroom
;
1864 unsigned int buflen
;
1865 struct sk_buff
*skb
;
1867 header_offset
= VIRTNET_RX_PAD
+ xdp_headroom
;
1868 headroom
= vi
->hdr_len
+ header_offset
;
1869 buflen
= SKB_DATA_ALIGN(GOOD_PACKET_LEN
+ headroom
) +
1870 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1872 skb
= virtnet_build_skb(buf
, buflen
, headroom
, len
);
1876 buf
+= header_offset
;
1877 memcpy(skb_vnet_common_hdr(skb
), buf
, vi
->hdr_len
);
1882 static struct sk_buff
*receive_small_xdp(struct net_device
*dev
,
1883 struct virtnet_info
*vi
,
1884 struct receive_queue
*rq
,
1885 struct bpf_prog
*xdp_prog
,
1887 unsigned int xdp_headroom
,
1889 unsigned int *xdp_xmit
,
1890 struct virtnet_rq_stats
*stats
)
1892 unsigned int header_offset
= VIRTNET_RX_PAD
+ xdp_headroom
;
1893 unsigned int headroom
= vi
->hdr_len
+ header_offset
;
1894 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
+ header_offset
;
1895 struct page
*page
= virt_to_head_page(buf
);
1896 struct page
*xdp_page
;
1897 unsigned int buflen
;
1898 struct xdp_buff xdp
;
1899 struct sk_buff
*skb
;
1900 unsigned int metasize
= 0;
1903 if (unlikely(hdr
->hdr
.gso_type
))
1906 /* Partially checksummed packets must be dropped. */
1907 if (unlikely(hdr
->hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
))
1910 buflen
= SKB_DATA_ALIGN(GOOD_PACKET_LEN
+ headroom
) +
1911 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1913 if (unlikely(xdp_headroom
< virtnet_get_headroom(vi
))) {
1914 int offset
= buf
- page_address(page
) + header_offset
;
1915 unsigned int tlen
= len
+ vi
->hdr_len
;
1918 xdp_headroom
= virtnet_get_headroom(vi
);
1919 header_offset
= VIRTNET_RX_PAD
+ xdp_headroom
;
1920 headroom
= vi
->hdr_len
+ header_offset
;
1921 buflen
= SKB_DATA_ALIGN(GOOD_PACKET_LEN
+ headroom
) +
1922 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1923 xdp_page
= xdp_linearize_page(rq
, &num_buf
, page
,
1924 offset
, header_offset
,
1929 buf
= page_address(xdp_page
);
1934 xdp_init_buff(&xdp
, buflen
, &rq
->xdp_rxq
);
1935 xdp_prepare_buff(&xdp
, buf
+ VIRTNET_RX_PAD
+ vi
->hdr_len
,
1936 xdp_headroom
, len
, true);
1938 act
= virtnet_xdp_handler(xdp_prog
, &xdp
, dev
, xdp_xmit
, stats
);
1942 /* Recalculate length in case bpf program changed it */
1943 len
= xdp
.data_end
- xdp
.data
;
1944 metasize
= xdp
.data
- xdp
.data_meta
;
1955 skb
= virtnet_build_skb(buf
, buflen
, xdp
.data
- buf
, len
);
1960 skb_metadata_set(skb
, metasize
);
1965 u64_stats_inc(&stats
->xdp_drops
);
1967 u64_stats_inc(&stats
->drops
);
1973 static struct sk_buff
*receive_small(struct net_device
*dev
,
1974 struct virtnet_info
*vi
,
1975 struct receive_queue
*rq
,
1976 void *buf
, void *ctx
,
1978 unsigned int *xdp_xmit
,
1979 struct virtnet_rq_stats
*stats
)
1981 unsigned int xdp_headroom
= (unsigned long)ctx
;
1982 struct page
*page
= virt_to_head_page(buf
);
1983 struct sk_buff
*skb
;
1985 /* We passed the address of virtnet header to virtio-core,
1986 * so truncate the padding.
1988 buf
-= VIRTNET_RX_PAD
+ xdp_headroom
;
1991 u64_stats_add(&stats
->bytes
, len
);
1993 if (unlikely(len
> GOOD_PACKET_LEN
)) {
1994 pr_debug("%s: rx error: len %u exceeds max size %d\n",
1995 dev
->name
, len
, GOOD_PACKET_LEN
);
1996 DEV_STATS_INC(dev
, rx_length_errors
);
2000 if (unlikely(vi
->xdp_enabled
)) {
2001 struct bpf_prog
*xdp_prog
;
2004 xdp_prog
= rcu_dereference(rq
->xdp_prog
);
2006 skb
= receive_small_xdp(dev
, vi
, rq
, xdp_prog
, buf
,
2007 xdp_headroom
, len
, xdp_xmit
,
2015 skb
= receive_small_build_skb(vi
, xdp_headroom
, buf
, len
);
2020 u64_stats_inc(&stats
->drops
);
2025 static struct sk_buff
*receive_big(struct net_device
*dev
,
2026 struct virtnet_info
*vi
,
2027 struct receive_queue
*rq
,
2030 struct virtnet_rq_stats
*stats
)
2032 struct page
*page
= buf
;
2033 struct sk_buff
*skb
=
2034 page_to_skb(vi
, rq
, page
, 0, len
, PAGE_SIZE
, 0);
2036 u64_stats_add(&stats
->bytes
, len
- vi
->hdr_len
);
2043 u64_stats_inc(&stats
->drops
);
2044 give_pages(rq
, page
);
2048 static void mergeable_buf_free(struct receive_queue
*rq
, int num_buf
,
2049 struct net_device
*dev
,
2050 struct virtnet_rq_stats
*stats
)
2056 while (num_buf
-- > 1) {
2057 buf
= virtnet_rq_get_buf(rq
, &len
, NULL
);
2058 if (unlikely(!buf
)) {
2059 pr_debug("%s: rx error: %d buffers missing\n",
2060 dev
->name
, num_buf
);
2061 DEV_STATS_INC(dev
, rx_length_errors
);
2064 u64_stats_add(&stats
->bytes
, len
);
2065 page
= virt_to_head_page(buf
);
2070 /* Why not use xdp_build_skb_from_frame() ?
2071 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
2072 * virtio-net there are 2 points that do not match its requirements:
2073 * 1. The size of the prefilled buffer is not fixed before xdp is set.
2074 * 2. xdp_build_skb_from_frame() does more checks that we don't need,
2075 * like eth_type_trans() (which virtio-net does in receive_buf()).
2077 static struct sk_buff
*build_skb_from_xdp_buff(struct net_device
*dev
,
2078 struct virtnet_info
*vi
,
2079 struct xdp_buff
*xdp
,
2080 unsigned int xdp_frags_truesz
)
2082 struct skb_shared_info
*sinfo
= xdp_get_shared_info_from_buff(xdp
);
2083 unsigned int headroom
, data_len
;
2084 struct sk_buff
*skb
;
2088 if (unlikely(xdp
->data_end
> xdp_data_hard_end(xdp
))) {
2089 pr_debug("Error building skb as missing reserved tailroom for xdp");
2093 if (unlikely(xdp_buff_has_frags(xdp
)))
2094 nr_frags
= sinfo
->nr_frags
;
2096 skb
= build_skb(xdp
->data_hard_start
, xdp
->frame_sz
);
2100 headroom
= xdp
->data
- xdp
->data_hard_start
;
2101 data_len
= xdp
->data_end
- xdp
->data
;
2102 skb_reserve(skb
, headroom
);
2103 __skb_put(skb
, data_len
);
2105 metasize
= xdp
->data
- xdp
->data_meta
;
2106 metasize
= metasize
> 0 ? metasize
: 0;
2108 skb_metadata_set(skb
, metasize
);
2110 if (unlikely(xdp_buff_has_frags(xdp
)))
2111 xdp_update_skb_shared_info(skb
, nr_frags
,
2112 sinfo
->xdp_frags_size
,
2114 xdp_buff_is_frag_pfmemalloc(xdp
));
2119 /* TODO: build xdp in big mode */
2120 static int virtnet_build_xdp_buff_mrg(struct net_device
*dev
,
2121 struct virtnet_info
*vi
,
2122 struct receive_queue
*rq
,
2123 struct xdp_buff
*xdp
,
2126 unsigned int frame_sz
,
2128 unsigned int *xdp_frags_truesize
,
2129 struct virtnet_rq_stats
*stats
)
2131 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
;
2132 unsigned int headroom
, tailroom
, room
;
2133 unsigned int truesize
, cur_frag_size
;
2134 struct skb_shared_info
*shinfo
;
2135 unsigned int xdp_frags_truesz
= 0;
2141 xdp_init_buff(xdp
, frame_sz
, &rq
->xdp_rxq
);
2142 xdp_prepare_buff(xdp
, buf
- XDP_PACKET_HEADROOM
,
2143 XDP_PACKET_HEADROOM
+ vi
->hdr_len
, len
- vi
->hdr_len
, true);
2149 /* If we want to build multi-buffer xdp, we need
2150 * to specify that the flags of xdp_buff have the
2151 * XDP_FLAGS_HAS_FRAG bit.
2153 if (!xdp_buff_has_frags(xdp
))
2154 xdp_buff_set_frags_flag(xdp
);
2156 shinfo
= xdp_get_shared_info_from_buff(xdp
);
2157 shinfo
->nr_frags
= 0;
2158 shinfo
->xdp_frags_size
= 0;
2161 if (*num_buf
> MAX_SKB_FRAGS
+ 1)
2164 while (--*num_buf
> 0) {
2165 buf
= virtnet_rq_get_buf(rq
, &len
, &ctx
);
2166 if (unlikely(!buf
)) {
2167 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2168 dev
->name
, *num_buf
,
2169 virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
));
2170 DEV_STATS_INC(dev
, rx_length_errors
);
2174 u64_stats_add(&stats
->bytes
, len
);
2175 page
= virt_to_head_page(buf
);
2176 offset
= buf
- page_address(page
);
2178 truesize
= mergeable_ctx_to_truesize(ctx
);
2179 headroom
= mergeable_ctx_to_headroom(ctx
);
2180 tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2181 room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2183 cur_frag_size
= truesize
;
2184 xdp_frags_truesz
+= cur_frag_size
;
2185 if (unlikely(len
> truesize
- room
|| cur_frag_size
> PAGE_SIZE
)) {
2187 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2188 dev
->name
, len
, (unsigned long)(truesize
- room
));
2189 DEV_STATS_INC(dev
, rx_length_errors
);
2193 frag
= &shinfo
->frags
[shinfo
->nr_frags
++];
2194 skb_frag_fill_page_desc(frag
, page
, offset
, len
);
2195 if (page_is_pfmemalloc(page
))
2196 xdp_buff_set_frag_pfmemalloc(xdp
);
2198 shinfo
->xdp_frags_size
+= len
;
2201 *xdp_frags_truesize
= xdp_frags_truesz
;
2209 static void *mergeable_xdp_get_buf(struct virtnet_info
*vi
,
2210 struct receive_queue
*rq
,
2211 struct bpf_prog
*xdp_prog
,
2213 unsigned int *frame_sz
,
2218 struct virtio_net_hdr_mrg_rxbuf
*hdr
)
2220 unsigned int truesize
= mergeable_ctx_to_truesize(ctx
);
2221 unsigned int headroom
= mergeable_ctx_to_headroom(ctx
);
2222 struct page
*xdp_page
;
2223 unsigned int xdp_room
;
2225 /* Transient failure which in theory could occur if
2226 * in-flight packets from before XDP was enabled reach
2227 * the receive path after XDP is loaded.
2229 if (unlikely(hdr
->hdr
.gso_type
))
2232 /* Partially checksummed packets must be dropped. */
2233 if (unlikely(hdr
->hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
))
2236 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
2237 * with headroom may add hole in truesize, which
2238 * make their length exceed PAGE_SIZE. So we disabled the
2239 * hole mechanism for xdp. See add_recvbuf_mergeable().
2241 *frame_sz
= truesize
;
2243 if (likely(headroom
>= virtnet_get_headroom(vi
) &&
2244 (*num_buf
== 1 || xdp_prog
->aux
->xdp_has_frags
))) {
2245 return page_address(*page
) + offset
;
2248 /* This happens when headroom is not enough because
2249 * of the buffer was prefilled before XDP is set.
2250 * This should only happen for the first several packets.
2251 * In fact, vq reset can be used here to help us clean up
2252 * the prefilled buffers, but many existing devices do not
2253 * support it, and we don't want to bother users who are
2254 * using xdp normally.
2256 if (!xdp_prog
->aux
->xdp_has_frags
) {
2257 /* linearize data for XDP */
2258 xdp_page
= xdp_linearize_page(rq
, num_buf
,
2260 XDP_PACKET_HEADROOM
,
2265 xdp_room
= SKB_DATA_ALIGN(XDP_PACKET_HEADROOM
+
2266 sizeof(struct skb_shared_info
));
2267 if (*len
+ xdp_room
> PAGE_SIZE
)
2270 xdp_page
= alloc_page(GFP_ATOMIC
);
2274 memcpy(page_address(xdp_page
) + XDP_PACKET_HEADROOM
,
2275 page_address(*page
) + offset
, *len
);
2278 *frame_sz
= PAGE_SIZE
;
2284 return page_address(*page
) + XDP_PACKET_HEADROOM
;
2287 static struct sk_buff
*receive_mergeable_xdp(struct net_device
*dev
,
2288 struct virtnet_info
*vi
,
2289 struct receive_queue
*rq
,
2290 struct bpf_prog
*xdp_prog
,
2294 unsigned int *xdp_xmit
,
2295 struct virtnet_rq_stats
*stats
)
2297 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
;
2298 int num_buf
= virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
);
2299 struct page
*page
= virt_to_head_page(buf
);
2300 int offset
= buf
- page_address(page
);
2301 unsigned int xdp_frags_truesz
= 0;
2302 struct sk_buff
*head_skb
;
2303 unsigned int frame_sz
;
2304 struct xdp_buff xdp
;
2309 data
= mergeable_xdp_get_buf(vi
, rq
, xdp_prog
, ctx
, &frame_sz
, &num_buf
, &page
,
2311 if (unlikely(!data
))
2314 err
= virtnet_build_xdp_buff_mrg(dev
, vi
, rq
, &xdp
, data
, len
, frame_sz
,
2315 &num_buf
, &xdp_frags_truesz
, stats
);
2319 act
= virtnet_xdp_handler(xdp_prog
, &xdp
, dev
, xdp_xmit
, stats
);
2323 head_skb
= build_skb_from_xdp_buff(dev
, vi
, &xdp
, xdp_frags_truesz
);
2324 if (unlikely(!head_skb
))
2336 put_xdp_frags(&xdp
);
2340 mergeable_buf_free(rq
, num_buf
, dev
, stats
);
2342 u64_stats_inc(&stats
->xdp_drops
);
2343 u64_stats_inc(&stats
->drops
);
2347 static struct sk_buff
*virtnet_skb_append_frag(struct sk_buff
*head_skb
,
2348 struct sk_buff
*curr_skb
,
2349 struct page
*page
, void *buf
,
2350 int len
, int truesize
)
2355 num_skb_frags
= skb_shinfo(curr_skb
)->nr_frags
;
2356 if (unlikely(num_skb_frags
== MAX_SKB_FRAGS
)) {
2357 struct sk_buff
*nskb
= alloc_skb(0, GFP_ATOMIC
);
2359 if (unlikely(!nskb
))
2362 if (curr_skb
== head_skb
)
2363 skb_shinfo(curr_skb
)->frag_list
= nskb
;
2365 curr_skb
->next
= nskb
;
2367 head_skb
->truesize
+= nskb
->truesize
;
2371 if (curr_skb
!= head_skb
) {
2372 head_skb
->data_len
+= len
;
2373 head_skb
->len
+= len
;
2374 head_skb
->truesize
+= truesize
;
2377 offset
= buf
- page_address(page
);
2378 if (skb_can_coalesce(curr_skb
, num_skb_frags
, page
, offset
)) {
2380 skb_coalesce_rx_frag(curr_skb
, num_skb_frags
- 1,
2383 skb_add_rx_frag(curr_skb
, num_skb_frags
, page
,
2384 offset
, len
, truesize
);
2390 static struct sk_buff
*receive_mergeable(struct net_device
*dev
,
2391 struct virtnet_info
*vi
,
2392 struct receive_queue
*rq
,
2396 unsigned int *xdp_xmit
,
2397 struct virtnet_rq_stats
*stats
)
2399 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
;
2400 int num_buf
= virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
);
2401 struct page
*page
= virt_to_head_page(buf
);
2402 int offset
= buf
- page_address(page
);
2403 struct sk_buff
*head_skb
, *curr_skb
;
2404 unsigned int truesize
= mergeable_ctx_to_truesize(ctx
);
2405 unsigned int headroom
= mergeable_ctx_to_headroom(ctx
);
2406 unsigned int tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2407 unsigned int room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2410 u64_stats_add(&stats
->bytes
, len
- vi
->hdr_len
);
2412 if (unlikely(len
> truesize
- room
)) {
2413 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2414 dev
->name
, len
, (unsigned long)(truesize
- room
));
2415 DEV_STATS_INC(dev
, rx_length_errors
);
2419 if (unlikely(vi
->xdp_enabled
)) {
2420 struct bpf_prog
*xdp_prog
;
2423 xdp_prog
= rcu_dereference(rq
->xdp_prog
);
2425 head_skb
= receive_mergeable_xdp(dev
, vi
, rq
, xdp_prog
, buf
, ctx
,
2426 len
, xdp_xmit
, stats
);
2433 head_skb
= page_to_skb(vi
, rq
, page
, offset
, len
, truesize
, headroom
);
2434 curr_skb
= head_skb
;
2436 if (unlikely(!curr_skb
))
2439 buf
= virtnet_rq_get_buf(rq
, &len
, &ctx
);
2440 if (unlikely(!buf
)) {
2441 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2443 virtio16_to_cpu(vi
->vdev
,
2445 DEV_STATS_INC(dev
, rx_length_errors
);
2449 u64_stats_add(&stats
->bytes
, len
);
2450 page
= virt_to_head_page(buf
);
2452 truesize
= mergeable_ctx_to_truesize(ctx
);
2453 headroom
= mergeable_ctx_to_headroom(ctx
);
2454 tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2455 room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2456 if (unlikely(len
> truesize
- room
)) {
2457 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2458 dev
->name
, len
, (unsigned long)(truesize
- room
));
2459 DEV_STATS_INC(dev
, rx_length_errors
);
2463 curr_skb
= virtnet_skb_append_frag(head_skb
, curr_skb
, page
,
2464 buf
, len
, truesize
);
2469 ewma_pkt_len_add(&rq
->mrg_avg_pkt_len
, head_skb
->len
);
2474 mergeable_buf_free(rq
, num_buf
, dev
, stats
);
2477 u64_stats_inc(&stats
->drops
);
2478 dev_kfree_skb(head_skb
);
2482 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash
*hdr_hash
,
2483 struct sk_buff
*skb
)
2485 enum pkt_hash_types rss_hash_type
;
2487 if (!hdr_hash
|| !skb
)
2490 switch (__le16_to_cpu(hdr_hash
->hash_report
)) {
2491 case VIRTIO_NET_HASH_REPORT_TCPv4
:
2492 case VIRTIO_NET_HASH_REPORT_UDPv4
:
2493 case VIRTIO_NET_HASH_REPORT_TCPv6
:
2494 case VIRTIO_NET_HASH_REPORT_UDPv6
:
2495 case VIRTIO_NET_HASH_REPORT_TCPv6_EX
:
2496 case VIRTIO_NET_HASH_REPORT_UDPv6_EX
:
2497 rss_hash_type
= PKT_HASH_TYPE_L4
;
2499 case VIRTIO_NET_HASH_REPORT_IPv4
:
2500 case VIRTIO_NET_HASH_REPORT_IPv6
:
2501 case VIRTIO_NET_HASH_REPORT_IPv6_EX
:
2502 rss_hash_type
= PKT_HASH_TYPE_L3
;
2504 case VIRTIO_NET_HASH_REPORT_NONE
:
2506 rss_hash_type
= PKT_HASH_TYPE_NONE
;
2508 skb_set_hash(skb
, __le32_to_cpu(hdr_hash
->hash_value
), rss_hash_type
);
2511 static void virtnet_receive_done(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2512 struct sk_buff
*skb
, u8 flags
)
2514 struct virtio_net_common_hdr
*hdr
;
2515 struct net_device
*dev
= vi
->dev
;
2517 hdr
= skb_vnet_common_hdr(skb
);
2518 if (dev
->features
& NETIF_F_RXHASH
&& vi
->has_rss_hash_report
)
2519 virtio_skb_set_hash(&hdr
->hash_v1_hdr
, skb
);
2521 if (flags
& VIRTIO_NET_HDR_F_DATA_VALID
)
2522 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
2524 if (virtio_net_hdr_to_skb(skb
, &hdr
->hdr
,
2525 virtio_is_little_endian(vi
->vdev
))) {
2526 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
2527 dev
->name
, hdr
->hdr
.gso_type
,
2532 skb_record_rx_queue(skb
, vq2rxq(rq
->vq
));
2533 skb
->protocol
= eth_type_trans(skb
, dev
);
2534 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
2535 ntohs(skb
->protocol
), skb
->len
, skb
->pkt_type
);
2537 napi_gro_receive(&rq
->napi
, skb
);
2541 DEV_STATS_INC(dev
, rx_frame_errors
);
2545 static void receive_buf(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2546 void *buf
, unsigned int len
, void **ctx
,
2547 unsigned int *xdp_xmit
,
2548 struct virtnet_rq_stats
*stats
)
2550 struct net_device
*dev
= vi
->dev
;
2551 struct sk_buff
*skb
;
2554 if (unlikely(len
< vi
->hdr_len
+ ETH_HLEN
)) {
2555 pr_debug("%s: short packet %i\n", dev
->name
, len
);
2556 DEV_STATS_INC(dev
, rx_length_errors
);
2557 virtnet_rq_free_buf(vi
, rq
, buf
);
2561 /* 1. Save the flags early, as the XDP program might overwrite them.
2562 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
2563 * stay valid after XDP processing.
2564 * 2. XDP doesn't work with partially checksummed packets (refer to
2565 * virtnet_xdp_set()), so packets marked as
2566 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
2568 flags
= ((struct virtio_net_common_hdr
*)buf
)->hdr
.flags
;
2570 if (vi
->mergeable_rx_bufs
)
2571 skb
= receive_mergeable(dev
, vi
, rq
, buf
, ctx
, len
, xdp_xmit
,
2573 else if (vi
->big_packets
)
2574 skb
= receive_big(dev
, vi
, rq
, buf
, len
, stats
);
2576 skb
= receive_small(dev
, vi
, rq
, buf
, ctx
, len
, xdp_xmit
, stats
);
2581 virtnet_receive_done(vi
, rq
, skb
, flags
);
2584 /* Unlike mergeable buffers, all buffers are allocated to the
2585 * same size, except for the headroom. For this reason we do
2586 * not need to use mergeable_len_to_ctx here - it is enough
2587 * to store the headroom as the context ignoring the truesize.
2589 static int add_recvbuf_small(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2593 unsigned int xdp_headroom
= virtnet_get_headroom(vi
);
2594 void *ctx
= (void *)(unsigned long)xdp_headroom
;
2595 int len
= vi
->hdr_len
+ VIRTNET_RX_PAD
+ GOOD_PACKET_LEN
+ xdp_headroom
;
2598 len
= SKB_DATA_ALIGN(len
) +
2599 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
2601 if (unlikely(!skb_page_frag_refill(len
, &rq
->alloc_frag
, gfp
)))
2604 buf
= virtnet_rq_alloc(rq
, len
, gfp
);
2608 buf
+= VIRTNET_RX_PAD
+ xdp_headroom
;
2610 virtnet_rq_init_one_sg(rq
, buf
, vi
->hdr_len
+ GOOD_PACKET_LEN
);
2612 err
= virtqueue_add_inbuf_premapped(rq
->vq
, rq
->sg
, 1, buf
, ctx
, gfp
);
2614 virtnet_rq_unmap(rq
, buf
, 0);
2615 put_page(virt_to_head_page(buf
));
2621 static int add_recvbuf_big(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2624 struct page
*first
, *list
= NULL
;
2628 sg_init_table(rq
->sg
, vi
->big_packets_num_skbfrags
+ 2);
2630 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
2631 for (i
= vi
->big_packets_num_skbfrags
+ 1; i
> 1; --i
) {
2632 first
= get_a_page(rq
, gfp
);
2635 give_pages(rq
, list
);
2638 sg_set_buf(&rq
->sg
[i
], page_address(first
), PAGE_SIZE
);
2640 /* chain new page in list head to match sg */
2641 first
->private = (unsigned long)list
;
2645 first
= get_a_page(rq
, gfp
);
2647 give_pages(rq
, list
);
2650 p
= page_address(first
);
2652 /* rq->sg[0], rq->sg[1] share the same page */
2653 /* a separated rq->sg[0] for header - required in case !any_header_sg */
2654 sg_set_buf(&rq
->sg
[0], p
, vi
->hdr_len
);
2656 /* rq->sg[1] for data packet, from offset */
2657 offset
= sizeof(struct padded_vnet_hdr
);
2658 sg_set_buf(&rq
->sg
[1], p
+ offset
, PAGE_SIZE
- offset
);
2660 /* chain first in list head */
2661 first
->private = (unsigned long)list
;
2662 err
= virtqueue_add_inbuf(rq
->vq
, rq
->sg
, vi
->big_packets_num_skbfrags
+ 2,
2665 give_pages(rq
, first
);
2670 static unsigned int get_mergeable_buf_len(struct receive_queue
*rq
,
2671 struct ewma_pkt_len
*avg_pkt_len
,
2674 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2675 const size_t hdr_len
= vi
->hdr_len
;
2679 return PAGE_SIZE
- room
;
2681 len
= hdr_len
+ clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len
),
2682 rq
->min_buf_len
, PAGE_SIZE
- hdr_len
);
2684 return ALIGN(len
, L1_CACHE_BYTES
);
2687 static int add_recvbuf_mergeable(struct virtnet_info
*vi
,
2688 struct receive_queue
*rq
, gfp_t gfp
)
2690 struct page_frag
*alloc_frag
= &rq
->alloc_frag
;
2691 unsigned int headroom
= virtnet_get_headroom(vi
);
2692 unsigned int tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2693 unsigned int room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2694 unsigned int len
, hole
;
2699 /* Extra tailroom is needed to satisfy XDP's assumption. This
2700 * means rx frags coalescing won't work, but consider we've
2701 * disabled GSO for XDP, it won't be a big issue.
2703 len
= get_mergeable_buf_len(rq
, &rq
->mrg_avg_pkt_len
, room
);
2705 if (unlikely(!skb_page_frag_refill(len
+ room
, alloc_frag
, gfp
)))
2708 if (!alloc_frag
->offset
&& len
+ room
+ sizeof(struct virtnet_rq_dma
) > alloc_frag
->size
)
2709 len
-= sizeof(struct virtnet_rq_dma
);
2711 buf
= virtnet_rq_alloc(rq
, len
+ room
, gfp
);
2715 buf
+= headroom
; /* advance address leaving hole at front of pkt */
2716 hole
= alloc_frag
->size
- alloc_frag
->offset
;
2717 if (hole
< len
+ room
) {
2718 /* To avoid internal fragmentation, if there is very likely not
2719 * enough space for another buffer, add the remaining space to
2720 * the current buffer.
2721 * XDP core assumes that frame_size of xdp_buff and the length
2722 * of the frag are PAGE_SIZE, so we disable the hole mechanism.
2726 alloc_frag
->offset
+= hole
;
2729 virtnet_rq_init_one_sg(rq
, buf
, len
);
2731 ctx
= mergeable_len_to_ctx(len
+ room
, headroom
);
2732 err
= virtqueue_add_inbuf_premapped(rq
->vq
, rq
->sg
, 1, buf
, ctx
, gfp
);
2734 virtnet_rq_unmap(rq
, buf
, 0);
2735 put_page(virt_to_head_page(buf
));
2742 * Returns false if we couldn't fill entirely (OOM).
2744 * Normally run in the receive path, but can also be run from ndo_open
2745 * before we're receiving packets, or from refill_work which is
2746 * careful to disable receiving (using napi_disable).
2748 static bool try_fill_recv(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2754 err
= virtnet_add_recvbuf_xsk(vi
, rq
, rq
->xsk_pool
, gfp
);
2759 if (vi
->mergeable_rx_bufs
)
2760 err
= add_recvbuf_mergeable(vi
, rq
, gfp
);
2761 else if (vi
->big_packets
)
2762 err
= add_recvbuf_big(vi
, rq
, gfp
);
2764 err
= add_recvbuf_small(vi
, rq
, gfp
);
2768 } while (rq
->vq
->num_free
);
2771 if (virtqueue_kick_prepare(rq
->vq
) && virtqueue_notify(rq
->vq
)) {
2772 unsigned long flags
;
2774 flags
= u64_stats_update_begin_irqsave(&rq
->stats
.syncp
);
2775 u64_stats_inc(&rq
->stats
.kicks
);
2776 u64_stats_update_end_irqrestore(&rq
->stats
.syncp
, flags
);
2779 return err
!= -ENOMEM
;
2782 static void skb_recv_done(struct virtqueue
*rvq
)
2784 struct virtnet_info
*vi
= rvq
->vdev
->priv
;
2785 struct receive_queue
*rq
= &vi
->rq
[vq2rxq(rvq
)];
2788 virtqueue_napi_schedule(&rq
->napi
, rvq
);
2791 static void virtnet_napi_enable(struct virtqueue
*vq
, struct napi_struct
*napi
)
2795 /* If all buffers were filled by other side before we napi_enabled, we
2796 * won't get another interrupt, so process any outstanding packets now.
2797 * Call local_bh_enable after to trigger softIRQ processing.
2800 virtqueue_napi_schedule(napi
, vq
);
2804 static void virtnet_napi_tx_enable(struct virtnet_info
*vi
,
2805 struct virtqueue
*vq
,
2806 struct napi_struct
*napi
)
2811 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
2812 * enable the feature if this is likely affine with the transmit path.
2814 if (!vi
->affinity_hint_set
) {
2819 return virtnet_napi_enable(vq
, napi
);
2822 static void virtnet_napi_tx_disable(struct napi_struct
*napi
)
2828 static void refill_work(struct work_struct
*work
)
2830 struct virtnet_info
*vi
=
2831 container_of(work
, struct virtnet_info
, refill
.work
);
2835 for (i
= 0; i
< vi
->curr_queue_pairs
; i
++) {
2836 struct receive_queue
*rq
= &vi
->rq
[i
];
2838 napi_disable(&rq
->napi
);
2839 still_empty
= !try_fill_recv(vi
, rq
, GFP_KERNEL
);
2840 virtnet_napi_enable(rq
->vq
, &rq
->napi
);
2842 /* In theory, this can happen: if we don't get any buffers in
2843 * we will *never* try to fill again.
2846 schedule_delayed_work(&vi
->refill
, HZ
/2);
2850 static int virtnet_receive_xsk_bufs(struct virtnet_info
*vi
,
2851 struct receive_queue
*rq
,
2853 unsigned int *xdp_xmit
,
2854 struct virtnet_rq_stats
*stats
)
2860 while (packets
< budget
) {
2861 buf
= virtqueue_get_buf(rq
->vq
, &len
);
2865 virtnet_receive_xsk_buf(vi
, rq
, buf
, len
, xdp_xmit
, stats
);
2872 static int virtnet_receive_packets(struct virtnet_info
*vi
,
2873 struct receive_queue
*rq
,
2875 unsigned int *xdp_xmit
,
2876 struct virtnet_rq_stats
*stats
)
2882 if (!vi
->big_packets
|| vi
->mergeable_rx_bufs
) {
2884 while (packets
< budget
&&
2885 (buf
= virtnet_rq_get_buf(rq
, &len
, &ctx
))) {
2886 receive_buf(vi
, rq
, buf
, len
, ctx
, xdp_xmit
, stats
);
2890 while (packets
< budget
&&
2891 (buf
= virtqueue_get_buf(rq
->vq
, &len
)) != NULL
) {
2892 receive_buf(vi
, rq
, buf
, len
, NULL
, xdp_xmit
, stats
);
2900 static int virtnet_receive(struct receive_queue
*rq
, int budget
,
2901 unsigned int *xdp_xmit
)
2903 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2904 struct virtnet_rq_stats stats
= {};
2908 packets
= virtnet_receive_xsk_bufs(vi
, rq
, budget
, xdp_xmit
, &stats
);
2910 packets
= virtnet_receive_packets(vi
, rq
, budget
, xdp_xmit
, &stats
);
2912 if (rq
->vq
->num_free
> min((unsigned int)budget
, virtqueue_get_vring_size(rq
->vq
)) / 2) {
2913 if (!try_fill_recv(vi
, rq
, GFP_ATOMIC
)) {
2914 spin_lock(&vi
->refill_lock
);
2915 if (vi
->refill_enabled
)
2916 schedule_delayed_work(&vi
->refill
, 0);
2917 spin_unlock(&vi
->refill_lock
);
2921 u64_stats_set(&stats
.packets
, packets
);
2922 u64_stats_update_begin(&rq
->stats
.syncp
);
2923 for (i
= 0; i
< ARRAY_SIZE(virtnet_rq_stats_desc
); i
++) {
2924 size_t offset
= virtnet_rq_stats_desc
[i
].offset
;
2925 u64_stats_t
*item
, *src
;
2927 item
= (u64_stats_t
*)((u8
*)&rq
->stats
+ offset
);
2928 src
= (u64_stats_t
*)((u8
*)&stats
+ offset
);
2929 u64_stats_add(item
, u64_stats_read(src
));
2932 u64_stats_add(&rq
->stats
.packets
, u64_stats_read(&stats
.packets
));
2933 u64_stats_add(&rq
->stats
.bytes
, u64_stats_read(&stats
.bytes
));
2935 u64_stats_update_end(&rq
->stats
.syncp
);
2940 static void virtnet_poll_cleantx(struct receive_queue
*rq
, int budget
)
2942 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2943 unsigned int index
= vq2rxq(rq
->vq
);
2944 struct send_queue
*sq
= &vi
->sq
[index
];
2945 struct netdev_queue
*txq
= netdev_get_tx_queue(vi
->dev
, index
);
2947 if (!sq
->napi
.weight
|| is_xdp_raw_buffer_queue(vi
, index
))
2950 if (__netif_tx_trylock(txq
)) {
2952 __netif_tx_unlock(txq
);
2957 virtqueue_disable_cb(sq
->vq
);
2958 free_old_xmit(sq
, txq
, !!budget
);
2959 } while (unlikely(!virtqueue_enable_cb_delayed(sq
->vq
)));
2961 if (sq
->vq
->num_free
>= 2 + MAX_SKB_FRAGS
) {
2962 if (netif_tx_queue_stopped(txq
)) {
2963 u64_stats_update_begin(&sq
->stats
.syncp
);
2964 u64_stats_inc(&sq
->stats
.wake
);
2965 u64_stats_update_end(&sq
->stats
.syncp
);
2967 netif_tx_wake_queue(txq
);
2970 __netif_tx_unlock(txq
);
2974 static void virtnet_rx_dim_update(struct virtnet_info
*vi
, struct receive_queue
*rq
)
2976 struct dim_sample cur_sample
= {};
2978 if (!rq
->packets_in_napi
)
2981 /* Don't need protection when fetching stats, since fetcher and
2982 * updater of the stats are in same context
2984 dim_update_sample(rq
->calls
,
2985 u64_stats_read(&rq
->stats
.packets
),
2986 u64_stats_read(&rq
->stats
.bytes
),
2989 net_dim(&rq
->dim
, &cur_sample
);
2990 rq
->packets_in_napi
= 0;
2993 static int virtnet_poll(struct napi_struct
*napi
, int budget
)
2995 struct receive_queue
*rq
=
2996 container_of(napi
, struct receive_queue
, napi
);
2997 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2998 struct send_queue
*sq
;
2999 unsigned int received
;
3000 unsigned int xdp_xmit
= 0;
3003 virtnet_poll_cleantx(rq
, budget
);
3005 received
= virtnet_receive(rq
, budget
, &xdp_xmit
);
3006 rq
->packets_in_napi
+= received
;
3008 if (xdp_xmit
& VIRTIO_XDP_REDIR
)
3011 /* Out of packets? */
3012 if (received
< budget
) {
3013 napi_complete
= virtqueue_napi_complete(napi
, rq
->vq
, received
);
3014 /* Intentionally not taking dim_lock here. This may result in a
3015 * spurious net_dim call. But if that happens virtnet_rx_dim_work
3016 * will not act on the scheduled work.
3018 if (napi_complete
&& rq
->dim_enabled
)
3019 virtnet_rx_dim_update(vi
, rq
);
3022 if (xdp_xmit
& VIRTIO_XDP_TX
) {
3023 sq
= virtnet_xdp_get_sq(vi
);
3024 if (virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
)) {
3025 u64_stats_update_begin(&sq
->stats
.syncp
);
3026 u64_stats_inc(&sq
->stats
.kicks
);
3027 u64_stats_update_end(&sq
->stats
.syncp
);
3029 virtnet_xdp_put_sq(vi
, sq
);
3035 static void virtnet_disable_queue_pair(struct virtnet_info
*vi
, int qp_index
)
3037 virtnet_napi_tx_disable(&vi
->sq
[qp_index
].napi
);
3038 napi_disable(&vi
->rq
[qp_index
].napi
);
3039 xdp_rxq_info_unreg(&vi
->rq
[qp_index
].xdp_rxq
);
3042 static int virtnet_enable_queue_pair(struct virtnet_info
*vi
, int qp_index
)
3044 struct net_device
*dev
= vi
->dev
;
3047 err
= xdp_rxq_info_reg(&vi
->rq
[qp_index
].xdp_rxq
, dev
, qp_index
,
3048 vi
->rq
[qp_index
].napi
.napi_id
);
3052 err
= xdp_rxq_info_reg_mem_model(&vi
->rq
[qp_index
].xdp_rxq
,
3053 MEM_TYPE_PAGE_SHARED
, NULL
);
3055 goto err_xdp_reg_mem_model
;
3057 netdev_tx_reset_queue(netdev_get_tx_queue(vi
->dev
, qp_index
));
3058 virtnet_napi_enable(vi
->rq
[qp_index
].vq
, &vi
->rq
[qp_index
].napi
);
3059 virtnet_napi_tx_enable(vi
, vi
->sq
[qp_index
].vq
, &vi
->sq
[qp_index
].napi
);
3063 err_xdp_reg_mem_model
:
3064 xdp_rxq_info_unreg(&vi
->rq
[qp_index
].xdp_rxq
);
3068 static void virtnet_cancel_dim(struct virtnet_info
*vi
, struct dim
*dim
)
3070 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
3072 net_dim_work_cancel(dim
);
3075 static void virtnet_update_settings(struct virtnet_info
*vi
)
3080 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_SPEED_DUPLEX
))
3083 virtio_cread_le(vi
->vdev
, struct virtio_net_config
, speed
, &speed
);
3085 if (ethtool_validate_speed(speed
))
3088 virtio_cread_le(vi
->vdev
, struct virtio_net_config
, duplex
, &duplex
);
3090 if (ethtool_validate_duplex(duplex
))
3091 vi
->duplex
= duplex
;
3094 static int virtnet_open(struct net_device
*dev
)
3096 struct virtnet_info
*vi
= netdev_priv(dev
);
3099 enable_delayed_refill(vi
);
3101 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3102 if (i
< vi
->curr_queue_pairs
)
3103 /* Make sure we have some buffers: if oom use wq. */
3104 if (!try_fill_recv(vi
, &vi
->rq
[i
], GFP_KERNEL
))
3105 schedule_delayed_work(&vi
->refill
, 0);
3107 err
= virtnet_enable_queue_pair(vi
, i
);
3112 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STATUS
)) {
3113 if (vi
->status
& VIRTIO_NET_S_LINK_UP
)
3114 netif_carrier_on(vi
->dev
);
3115 virtio_config_driver_enable(vi
->vdev
);
3117 vi
->status
= VIRTIO_NET_S_LINK_UP
;
3118 netif_carrier_on(dev
);
3124 disable_delayed_refill(vi
);
3125 cancel_delayed_work_sync(&vi
->refill
);
3127 for (i
--; i
>= 0; i
--) {
3128 virtnet_disable_queue_pair(vi
, i
);
3129 virtnet_cancel_dim(vi
, &vi
->rq
[i
].dim
);
3135 static int virtnet_poll_tx(struct napi_struct
*napi
, int budget
)
3137 struct send_queue
*sq
= container_of(napi
, struct send_queue
, napi
);
3138 struct virtnet_info
*vi
= sq
->vq
->vdev
->priv
;
3139 unsigned int index
= vq2txq(sq
->vq
);
3140 struct netdev_queue
*txq
;
3141 int opaque
, xsk_done
= 0;
3144 if (unlikely(is_xdp_raw_buffer_queue(vi
, index
))) {
3145 /* We don't need to enable cb for XDP */
3146 napi_complete_done(napi
, 0);
3150 txq
= netdev_get_tx_queue(vi
->dev
, index
);
3151 __netif_tx_lock(txq
, raw_smp_processor_id());
3152 virtqueue_disable_cb(sq
->vq
);
3155 xsk_done
= virtnet_xsk_xmit(sq
, sq
->xsk_pool
, budget
);
3157 free_old_xmit(sq
, txq
, !!budget
);
3159 if (sq
->vq
->num_free
>= 2 + MAX_SKB_FRAGS
) {
3160 if (netif_tx_queue_stopped(txq
)) {
3161 u64_stats_update_begin(&sq
->stats
.syncp
);
3162 u64_stats_inc(&sq
->stats
.wake
);
3163 u64_stats_update_end(&sq
->stats
.syncp
);
3165 netif_tx_wake_queue(txq
);
3168 if (xsk_done
>= budget
) {
3169 __netif_tx_unlock(txq
);
3173 opaque
= virtqueue_enable_cb_prepare(sq
->vq
);
3175 done
= napi_complete_done(napi
, 0);
3178 virtqueue_disable_cb(sq
->vq
);
3180 __netif_tx_unlock(txq
);
3183 if (unlikely(virtqueue_poll(sq
->vq
, opaque
))) {
3184 if (napi_schedule_prep(napi
)) {
3185 __netif_tx_lock(txq
, raw_smp_processor_id());
3186 virtqueue_disable_cb(sq
->vq
);
3187 __netif_tx_unlock(txq
);
3188 __napi_schedule(napi
);
3196 static int xmit_skb(struct send_queue
*sq
, struct sk_buff
*skb
, bool orphan
)
3198 struct virtio_net_hdr_mrg_rxbuf
*hdr
;
3199 const unsigned char *dest
= ((struct ethhdr
*)skb
->data
)->h_dest
;
3200 struct virtnet_info
*vi
= sq
->vq
->vdev
->priv
;
3202 unsigned hdr_len
= vi
->hdr_len
;
3205 pr_debug("%s: xmit %p %pM\n", vi
->dev
->name
, skb
, dest
);
3207 can_push
= vi
->any_header_sg
&&
3208 !((unsigned long)skb
->data
& (__alignof__(*hdr
) - 1)) &&
3209 !skb_header_cloned(skb
) && skb_headroom(skb
) >= hdr_len
;
3210 /* Even if we can, don't push here yet as this would skew
3211 * csum_start offset below. */
3213 hdr
= (struct virtio_net_hdr_mrg_rxbuf
*)(skb
->data
- hdr_len
);
3215 hdr
= &skb_vnet_common_hdr(skb
)->mrg_hdr
;
3217 if (virtio_net_hdr_from_skb(skb
, &hdr
->hdr
,
3218 virtio_is_little_endian(vi
->vdev
), false,
3222 if (vi
->mergeable_rx_bufs
)
3223 hdr
->num_buffers
= 0;
3225 sg_init_table(sq
->sg
, skb_shinfo(skb
)->nr_frags
+ (can_push
? 1 : 2));
3227 __skb_push(skb
, hdr_len
);
3228 num_sg
= skb_to_sgvec(skb
, sq
->sg
, 0, skb
->len
);
3229 if (unlikely(num_sg
< 0))
3231 /* Pull header back to avoid skew in tx bytes calculations. */
3232 __skb_pull(skb
, hdr_len
);
3234 sg_set_buf(sq
->sg
, hdr
, hdr_len
);
3235 num_sg
= skb_to_sgvec(skb
, sq
->sg
+ 1, 0, skb
->len
);
3236 if (unlikely(num_sg
< 0))
3241 return virtnet_add_outbuf(sq
, num_sg
, skb
,
3242 orphan
? VIRTNET_XMIT_TYPE_SKB_ORPHAN
: VIRTNET_XMIT_TYPE_SKB
);
3245 static netdev_tx_t
start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
3247 struct virtnet_info
*vi
= netdev_priv(dev
);
3248 int qnum
= skb_get_queue_mapping(skb
);
3249 struct send_queue
*sq
= &vi
->sq
[qnum
];
3251 struct netdev_queue
*txq
= netdev_get_tx_queue(dev
, qnum
);
3252 bool xmit_more
= netdev_xmit_more();
3253 bool use_napi
= sq
->napi
.weight
;
3256 /* Free up any pending old buffers before queueing new ones. */
3259 virtqueue_disable_cb(sq
->vq
);
3261 free_old_xmit(sq
, txq
, false);
3263 } while (use_napi
&& !xmit_more
&&
3264 unlikely(!virtqueue_enable_cb_delayed(sq
->vq
)));
3266 /* timestamp packet in software */
3267 skb_tx_timestamp(skb
);
3269 /* Try to transmit */
3270 err
= xmit_skb(sq
, skb
, !use_napi
);
3272 /* This should not happen! */
3273 if (unlikely(err
)) {
3274 DEV_STATS_INC(dev
, tx_fifo_errors
);
3275 if (net_ratelimit())
3277 "Unexpected TXQ (%d) queue failure: %d\n",
3279 DEV_STATS_INC(dev
, tx_dropped
);
3280 dev_kfree_skb_any(skb
);
3281 return NETDEV_TX_OK
;
3284 /* Don't wait up for transmitted skbs to be freed. */
3290 check_sq_full_and_disable(vi
, dev
, sq
);
3292 kick
= use_napi
? __netdev_tx_sent_queue(txq
, skb
->len
, xmit_more
) :
3293 !xmit_more
|| netif_xmit_stopped(txq
);
3295 if (virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
)) {
3296 u64_stats_update_begin(&sq
->stats
.syncp
);
3297 u64_stats_inc(&sq
->stats
.kicks
);
3298 u64_stats_update_end(&sq
->stats
.syncp
);
3302 return NETDEV_TX_OK
;
3305 static void virtnet_rx_pause(struct virtnet_info
*vi
, struct receive_queue
*rq
)
3307 bool running
= netif_running(vi
->dev
);
3310 napi_disable(&rq
->napi
);
3311 virtnet_cancel_dim(vi
, &rq
->dim
);
3315 static void virtnet_rx_resume(struct virtnet_info
*vi
, struct receive_queue
*rq
)
3317 bool running
= netif_running(vi
->dev
);
3319 if (!try_fill_recv(vi
, rq
, GFP_KERNEL
))
3320 schedule_delayed_work(&vi
->refill
, 0);
3323 virtnet_napi_enable(rq
->vq
, &rq
->napi
);
3326 static int virtnet_rx_resize(struct virtnet_info
*vi
,
3327 struct receive_queue
*rq
, u32 ring_num
)
3331 qindex
= rq
- vi
->rq
;
3333 virtnet_rx_pause(vi
, rq
);
3335 err
= virtqueue_resize(rq
->vq
, ring_num
, virtnet_rq_unmap_free_buf
);
3337 netdev_err(vi
->dev
, "resize rx fail: rx queue index: %d err: %d\n", qindex
, err
);
3339 virtnet_rx_resume(vi
, rq
);
3343 static void virtnet_tx_pause(struct virtnet_info
*vi
, struct send_queue
*sq
)
3345 bool running
= netif_running(vi
->dev
);
3346 struct netdev_queue
*txq
;
3349 qindex
= sq
- vi
->sq
;
3352 virtnet_napi_tx_disable(&sq
->napi
);
3354 txq
= netdev_get_tx_queue(vi
->dev
, qindex
);
3356 /* 1. wait all ximt complete
3357 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
3359 __netif_tx_lock_bh(txq
);
3361 /* Prevent rx poll from accessing sq. */
3364 /* Prevent the upper layer from trying to send packets. */
3365 netif_stop_subqueue(vi
->dev
, qindex
);
3367 __netif_tx_unlock_bh(txq
);
3370 static void virtnet_tx_resume(struct virtnet_info
*vi
, struct send_queue
*sq
)
3372 bool running
= netif_running(vi
->dev
);
3373 struct netdev_queue
*txq
;
3376 qindex
= sq
- vi
->sq
;
3378 txq
= netdev_get_tx_queue(vi
->dev
, qindex
);
3380 __netif_tx_lock_bh(txq
);
3382 netif_tx_wake_queue(txq
);
3383 __netif_tx_unlock_bh(txq
);
3386 virtnet_napi_tx_enable(vi
, sq
->vq
, &sq
->napi
);
3389 static int virtnet_tx_resize(struct virtnet_info
*vi
, struct send_queue
*sq
,
3394 qindex
= sq
- vi
->sq
;
3396 virtnet_tx_pause(vi
, sq
);
3398 err
= virtqueue_resize(sq
->vq
, ring_num
, virtnet_sq_free_unused_buf
);
3400 netdev_err(vi
->dev
, "resize tx fail: tx queue index: %d err: %d\n", qindex
, err
);
3402 virtnet_tx_resume(vi
, sq
);
3408 * Send command via the control virtqueue and check status. Commands
3409 * supported by the hypervisor, as indicated by feature bits, should
3410 * never fail unless improperly formatted.
3412 static bool virtnet_send_command_reply(struct virtnet_info
*vi
, u8
class, u8 cmd
,
3413 struct scatterlist
*out
,
3414 struct scatterlist
*in
)
3416 struct scatterlist
*sgs
[5], hdr
, stat
;
3417 u32 out_num
= 0, tmp
, in_num
= 0;
3421 /* Caller should know better */
3422 BUG_ON(!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_VQ
));
3424 mutex_lock(&vi
->cvq_lock
);
3425 vi
->ctrl
->status
= ~0;
3426 vi
->ctrl
->hdr
.class = class;
3427 vi
->ctrl
->hdr
.cmd
= cmd
;
3429 sg_init_one(&hdr
, &vi
->ctrl
->hdr
, sizeof(vi
->ctrl
->hdr
));
3430 sgs
[out_num
++] = &hdr
;
3433 sgs
[out_num
++] = out
;
3435 /* Add return status. */
3436 sg_init_one(&stat
, &vi
->ctrl
->status
, sizeof(vi
->ctrl
->status
));
3437 sgs
[out_num
+ in_num
++] = &stat
;
3440 sgs
[out_num
+ in_num
++] = in
;
3442 BUG_ON(out_num
+ in_num
> ARRAY_SIZE(sgs
));
3443 ret
= virtqueue_add_sgs(vi
->cvq
, sgs
, out_num
, in_num
, vi
, GFP_ATOMIC
);
3445 dev_warn(&vi
->vdev
->dev
,
3446 "Failed to add sgs for command vq: %d\n.", ret
);
3447 mutex_unlock(&vi
->cvq_lock
);
3451 if (unlikely(!virtqueue_kick(vi
->cvq
)))
3454 /* Spin for a response, the kick causes an ioport write, trapping
3455 * into the hypervisor, so the request should be handled immediately.
3457 while (!virtqueue_get_buf(vi
->cvq
, &tmp
) &&
3458 !virtqueue_is_broken(vi
->cvq
)) {
3464 ok
= vi
->ctrl
->status
== VIRTIO_NET_OK
;
3465 mutex_unlock(&vi
->cvq_lock
);
3469 static bool virtnet_send_command(struct virtnet_info
*vi
, u8
class, u8 cmd
,
3470 struct scatterlist
*out
)
3472 return virtnet_send_command_reply(vi
, class, cmd
, out
, NULL
);
3475 static int virtnet_set_mac_address(struct net_device
*dev
, void *p
)
3477 struct virtnet_info
*vi
= netdev_priv(dev
);
3478 struct virtio_device
*vdev
= vi
->vdev
;
3480 struct sockaddr
*addr
;
3481 struct scatterlist sg
;
3483 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STANDBY
))
3486 addr
= kmemdup(p
, sizeof(*addr
), GFP_KERNEL
);
3490 ret
= eth_prepare_mac_addr_change(dev
, addr
);
3494 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
)) {
3495 sg_init_one(&sg
, addr
->sa_data
, dev
->addr_len
);
3496 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MAC
,
3497 VIRTIO_NET_CTRL_MAC_ADDR_SET
, &sg
)) {
3498 dev_warn(&vdev
->dev
,
3499 "Failed to set mac address by vq command.\n");
3503 } else if (virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
) &&
3504 !virtio_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3507 /* Naturally, this has an atomicity problem. */
3508 for (i
= 0; i
< dev
->addr_len
; i
++)
3509 virtio_cwrite8(vdev
,
3510 offsetof(struct virtio_net_config
, mac
) +
3511 i
, addr
->sa_data
[i
]);
3514 eth_commit_mac_addr_change(dev
, p
);
3522 static void virtnet_stats(struct net_device
*dev
,
3523 struct rtnl_link_stats64
*tot
)
3525 struct virtnet_info
*vi
= netdev_priv(dev
);
3529 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3530 u64 tpackets
, tbytes
, terrors
, rpackets
, rbytes
, rdrops
;
3531 struct receive_queue
*rq
= &vi
->rq
[i
];
3532 struct send_queue
*sq
= &vi
->sq
[i
];
3535 start
= u64_stats_fetch_begin(&sq
->stats
.syncp
);
3536 tpackets
= u64_stats_read(&sq
->stats
.packets
);
3537 tbytes
= u64_stats_read(&sq
->stats
.bytes
);
3538 terrors
= u64_stats_read(&sq
->stats
.tx_timeouts
);
3539 } while (u64_stats_fetch_retry(&sq
->stats
.syncp
, start
));
3542 start
= u64_stats_fetch_begin(&rq
->stats
.syncp
);
3543 rpackets
= u64_stats_read(&rq
->stats
.packets
);
3544 rbytes
= u64_stats_read(&rq
->stats
.bytes
);
3545 rdrops
= u64_stats_read(&rq
->stats
.drops
);
3546 } while (u64_stats_fetch_retry(&rq
->stats
.syncp
, start
));
3548 tot
->rx_packets
+= rpackets
;
3549 tot
->tx_packets
+= tpackets
;
3550 tot
->rx_bytes
+= rbytes
;
3551 tot
->tx_bytes
+= tbytes
;
3552 tot
->rx_dropped
+= rdrops
;
3553 tot
->tx_errors
+= terrors
;
3556 tot
->tx_dropped
= DEV_STATS_READ(dev
, tx_dropped
);
3557 tot
->tx_fifo_errors
= DEV_STATS_READ(dev
, tx_fifo_errors
);
3558 tot
->rx_length_errors
= DEV_STATS_READ(dev
, rx_length_errors
);
3559 tot
->rx_frame_errors
= DEV_STATS_READ(dev
, rx_frame_errors
);
3562 static void virtnet_ack_link_announce(struct virtnet_info
*vi
)
3564 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_ANNOUNCE
,
3565 VIRTIO_NET_CTRL_ANNOUNCE_ACK
, NULL
))
3566 dev_warn(&vi
->dev
->dev
, "Failed to ack link announce.\n");
3569 static bool virtnet_commit_rss_command(struct virtnet_info
*vi
);
3571 static void virtnet_rss_update_by_qpairs(struct virtnet_info
*vi
, u16 queue_pairs
)
3576 for (; i
< vi
->rss_indir_table_size
; ++i
) {
3577 indir_val
= ethtool_rxfh_indir_default(i
, queue_pairs
);
3578 vi
->rss
.indirection_table
[i
] = indir_val
;
3580 vi
->rss
.max_tx_vq
= queue_pairs
;
3583 static int virtnet_set_queues(struct virtnet_info
*vi
, u16 queue_pairs
)
3585 struct virtio_net_ctrl_mq
*mq
__free(kfree
) = NULL
;
3586 struct virtio_net_ctrl_rss old_rss
;
3587 struct net_device
*dev
= vi
->dev
;
3588 struct scatterlist sg
;
3590 if (!vi
->has_cvq
|| !virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_MQ
))
3593 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and
3594 * (2) no user configuration.
3596 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is,
3597 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs
3598 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly.
3600 if (vi
->has_rss
&& !netif_is_rxfh_configured(dev
)) {
3601 memcpy(&old_rss
, &vi
->rss
, sizeof(old_rss
));
3602 if (rss_indirection_table_alloc(&vi
->rss
, vi
->rss_indir_table_size
)) {
3603 vi
->rss
.indirection_table
= old_rss
.indirection_table
;
3607 virtnet_rss_update_by_qpairs(vi
, queue_pairs
);
3609 if (!virtnet_commit_rss_command(vi
)) {
3610 /* restore ctrl_rss if commit_rss_command failed */
3611 rss_indirection_table_free(&vi
->rss
);
3612 memcpy(&vi
->rss
, &old_rss
, sizeof(old_rss
));
3614 dev_warn(&dev
->dev
, "Fail to set num of queue pairs to %d, because committing RSS failed\n",
3618 rss_indirection_table_free(&old_rss
);
3622 mq
= kzalloc(sizeof(*mq
), GFP_KERNEL
);
3626 mq
->virtqueue_pairs
= cpu_to_virtio16(vi
->vdev
, queue_pairs
);
3627 sg_init_one(&sg
, mq
, sizeof(*mq
));
3629 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MQ
,
3630 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
, &sg
)) {
3631 dev_warn(&dev
->dev
, "Fail to set num of queue pairs to %d\n",
3636 vi
->curr_queue_pairs
= queue_pairs
;
3637 /* virtnet_open() will refill when device is going to up. */
3638 if (dev
->flags
& IFF_UP
)
3639 schedule_delayed_work(&vi
->refill
, 0);
3644 static int virtnet_close(struct net_device
*dev
)
3646 struct virtnet_info
*vi
= netdev_priv(dev
);
3649 /* Make sure NAPI doesn't schedule refill work */
3650 disable_delayed_refill(vi
);
3651 /* Make sure refill_work doesn't re-enable napi! */
3652 cancel_delayed_work_sync(&vi
->refill
);
3653 /* Prevent the config change callback from changing carrier
3656 virtio_config_driver_disable(vi
->vdev
);
3657 /* Stop getting status/speed updates: we don't care until next
3660 cancel_work_sync(&vi
->config_work
);
3662 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3663 virtnet_disable_queue_pair(vi
, i
);
3664 virtnet_cancel_dim(vi
, &vi
->rq
[i
].dim
);
3667 netif_carrier_off(dev
);
3672 static void virtnet_rx_mode_work(struct work_struct
*work
)
3674 struct virtnet_info
*vi
=
3675 container_of(work
, struct virtnet_info
, rx_mode_work
);
3676 u8
*promisc_allmulti
__free(kfree
) = NULL
;
3677 struct net_device
*dev
= vi
->dev
;
3678 struct scatterlist sg
[2];
3679 struct virtio_net_ctrl_mac
*mac_data
;
3680 struct netdev_hw_addr
*ha
;
3686 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
3687 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_RX
))
3690 promisc_allmulti
= kzalloc(sizeof(*promisc_allmulti
), GFP_KERNEL
);
3691 if (!promisc_allmulti
) {
3692 dev_warn(&dev
->dev
, "Failed to set RX mode, no memory.\n");
3698 *promisc_allmulti
= !!(dev
->flags
& IFF_PROMISC
);
3699 sg_init_one(sg
, promisc_allmulti
, sizeof(*promisc_allmulti
));
3701 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_RX
,
3702 VIRTIO_NET_CTRL_RX_PROMISC
, sg
))
3703 dev_warn(&dev
->dev
, "Failed to %sable promisc mode.\n",
3704 *promisc_allmulti
? "en" : "dis");
3706 *promisc_allmulti
= !!(dev
->flags
& IFF_ALLMULTI
);
3707 sg_init_one(sg
, promisc_allmulti
, sizeof(*promisc_allmulti
));
3709 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_RX
,
3710 VIRTIO_NET_CTRL_RX_ALLMULTI
, sg
))
3711 dev_warn(&dev
->dev
, "Failed to %sable allmulti mode.\n",
3712 *promisc_allmulti
? "en" : "dis");
3714 netif_addr_lock_bh(dev
);
3716 uc_count
= netdev_uc_count(dev
);
3717 mc_count
= netdev_mc_count(dev
);
3718 /* MAC filter - use one buffer for both lists */
3719 buf
= kzalloc(((uc_count
+ mc_count
) * ETH_ALEN
) +
3720 (2 * sizeof(mac_data
->entries
)), GFP_ATOMIC
);
3723 netif_addr_unlock_bh(dev
);
3728 sg_init_table(sg
, 2);
3730 /* Store the unicast list and count in the front of the buffer */
3731 mac_data
->entries
= cpu_to_virtio32(vi
->vdev
, uc_count
);
3733 netdev_for_each_uc_addr(ha
, dev
)
3734 memcpy(&mac_data
->macs
[i
++][0], ha
->addr
, ETH_ALEN
);
3736 sg_set_buf(&sg
[0], mac_data
,
3737 sizeof(mac_data
->entries
) + (uc_count
* ETH_ALEN
));
3739 /* multicast list and count fill the end */
3740 mac_data
= (void *)&mac_data
->macs
[uc_count
][0];
3742 mac_data
->entries
= cpu_to_virtio32(vi
->vdev
, mc_count
);
3744 netdev_for_each_mc_addr(ha
, dev
)
3745 memcpy(&mac_data
->macs
[i
++][0], ha
->addr
, ETH_ALEN
);
3747 netif_addr_unlock_bh(dev
);
3749 sg_set_buf(&sg
[1], mac_data
,
3750 sizeof(mac_data
->entries
) + (mc_count
* ETH_ALEN
));
3752 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MAC
,
3753 VIRTIO_NET_CTRL_MAC_TABLE_SET
, sg
))
3754 dev_warn(&dev
->dev
, "Failed to set MAC filter table.\n");
3761 static void virtnet_set_rx_mode(struct net_device
*dev
)
3763 struct virtnet_info
*vi
= netdev_priv(dev
);
3765 if (vi
->rx_mode_work_enabled
)
3766 schedule_work(&vi
->rx_mode_work
);
3769 static int virtnet_vlan_rx_add_vid(struct net_device
*dev
,
3770 __be16 proto
, u16 vid
)
3772 struct virtnet_info
*vi
= netdev_priv(dev
);
3773 __virtio16
*_vid
__free(kfree
) = NULL
;
3774 struct scatterlist sg
;
3776 _vid
= kzalloc(sizeof(*_vid
), GFP_KERNEL
);
3780 *_vid
= cpu_to_virtio16(vi
->vdev
, vid
);
3781 sg_init_one(&sg
, _vid
, sizeof(*_vid
));
3783 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_VLAN
,
3784 VIRTIO_NET_CTRL_VLAN_ADD
, &sg
))
3785 dev_warn(&dev
->dev
, "Failed to add VLAN ID %d.\n", vid
);
3789 static int virtnet_vlan_rx_kill_vid(struct net_device
*dev
,
3790 __be16 proto
, u16 vid
)
3792 struct virtnet_info
*vi
= netdev_priv(dev
);
3793 __virtio16
*_vid
__free(kfree
) = NULL
;
3794 struct scatterlist sg
;
3796 _vid
= kzalloc(sizeof(*_vid
), GFP_KERNEL
);
3800 *_vid
= cpu_to_virtio16(vi
->vdev
, vid
);
3801 sg_init_one(&sg
, _vid
, sizeof(*_vid
));
3803 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_VLAN
,
3804 VIRTIO_NET_CTRL_VLAN_DEL
, &sg
))
3805 dev_warn(&dev
->dev
, "Failed to kill VLAN ID %d.\n", vid
);
3809 static void virtnet_clean_affinity(struct virtnet_info
*vi
)
3813 if (vi
->affinity_hint_set
) {
3814 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3815 virtqueue_set_affinity(vi
->rq
[i
].vq
, NULL
);
3816 virtqueue_set_affinity(vi
->sq
[i
].vq
, NULL
);
3819 vi
->affinity_hint_set
= false;
3823 static void virtnet_set_affinity(struct virtnet_info
*vi
)
3832 if (!zalloc_cpumask_var(&mask
, GFP_KERNEL
)) {
3833 virtnet_clean_affinity(vi
);
3837 num_cpu
= num_online_cpus();
3838 stride
= max_t(int, num_cpu
/ vi
->curr_queue_pairs
, 1);
3839 stragglers
= num_cpu
>= vi
->curr_queue_pairs
?
3840 num_cpu
% vi
->curr_queue_pairs
:
3842 cpu
= cpumask_first(cpu_online_mask
);
3844 for (i
= 0; i
< vi
->curr_queue_pairs
; i
++) {
3845 group_size
= stride
+ (i
< stragglers
? 1 : 0);
3847 for (j
= 0; j
< group_size
; j
++) {
3848 cpumask_set_cpu(cpu
, mask
);
3849 cpu
= cpumask_next_wrap(cpu
, cpu_online_mask
,
3852 virtqueue_set_affinity(vi
->rq
[i
].vq
, mask
);
3853 virtqueue_set_affinity(vi
->sq
[i
].vq
, mask
);
3854 __netif_set_xps_queue(vi
->dev
, cpumask_bits(mask
), i
, XPS_CPUS
);
3855 cpumask_clear(mask
);
3858 vi
->affinity_hint_set
= true;
3859 free_cpumask_var(mask
);
3862 static int virtnet_cpu_online(unsigned int cpu
, struct hlist_node
*node
)
3864 struct virtnet_info
*vi
= hlist_entry_safe(node
, struct virtnet_info
,
3866 virtnet_set_affinity(vi
);
3870 static int virtnet_cpu_dead(unsigned int cpu
, struct hlist_node
*node
)
3872 struct virtnet_info
*vi
= hlist_entry_safe(node
, struct virtnet_info
,
3874 virtnet_set_affinity(vi
);
3878 static int virtnet_cpu_down_prep(unsigned int cpu
, struct hlist_node
*node
)
3880 struct virtnet_info
*vi
= hlist_entry_safe(node
, struct virtnet_info
,
3883 virtnet_clean_affinity(vi
);
3887 static enum cpuhp_state virtionet_online
;
3889 static int virtnet_cpu_notif_add(struct virtnet_info
*vi
)
3893 ret
= cpuhp_state_add_instance_nocalls(virtionet_online
, &vi
->node
);
3896 ret
= cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD
,
3900 cpuhp_state_remove_instance_nocalls(virtionet_online
, &vi
->node
);
3904 static void virtnet_cpu_notif_remove(struct virtnet_info
*vi
)
3906 cpuhp_state_remove_instance_nocalls(virtionet_online
, &vi
->node
);
3907 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD
,
3911 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info
*vi
,
3912 u16 vqn
, u32 max_usecs
, u32 max_packets
)
3914 struct virtio_net_ctrl_coal_vq
*coal_vq
__free(kfree
) = NULL
;
3915 struct scatterlist sgs
;
3917 coal_vq
= kzalloc(sizeof(*coal_vq
), GFP_KERNEL
);
3921 coal_vq
->vqn
= cpu_to_le16(vqn
);
3922 coal_vq
->coal
.max_usecs
= cpu_to_le32(max_usecs
);
3923 coal_vq
->coal
.max_packets
= cpu_to_le32(max_packets
);
3924 sg_init_one(&sgs
, coal_vq
, sizeof(*coal_vq
));
3926 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_NOTF_COAL
,
3927 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET
,
3934 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info
*vi
,
3935 u16 queue
, u32 max_usecs
,
3940 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
3943 err
= virtnet_send_ctrl_coal_vq_cmd(vi
, rxq2vq(queue
),
3944 max_usecs
, max_packets
);
3948 vi
->rq
[queue
].intr_coal
.max_usecs
= max_usecs
;
3949 vi
->rq
[queue
].intr_coal
.max_packets
= max_packets
;
3954 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info
*vi
,
3955 u16 queue
, u32 max_usecs
,
3960 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
3963 err
= virtnet_send_ctrl_coal_vq_cmd(vi
, txq2vq(queue
),
3964 max_usecs
, max_packets
);
3968 vi
->sq
[queue
].intr_coal
.max_usecs
= max_usecs
;
3969 vi
->sq
[queue
].intr_coal
.max_packets
= max_packets
;
3974 static void virtnet_get_ringparam(struct net_device
*dev
,
3975 struct ethtool_ringparam
*ring
,
3976 struct kernel_ethtool_ringparam
*kernel_ring
,
3977 struct netlink_ext_ack
*extack
)
3979 struct virtnet_info
*vi
= netdev_priv(dev
);
3981 ring
->rx_max_pending
= vi
->rq
[0].vq
->num_max
;
3982 ring
->tx_max_pending
= vi
->sq
[0].vq
->num_max
;
3983 ring
->rx_pending
= virtqueue_get_vring_size(vi
->rq
[0].vq
);
3984 ring
->tx_pending
= virtqueue_get_vring_size(vi
->sq
[0].vq
);
3987 static int virtnet_set_ringparam(struct net_device
*dev
,
3988 struct ethtool_ringparam
*ring
,
3989 struct kernel_ethtool_ringparam
*kernel_ring
,
3990 struct netlink_ext_ack
*extack
)
3992 struct virtnet_info
*vi
= netdev_priv(dev
);
3993 u32 rx_pending
, tx_pending
;
3994 struct receive_queue
*rq
;
3995 struct send_queue
*sq
;
3998 if (ring
->rx_mini_pending
|| ring
->rx_jumbo_pending
)
4001 rx_pending
= virtqueue_get_vring_size(vi
->rq
[0].vq
);
4002 tx_pending
= virtqueue_get_vring_size(vi
->sq
[0].vq
);
4004 if (ring
->rx_pending
== rx_pending
&&
4005 ring
->tx_pending
== tx_pending
)
4008 if (ring
->rx_pending
> vi
->rq
[0].vq
->num_max
)
4011 if (ring
->tx_pending
> vi
->sq
[0].vq
->num_max
)
4014 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
4018 if (ring
->tx_pending
!= tx_pending
) {
4019 err
= virtnet_tx_resize(vi
, sq
, ring
->tx_pending
);
4023 /* Upon disabling and re-enabling a transmit virtqueue, the device must
4024 * set the coalescing parameters of the virtqueue to those configured
4025 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
4026 * did not set any TX coalescing parameters, to 0.
4028 err
= virtnet_send_tx_ctrl_coal_vq_cmd(vi
, i
,
4029 vi
->intr_coal_tx
.max_usecs
,
4030 vi
->intr_coal_tx
.max_packets
);
4032 /* Don't break the tx resize action if the vq coalescing is not
4033 * supported. The same is true for rx resize below.
4035 if (err
&& err
!= -EOPNOTSUPP
)
4039 if (ring
->rx_pending
!= rx_pending
) {
4040 err
= virtnet_rx_resize(vi
, rq
, ring
->rx_pending
);
4044 /* The reason is same as the transmit virtqueue reset */
4045 mutex_lock(&vi
->rq
[i
].dim_lock
);
4046 err
= virtnet_send_rx_ctrl_coal_vq_cmd(vi
, i
,
4047 vi
->intr_coal_rx
.max_usecs
,
4048 vi
->intr_coal_rx
.max_packets
);
4049 mutex_unlock(&vi
->rq
[i
].dim_lock
);
4050 if (err
&& err
!= -EOPNOTSUPP
)
4058 static bool virtnet_commit_rss_command(struct virtnet_info
*vi
)
4060 struct net_device
*dev
= vi
->dev
;
4061 struct scatterlist sgs
[4];
4062 unsigned int sg_buf_size
;
4065 sg_init_table(sgs
, 4);
4067 sg_buf_size
= offsetof(struct virtio_net_ctrl_rss
, hash_cfg_reserved
);
4068 sg_set_buf(&sgs
[0], &vi
->rss
, sg_buf_size
);
4071 sg_buf_size
= sizeof(uint16_t) * vi
->rss_indir_table_size
;
4072 sg_set_buf(&sgs
[1], vi
->rss
.indirection_table
, sg_buf_size
);
4074 sg_set_buf(&sgs
[1], &vi
->rss
.hash_cfg_reserved
, sizeof(uint16_t));
4077 sg_buf_size
= offsetof(struct virtio_net_ctrl_rss
, key
)
4078 - offsetof(struct virtio_net_ctrl_rss
, max_tx_vq
);
4079 sg_set_buf(&sgs
[2], &vi
->rss
.max_tx_vq
, sg_buf_size
);
4081 sg_buf_size
= vi
->rss_key_size
;
4082 sg_set_buf(&sgs
[3], vi
->rss
.key
, sg_buf_size
);
4084 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MQ
,
4085 vi
->has_rss
? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
4086 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG
, sgs
))
4092 dev_warn(&dev
->dev
, "VIRTIONET issue with committing RSS sgs\n");
4097 static void virtnet_init_default_rss(struct virtnet_info
*vi
)
4099 vi
->rss
.hash_types
= vi
->rss_hash_types_supported
;
4100 vi
->rss_hash_types_saved
= vi
->rss_hash_types_supported
;
4101 vi
->rss
.indirection_table_mask
= vi
->rss_indir_table_size
4102 ? vi
->rss_indir_table_size
- 1 : 0;
4103 vi
->rss
.unclassified_queue
= 0;
4105 virtnet_rss_update_by_qpairs(vi
, vi
->curr_queue_pairs
);
4107 vi
->rss
.hash_key_length
= vi
->rss_key_size
;
4109 netdev_rss_key_fill(vi
->rss
.key
, vi
->rss_key_size
);
4112 static void virtnet_get_hashflow(const struct virtnet_info
*vi
, struct ethtool_rxnfc
*info
)
4115 switch (info
->flow_type
) {
4117 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_TCPv4
) {
4118 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4119 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4120 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
) {
4121 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4125 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_TCPv6
) {
4126 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4127 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4128 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv6
) {
4129 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4133 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_UDPv4
) {
4134 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4135 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4136 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
) {
4137 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4141 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_UDPv6
) {
4142 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4143 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4144 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv6
) {
4145 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4149 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
)
4150 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4154 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv6
)
4155 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4164 static bool virtnet_set_hashflow(struct virtnet_info
*vi
, struct ethtool_rxnfc
*info
)
4166 u32 new_hashtypes
= vi
->rss_hash_types_saved
;
4167 bool is_disable
= info
->data
& RXH_DISCARD
;
4168 bool is_l4
= info
->data
== (RXH_IP_SRC
| RXH_IP_DST
| RXH_L4_B_0_1
| RXH_L4_B_2_3
);
4170 /* supports only 'sd', 'sdfn' and 'r' */
4171 if (!((info
->data
== (RXH_IP_SRC
| RXH_IP_DST
)) | is_l4
| is_disable
))
4174 switch (info
->flow_type
) {
4176 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4
| VIRTIO_NET_RSS_HASH_TYPE_TCPv4
);
4178 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4179 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_TCPv4
: 0);
4182 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4
| VIRTIO_NET_RSS_HASH_TYPE_UDPv4
);
4184 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4185 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_UDPv4
: 0);
4188 new_hashtypes
&= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4
;
4190 new_hashtypes
= VIRTIO_NET_RSS_HASH_TYPE_IPv4
;
4193 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6
| VIRTIO_NET_RSS_HASH_TYPE_TCPv6
);
4195 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4196 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_TCPv6
: 0);
4199 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6
| VIRTIO_NET_RSS_HASH_TYPE_UDPv6
);
4201 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4202 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_UDPv6
: 0);
4205 new_hashtypes
&= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6
;
4207 new_hashtypes
= VIRTIO_NET_RSS_HASH_TYPE_IPv6
;
4210 /* unsupported flow */
4214 /* if unsupported hashtype was set */
4215 if (new_hashtypes
!= (new_hashtypes
& vi
->rss_hash_types_supported
))
4218 if (new_hashtypes
!= vi
->rss_hash_types_saved
) {
4219 vi
->rss_hash_types_saved
= new_hashtypes
;
4220 vi
->rss
.hash_types
= vi
->rss_hash_types_saved
;
4221 if (vi
->dev
->features
& NETIF_F_RXHASH
)
4222 return virtnet_commit_rss_command(vi
);
4228 static void virtnet_get_drvinfo(struct net_device
*dev
,
4229 struct ethtool_drvinfo
*info
)
4231 struct virtnet_info
*vi
= netdev_priv(dev
);
4232 struct virtio_device
*vdev
= vi
->vdev
;
4234 strscpy(info
->driver
, KBUILD_MODNAME
, sizeof(info
->driver
));
4235 strscpy(info
->version
, VIRTNET_DRIVER_VERSION
, sizeof(info
->version
));
4236 strscpy(info
->bus_info
, virtio_bus_name(vdev
), sizeof(info
->bus_info
));
4240 /* TODO: Eliminate OOO packets during switching */
4241 static int virtnet_set_channels(struct net_device
*dev
,
4242 struct ethtool_channels
*channels
)
4244 struct virtnet_info
*vi
= netdev_priv(dev
);
4245 u16 queue_pairs
= channels
->combined_count
;
4248 /* We don't support separate rx/tx channels.
4249 * We don't allow setting 'other' channels.
4251 if (channels
->rx_count
|| channels
->tx_count
|| channels
->other_count
)
4254 if (queue_pairs
> vi
->max_queue_pairs
|| queue_pairs
== 0)
4257 /* For now we don't support modifying channels while XDP is loaded
4258 * also when XDP is loaded all RX queues have XDP programs so we only
4259 * need to check a single RX queue.
4261 if (vi
->rq
[0].xdp_prog
)
4265 err
= virtnet_set_queues(vi
, queue_pairs
);
4270 virtnet_set_affinity(vi
);
4273 netif_set_real_num_tx_queues(dev
, queue_pairs
);
4274 netif_set_real_num_rx_queues(dev
, queue_pairs
);
4279 static void virtnet_stats_sprintf(u8
**p
, const char *fmt
, const char *noq_fmt
,
4280 int num
, int qid
, const struct virtnet_stat_desc
*desc
)
4285 for (i
= 0; i
< num
; ++i
)
4286 ethtool_sprintf(p
, noq_fmt
, desc
[i
].desc
);
4288 for (i
= 0; i
< num
; ++i
)
4289 ethtool_sprintf(p
, fmt
, qid
, desc
[i
].desc
);
4293 /* qid == -1: for rx/tx queue total field */
4294 static void virtnet_get_stats_string(struct virtnet_info
*vi
, int type
, int qid
, u8
**data
)
4296 const struct virtnet_stat_desc
*desc
;
4297 const char *fmt
, *noq_fmt
;
4301 if (type
== VIRTNET_Q_TYPE_CQ
&& qid
>= 0) {
4302 noq_fmt
= "cq_hw_%s";
4304 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_CVQ
) {
4305 desc
= &virtnet_stats_cvq_desc
[0];
4306 num
= ARRAY_SIZE(virtnet_stats_cvq_desc
);
4308 virtnet_stats_sprintf(&p
, NULL
, noq_fmt
, num
, -1, desc
);
4312 if (type
== VIRTNET_Q_TYPE_RX
) {
4316 desc
= &virtnet_rq_stats_desc
[0];
4317 num
= ARRAY_SIZE(virtnet_rq_stats_desc
);
4319 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4322 noq_fmt
= "rx_hw_%s";
4324 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4325 desc
= &virtnet_stats_rx_basic_desc
[0];
4326 num
= ARRAY_SIZE(virtnet_stats_rx_basic_desc
);
4328 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4331 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4332 desc
= &virtnet_stats_rx_csum_desc
[0];
4333 num
= ARRAY_SIZE(virtnet_stats_rx_csum_desc
);
4335 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4338 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4339 desc
= &virtnet_stats_rx_speed_desc
[0];
4340 num
= ARRAY_SIZE(virtnet_stats_rx_speed_desc
);
4342 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4346 if (type
== VIRTNET_Q_TYPE_TX
) {
4350 desc
= &virtnet_sq_stats_desc
[0];
4351 num
= ARRAY_SIZE(virtnet_sq_stats_desc
);
4353 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4356 noq_fmt
= "tx_hw_%s";
4358 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4359 desc
= &virtnet_stats_tx_basic_desc
[0];
4360 num
= ARRAY_SIZE(virtnet_stats_tx_basic_desc
);
4362 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4365 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4366 desc
= &virtnet_stats_tx_gso_desc
[0];
4367 num
= ARRAY_SIZE(virtnet_stats_tx_gso_desc
);
4369 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4372 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4373 desc
= &virtnet_stats_tx_speed_desc
[0];
4374 num
= ARRAY_SIZE(virtnet_stats_tx_speed_desc
);
4376 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4383 struct virtnet_stats_ctx
{
4384 /* The stats are write to qstats or ethtool -S */
4387 /* Used to calculate the offset inside the output buffer. */
4390 /* The actual supported stat types. */
4393 /* Used to calculate the reply buffer size. */
4396 /* Record the output buffer. */
4400 static void virtnet_stats_ctx_init(struct virtnet_info
*vi
,
4401 struct virtnet_stats_ctx
*ctx
,
4402 u64
*data
, bool to_qstat
)
4407 ctx
->to_qstat
= to_qstat
;
4410 ctx
->desc_num
[VIRTNET_Q_TYPE_RX
] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat
);
4411 ctx
->desc_num
[VIRTNET_Q_TYPE_TX
] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat
);
4413 queue_type
= VIRTNET_Q_TYPE_RX
;
4415 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4416 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_BASIC
;
4417 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat
);
4418 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_basic
);
4421 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4422 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_CSUM
;
4423 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat
);
4424 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_csum
);
4427 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_GSO
) {
4428 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_GSO
;
4429 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat
);
4430 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_gso
);
4433 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4434 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_SPEED
;
4435 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat
);
4436 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_speed
);
4439 queue_type
= VIRTNET_Q_TYPE_TX
;
4441 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4442 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_BASIC
;
4443 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat
);
4444 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_basic
);
4447 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_CSUM
) {
4448 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_CSUM
;
4449 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat
);
4450 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_csum
);
4453 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4454 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_GSO
;
4455 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat
);
4456 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_gso
);
4459 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4460 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_SPEED
;
4461 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat
);
4462 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_speed
);
4468 ctx
->desc_num
[VIRTNET_Q_TYPE_RX
] = ARRAY_SIZE(virtnet_rq_stats_desc
);
4469 ctx
->desc_num
[VIRTNET_Q_TYPE_TX
] = ARRAY_SIZE(virtnet_sq_stats_desc
);
4471 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_CVQ
) {
4472 queue_type
= VIRTNET_Q_TYPE_CQ
;
4474 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_CVQ
;
4475 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_cvq_desc
);
4476 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_cvq
);
4479 queue_type
= VIRTNET_Q_TYPE_RX
;
4481 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4482 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_BASIC
;
4483 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_basic_desc
);
4484 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_basic
);
4487 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4488 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_CSUM
;
4489 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_csum_desc
);
4490 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_csum
);
4493 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4494 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_SPEED
;
4495 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_speed_desc
);
4496 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_speed
);
4499 queue_type
= VIRTNET_Q_TYPE_TX
;
4501 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4502 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_BASIC
;
4503 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_basic_desc
);
4504 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_basic
);
4507 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4508 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_GSO
;
4509 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_gso_desc
);
4510 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_gso
);
4513 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4514 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_SPEED
;
4515 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_speed_desc
);
4516 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_speed
);
4520 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq.
4521 * @sum: the position to store the sum values
4523 * @q_value: the first queue fields
4524 * @q_num: number of the queues
4526 static void stats_sum_queue(u64
*sum
, u32 num
, u64
*q_value
, u32 q_num
)
4532 for (i
= 0; i
< num
; ++i
) {
4536 for (j
= 0; j
< q_num
; ++j
)
4537 *p
+= *(q_value
+ i
+ j
* step
);
4541 static void virtnet_fill_total_fields(struct virtnet_info
*vi
,
4542 struct virtnet_stats_ctx
*ctx
)
4544 u64
*data
, *first_rx_q
, *first_tx_q
;
4545 u32 num_cq
, num_rx
, num_tx
;
4547 num_cq
= ctx
->desc_num
[VIRTNET_Q_TYPE_CQ
];
4548 num_rx
= ctx
->desc_num
[VIRTNET_Q_TYPE_RX
];
4549 num_tx
= ctx
->desc_num
[VIRTNET_Q_TYPE_TX
];
4551 first_rx_q
= ctx
->data
+ num_rx
+ num_tx
+ num_cq
;
4552 first_tx_q
= first_rx_q
+ vi
->curr_queue_pairs
* num_rx
;
4556 stats_sum_queue(data
, num_rx
, first_rx_q
, vi
->curr_queue_pairs
);
4558 data
= ctx
->data
+ num_rx
;
4560 stats_sum_queue(data
, num_tx
, first_tx_q
, vi
->curr_queue_pairs
);
4563 static void virtnet_fill_stats_qstat(struct virtnet_info
*vi
, u32 qid
,
4564 struct virtnet_stats_ctx
*ctx
,
4565 const u8
*base
, bool drv_stats
, u8 reply_type
)
4567 const struct virtnet_stat_desc
*desc
;
4568 const u64_stats_t
*v_stat
;
4574 queue_type
= vq_type(vi
, qid
);
4575 bitmap
= ctx
->bitmap
[queue_type
];
4578 if (queue_type
== VIRTNET_Q_TYPE_RX
) {
4579 desc
= &virtnet_rq_stats_desc_qstat
[0];
4580 num
= ARRAY_SIZE(virtnet_rq_stats_desc_qstat
);
4582 desc
= &virtnet_sq_stats_desc_qstat
[0];
4583 num
= ARRAY_SIZE(virtnet_sq_stats_desc_qstat
);
4586 for (i
= 0; i
< num
; ++i
) {
4587 offset
= desc
[i
].qstat_offset
/ sizeof(*ctx
->data
);
4588 v_stat
= (const u64_stats_t
*)(base
+ desc
[i
].offset
);
4589 ctx
->data
[offset
] = u64_stats_read(v_stat
);
4594 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4595 desc
= &virtnet_stats_rx_basic_desc_qstat
[0];
4596 num
= ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat
);
4597 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC
)
4601 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4602 desc
= &virtnet_stats_rx_csum_desc_qstat
[0];
4603 num
= ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat
);
4604 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM
)
4608 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_GSO
) {
4609 desc
= &virtnet_stats_rx_gso_desc_qstat
[0];
4610 num
= ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat
);
4611 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO
)
4615 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4616 desc
= &virtnet_stats_rx_speed_desc_qstat
[0];
4617 num
= ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat
);
4618 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED
)
4622 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4623 desc
= &virtnet_stats_tx_basic_desc_qstat
[0];
4624 num
= ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat
);
4625 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC
)
4629 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_CSUM
) {
4630 desc
= &virtnet_stats_tx_csum_desc_qstat
[0];
4631 num
= ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat
);
4632 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM
)
4636 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4637 desc
= &virtnet_stats_tx_gso_desc_qstat
[0];
4638 num
= ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat
);
4639 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO
)
4643 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4644 desc
= &virtnet_stats_tx_speed_desc_qstat
[0];
4645 num
= ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat
);
4646 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED
)
4653 for (i
= 0; i
< num
; ++i
) {
4654 offset
= desc
[i
].qstat_offset
/ sizeof(*ctx
->data
);
4655 v
= (const __le64
*)(base
+ desc
[i
].offset
);
4656 ctx
->data
[offset
] = le64_to_cpu(*v
);
4660 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S
4661 * The stats source is the device or the driver.
4663 * @vi: virtio net info
4665 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
4666 * @base: pointer to the device reply or the driver stats structure.
4667 * @drv_stats: designate the base type (device reply, driver stats)
4668 * @type: the type of the device reply (if drv_stats is true, this must be zero)
4670 static void virtnet_fill_stats(struct virtnet_info
*vi
, u32 qid
,
4671 struct virtnet_stats_ctx
*ctx
,
4672 const u8
*base
, bool drv_stats
, u8 reply_type
)
4674 u32 queue_type
, num_rx
, num_tx
, num_cq
;
4675 const struct virtnet_stat_desc
*desc
;
4676 const u64_stats_t
*v_stat
;
4682 return virtnet_fill_stats_qstat(vi
, qid
, ctx
, base
, drv_stats
, reply_type
);
4684 num_cq
= ctx
->desc_num
[VIRTNET_Q_TYPE_CQ
];
4685 num_rx
= ctx
->desc_num
[VIRTNET_Q_TYPE_RX
];
4686 num_tx
= ctx
->desc_num
[VIRTNET_Q_TYPE_TX
];
4688 queue_type
= vq_type(vi
, qid
);
4689 bitmap
= ctx
->bitmap
[queue_type
];
4691 /* skip the total fields of pairs */
4692 offset
= num_rx
+ num_tx
;
4694 if (queue_type
== VIRTNET_Q_TYPE_TX
) {
4695 offset
+= num_cq
+ num_rx
* vi
->curr_queue_pairs
+ num_tx
* (qid
/ 2);
4697 num
= ARRAY_SIZE(virtnet_sq_stats_desc
);
4699 desc
= &virtnet_sq_stats_desc
[0];
4705 } else if (queue_type
== VIRTNET_Q_TYPE_RX
) {
4706 offset
+= num_cq
+ num_rx
* (qid
/ 2);
4708 num
= ARRAY_SIZE(virtnet_rq_stats_desc
);
4710 desc
= &virtnet_rq_stats_desc
[0];
4717 if (bitmap
& VIRTIO_NET_STATS_TYPE_CVQ
) {
4718 desc
= &virtnet_stats_cvq_desc
[0];
4719 num
= ARRAY_SIZE(virtnet_stats_cvq_desc
);
4720 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_CVQ
)
4726 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4727 desc
= &virtnet_stats_rx_basic_desc
[0];
4728 num
= ARRAY_SIZE(virtnet_stats_rx_basic_desc
);
4729 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC
)
4735 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4736 desc
= &virtnet_stats_rx_csum_desc
[0];
4737 num
= ARRAY_SIZE(virtnet_stats_rx_csum_desc
);
4738 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM
)
4744 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4745 desc
= &virtnet_stats_rx_speed_desc
[0];
4746 num
= ARRAY_SIZE(virtnet_stats_rx_speed_desc
);
4747 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED
)
4753 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4754 desc
= &virtnet_stats_tx_basic_desc
[0];
4755 num
= ARRAY_SIZE(virtnet_stats_tx_basic_desc
);
4756 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC
)
4762 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4763 desc
= &virtnet_stats_tx_gso_desc
[0];
4764 num
= ARRAY_SIZE(virtnet_stats_tx_gso_desc
);
4765 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO
)
4771 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4772 desc
= &virtnet_stats_tx_speed_desc
[0];
4773 num
= ARRAY_SIZE(virtnet_stats_tx_speed_desc
);
4774 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED
)
4783 for (i
= 0; i
< num
; ++i
) {
4784 v
= (const __le64
*)(base
+ desc
[i
].offset
);
4785 ctx
->data
[offset
+ i
] = le64_to_cpu(*v
);
4791 for (i
= 0; i
< num
; ++i
) {
4792 v_stat
= (const u64_stats_t
*)(base
+ desc
[i
].offset
);
4793 ctx
->data
[offset
+ i
] = u64_stats_read(v_stat
);
4797 static int __virtnet_get_hw_stats(struct virtnet_info
*vi
,
4798 struct virtnet_stats_ctx
*ctx
,
4799 struct virtio_net_ctrl_queue_stats
*req
,
4800 int req_size
, void *reply
, int res_size
)
4802 struct virtio_net_stats_reply_hdr
*hdr
;
4803 struct scatterlist sgs_in
, sgs_out
;
4808 sg_init_one(&sgs_out
, req
, req_size
);
4809 sg_init_one(&sgs_in
, reply
, res_size
);
4811 ok
= virtnet_send_command_reply(vi
, VIRTIO_NET_CTRL_STATS
,
4812 VIRTIO_NET_CTRL_STATS_GET
,
4818 for (p
= reply
; p
- reply
< res_size
; p
+= le16_to_cpu(hdr
->size
)) {
4820 qid
= le16_to_cpu(hdr
->vq_index
);
4821 virtnet_fill_stats(vi
, qid
, ctx
, p
, false, hdr
->type
);
4827 static void virtnet_make_stat_req(struct virtnet_info
*vi
,
4828 struct virtnet_stats_ctx
*ctx
,
4829 struct virtio_net_ctrl_queue_stats
*req
,
4832 int qtype
= vq_type(vi
, qid
);
4833 u64 bitmap
= ctx
->bitmap
[qtype
];
4838 req
->stats
[*idx
].vq_index
= cpu_to_le16(qid
);
4839 req
->stats
[*idx
].types_bitmap
[0] = cpu_to_le64(bitmap
);
4843 /* qid: -1: get stats of all vq.
4844 * > 0: get the stats for the special vq. This must not be cvq.
4846 static int virtnet_get_hw_stats(struct virtnet_info
*vi
,
4847 struct virtnet_stats_ctx
*ctx
, int qid
)
4849 int qnum
, i
, j
, res_size
, qtype
, last_vq
, first_vq
;
4850 struct virtio_net_ctrl_queue_stats
*req
;
4855 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_DEVICE_STATS
))
4859 last_vq
= vi
->curr_queue_pairs
* 2 - 1;
4870 for (i
= first_vq
; i
<= last_vq
; ++i
) {
4871 qtype
= vq_type(vi
, i
);
4872 if (ctx
->bitmap
[qtype
]) {
4874 res_size
+= ctx
->size
[qtype
];
4878 if (enable_cvq
&& ctx
->bitmap
[VIRTNET_Q_TYPE_CQ
]) {
4879 res_size
+= ctx
->size
[VIRTNET_Q_TYPE_CQ
];
4883 req
= kcalloc(qnum
, sizeof(*req
), GFP_KERNEL
);
4887 reply
= kmalloc(res_size
, GFP_KERNEL
);
4894 for (i
= first_vq
; i
<= last_vq
; ++i
)
4895 virtnet_make_stat_req(vi
, ctx
, req
, i
, &j
);
4898 virtnet_make_stat_req(vi
, ctx
, req
, vi
->max_queue_pairs
* 2, &j
);
4900 ok
= __virtnet_get_hw_stats(vi
, ctx
, req
, sizeof(*req
) * j
, reply
, res_size
);
4908 static void virtnet_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
4910 struct virtnet_info
*vi
= netdev_priv(dev
);
4914 switch (stringset
) {
4916 /* Generate the total field names. */
4917 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_RX
, -1, &p
);
4918 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_TX
, -1, &p
);
4920 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_CQ
, 0, &p
);
4922 for (i
= 0; i
< vi
->curr_queue_pairs
; ++i
)
4923 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_RX
, i
, &p
);
4925 for (i
= 0; i
< vi
->curr_queue_pairs
; ++i
)
4926 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_TX
, i
, &p
);
4931 static int virtnet_get_sset_count(struct net_device
*dev
, int sset
)
4933 struct virtnet_info
*vi
= netdev_priv(dev
);
4934 struct virtnet_stats_ctx ctx
= {0};
4939 virtnet_stats_ctx_init(vi
, &ctx
, NULL
, false);
4941 pair_count
= ctx
.desc_num
[VIRTNET_Q_TYPE_RX
] + ctx
.desc_num
[VIRTNET_Q_TYPE_TX
];
4943 return pair_count
+ ctx
.desc_num
[VIRTNET_Q_TYPE_CQ
] +
4944 vi
->curr_queue_pairs
* pair_count
;
4950 static void virtnet_get_ethtool_stats(struct net_device
*dev
,
4951 struct ethtool_stats
*stats
, u64
*data
)
4953 struct virtnet_info
*vi
= netdev_priv(dev
);
4954 struct virtnet_stats_ctx ctx
= {0};
4955 unsigned int start
, i
;
4956 const u8
*stats_base
;
4958 virtnet_stats_ctx_init(vi
, &ctx
, data
, false);
4959 if (virtnet_get_hw_stats(vi
, &ctx
, -1))
4960 dev_warn(&vi
->dev
->dev
, "Failed to get hw stats.\n");
4962 for (i
= 0; i
< vi
->curr_queue_pairs
; i
++) {
4963 struct receive_queue
*rq
= &vi
->rq
[i
];
4964 struct send_queue
*sq
= &vi
->sq
[i
];
4966 stats_base
= (const u8
*)&rq
->stats
;
4968 start
= u64_stats_fetch_begin(&rq
->stats
.syncp
);
4969 virtnet_fill_stats(vi
, i
* 2, &ctx
, stats_base
, true, 0);
4970 } while (u64_stats_fetch_retry(&rq
->stats
.syncp
, start
));
4972 stats_base
= (const u8
*)&sq
->stats
;
4974 start
= u64_stats_fetch_begin(&sq
->stats
.syncp
);
4975 virtnet_fill_stats(vi
, i
* 2 + 1, &ctx
, stats_base
, true, 0);
4976 } while (u64_stats_fetch_retry(&sq
->stats
.syncp
, start
));
4979 virtnet_fill_total_fields(vi
, &ctx
);
4982 static void virtnet_get_channels(struct net_device
*dev
,
4983 struct ethtool_channels
*channels
)
4985 struct virtnet_info
*vi
= netdev_priv(dev
);
4987 channels
->combined_count
= vi
->curr_queue_pairs
;
4988 channels
->max_combined
= vi
->max_queue_pairs
;
4989 channels
->max_other
= 0;
4990 channels
->rx_count
= 0;
4991 channels
->tx_count
= 0;
4992 channels
->other_count
= 0;
4995 static int virtnet_set_link_ksettings(struct net_device
*dev
,
4996 const struct ethtool_link_ksettings
*cmd
)
4998 struct virtnet_info
*vi
= netdev_priv(dev
);
5000 return ethtool_virtdev_set_link_ksettings(dev
, cmd
,
5001 &vi
->speed
, &vi
->duplex
);
5004 static int virtnet_get_link_ksettings(struct net_device
*dev
,
5005 struct ethtool_link_ksettings
*cmd
)
5007 struct virtnet_info
*vi
= netdev_priv(dev
);
5009 cmd
->base
.speed
= vi
->speed
;
5010 cmd
->base
.duplex
= vi
->duplex
;
5011 cmd
->base
.port
= PORT_OTHER
;
5016 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info
*vi
,
5017 struct ethtool_coalesce
*ec
)
5019 struct virtio_net_ctrl_coal_tx
*coal_tx
__free(kfree
) = NULL
;
5020 struct scatterlist sgs_tx
;
5023 coal_tx
= kzalloc(sizeof(*coal_tx
), GFP_KERNEL
);
5027 coal_tx
->tx_usecs
= cpu_to_le32(ec
->tx_coalesce_usecs
);
5028 coal_tx
->tx_max_packets
= cpu_to_le32(ec
->tx_max_coalesced_frames
);
5029 sg_init_one(&sgs_tx
, coal_tx
, sizeof(*coal_tx
));
5031 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_NOTF_COAL
,
5032 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET
,
5036 vi
->intr_coal_tx
.max_usecs
= ec
->tx_coalesce_usecs
;
5037 vi
->intr_coal_tx
.max_packets
= ec
->tx_max_coalesced_frames
;
5038 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5039 vi
->sq
[i
].intr_coal
.max_usecs
= ec
->tx_coalesce_usecs
;
5040 vi
->sq
[i
].intr_coal
.max_packets
= ec
->tx_max_coalesced_frames
;
5046 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info
*vi
,
5047 struct ethtool_coalesce
*ec
)
5049 struct virtio_net_ctrl_coal_rx
*coal_rx
__free(kfree
) = NULL
;
5050 bool rx_ctrl_dim_on
= !!ec
->use_adaptive_rx_coalesce
;
5051 struct scatterlist sgs_rx
;
5054 if (rx_ctrl_dim_on
&& !virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
5057 if (rx_ctrl_dim_on
&& (ec
->rx_coalesce_usecs
!= vi
->intr_coal_rx
.max_usecs
||
5058 ec
->rx_max_coalesced_frames
!= vi
->intr_coal_rx
.max_packets
))
5061 if (rx_ctrl_dim_on
&& !vi
->rx_dim_enabled
) {
5062 vi
->rx_dim_enabled
= true;
5063 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5064 mutex_lock(&vi
->rq
[i
].dim_lock
);
5065 vi
->rq
[i
].dim_enabled
= true;
5066 mutex_unlock(&vi
->rq
[i
].dim_lock
);
5071 coal_rx
= kzalloc(sizeof(*coal_rx
), GFP_KERNEL
);
5075 if (!rx_ctrl_dim_on
&& vi
->rx_dim_enabled
) {
5076 vi
->rx_dim_enabled
= false;
5077 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5078 mutex_lock(&vi
->rq
[i
].dim_lock
);
5079 vi
->rq
[i
].dim_enabled
= false;
5080 mutex_unlock(&vi
->rq
[i
].dim_lock
);
5084 /* Since the per-queue coalescing params can be set,
5085 * we need apply the global new params even if they
5088 coal_rx
->rx_usecs
= cpu_to_le32(ec
->rx_coalesce_usecs
);
5089 coal_rx
->rx_max_packets
= cpu_to_le32(ec
->rx_max_coalesced_frames
);
5090 sg_init_one(&sgs_rx
, coal_rx
, sizeof(*coal_rx
));
5092 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_NOTF_COAL
,
5093 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET
,
5097 vi
->intr_coal_rx
.max_usecs
= ec
->rx_coalesce_usecs
;
5098 vi
->intr_coal_rx
.max_packets
= ec
->rx_max_coalesced_frames
;
5099 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5100 mutex_lock(&vi
->rq
[i
].dim_lock
);
5101 vi
->rq
[i
].intr_coal
.max_usecs
= ec
->rx_coalesce_usecs
;
5102 vi
->rq
[i
].intr_coal
.max_packets
= ec
->rx_max_coalesced_frames
;
5103 mutex_unlock(&vi
->rq
[i
].dim_lock
);
5109 static int virtnet_send_notf_coal_cmds(struct virtnet_info
*vi
,
5110 struct ethtool_coalesce
*ec
)
5114 err
= virtnet_send_tx_notf_coal_cmds(vi
, ec
);
5118 err
= virtnet_send_rx_notf_coal_cmds(vi
, ec
);
5125 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info
*vi
,
5126 struct ethtool_coalesce
*ec
,
5129 bool rx_ctrl_dim_on
= !!ec
->use_adaptive_rx_coalesce
;
5130 u32 max_usecs
, max_packets
;
5134 mutex_lock(&vi
->rq
[queue
].dim_lock
);
5135 cur_rx_dim
= vi
->rq
[queue
].dim_enabled
;
5136 max_usecs
= vi
->rq
[queue
].intr_coal
.max_usecs
;
5137 max_packets
= vi
->rq
[queue
].intr_coal
.max_packets
;
5139 if (rx_ctrl_dim_on
&& (ec
->rx_coalesce_usecs
!= max_usecs
||
5140 ec
->rx_max_coalesced_frames
!= max_packets
)) {
5141 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5145 if (rx_ctrl_dim_on
&& !cur_rx_dim
) {
5146 vi
->rq
[queue
].dim_enabled
= true;
5147 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5151 if (!rx_ctrl_dim_on
&& cur_rx_dim
)
5152 vi
->rq
[queue
].dim_enabled
= false;
5154 /* If no params are updated, userspace ethtool will
5155 * reject the modification.
5157 err
= virtnet_send_rx_ctrl_coal_vq_cmd(vi
, queue
,
5158 ec
->rx_coalesce_usecs
,
5159 ec
->rx_max_coalesced_frames
);
5160 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5164 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info
*vi
,
5165 struct ethtool_coalesce
*ec
,
5170 err
= virtnet_send_rx_notf_coal_vq_cmds(vi
, ec
, queue
);
5174 err
= virtnet_send_tx_ctrl_coal_vq_cmd(vi
, queue
,
5175 ec
->tx_coalesce_usecs
,
5176 ec
->tx_max_coalesced_frames
);
5183 static void virtnet_rx_dim_work(struct work_struct
*work
)
5185 struct dim
*dim
= container_of(work
, struct dim
, work
);
5186 struct receive_queue
*rq
= container_of(dim
,
5187 struct receive_queue
, dim
);
5188 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
5189 struct net_device
*dev
= vi
->dev
;
5190 struct dim_cq_moder update_moder
;
5195 mutex_lock(&rq
->dim_lock
);
5196 if (!rq
->dim_enabled
)
5199 update_moder
= net_dim_get_rx_irq_moder(dev
, dim
);
5200 if (update_moder
.usec
!= rq
->intr_coal
.max_usecs
||
5201 update_moder
.pkts
!= rq
->intr_coal
.max_packets
) {
5202 err
= virtnet_send_rx_ctrl_coal_vq_cmd(vi
, qnum
,
5206 pr_debug("%s: Failed to send dim parameters on rxq%d\n",
5210 dim
->state
= DIM_START_MEASURE
;
5211 mutex_unlock(&rq
->dim_lock
);
5214 static int virtnet_coal_params_supported(struct ethtool_coalesce
*ec
)
5216 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
5217 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
5219 if (ec
->rx_coalesce_usecs
|| ec
->tx_coalesce_usecs
)
5222 if (ec
->tx_max_coalesced_frames
> 1 ||
5223 ec
->rx_max_coalesced_frames
!= 1)
5229 static int virtnet_should_update_vq_weight(int dev_flags
, int weight
,
5230 int vq_weight
, bool *should_update
)
5232 if (weight
^ vq_weight
) {
5233 if (dev_flags
& IFF_UP
)
5235 *should_update
= true;
5241 static int virtnet_set_coalesce(struct net_device
*dev
,
5242 struct ethtool_coalesce
*ec
,
5243 struct kernel_ethtool_coalesce
*kernel_coal
,
5244 struct netlink_ext_ack
*extack
)
5246 struct virtnet_info
*vi
= netdev_priv(dev
);
5247 int ret
, queue_number
, napi_weight
, i
;
5248 bool update_napi
= false;
5250 /* Can't change NAPI weight if the link is up */
5251 napi_weight
= ec
->tx_max_coalesced_frames
? NAPI_POLL_WEIGHT
: 0;
5252 for (queue_number
= 0; queue_number
< vi
->max_queue_pairs
; queue_number
++) {
5253 ret
= virtnet_should_update_vq_weight(dev
->flags
, napi_weight
,
5254 vi
->sq
[queue_number
].napi
.weight
,
5260 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
5261 * updated for the sake of simplicity, which might not be necessary
5267 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_NOTF_COAL
))
5268 ret
= virtnet_send_notf_coal_cmds(vi
, ec
);
5270 ret
= virtnet_coal_params_supported(ec
);
5276 /* xsk xmit depends on the tx napi. So if xsk is active,
5277 * prevent modifications to tx napi.
5279 for (i
= queue_number
; i
< vi
->max_queue_pairs
; i
++) {
5280 if (vi
->sq
[i
].xsk_pool
)
5284 for (; queue_number
< vi
->max_queue_pairs
; queue_number
++)
5285 vi
->sq
[queue_number
].napi
.weight
= napi_weight
;
5291 static int virtnet_get_coalesce(struct net_device
*dev
,
5292 struct ethtool_coalesce
*ec
,
5293 struct kernel_ethtool_coalesce
*kernel_coal
,
5294 struct netlink_ext_ack
*extack
)
5296 struct virtnet_info
*vi
= netdev_priv(dev
);
5298 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_NOTF_COAL
)) {
5299 ec
->rx_coalesce_usecs
= vi
->intr_coal_rx
.max_usecs
;
5300 ec
->tx_coalesce_usecs
= vi
->intr_coal_tx
.max_usecs
;
5301 ec
->tx_max_coalesced_frames
= vi
->intr_coal_tx
.max_packets
;
5302 ec
->rx_max_coalesced_frames
= vi
->intr_coal_rx
.max_packets
;
5303 ec
->use_adaptive_rx_coalesce
= vi
->rx_dim_enabled
;
5305 ec
->rx_max_coalesced_frames
= 1;
5307 if (vi
->sq
[0].napi
.weight
)
5308 ec
->tx_max_coalesced_frames
= 1;
5314 static int virtnet_set_per_queue_coalesce(struct net_device
*dev
,
5316 struct ethtool_coalesce
*ec
)
5318 struct virtnet_info
*vi
= netdev_priv(dev
);
5319 int ret
, napi_weight
;
5320 bool update_napi
= false;
5322 if (queue
>= vi
->max_queue_pairs
)
5325 /* Can't change NAPI weight if the link is up */
5326 napi_weight
= ec
->tx_max_coalesced_frames
? NAPI_POLL_WEIGHT
: 0;
5327 ret
= virtnet_should_update_vq_weight(dev
->flags
, napi_weight
,
5328 vi
->sq
[queue
].napi
.weight
,
5333 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
5334 ret
= virtnet_send_notf_coal_vq_cmds(vi
, ec
, queue
);
5336 ret
= virtnet_coal_params_supported(ec
);
5342 vi
->sq
[queue
].napi
.weight
= napi_weight
;
5347 static int virtnet_get_per_queue_coalesce(struct net_device
*dev
,
5349 struct ethtool_coalesce
*ec
)
5351 struct virtnet_info
*vi
= netdev_priv(dev
);
5353 if (queue
>= vi
->max_queue_pairs
)
5356 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
)) {
5357 mutex_lock(&vi
->rq
[queue
].dim_lock
);
5358 ec
->rx_coalesce_usecs
= vi
->rq
[queue
].intr_coal
.max_usecs
;
5359 ec
->tx_coalesce_usecs
= vi
->sq
[queue
].intr_coal
.max_usecs
;
5360 ec
->tx_max_coalesced_frames
= vi
->sq
[queue
].intr_coal
.max_packets
;
5361 ec
->rx_max_coalesced_frames
= vi
->rq
[queue
].intr_coal
.max_packets
;
5362 ec
->use_adaptive_rx_coalesce
= vi
->rq
[queue
].dim_enabled
;
5363 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5365 ec
->rx_max_coalesced_frames
= 1;
5367 if (vi
->sq
[queue
].napi
.weight
)
5368 ec
->tx_max_coalesced_frames
= 1;
5374 static void virtnet_init_settings(struct net_device
*dev
)
5376 struct virtnet_info
*vi
= netdev_priv(dev
);
5378 vi
->speed
= SPEED_UNKNOWN
;
5379 vi
->duplex
= DUPLEX_UNKNOWN
;
5382 static u32
virtnet_get_rxfh_key_size(struct net_device
*dev
)
5384 return ((struct virtnet_info
*)netdev_priv(dev
))->rss_key_size
;
5387 static u32
virtnet_get_rxfh_indir_size(struct net_device
*dev
)
5389 return ((struct virtnet_info
*)netdev_priv(dev
))->rss_indir_table_size
;
5392 static int virtnet_get_rxfh(struct net_device
*dev
,
5393 struct ethtool_rxfh_param
*rxfh
)
5395 struct virtnet_info
*vi
= netdev_priv(dev
);
5399 for (i
= 0; i
< vi
->rss_indir_table_size
; ++i
)
5400 rxfh
->indir
[i
] = vi
->rss
.indirection_table
[i
];
5404 memcpy(rxfh
->key
, vi
->rss
.key
, vi
->rss_key_size
);
5406 rxfh
->hfunc
= ETH_RSS_HASH_TOP
;
5411 static int virtnet_set_rxfh(struct net_device
*dev
,
5412 struct ethtool_rxfh_param
*rxfh
,
5413 struct netlink_ext_ack
*extack
)
5415 struct virtnet_info
*vi
= netdev_priv(dev
);
5416 bool update
= false;
5419 if (rxfh
->hfunc
!= ETH_RSS_HASH_NO_CHANGE
&&
5420 rxfh
->hfunc
!= ETH_RSS_HASH_TOP
)
5427 for (i
= 0; i
< vi
->rss_indir_table_size
; ++i
)
5428 vi
->rss
.indirection_table
[i
] = rxfh
->indir
[i
];
5433 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
5434 * device provides hash calculation capabilities, that is,
5435 * hash_key is configured.
5437 if (!vi
->has_rss
&& !vi
->has_rss_hash_report
)
5440 memcpy(vi
->rss
.key
, rxfh
->key
, vi
->rss_key_size
);
5445 virtnet_commit_rss_command(vi
);
5450 static int virtnet_get_rxnfc(struct net_device
*dev
, struct ethtool_rxnfc
*info
, u32
*rule_locs
)
5452 struct virtnet_info
*vi
= netdev_priv(dev
);
5455 switch (info
->cmd
) {
5456 case ETHTOOL_GRXRINGS
:
5457 info
->data
= vi
->curr_queue_pairs
;
5460 virtnet_get_hashflow(vi
, info
);
5469 static int virtnet_set_rxnfc(struct net_device
*dev
, struct ethtool_rxnfc
*info
)
5471 struct virtnet_info
*vi
= netdev_priv(dev
);
5474 switch (info
->cmd
) {
5476 if (!virtnet_set_hashflow(vi
, info
))
5487 static const struct ethtool_ops virtnet_ethtool_ops
= {
5488 .supported_coalesce_params
= ETHTOOL_COALESCE_MAX_FRAMES
|
5489 ETHTOOL_COALESCE_USECS
| ETHTOOL_COALESCE_USE_ADAPTIVE_RX
,
5490 .get_drvinfo
= virtnet_get_drvinfo
,
5491 .get_link
= ethtool_op_get_link
,
5492 .get_ringparam
= virtnet_get_ringparam
,
5493 .set_ringparam
= virtnet_set_ringparam
,
5494 .get_strings
= virtnet_get_strings
,
5495 .get_sset_count
= virtnet_get_sset_count
,
5496 .get_ethtool_stats
= virtnet_get_ethtool_stats
,
5497 .set_channels
= virtnet_set_channels
,
5498 .get_channels
= virtnet_get_channels
,
5499 .get_ts_info
= ethtool_op_get_ts_info
,
5500 .get_link_ksettings
= virtnet_get_link_ksettings
,
5501 .set_link_ksettings
= virtnet_set_link_ksettings
,
5502 .set_coalesce
= virtnet_set_coalesce
,
5503 .get_coalesce
= virtnet_get_coalesce
,
5504 .set_per_queue_coalesce
= virtnet_set_per_queue_coalesce
,
5505 .get_per_queue_coalesce
= virtnet_get_per_queue_coalesce
,
5506 .get_rxfh_key_size
= virtnet_get_rxfh_key_size
,
5507 .get_rxfh_indir_size
= virtnet_get_rxfh_indir_size
,
5508 .get_rxfh
= virtnet_get_rxfh
,
5509 .set_rxfh
= virtnet_set_rxfh
,
5510 .get_rxnfc
= virtnet_get_rxnfc
,
5511 .set_rxnfc
= virtnet_set_rxnfc
,
5514 static void virtnet_get_queue_stats_rx(struct net_device
*dev
, int i
,
5515 struct netdev_queue_stats_rx
*stats
)
5517 struct virtnet_info
*vi
= netdev_priv(dev
);
5518 struct receive_queue
*rq
= &vi
->rq
[i
];
5519 struct virtnet_stats_ctx ctx
= {0};
5521 virtnet_stats_ctx_init(vi
, &ctx
, (void *)stats
, true);
5523 virtnet_get_hw_stats(vi
, &ctx
, i
* 2);
5524 virtnet_fill_stats(vi
, i
* 2, &ctx
, (void *)&rq
->stats
, true, 0);
5527 static void virtnet_get_queue_stats_tx(struct net_device
*dev
, int i
,
5528 struct netdev_queue_stats_tx
*stats
)
5530 struct virtnet_info
*vi
= netdev_priv(dev
);
5531 struct send_queue
*sq
= &vi
->sq
[i
];
5532 struct virtnet_stats_ctx ctx
= {0};
5534 virtnet_stats_ctx_init(vi
, &ctx
, (void *)stats
, true);
5536 virtnet_get_hw_stats(vi
, &ctx
, i
* 2 + 1);
5537 virtnet_fill_stats(vi
, i
* 2 + 1, &ctx
, (void *)&sq
->stats
, true, 0);
5540 static void virtnet_get_base_stats(struct net_device
*dev
,
5541 struct netdev_queue_stats_rx
*rx
,
5542 struct netdev_queue_stats_tx
*tx
)
5544 struct virtnet_info
*vi
= netdev_priv(dev
);
5546 /* The queue stats of the virtio-net will not be reset. So here we
5552 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
5554 rx
->hw_drop_overruns
= 0;
5557 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
5558 rx
->csum_unnecessary
= 0;
5563 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_GSO
) {
5564 rx
->hw_gro_packets
= 0;
5565 rx
->hw_gro_bytes
= 0;
5566 rx
->hw_gro_wire_packets
= 0;
5567 rx
->hw_gro_wire_bytes
= 0;
5570 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
)
5571 rx
->hw_drop_ratelimits
= 0;
5578 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
5580 tx
->hw_drop_errors
= 0;
5583 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_CSUM
) {
5588 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
5589 tx
->hw_gso_packets
= 0;
5590 tx
->hw_gso_bytes
= 0;
5591 tx
->hw_gso_wire_packets
= 0;
5592 tx
->hw_gso_wire_bytes
= 0;
5595 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
)
5596 tx
->hw_drop_ratelimits
= 0;
5599 static const struct netdev_stat_ops virtnet_stat_ops
= {
5600 .get_queue_stats_rx
= virtnet_get_queue_stats_rx
,
5601 .get_queue_stats_tx
= virtnet_get_queue_stats_tx
,
5602 .get_base_stats
= virtnet_get_base_stats
,
5605 static void virtnet_freeze_down(struct virtio_device
*vdev
)
5607 struct virtnet_info
*vi
= vdev
->priv
;
5609 /* Make sure no work handler is accessing the device */
5610 flush_work(&vi
->config_work
);
5611 disable_rx_mode_work(vi
);
5612 flush_work(&vi
->rx_mode_work
);
5614 netif_tx_lock_bh(vi
->dev
);
5615 netif_device_detach(vi
->dev
);
5616 netif_tx_unlock_bh(vi
->dev
);
5617 if (netif_running(vi
->dev
))
5618 virtnet_close(vi
->dev
);
5621 static int init_vqs(struct virtnet_info
*vi
);
5623 static int virtnet_restore_up(struct virtio_device
*vdev
)
5625 struct virtnet_info
*vi
= vdev
->priv
;
5632 virtio_device_ready(vdev
);
5634 enable_delayed_refill(vi
);
5635 enable_rx_mode_work(vi
);
5637 if (netif_running(vi
->dev
)) {
5638 err
= virtnet_open(vi
->dev
);
5643 netif_tx_lock_bh(vi
->dev
);
5644 netif_device_attach(vi
->dev
);
5645 netif_tx_unlock_bh(vi
->dev
);
5649 static int virtnet_set_guest_offloads(struct virtnet_info
*vi
, u64 offloads
)
5651 __virtio64
*_offloads
__free(kfree
) = NULL
;
5652 struct scatterlist sg
;
5654 _offloads
= kzalloc(sizeof(*_offloads
), GFP_KERNEL
);
5658 *_offloads
= cpu_to_virtio64(vi
->vdev
, offloads
);
5660 sg_init_one(&sg
, _offloads
, sizeof(*_offloads
));
5662 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_GUEST_OFFLOADS
,
5663 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
, &sg
)) {
5664 dev_warn(&vi
->dev
->dev
, "Fail to set guest offload.\n");
5671 static int virtnet_clear_guest_offloads(struct virtnet_info
*vi
)
5675 if (!vi
->guest_offloads
)
5678 return virtnet_set_guest_offloads(vi
, offloads
);
5681 static int virtnet_restore_guest_offloads(struct virtnet_info
*vi
)
5683 u64 offloads
= vi
->guest_offloads
;
5685 if (!vi
->guest_offloads
)
5688 return virtnet_set_guest_offloads(vi
, offloads
);
5691 static int virtnet_rq_bind_xsk_pool(struct virtnet_info
*vi
, struct receive_queue
*rq
,
5692 struct xsk_buff_pool
*pool
)
5696 qindex
= rq
- vi
->rq
;
5699 err
= xdp_rxq_info_reg(&rq
->xsk_rxq_info
, vi
->dev
, qindex
, rq
->napi
.napi_id
);
5703 err
= xdp_rxq_info_reg_mem_model(&rq
->xsk_rxq_info
,
5704 MEM_TYPE_XSK_BUFF_POOL
, NULL
);
5708 xsk_pool_set_rxq_info(pool
, &rq
->xsk_rxq_info
);
5711 virtnet_rx_pause(vi
, rq
);
5713 err
= virtqueue_reset(rq
->vq
, virtnet_rq_unmap_free_buf
);
5715 netdev_err(vi
->dev
, "reset rx fail: rx queue index: %d err: %d\n", qindex
, err
);
5720 rq
->xsk_pool
= pool
;
5722 virtnet_rx_resume(vi
, rq
);
5728 xdp_rxq_info_unreg(&rq
->xsk_rxq_info
);
5732 static int virtnet_sq_bind_xsk_pool(struct virtnet_info
*vi
,
5733 struct send_queue
*sq
,
5734 struct xsk_buff_pool
*pool
)
5738 qindex
= sq
- vi
->sq
;
5740 virtnet_tx_pause(vi
, sq
);
5742 err
= virtqueue_reset(sq
->vq
, virtnet_sq_free_unused_buf
);
5744 netdev_err(vi
->dev
, "reset tx fail: tx queue index: %d err: %d\n", qindex
, err
);
5748 sq
->xsk_pool
= pool
;
5750 virtnet_tx_resume(vi
, sq
);
5755 static int virtnet_xsk_pool_enable(struct net_device
*dev
,
5756 struct xsk_buff_pool
*pool
,
5759 struct virtnet_info
*vi
= netdev_priv(dev
);
5760 struct receive_queue
*rq
;
5761 struct device
*dma_dev
;
5762 struct send_queue
*sq
;
5766 if (vi
->hdr_len
> xsk_pool_get_headroom(pool
))
5769 /* In big_packets mode, xdp cannot work, so there is no need to
5770 * initialize xsk of rq.
5772 if (vi
->big_packets
&& !vi
->mergeable_rx_bufs
)
5775 if (qid
>= vi
->curr_queue_pairs
)
5781 /* xsk assumes that tx and rx must have the same dma device. The af-xdp
5782 * may use one buffer to receive from the rx and reuse this buffer to
5783 * send by the tx. So the dma dev of sq and rq must be the same one.
5785 * But vq->dma_dev allows every vq has the respective dma dev. So I
5786 * check the dma dev of vq and sq is the same dev.
5788 if (virtqueue_dma_dev(rq
->vq
) != virtqueue_dma_dev(sq
->vq
))
5791 dma_dev
= virtqueue_dma_dev(rq
->vq
);
5795 size
= virtqueue_get_vring_size(rq
->vq
);
5797 rq
->xsk_buffs
= kvcalloc(size
, sizeof(*rq
->xsk_buffs
), GFP_KERNEL
);
5801 hdr_dma
= virtqueue_dma_map_single_attrs(sq
->vq
, &xsk_hdr
, vi
->hdr_len
,
5803 if (virtqueue_dma_mapping_error(sq
->vq
, hdr_dma
))
5806 err
= xsk_pool_dma_map(pool
, dma_dev
, 0);
5810 err
= virtnet_rq_bind_xsk_pool(vi
, rq
, pool
);
5814 err
= virtnet_sq_bind_xsk_pool(vi
, sq
, pool
);
5818 /* Now, we do not support tx offload(such as tx csum), so all the tx
5819 * virtnet hdr is zero. So all the tx packets can share a single hdr.
5821 sq
->xsk_hdr_dma_addr
= hdr_dma
;
5826 virtnet_rq_bind_xsk_pool(vi
, rq
, NULL
);
5828 xsk_pool_dma_unmap(pool
, 0);
5830 virtqueue_dma_unmap_single_attrs(rq
->vq
, hdr_dma
, vi
->hdr_len
,
5835 static int virtnet_xsk_pool_disable(struct net_device
*dev
, u16 qid
)
5837 struct virtnet_info
*vi
= netdev_priv(dev
);
5838 struct xsk_buff_pool
*pool
;
5839 struct receive_queue
*rq
;
5840 struct send_queue
*sq
;
5843 if (qid
>= vi
->curr_queue_pairs
)
5849 pool
= rq
->xsk_pool
;
5851 err
= virtnet_rq_bind_xsk_pool(vi
, rq
, NULL
);
5852 err
|= virtnet_sq_bind_xsk_pool(vi
, sq
, NULL
);
5854 xsk_pool_dma_unmap(pool
, 0);
5856 virtqueue_dma_unmap_single_attrs(sq
->vq
, sq
->xsk_hdr_dma_addr
,
5857 vi
->hdr_len
, DMA_TO_DEVICE
, 0);
5858 kvfree(rq
->xsk_buffs
);
5863 static int virtnet_xsk_pool_setup(struct net_device
*dev
, struct netdev_bpf
*xdp
)
5866 return virtnet_xsk_pool_enable(dev
, xdp
->xsk
.pool
,
5869 return virtnet_xsk_pool_disable(dev
, xdp
->xsk
.queue_id
);
5872 static int virtnet_xdp_set(struct net_device
*dev
, struct bpf_prog
*prog
,
5873 struct netlink_ext_ack
*extack
)
5875 unsigned int room
= SKB_DATA_ALIGN(XDP_PACKET_HEADROOM
+
5876 sizeof(struct skb_shared_info
));
5877 unsigned int max_sz
= PAGE_SIZE
- room
- ETH_HLEN
;
5878 struct virtnet_info
*vi
= netdev_priv(dev
);
5879 struct bpf_prog
*old_prog
;
5880 u16 xdp_qp
= 0, curr_qp
;
5883 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
)
5884 && (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO4
) ||
5885 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO6
) ||
5886 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_ECN
) ||
5887 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_UFO
) ||
5888 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_CSUM
) ||
5889 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO4
) ||
5890 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO6
))) {
5891 NL_SET_ERR_MSG_MOD(extack
, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
5895 if (vi
->mergeable_rx_bufs
&& !vi
->any_header_sg
) {
5896 NL_SET_ERR_MSG_MOD(extack
, "XDP expects header/data in single page, any_header_sg required");
5900 if (prog
&& !prog
->aux
->xdp_has_frags
&& dev
->mtu
> max_sz
) {
5901 NL_SET_ERR_MSG_MOD(extack
, "MTU too large to enable XDP without frags");
5902 netdev_warn(dev
, "single-buffer XDP requires MTU less than %u\n", max_sz
);
5906 curr_qp
= vi
->curr_queue_pairs
- vi
->xdp_queue_pairs
;
5908 xdp_qp
= nr_cpu_ids
;
5910 /* XDP requires extra queues for XDP_TX */
5911 if (curr_qp
+ xdp_qp
> vi
->max_queue_pairs
) {
5912 netdev_warn_once(dev
, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
5913 curr_qp
+ xdp_qp
, vi
->max_queue_pairs
);
5917 old_prog
= rtnl_dereference(vi
->rq
[0].xdp_prog
);
5918 if (!prog
&& !old_prog
)
5922 bpf_prog_add(prog
, vi
->max_queue_pairs
- 1);
5924 /* Make sure NAPI is not using any XDP TX queues for RX. */
5925 if (netif_running(dev
)) {
5926 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5927 napi_disable(&vi
->rq
[i
].napi
);
5928 virtnet_napi_tx_disable(&vi
->sq
[i
].napi
);
5933 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5934 rcu_assign_pointer(vi
->rq
[i
].xdp_prog
, prog
);
5936 virtnet_restore_guest_offloads(vi
);
5941 err
= virtnet_set_queues(vi
, curr_qp
+ xdp_qp
);
5944 netif_set_real_num_rx_queues(dev
, curr_qp
+ xdp_qp
);
5945 vi
->xdp_queue_pairs
= xdp_qp
;
5948 vi
->xdp_enabled
= true;
5949 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5950 rcu_assign_pointer(vi
->rq
[i
].xdp_prog
, prog
);
5951 if (i
== 0 && !old_prog
)
5952 virtnet_clear_guest_offloads(vi
);
5955 xdp_features_set_redirect_target(dev
, true);
5957 xdp_features_clear_redirect_target(dev
);
5958 vi
->xdp_enabled
= false;
5961 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5963 bpf_prog_put(old_prog
);
5964 if (netif_running(dev
)) {
5965 virtnet_napi_enable(vi
->rq
[i
].vq
, &vi
->rq
[i
].napi
);
5966 virtnet_napi_tx_enable(vi
, vi
->sq
[i
].vq
,
5975 virtnet_clear_guest_offloads(vi
);
5976 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
5977 rcu_assign_pointer(vi
->rq
[i
].xdp_prog
, old_prog
);
5980 if (netif_running(dev
)) {
5981 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5982 virtnet_napi_enable(vi
->rq
[i
].vq
, &vi
->rq
[i
].napi
);
5983 virtnet_napi_tx_enable(vi
, vi
->sq
[i
].vq
,
5988 bpf_prog_sub(prog
, vi
->max_queue_pairs
- 1);
5992 static int virtnet_xdp(struct net_device
*dev
, struct netdev_bpf
*xdp
)
5994 switch (xdp
->command
) {
5995 case XDP_SETUP_PROG
:
5996 return virtnet_xdp_set(dev
, xdp
->prog
, xdp
->extack
);
5997 case XDP_SETUP_XSK_POOL
:
5998 return virtnet_xsk_pool_setup(dev
, xdp
);
6004 static int virtnet_get_phys_port_name(struct net_device
*dev
, char *buf
,
6007 struct virtnet_info
*vi
= netdev_priv(dev
);
6010 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STANDBY
))
6013 ret
= snprintf(buf
, len
, "sby");
6020 static int virtnet_set_features(struct net_device
*dev
,
6021 netdev_features_t features
)
6023 struct virtnet_info
*vi
= netdev_priv(dev
);
6027 if ((dev
->features
^ features
) & NETIF_F_GRO_HW
) {
6028 if (vi
->xdp_enabled
)
6031 if (features
& NETIF_F_GRO_HW
)
6032 offloads
= vi
->guest_offloads_capable
;
6034 offloads
= vi
->guest_offloads_capable
&
6035 ~GUEST_OFFLOAD_GRO_HW_MASK
;
6037 err
= virtnet_set_guest_offloads(vi
, offloads
);
6040 vi
->guest_offloads
= offloads
;
6043 if ((dev
->features
^ features
) & NETIF_F_RXHASH
) {
6044 if (features
& NETIF_F_RXHASH
)
6045 vi
->rss
.hash_types
= vi
->rss_hash_types_saved
;
6047 vi
->rss
.hash_types
= VIRTIO_NET_HASH_REPORT_NONE
;
6049 if (!virtnet_commit_rss_command(vi
))
6056 static void virtnet_tx_timeout(struct net_device
*dev
, unsigned int txqueue
)
6058 struct virtnet_info
*priv
= netdev_priv(dev
);
6059 struct send_queue
*sq
= &priv
->sq
[txqueue
];
6060 struct netdev_queue
*txq
= netdev_get_tx_queue(dev
, txqueue
);
6062 u64_stats_update_begin(&sq
->stats
.syncp
);
6063 u64_stats_inc(&sq
->stats
.tx_timeouts
);
6064 u64_stats_update_end(&sq
->stats
.syncp
);
6066 netdev_err(dev
, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
6067 txqueue
, sq
->name
, sq
->vq
->index
, sq
->vq
->name
,
6068 jiffies_to_usecs(jiffies
- READ_ONCE(txq
->trans_start
)));
6071 static int virtnet_init_irq_moder(struct virtnet_info
*vi
)
6073 u8 profile_flags
= 0, coal_flags
= 0;
6076 profile_flags
|= DIM_PROFILE_RX
;
6077 coal_flags
|= DIM_COALESCE_USEC
| DIM_COALESCE_PKTS
;
6078 ret
= net_dim_init_irq_moder(vi
->dev
, profile_flags
, coal_flags
,
6079 DIM_CQ_PERIOD_MODE_START_FROM_EQE
,
6080 0, virtnet_rx_dim_work
, NULL
);
6085 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6086 net_dim_setting(vi
->dev
, &vi
->rq
[i
].dim
, false);
6091 static void virtnet_free_irq_moder(struct virtnet_info
*vi
)
6093 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
6097 net_dim_free_irq_moder(vi
->dev
);
6101 static const struct net_device_ops virtnet_netdev
= {
6102 .ndo_open
= virtnet_open
,
6103 .ndo_stop
= virtnet_close
,
6104 .ndo_start_xmit
= start_xmit
,
6105 .ndo_validate_addr
= eth_validate_addr
,
6106 .ndo_set_mac_address
= virtnet_set_mac_address
,
6107 .ndo_set_rx_mode
= virtnet_set_rx_mode
,
6108 .ndo_get_stats64
= virtnet_stats
,
6109 .ndo_vlan_rx_add_vid
= virtnet_vlan_rx_add_vid
,
6110 .ndo_vlan_rx_kill_vid
= virtnet_vlan_rx_kill_vid
,
6111 .ndo_bpf
= virtnet_xdp
,
6112 .ndo_xdp_xmit
= virtnet_xdp_xmit
,
6113 .ndo_xsk_wakeup
= virtnet_xsk_wakeup
,
6114 .ndo_features_check
= passthru_features_check
,
6115 .ndo_get_phys_port_name
= virtnet_get_phys_port_name
,
6116 .ndo_set_features
= virtnet_set_features
,
6117 .ndo_tx_timeout
= virtnet_tx_timeout
,
6120 static void virtnet_config_changed_work(struct work_struct
*work
)
6122 struct virtnet_info
*vi
=
6123 container_of(work
, struct virtnet_info
, config_work
);
6126 if (virtio_cread_feature(vi
->vdev
, VIRTIO_NET_F_STATUS
,
6127 struct virtio_net_config
, status
, &v
) < 0)
6130 if (v
& VIRTIO_NET_S_ANNOUNCE
) {
6131 netdev_notify_peers(vi
->dev
);
6132 virtnet_ack_link_announce(vi
);
6135 /* Ignore unknown (future) status bits */
6136 v
&= VIRTIO_NET_S_LINK_UP
;
6138 if (vi
->status
== v
)
6143 if (vi
->status
& VIRTIO_NET_S_LINK_UP
) {
6144 virtnet_update_settings(vi
);
6145 netif_carrier_on(vi
->dev
);
6146 netif_tx_wake_all_queues(vi
->dev
);
6148 netif_carrier_off(vi
->dev
);
6149 netif_tx_stop_all_queues(vi
->dev
);
6153 static void virtnet_config_changed(struct virtio_device
*vdev
)
6155 struct virtnet_info
*vi
= vdev
->priv
;
6157 schedule_work(&vi
->config_work
);
6160 static void virtnet_free_queues(struct virtnet_info
*vi
)
6164 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6165 __netif_napi_del(&vi
->rq
[i
].napi
);
6166 __netif_napi_del(&vi
->sq
[i
].napi
);
6169 /* We called __netif_napi_del(),
6170 * we need to respect an RCU grace period before freeing vi->rq
6179 static void _free_receive_bufs(struct virtnet_info
*vi
)
6181 struct bpf_prog
*old_prog
;
6184 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6185 while (vi
->rq
[i
].pages
)
6186 __free_pages(get_a_page(&vi
->rq
[i
], GFP_KERNEL
), 0);
6188 old_prog
= rtnl_dereference(vi
->rq
[i
].xdp_prog
);
6189 RCU_INIT_POINTER(vi
->rq
[i
].xdp_prog
, NULL
);
6191 bpf_prog_put(old_prog
);
6195 static void free_receive_bufs(struct virtnet_info
*vi
)
6198 _free_receive_bufs(vi
);
6202 static void free_receive_page_frags(struct virtnet_info
*vi
)
6205 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6206 if (vi
->rq
[i
].alloc_frag
.page
) {
6207 if (vi
->rq
[i
].last_dma
)
6208 virtnet_rq_unmap(&vi
->rq
[i
], vi
->rq
[i
].last_dma
, 0);
6209 put_page(vi
->rq
[i
].alloc_frag
.page
);
6213 static void virtnet_sq_free_unused_buf(struct virtqueue
*vq
, void *buf
)
6215 struct virtnet_info
*vi
= vq
->vdev
->priv
;
6216 struct send_queue
*sq
;
6221 switch (virtnet_xmit_ptr_unpack(&buf
)) {
6222 case VIRTNET_XMIT_TYPE_SKB
:
6223 case VIRTNET_XMIT_TYPE_SKB_ORPHAN
:
6227 case VIRTNET_XMIT_TYPE_XDP
:
6228 xdp_return_frame(buf
);
6231 case VIRTNET_XMIT_TYPE_XSK
:
6232 xsk_tx_completed(sq
->xsk_pool
, 1);
6237 static void free_unused_bufs(struct virtnet_info
*vi
)
6242 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6243 struct virtqueue
*vq
= vi
->sq
[i
].vq
;
6244 while ((buf
= virtqueue_detach_unused_buf(vq
)) != NULL
)
6245 virtnet_sq_free_unused_buf(vq
, buf
);
6249 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6250 struct virtqueue
*vq
= vi
->rq
[i
].vq
;
6252 while ((buf
= virtqueue_detach_unused_buf(vq
)) != NULL
)
6253 virtnet_rq_unmap_free_buf(vq
, buf
);
6258 static void virtnet_del_vqs(struct virtnet_info
*vi
)
6260 struct virtio_device
*vdev
= vi
->vdev
;
6262 virtnet_clean_affinity(vi
);
6264 vdev
->config
->del_vqs(vdev
);
6266 virtnet_free_queues(vi
);
6269 /* How large should a single buffer be so a queue full of these can fit at
6270 * least one full packet?
6271 * Logic below assumes the mergeable buffer header is used.
6273 static unsigned int mergeable_min_buf_len(struct virtnet_info
*vi
, struct virtqueue
*vq
)
6275 const unsigned int hdr_len
= vi
->hdr_len
;
6276 unsigned int rq_size
= virtqueue_get_vring_size(vq
);
6277 unsigned int packet_len
= vi
->big_packets
? IP_MAX_MTU
: vi
->dev
->max_mtu
;
6278 unsigned int buf_len
= hdr_len
+ ETH_HLEN
+ VLAN_HLEN
+ packet_len
;
6279 unsigned int min_buf_len
= DIV_ROUND_UP(buf_len
, rq_size
);
6281 return max(max(min_buf_len
, hdr_len
) - hdr_len
,
6282 (unsigned int)GOOD_PACKET_LEN
);
6285 static int virtnet_find_vqs(struct virtnet_info
*vi
)
6287 struct virtqueue_info
*vqs_info
;
6288 struct virtqueue
**vqs
;
6294 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
6295 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
6296 * possible control vq.
6298 total_vqs
= vi
->max_queue_pairs
* 2 +
6299 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_VQ
);
6301 /* Allocate space for find_vqs parameters */
6302 vqs
= kcalloc(total_vqs
, sizeof(*vqs
), GFP_KERNEL
);
6305 vqs_info
= kcalloc(total_vqs
, sizeof(*vqs_info
), GFP_KERNEL
);
6308 if (!vi
->big_packets
|| vi
->mergeable_rx_bufs
) {
6309 ctx
= kcalloc(total_vqs
, sizeof(*ctx
), GFP_KERNEL
);
6316 /* Parameters for control virtqueue, if any */
6318 vqs_info
[total_vqs
- 1].name
= "control";
6321 /* Allocate/initialize parameters for send/receive virtqueues */
6322 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6323 vqs_info
[rxq2vq(i
)].callback
= skb_recv_done
;
6324 vqs_info
[txq2vq(i
)].callback
= skb_xmit_done
;
6325 sprintf(vi
->rq
[i
].name
, "input.%u", i
);
6326 sprintf(vi
->sq
[i
].name
, "output.%u", i
);
6327 vqs_info
[rxq2vq(i
)].name
= vi
->rq
[i
].name
;
6328 vqs_info
[txq2vq(i
)].name
= vi
->sq
[i
].name
;
6330 vqs_info
[rxq2vq(i
)].ctx
= true;
6333 ret
= virtio_find_vqs(vi
->vdev
, total_vqs
, vqs
, vqs_info
, NULL
);
6338 vi
->cvq
= vqs
[total_vqs
- 1];
6339 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_VLAN
))
6340 vi
->dev
->features
|= NETIF_F_HW_VLAN_CTAG_FILTER
;
6343 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6344 vi
->rq
[i
].vq
= vqs
[rxq2vq(i
)];
6345 vi
->rq
[i
].min_buf_len
= mergeable_min_buf_len(vi
, vi
->rq
[i
].vq
);
6346 vi
->sq
[i
].vq
= vqs
[txq2vq(i
)];
6349 /* run here: ret == 0. */
6362 static int virtnet_alloc_queues(struct virtnet_info
*vi
)
6367 vi
->ctrl
= kzalloc(sizeof(*vi
->ctrl
), GFP_KERNEL
);
6373 vi
->sq
= kcalloc(vi
->max_queue_pairs
, sizeof(*vi
->sq
), GFP_KERNEL
);
6376 vi
->rq
= kcalloc(vi
->max_queue_pairs
, sizeof(*vi
->rq
), GFP_KERNEL
);
6380 INIT_DELAYED_WORK(&vi
->refill
, refill_work
);
6381 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6382 vi
->rq
[i
].pages
= NULL
;
6383 netif_napi_add_weight(vi
->dev
, &vi
->rq
[i
].napi
, virtnet_poll
,
6385 netif_napi_add_tx_weight(vi
->dev
, &vi
->sq
[i
].napi
,
6387 napi_tx
? napi_weight
: 0);
6389 sg_init_table(vi
->rq
[i
].sg
, ARRAY_SIZE(vi
->rq
[i
].sg
));
6390 ewma_pkt_len_init(&vi
->rq
[i
].mrg_avg_pkt_len
);
6391 sg_init_table(vi
->sq
[i
].sg
, ARRAY_SIZE(vi
->sq
[i
].sg
));
6393 u64_stats_init(&vi
->rq
[i
].stats
.syncp
);
6394 u64_stats_init(&vi
->sq
[i
].stats
.syncp
);
6395 mutex_init(&vi
->rq
[i
].dim_lock
);
6408 static int init_vqs(struct virtnet_info
*vi
)
6412 /* Allocate send & receive queues */
6413 ret
= virtnet_alloc_queues(vi
);
6417 ret
= virtnet_find_vqs(vi
);
6422 virtnet_set_affinity(vi
);
6428 virtnet_free_queues(vi
);
6434 static ssize_t
mergeable_rx_buffer_size_show(struct netdev_rx_queue
*queue
,
6437 struct virtnet_info
*vi
= netdev_priv(queue
->dev
);
6438 unsigned int queue_index
= get_netdev_rx_queue_index(queue
);
6439 unsigned int headroom
= virtnet_get_headroom(vi
);
6440 unsigned int tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
6441 struct ewma_pkt_len
*avg
;
6443 BUG_ON(queue_index
>= vi
->max_queue_pairs
);
6444 avg
= &vi
->rq
[queue_index
].mrg_avg_pkt_len
;
6445 return sprintf(buf
, "%u\n",
6446 get_mergeable_buf_len(&vi
->rq
[queue_index
], avg
,
6447 SKB_DATA_ALIGN(headroom
+ tailroom
)));
6450 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute
=
6451 __ATTR_RO(mergeable_rx_buffer_size
);
6453 static struct attribute
*virtio_net_mrg_rx_attrs
[] = {
6454 &mergeable_rx_buffer_size_attribute
.attr
,
6458 static const struct attribute_group virtio_net_mrg_rx_group
= {
6459 .name
= "virtio_net",
6460 .attrs
= virtio_net_mrg_rx_attrs
6464 static bool virtnet_fail_on_feature(struct virtio_device
*vdev
,
6466 const char *fname
, const char *dname
)
6468 if (!virtio_has_feature(vdev
, fbit
))
6471 dev_err(&vdev
->dev
, "device advertises feature %s but not %s",
6477 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
6478 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
6480 static bool virtnet_validate_features(struct virtio_device
*vdev
)
6482 if (!virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
) &&
6483 (VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_CTRL_RX
,
6484 "VIRTIO_NET_F_CTRL_VQ") ||
6485 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_CTRL_VLAN
,
6486 "VIRTIO_NET_F_CTRL_VQ") ||
6487 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_GUEST_ANNOUNCE
,
6488 "VIRTIO_NET_F_CTRL_VQ") ||
6489 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_MQ
, "VIRTIO_NET_F_CTRL_VQ") ||
6490 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
,
6491 "VIRTIO_NET_F_CTRL_VQ") ||
6492 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_RSS
,
6493 "VIRTIO_NET_F_CTRL_VQ") ||
6494 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_HASH_REPORT
,
6495 "VIRTIO_NET_F_CTRL_VQ") ||
6496 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_NOTF_COAL
,
6497 "VIRTIO_NET_F_CTRL_VQ") ||
6498 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
,
6499 "VIRTIO_NET_F_CTRL_VQ"))) {
6506 #define MIN_MTU ETH_MIN_MTU
6507 #define MAX_MTU ETH_MAX_MTU
6509 static int virtnet_validate(struct virtio_device
*vdev
)
6511 if (!vdev
->config
->get
) {
6512 dev_err(&vdev
->dev
, "%s failure: config access disabled\n",
6517 if (!virtnet_validate_features(vdev
))
6520 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MTU
)) {
6521 int mtu
= virtio_cread16(vdev
,
6522 offsetof(struct virtio_net_config
,
6525 __virtio_clear_bit(vdev
, VIRTIO_NET_F_MTU
);
6528 if (virtio_has_feature(vdev
, VIRTIO_NET_F_STANDBY
) &&
6529 !virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
)) {
6530 dev_warn(&vdev
->dev
, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
6531 __virtio_clear_bit(vdev
, VIRTIO_NET_F_STANDBY
);
6537 static bool virtnet_check_guest_gso(const struct virtnet_info
*vi
)
6539 return virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO4
) ||
6540 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO6
) ||
6541 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_ECN
) ||
6542 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_UFO
) ||
6543 (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO4
) &&
6544 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO6
));
6547 static void virtnet_set_big_packets(struct virtnet_info
*vi
, const int mtu
)
6549 bool guest_gso
= virtnet_check_guest_gso(vi
);
6551 /* If device can receive ANY guest GSO packets, regardless of mtu,
6552 * allocate packets of maximum size, otherwise limit it to only
6553 * mtu size worth only.
6555 if (mtu
> ETH_DATA_LEN
|| guest_gso
) {
6556 vi
->big_packets
= true;
6557 vi
->big_packets_num_skbfrags
= guest_gso
? MAX_SKB_FRAGS
: DIV_ROUND_UP(mtu
, PAGE_SIZE
);
6561 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
6562 static enum xdp_rss_hash_type
6563 virtnet_xdp_rss_type
[VIRTIO_NET_HASH_REPORT_MAX_TABLE
] = {
6564 [VIRTIO_NET_HASH_REPORT_NONE
] = XDP_RSS_TYPE_NONE
,
6565 [VIRTIO_NET_HASH_REPORT_IPv4
] = XDP_RSS_TYPE_L3_IPV4
,
6566 [VIRTIO_NET_HASH_REPORT_TCPv4
] = XDP_RSS_TYPE_L4_IPV4_TCP
,
6567 [VIRTIO_NET_HASH_REPORT_UDPv4
] = XDP_RSS_TYPE_L4_IPV4_UDP
,
6568 [VIRTIO_NET_HASH_REPORT_IPv6
] = XDP_RSS_TYPE_L3_IPV6
,
6569 [VIRTIO_NET_HASH_REPORT_TCPv6
] = XDP_RSS_TYPE_L4_IPV6_TCP
,
6570 [VIRTIO_NET_HASH_REPORT_UDPv6
] = XDP_RSS_TYPE_L4_IPV6_UDP
,
6571 [VIRTIO_NET_HASH_REPORT_IPv6_EX
] = XDP_RSS_TYPE_L3_IPV6_EX
,
6572 [VIRTIO_NET_HASH_REPORT_TCPv6_EX
] = XDP_RSS_TYPE_L4_IPV6_TCP_EX
,
6573 [VIRTIO_NET_HASH_REPORT_UDPv6_EX
] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
6576 static int virtnet_xdp_rx_hash(const struct xdp_md
*_ctx
, u32
*hash
,
6577 enum xdp_rss_hash_type
*rss_type
)
6579 const struct xdp_buff
*xdp
= (void *)_ctx
;
6580 struct virtio_net_hdr_v1_hash
*hdr_hash
;
6581 struct virtnet_info
*vi
;
6584 if (!(xdp
->rxq
->dev
->features
& NETIF_F_RXHASH
))
6587 vi
= netdev_priv(xdp
->rxq
->dev
);
6588 hdr_hash
= (struct virtio_net_hdr_v1_hash
*)(xdp
->data
- vi
->hdr_len
);
6589 hash_report
= __le16_to_cpu(hdr_hash
->hash_report
);
6591 if (hash_report
>= VIRTIO_NET_HASH_REPORT_MAX_TABLE
)
6592 hash_report
= VIRTIO_NET_HASH_REPORT_NONE
;
6594 *rss_type
= virtnet_xdp_rss_type
[hash_report
];
6595 *hash
= __le32_to_cpu(hdr_hash
->hash_value
);
6599 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops
= {
6600 .xmo_rx_hash
= virtnet_xdp_rx_hash
,
6603 static int virtnet_probe(struct virtio_device
*vdev
)
6605 int i
, err
= -ENOMEM
;
6606 struct net_device
*dev
;
6607 struct virtnet_info
*vi
;
6608 u16 max_queue_pairs
;
6611 /* Find if host supports multiqueue/rss virtio_net device */
6612 max_queue_pairs
= 1;
6613 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MQ
) || virtio_has_feature(vdev
, VIRTIO_NET_F_RSS
))
6615 virtio_cread16(vdev
, offsetof(struct virtio_net_config
, max_virtqueue_pairs
));
6617 /* We need at least 2 queue's */
6618 if (max_queue_pairs
< VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN
||
6619 max_queue_pairs
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
||
6620 !virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
))
6621 max_queue_pairs
= 1;
6623 /* Allocate ourselves a network device with room for our info */
6624 dev
= alloc_etherdev_mq(sizeof(struct virtnet_info
), max_queue_pairs
);
6628 /* Set up network device as normal. */
6629 dev
->priv_flags
|= IFF_UNICAST_FLT
| IFF_LIVE_ADDR_CHANGE
|
6630 IFF_TX_SKB_NO_LINEAR
;
6631 dev
->netdev_ops
= &virtnet_netdev
;
6632 dev
->stat_ops
= &virtnet_stat_ops
;
6633 dev
->features
= NETIF_F_HIGHDMA
;
6635 dev
->ethtool_ops
= &virtnet_ethtool_ops
;
6636 SET_NETDEV_DEV(dev
, &vdev
->dev
);
6638 /* Do we support "hardware" checksums? */
6639 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CSUM
)) {
6640 /* This opens up the world of extra features. */
6641 dev
->hw_features
|= NETIF_F_HW_CSUM
| NETIF_F_SG
;
6643 dev
->features
|= NETIF_F_HW_CSUM
| NETIF_F_SG
;
6645 if (virtio_has_feature(vdev
, VIRTIO_NET_F_GSO
)) {
6646 dev
->hw_features
|= NETIF_F_TSO
6647 | NETIF_F_TSO_ECN
| NETIF_F_TSO6
;
6649 /* Individual feature bits: what can host handle? */
6650 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_TSO4
))
6651 dev
->hw_features
|= NETIF_F_TSO
;
6652 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_TSO6
))
6653 dev
->hw_features
|= NETIF_F_TSO6
;
6654 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_ECN
))
6655 dev
->hw_features
|= NETIF_F_TSO_ECN
;
6656 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_USO
))
6657 dev
->hw_features
|= NETIF_F_GSO_UDP_L4
;
6659 dev
->features
|= NETIF_F_GSO_ROBUST
;
6662 dev
->features
|= dev
->hw_features
& NETIF_F_ALL_TSO
;
6663 /* (!csum && gso) case will be fixed by register_netdev() */
6666 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
6667 * need to calculate checksums for partially checksummed packets,
6668 * as they're considered valid by the upper layer.
6669 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
6670 * receives fully checksummed packets. The device may assist in
6671 * validating these packets' checksums, so the driver won't have to.
6673 dev
->features
|= NETIF_F_RXCSUM
;
6675 if (virtio_has_feature(vdev
, VIRTIO_NET_F_GUEST_TSO4
) ||
6676 virtio_has_feature(vdev
, VIRTIO_NET_F_GUEST_TSO6
))
6677 dev
->features
|= NETIF_F_GRO_HW
;
6678 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
))
6679 dev
->hw_features
|= NETIF_F_GRO_HW
;
6681 dev
->vlan_features
= dev
->features
;
6682 dev
->xdp_features
= NETDEV_XDP_ACT_BASIC
| NETDEV_XDP_ACT_REDIRECT
|
6683 NETDEV_XDP_ACT_XSK_ZEROCOPY
;
6685 /* MTU range: 68 - 65535 */
6686 dev
->min_mtu
= MIN_MTU
;
6687 dev
->max_mtu
= MAX_MTU
;
6689 /* Configuration may specify what MAC to use. Otherwise random. */
6690 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
)) {
6693 virtio_cread_bytes(vdev
,
6694 offsetof(struct virtio_net_config
, mac
),
6696 eth_hw_addr_set(dev
, addr
);
6698 eth_hw_addr_random(dev
);
6699 dev_info(&vdev
->dev
, "Assigned random MAC address %pM\n",
6703 /* Set up our device-specific information */
6704 vi
= netdev_priv(dev
);
6709 INIT_WORK(&vi
->config_work
, virtnet_config_changed_work
);
6710 INIT_WORK(&vi
->rx_mode_work
, virtnet_rx_mode_work
);
6711 spin_lock_init(&vi
->refill_lock
);
6713 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MRG_RXBUF
)) {
6714 vi
->mergeable_rx_bufs
= true;
6715 dev
->xdp_features
|= NETDEV_XDP_ACT_RX_SG
;
6718 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HASH_REPORT
))
6719 vi
->has_rss_hash_report
= true;
6721 if (virtio_has_feature(vdev
, VIRTIO_NET_F_RSS
)) {
6724 vi
->rss_indir_table_size
=
6725 virtio_cread16(vdev
, offsetof(struct virtio_net_config
,
6726 rss_max_indirection_table_length
));
6728 err
= rss_indirection_table_alloc(&vi
->rss
, vi
->rss_indir_table_size
);
6732 if (vi
->has_rss
|| vi
->has_rss_hash_report
) {
6734 virtio_cread8(vdev
, offsetof(struct virtio_net_config
, rss_max_key_size
));
6735 if (vi
->rss_key_size
> VIRTIO_NET_RSS_MAX_KEY_SIZE
) {
6736 dev_err(&vdev
->dev
, "rss_max_key_size=%u exceeds the limit %u.\n",
6737 vi
->rss_key_size
, VIRTIO_NET_RSS_MAX_KEY_SIZE
);
6742 vi
->rss_hash_types_supported
=
6743 virtio_cread32(vdev
, offsetof(struct virtio_net_config
, supported_hash_types
));
6744 vi
->rss_hash_types_supported
&=
6745 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX
|
6746 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX
|
6747 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX
);
6749 dev
->hw_features
|= NETIF_F_RXHASH
;
6750 dev
->xdp_metadata_ops
= &virtnet_xdp_metadata_ops
;
6753 if (vi
->has_rss_hash_report
)
6754 vi
->hdr_len
= sizeof(struct virtio_net_hdr_v1_hash
);
6755 else if (virtio_has_feature(vdev
, VIRTIO_NET_F_MRG_RXBUF
) ||
6756 virtio_has_feature(vdev
, VIRTIO_F_VERSION_1
))
6757 vi
->hdr_len
= sizeof(struct virtio_net_hdr_mrg_rxbuf
);
6759 vi
->hdr_len
= sizeof(struct virtio_net_hdr
);
6761 if (virtio_has_feature(vdev
, VIRTIO_F_ANY_LAYOUT
) ||
6762 virtio_has_feature(vdev
, VIRTIO_F_VERSION_1
))
6763 vi
->any_header_sg
= true;
6765 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
))
6768 mutex_init(&vi
->cvq_lock
);
6770 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MTU
)) {
6771 mtu
= virtio_cread16(vdev
,
6772 offsetof(struct virtio_net_config
,
6774 if (mtu
< dev
->min_mtu
) {
6775 /* Should never trigger: MTU was previously validated
6776 * in virtnet_validate.
6779 "device MTU appears to have changed it is now %d < %d",
6789 virtnet_set_big_packets(vi
, mtu
);
6791 if (vi
->any_header_sg
)
6792 dev
->needed_headroom
= vi
->hdr_len
;
6794 /* Enable multiqueue by default */
6795 if (num_online_cpus() >= max_queue_pairs
)
6796 vi
->curr_queue_pairs
= max_queue_pairs
;
6798 vi
->curr_queue_pairs
= num_online_cpus();
6799 vi
->max_queue_pairs
= max_queue_pairs
;
6801 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
6806 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_NOTF_COAL
)) {
6807 vi
->intr_coal_rx
.max_usecs
= 0;
6808 vi
->intr_coal_tx
.max_usecs
= 0;
6809 vi
->intr_coal_rx
.max_packets
= 0;
6811 /* Keep the default values of the coalescing parameters
6812 * aligned with the default napi_tx state.
6814 if (vi
->sq
[0].napi
.weight
)
6815 vi
->intr_coal_tx
.max_packets
= 1;
6817 vi
->intr_coal_tx
.max_packets
= 0;
6820 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
)) {
6821 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
6822 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6823 if (vi
->sq
[i
].napi
.weight
)
6824 vi
->sq
[i
].intr_coal
.max_packets
= 1;
6826 err
= virtnet_init_irq_moder(vi
);
6832 if (vi
->mergeable_rx_bufs
)
6833 dev
->sysfs_rx_queue_group
= &virtio_net_mrg_rx_group
;
6835 netif_set_real_num_tx_queues(dev
, vi
->curr_queue_pairs
);
6836 netif_set_real_num_rx_queues(dev
, vi
->curr_queue_pairs
);
6838 virtnet_init_settings(dev
);
6840 if (virtio_has_feature(vdev
, VIRTIO_NET_F_STANDBY
)) {
6841 vi
->failover
= net_failover_create(vi
->dev
);
6842 if (IS_ERR(vi
->failover
)) {
6843 err
= PTR_ERR(vi
->failover
);
6848 if (vi
->has_rss
|| vi
->has_rss_hash_report
)
6849 virtnet_init_default_rss(vi
);
6851 enable_rx_mode_work(vi
);
6853 /* serialize netdev register + virtio_device_ready() with ndo_open() */
6856 err
= register_netdevice(dev
);
6858 pr_debug("virtio_net: registering device failed\n");
6863 /* Disable config change notification until ndo_open. */
6864 virtio_config_driver_disable(vi
->vdev
);
6866 virtio_device_ready(vdev
);
6868 if (vi
->has_rss
|| vi
->has_rss_hash_report
) {
6869 if (!virtnet_commit_rss_command(vi
)) {
6870 dev_warn(&vdev
->dev
, "RSS disabled because committing failed.\n");
6871 dev
->hw_features
&= ~NETIF_F_RXHASH
;
6872 vi
->has_rss_hash_report
= false;
6873 vi
->has_rss
= false;
6877 virtnet_set_queues(vi
, vi
->curr_queue_pairs
);
6879 /* a random MAC address has been assigned, notify the device.
6880 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
6881 * because many devices work fine without getting MAC explicitly
6883 if (!virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
) &&
6884 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
)) {
6885 struct scatterlist sg
;
6887 sg_init_one(&sg
, dev
->dev_addr
, dev
->addr_len
);
6888 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MAC
,
6889 VIRTIO_NET_CTRL_MAC_ADDR_SET
, &sg
)) {
6890 pr_debug("virtio_net: setting MAC address failed\n");
6893 goto free_unregister_netdev
;
6897 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_DEVICE_STATS
)) {
6898 struct virtio_net_stats_capabilities
*stats_cap
__free(kfree
) = NULL
;
6899 struct scatterlist sg
;
6902 stats_cap
= kzalloc(sizeof(*stats_cap
), GFP_KERNEL
);
6906 goto free_unregister_netdev
;
6909 sg_init_one(&sg
, stats_cap
, sizeof(*stats_cap
));
6911 if (!virtnet_send_command_reply(vi
, VIRTIO_NET_CTRL_STATS
,
6912 VIRTIO_NET_CTRL_STATS_QUERY
,
6914 pr_debug("virtio_net: fail to get stats capability\n");
6917 goto free_unregister_netdev
;
6920 v
= stats_cap
->supported_stats_types
[0];
6921 vi
->device_stats_cap
= le64_to_cpu(v
);
6924 /* Assume link up if device can't report link status,
6925 otherwise get link status from config. */
6926 netif_carrier_off(dev
);
6927 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STATUS
)) {
6928 virtnet_config_changed_work(&vi
->config_work
);
6930 vi
->status
= VIRTIO_NET_S_LINK_UP
;
6931 virtnet_update_settings(vi
);
6932 netif_carrier_on(dev
);
6935 for (i
= 0; i
< ARRAY_SIZE(guest_offloads
); i
++)
6936 if (virtio_has_feature(vi
->vdev
, guest_offloads
[i
]))
6937 set_bit(guest_offloads
[i
], &vi
->guest_offloads
);
6938 vi
->guest_offloads_capable
= vi
->guest_offloads
;
6942 err
= virtnet_cpu_notif_add(vi
);
6944 pr_debug("virtio_net: registering cpu notifier failed\n");
6945 goto free_unregister_netdev
;
6948 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
6949 dev
->name
, max_queue_pairs
);
6953 free_unregister_netdev
:
6954 unregister_netdev(dev
);
6956 net_failover_destroy(vi
->failover
);
6958 virtio_reset_device(vdev
);
6959 cancel_delayed_work_sync(&vi
->refill
);
6960 free_receive_page_frags(vi
);
6961 virtnet_del_vqs(vi
);
6967 static void remove_vq_common(struct virtnet_info
*vi
)
6969 virtio_reset_device(vi
->vdev
);
6971 /* Free unused buffers in both send and recv, if any. */
6972 free_unused_bufs(vi
);
6974 free_receive_bufs(vi
);
6976 free_receive_page_frags(vi
);
6978 virtnet_del_vqs(vi
);
6981 static void virtnet_remove(struct virtio_device
*vdev
)
6983 struct virtnet_info
*vi
= vdev
->priv
;
6985 virtnet_cpu_notif_remove(vi
);
6987 /* Make sure no work handler is accessing the device. */
6988 flush_work(&vi
->config_work
);
6989 disable_rx_mode_work(vi
);
6990 flush_work(&vi
->rx_mode_work
);
6992 virtnet_free_irq_moder(vi
);
6994 unregister_netdev(vi
->dev
);
6996 net_failover_destroy(vi
->failover
);
6998 remove_vq_common(vi
);
7000 rss_indirection_table_free(&vi
->rss
);
7002 free_netdev(vi
->dev
);
7005 static __maybe_unused
int virtnet_freeze(struct virtio_device
*vdev
)
7007 struct virtnet_info
*vi
= vdev
->priv
;
7009 virtnet_cpu_notif_remove(vi
);
7010 virtnet_freeze_down(vdev
);
7011 remove_vq_common(vi
);
7016 static __maybe_unused
int virtnet_restore(struct virtio_device
*vdev
)
7018 struct virtnet_info
*vi
= vdev
->priv
;
7021 err
= virtnet_restore_up(vdev
);
7024 virtnet_set_queues(vi
, vi
->curr_queue_pairs
);
7026 err
= virtnet_cpu_notif_add(vi
);
7028 virtnet_freeze_down(vdev
);
7029 remove_vq_common(vi
);
7036 static struct virtio_device_id id_table
[] = {
7037 { VIRTIO_ID_NET
, VIRTIO_DEV_ANY_ID
},
7041 #define VIRTNET_FEATURES \
7042 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
7044 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
7045 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
7046 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
7047 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
7048 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
7049 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
7050 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
7051 VIRTIO_NET_F_CTRL_MAC_ADDR, \
7052 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
7053 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
7054 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
7055 VIRTIO_NET_F_VQ_NOTF_COAL, \
7056 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
7058 static unsigned int features
[] = {
7062 static unsigned int features_legacy
[] = {
7065 VIRTIO_F_ANY_LAYOUT
,
7068 static struct virtio_driver virtio_net_driver
= {
7069 .feature_table
= features
,
7070 .feature_table_size
= ARRAY_SIZE(features
),
7071 .feature_table_legacy
= features_legacy
,
7072 .feature_table_size_legacy
= ARRAY_SIZE(features_legacy
),
7073 .driver
.name
= KBUILD_MODNAME
,
7074 .id_table
= id_table
,
7075 .validate
= virtnet_validate
,
7076 .probe
= virtnet_probe
,
7077 .remove
= virtnet_remove
,
7078 .config_changed
= virtnet_config_changed
,
7079 #ifdef CONFIG_PM_SLEEP
7080 .freeze
= virtnet_freeze
,
7081 .restore
= virtnet_restore
,
7085 static __init
int virtio_net_driver_init(void)
7089 ret
= cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN
, "virtio/net:online",
7091 virtnet_cpu_down_prep
);
7094 virtionet_online
= ret
;
7095 ret
= cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD
, "virtio/net:dead",
7096 NULL
, virtnet_cpu_dead
);
7099 ret
= register_virtio_driver(&virtio_net_driver
);
7104 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD
);
7106 cpuhp_remove_multi_state(virtionet_online
);
7110 module_init(virtio_net_driver_init
);
7112 static __exit
void virtio_net_driver_exit(void)
7114 unregister_virtio_driver(&virtio_net_driver
);
7115 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD
);
7116 cpuhp_remove_multi_state(virtionet_online
);
7118 module_exit(virtio_net_driver_exit
);
7120 MODULE_DEVICE_TABLE(virtio
, id_table
);
7121 MODULE_DESCRIPTION("Virtio network driver");
7122 MODULE_LICENSE("GPL");