1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* A network driver using virtio.
4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
7 #include <linux/netdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/ethtool.h>
10 #include <linux/module.h>
11 #include <linux/virtio.h>
12 #include <linux/virtio_net.h>
13 #include <linux/bpf.h>
14 #include <linux/bpf_trace.h>
15 #include <linux/scatterlist.h>
16 #include <linux/if_vlan.h>
17 #include <linux/slab.h>
18 #include <linux/cpu.h>
19 #include <linux/average.h>
20 #include <linux/filter.h>
21 #include <linux/kernel.h>
22 #include <linux/dim.h>
23 #include <net/route.h>
25 #include <net/net_failover.h>
26 #include <net/netdev_rx_queue.h>
27 #include <net/netdev_queues.h>
28 #include <net/xdp_sock_drv.h>
30 static int napi_weight
= NAPI_POLL_WEIGHT
;
31 module_param(napi_weight
, int, 0444);
33 static bool csum
= true, gso
= true, napi_tx
= true;
34 module_param(csum
, bool, 0444);
35 module_param(gso
, bool, 0444);
36 module_param(napi_tx
, bool, 0644);
38 /* FIXME: MTU in config. */
39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
40 #define GOOD_COPY_LEN 128
42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
44 /* Separating two types of XDP xmit */
45 #define VIRTIO_XDP_TX BIT(0)
46 #define VIRTIO_XDP_REDIR BIT(1)
48 /* RX packet size EWMA. The average packet size is used to determine the packet
49 * buffer size when refilling RX rings. As the entire RX ring may be refilled
50 * at once, the weight is chosen so that the EWMA will be insensitive to short-
51 * term, transient changes in packet size.
53 DECLARE_EWMA(pkt_len
, 0, 64)
55 #define VIRTNET_DRIVER_VERSION "1.0.0"
57 static const unsigned long guest_offloads
[] = {
58 VIRTIO_NET_F_GUEST_TSO4
,
59 VIRTIO_NET_F_GUEST_TSO6
,
60 VIRTIO_NET_F_GUEST_ECN
,
61 VIRTIO_NET_F_GUEST_UFO
,
62 VIRTIO_NET_F_GUEST_CSUM
,
63 VIRTIO_NET_F_GUEST_USO4
,
64 VIRTIO_NET_F_GUEST_USO6
,
65 VIRTIO_NET_F_GUEST_HDRLEN
68 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
69 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
70 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
71 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
72 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
73 (1ULL << VIRTIO_NET_F_GUEST_USO6))
75 struct virtnet_stat_desc
{
76 char desc
[ETH_GSTRING_LEN
];
81 struct virtnet_sq_free_stats
{
89 struct virtnet_sq_stats
{
90 struct u64_stats_sync syncp
;
94 u64_stats_t xdp_tx_drops
;
96 u64_stats_t tx_timeouts
;
101 struct virtnet_rq_stats
{
102 struct u64_stats_sync syncp
;
106 u64_stats_t xdp_packets
;
108 u64_stats_t xdp_redirects
;
109 u64_stats_t xdp_drops
;
113 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
114 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
116 #define VIRTNET_SQ_STAT_QSTAT(name, m) \
119 offsetof(struct virtnet_sq_stats, m), \
120 offsetof(struct netdev_queue_stats_tx, m), \
123 #define VIRTNET_RQ_STAT_QSTAT(name, m) \
126 offsetof(struct virtnet_rq_stats, m), \
127 offsetof(struct netdev_queue_stats_rx, m), \
130 static const struct virtnet_stat_desc virtnet_sq_stats_desc
[] = {
131 VIRTNET_SQ_STAT("xdp_tx", xdp_tx
),
132 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops
),
133 VIRTNET_SQ_STAT("kicks", kicks
),
134 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts
),
137 static const struct virtnet_stat_desc virtnet_rq_stats_desc
[] = {
138 VIRTNET_RQ_STAT("drops", drops
),
139 VIRTNET_RQ_STAT("xdp_packets", xdp_packets
),
140 VIRTNET_RQ_STAT("xdp_tx", xdp_tx
),
141 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects
),
142 VIRTNET_RQ_STAT("xdp_drops", xdp_drops
),
143 VIRTNET_RQ_STAT("kicks", kicks
),
146 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat
[] = {
147 VIRTNET_SQ_STAT_QSTAT("packets", packets
),
148 VIRTNET_SQ_STAT_QSTAT("bytes", bytes
),
149 VIRTNET_SQ_STAT_QSTAT("stop", stop
),
150 VIRTNET_SQ_STAT_QSTAT("wake", wake
),
153 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat
[] = {
154 VIRTNET_RQ_STAT_QSTAT("packets", packets
),
155 VIRTNET_RQ_STAT_QSTAT("bytes", bytes
),
158 #define VIRTNET_STATS_DESC_CQ(name) \
159 {#name, offsetof(struct virtio_net_stats_cvq, name), -1}
161 #define VIRTNET_STATS_DESC_RX(class, name) \
162 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
164 #define VIRTNET_STATS_DESC_TX(class, name) \
165 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
168 static const struct virtnet_stat_desc virtnet_stats_cvq_desc
[] = {
169 VIRTNET_STATS_DESC_CQ(command_num
),
170 VIRTNET_STATS_DESC_CQ(ok_num
),
173 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc
[] = {
174 VIRTNET_STATS_DESC_RX(basic
, packets
),
175 VIRTNET_STATS_DESC_RX(basic
, bytes
),
177 VIRTNET_STATS_DESC_RX(basic
, notifications
),
178 VIRTNET_STATS_DESC_RX(basic
, interrupts
),
181 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc
[] = {
182 VIRTNET_STATS_DESC_TX(basic
, packets
),
183 VIRTNET_STATS_DESC_TX(basic
, bytes
),
185 VIRTNET_STATS_DESC_TX(basic
, notifications
),
186 VIRTNET_STATS_DESC_TX(basic
, interrupts
),
189 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc
[] = {
190 VIRTNET_STATS_DESC_RX(csum
, needs_csum
),
193 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc
[] = {
194 VIRTNET_STATS_DESC_TX(gso
, gso_packets_noseg
),
195 VIRTNET_STATS_DESC_TX(gso
, gso_bytes_noseg
),
198 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc
[] = {
199 VIRTNET_STATS_DESC_RX(speed
, ratelimit_bytes
),
202 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc
[] = {
203 VIRTNET_STATS_DESC_TX(speed
, ratelimit_bytes
),
206 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
209 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
210 offsetof(struct netdev_queue_stats_rx, qstat_field), \
213 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
216 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
217 offsetof(struct netdev_queue_stats_tx, qstat_field), \
220 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat
[] = {
221 VIRTNET_STATS_DESC_RX_QSTAT(basic
, drops
, hw_drops
),
222 VIRTNET_STATS_DESC_RX_QSTAT(basic
, drop_overruns
, hw_drop_overruns
),
225 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat
[] = {
226 VIRTNET_STATS_DESC_TX_QSTAT(basic
, drops
, hw_drops
),
227 VIRTNET_STATS_DESC_TX_QSTAT(basic
, drop_malformed
, hw_drop_errors
),
230 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat
[] = {
231 VIRTNET_STATS_DESC_RX_QSTAT(csum
, csum_valid
, csum_unnecessary
),
232 VIRTNET_STATS_DESC_RX_QSTAT(csum
, csum_none
, csum_none
),
233 VIRTNET_STATS_DESC_RX_QSTAT(csum
, csum_bad
, csum_bad
),
236 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat
[] = {
237 VIRTNET_STATS_DESC_TX_QSTAT(csum
, csum_none
, csum_none
),
238 VIRTNET_STATS_DESC_TX_QSTAT(csum
, needs_csum
, needs_csum
),
241 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat
[] = {
242 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_packets
, hw_gro_packets
),
243 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_bytes
, hw_gro_bytes
),
244 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_packets_coalesced
, hw_gro_wire_packets
),
245 VIRTNET_STATS_DESC_RX_QSTAT(gso
, gso_bytes_coalesced
, hw_gro_wire_bytes
),
248 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat
[] = {
249 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_packets
, hw_gso_packets
),
250 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_bytes
, hw_gso_bytes
),
251 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_segments
, hw_gso_wire_packets
),
252 VIRTNET_STATS_DESC_TX_QSTAT(gso
, gso_segments_bytes
, hw_gso_wire_bytes
),
255 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat
[] = {
256 VIRTNET_STATS_DESC_RX_QSTAT(speed
, ratelimit_packets
, hw_drop_ratelimits
),
259 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat
[] = {
260 VIRTNET_STATS_DESC_TX_QSTAT(speed
, ratelimit_packets
, hw_drop_ratelimits
),
263 #define VIRTNET_Q_TYPE_RX 0
264 #define VIRTNET_Q_TYPE_TX 1
265 #define VIRTNET_Q_TYPE_CQ 2
267 struct virtnet_interrupt_coalesce
{
272 /* The dma information of pages allocated at a time. */
273 struct virtnet_rq_dma
{
280 /* Internal representation of a send virtqueue */
282 /* Virtqueue associated with this send _queue */
283 struct virtqueue
*vq
;
285 /* TX: fragments + linear part + virtio header */
286 struct scatterlist sg
[MAX_SKB_FRAGS
+ 2];
288 /* Name of the send queue: output.$index */
291 struct virtnet_sq_stats stats
;
293 struct virtnet_interrupt_coalesce intr_coal
;
295 struct napi_struct napi
;
297 /* Record whether sq is in reset state. */
300 struct xsk_buff_pool
*xsk_pool
;
302 dma_addr_t xsk_hdr_dma_addr
;
305 /* Internal representation of a receive virtqueue */
306 struct receive_queue
{
307 /* Virtqueue associated with this receive_queue */
308 struct virtqueue
*vq
;
310 struct napi_struct napi
;
312 struct bpf_prog __rcu
*xdp_prog
;
314 struct virtnet_rq_stats stats
;
316 /* The number of rx notifications */
319 /* Is dynamic interrupt moderation enabled? */
322 /* Used to protect dim_enabled and inter_coal */
323 struct mutex dim_lock
;
325 /* Dynamic Interrupt Moderation */
330 struct virtnet_interrupt_coalesce intr_coal
;
332 /* Chain pages by the private ptr. */
335 /* Average packet length for mergeable receive buffers. */
336 struct ewma_pkt_len mrg_avg_pkt_len
;
338 /* Page frag for packet buffer allocation. */
339 struct page_frag alloc_frag
;
341 /* RX: fragments + linear part + virtio header */
342 struct scatterlist sg
[MAX_SKB_FRAGS
+ 2];
344 /* Min single buffer size for mergeable buffers case. */
345 unsigned int min_buf_len
;
347 /* Name of this receive queue: input.$index */
350 struct xdp_rxq_info xdp_rxq
;
352 /* Record the last dma info to free after new pages is allocated. */
353 struct virtnet_rq_dma
*last_dma
;
355 struct xsk_buff_pool
*xsk_pool
;
357 /* xdp rxq used by xsk */
358 struct xdp_rxq_info xsk_rxq_info
;
360 struct xdp_buff
**xsk_buffs
;
363 /* This structure can contain rss message with maximum settings for indirection table and keysize
364 * Note, that default structure that describes RSS configuration virtio_net_rss_config
365 * contains same info but can't handle table values.
366 * In any case, structure would be passed to virtio hw through sg_buf split by parts
367 * because table sizes may be differ according to the device configuration.
369 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
370 struct virtio_net_ctrl_rss
{
372 u16 indirection_table_mask
;
373 u16 unclassified_queue
;
374 u16 hash_cfg_reserved
; /* for HASH_CONFIG (see virtio_net_hash_config for details) */
377 u8 key
[VIRTIO_NET_RSS_MAX_KEY_SIZE
];
379 u16
*indirection_table
;
382 /* Control VQ buffers: protected by the rtnl lock */
384 struct virtio_net_ctrl_hdr hdr
;
385 virtio_net_ctrl_ack status
;
388 struct virtnet_info
{
389 struct virtio_device
*vdev
;
390 struct virtqueue
*cvq
;
391 struct net_device
*dev
;
392 struct send_queue
*sq
;
393 struct receive_queue
*rq
;
396 /* Max # of queue pairs supported by the device */
399 /* # of queue pairs currently used by the driver */
400 u16 curr_queue_pairs
;
402 /* # of XDP queue pairs currently used by the driver */
405 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
408 /* I like... big packets and I cannot lie! */
411 /* number of sg entries allocated for big packets */
412 unsigned int big_packets_num_skbfrags
;
414 /* Host will merge rx buffers for big packets (shake it! shake it!) */
415 bool mergeable_rx_bufs
;
417 /* Host supports rss and/or hash report */
419 bool has_rss_hash_report
;
421 u16 rss_indir_table_size
;
422 u32 rss_hash_types_supported
;
423 u32 rss_hash_types_saved
;
424 struct virtio_net_ctrl_rss rss
;
426 /* Has control virtqueue */
429 /* Lock to protect the control VQ */
430 struct mutex cvq_lock
;
432 /* Host can handle any s/g split between our header and packet data */
435 /* Packet virtio header size */
438 /* Work struct for delayed refilling if we run low on memory. */
439 struct delayed_work refill
;
441 /* Is delayed refill enabled? */
444 /* The lock to synchronize the access to refill_enabled */
445 spinlock_t refill_lock
;
447 /* Work struct for config space updates */
448 struct work_struct config_work
;
450 /* Work struct for setting rx mode */
451 struct work_struct rx_mode_work
;
453 /* OK to queue work setting RX mode? */
454 bool rx_mode_work_enabled
;
456 /* Does the affinity hint is set for virtqueues? */
457 bool affinity_hint_set
;
459 /* CPU hotplug instances for online & dead */
460 struct hlist_node node
;
461 struct hlist_node node_dead
;
463 struct control_buf
*ctrl
;
465 /* Ethtool settings */
469 /* Is rx dynamic interrupt moderation enabled? */
472 /* Interrupt coalescing settings */
473 struct virtnet_interrupt_coalesce intr_coal_tx
;
474 struct virtnet_interrupt_coalesce intr_coal_rx
;
476 unsigned long guest_offloads
;
477 unsigned long guest_offloads_capable
;
479 /* failover when STANDBY feature enabled */
480 struct failover
*failover
;
482 u64 device_stats_cap
;
485 struct padded_vnet_hdr
{
486 struct virtio_net_hdr_v1_hash hdr
;
488 * hdr is in a separate sg buffer, and data sg buffer shares same page
489 * with this header sg. This padding makes next sg 16 byte aligned
495 struct virtio_net_common_hdr
{
497 struct virtio_net_hdr hdr
;
498 struct virtio_net_hdr_mrg_rxbuf mrg_hdr
;
499 struct virtio_net_hdr_v1_hash hash_v1_hdr
;
503 static struct virtio_net_common_hdr xsk_hdr
;
505 static void virtnet_sq_free_unused_buf(struct virtqueue
*vq
, void *buf
);
506 static void virtnet_sq_free_unused_buf_done(struct virtqueue
*vq
);
507 static int virtnet_xdp_handler(struct bpf_prog
*xdp_prog
, struct xdp_buff
*xdp
,
508 struct net_device
*dev
,
509 unsigned int *xdp_xmit
,
510 struct virtnet_rq_stats
*stats
);
511 static void virtnet_receive_done(struct virtnet_info
*vi
, struct receive_queue
*rq
,
512 struct sk_buff
*skb
, u8 flags
);
513 static struct sk_buff
*virtnet_skb_append_frag(struct sk_buff
*head_skb
,
514 struct sk_buff
*curr_skb
,
515 struct page
*page
, void *buf
,
516 int len
, int truesize
);
517 static void virtnet_xsk_completed(struct send_queue
*sq
, int num
);
519 enum virtnet_xmit_type
{
520 VIRTNET_XMIT_TYPE_SKB
,
521 VIRTNET_XMIT_TYPE_SKB_ORPHAN
,
522 VIRTNET_XMIT_TYPE_XDP
,
523 VIRTNET_XMIT_TYPE_XSK
,
526 static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss
*rss
, u16 indir_table_size
)
528 if (!indir_table_size
) {
529 rss
->indirection_table
= NULL
;
533 rss
->indirection_table
= kmalloc_array(indir_table_size
, sizeof(u16
), GFP_KERNEL
);
534 if (!rss
->indirection_table
)
540 static void rss_indirection_table_free(struct virtio_net_ctrl_rss
*rss
)
542 kfree(rss
->indirection_table
);
545 /* We use the last two bits of the pointer to distinguish the xmit type. */
546 #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1))
548 #define VIRTIO_XSK_FLAG_OFFSET 2
550 static enum virtnet_xmit_type
virtnet_xmit_ptr_unpack(void **ptr
)
552 unsigned long p
= (unsigned long)*ptr
;
554 *ptr
= (void *)(p
& ~VIRTNET_XMIT_TYPE_MASK
);
556 return p
& VIRTNET_XMIT_TYPE_MASK
;
559 static void *virtnet_xmit_ptr_pack(void *ptr
, enum virtnet_xmit_type type
)
561 return (void *)((unsigned long)ptr
| type
);
564 static int virtnet_add_outbuf(struct send_queue
*sq
, int num
, void *data
,
565 enum virtnet_xmit_type type
)
567 return virtqueue_add_outbuf(sq
->vq
, sq
->sg
, num
,
568 virtnet_xmit_ptr_pack(data
, type
),
572 static u32
virtnet_ptr_to_xsk_buff_len(void *ptr
)
574 return ((unsigned long)ptr
) >> VIRTIO_XSK_FLAG_OFFSET
;
577 static void sg_fill_dma(struct scatterlist
*sg
, dma_addr_t addr
, u32 len
)
579 sg_dma_address(sg
) = addr
;
580 sg_dma_len(sg
) = len
;
583 static void __free_old_xmit(struct send_queue
*sq
, struct netdev_queue
*txq
,
584 bool in_napi
, struct virtnet_sq_free_stats
*stats
)
586 struct xdp_frame
*frame
;
591 while ((ptr
= virtqueue_get_buf(sq
->vq
, &len
)) != NULL
) {
592 switch (virtnet_xmit_ptr_unpack(&ptr
)) {
593 case VIRTNET_XMIT_TYPE_SKB
:
596 pr_debug("Sent skb %p\n", skb
);
597 stats
->napi_packets
++;
598 stats
->napi_bytes
+= skb
->len
;
599 napi_consume_skb(skb
, in_napi
);
602 case VIRTNET_XMIT_TYPE_SKB_ORPHAN
:
606 stats
->bytes
+= skb
->len
;
607 napi_consume_skb(skb
, in_napi
);
610 case VIRTNET_XMIT_TYPE_XDP
:
614 stats
->bytes
+= xdp_get_frame_len(frame
);
615 xdp_return_frame(frame
);
618 case VIRTNET_XMIT_TYPE_XSK
:
619 stats
->bytes
+= virtnet_ptr_to_xsk_buff_len(ptr
);
624 netdev_tx_completed_queue(txq
, stats
->napi_packets
, stats
->napi_bytes
);
627 static void virtnet_free_old_xmit(struct send_queue
*sq
,
628 struct netdev_queue
*txq
,
630 struct virtnet_sq_free_stats
*stats
)
632 __free_old_xmit(sq
, txq
, in_napi
, stats
);
635 virtnet_xsk_completed(sq
, stats
->xsk
);
638 /* Converting between virtqueue no. and kernel tx/rx queue no.
639 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
641 static int vq2txq(struct virtqueue
*vq
)
643 return (vq
->index
- 1) / 2;
646 static int txq2vq(int txq
)
651 static int vq2rxq(struct virtqueue
*vq
)
653 return vq
->index
/ 2;
656 static int rxq2vq(int rxq
)
661 static int vq_type(struct virtnet_info
*vi
, int qid
)
663 if (qid
== vi
->max_queue_pairs
* 2)
664 return VIRTNET_Q_TYPE_CQ
;
667 return VIRTNET_Q_TYPE_TX
;
669 return VIRTNET_Q_TYPE_RX
;
672 static inline struct virtio_net_common_hdr
*
673 skb_vnet_common_hdr(struct sk_buff
*skb
)
675 return (struct virtio_net_common_hdr
*)skb
->cb
;
679 * private is used to chain pages for big packets, put the whole
680 * most recent used list in the beginning for reuse
682 static void give_pages(struct receive_queue
*rq
, struct page
*page
)
686 /* Find end of list, sew whole thing into vi->rq.pages. */
687 for (end
= page
; end
->private; end
= (struct page
*)end
->private);
688 end
->private = (unsigned long)rq
->pages
;
692 static struct page
*get_a_page(struct receive_queue
*rq
, gfp_t gfp_mask
)
694 struct page
*p
= rq
->pages
;
697 rq
->pages
= (struct page
*)p
->private;
698 /* clear private here, it is used to chain pages */
701 p
= alloc_page(gfp_mask
);
705 static void virtnet_rq_free_buf(struct virtnet_info
*vi
,
706 struct receive_queue
*rq
, void *buf
)
708 if (vi
->mergeable_rx_bufs
)
709 put_page(virt_to_head_page(buf
));
710 else if (vi
->big_packets
)
713 put_page(virt_to_head_page(buf
));
716 static void enable_delayed_refill(struct virtnet_info
*vi
)
718 spin_lock_bh(&vi
->refill_lock
);
719 vi
->refill_enabled
= true;
720 spin_unlock_bh(&vi
->refill_lock
);
723 static void disable_delayed_refill(struct virtnet_info
*vi
)
725 spin_lock_bh(&vi
->refill_lock
);
726 vi
->refill_enabled
= false;
727 spin_unlock_bh(&vi
->refill_lock
);
730 static void enable_rx_mode_work(struct virtnet_info
*vi
)
733 vi
->rx_mode_work_enabled
= true;
737 static void disable_rx_mode_work(struct virtnet_info
*vi
)
740 vi
->rx_mode_work_enabled
= false;
744 static void virtqueue_napi_schedule(struct napi_struct
*napi
,
745 struct virtqueue
*vq
)
747 if (napi_schedule_prep(napi
)) {
748 virtqueue_disable_cb(vq
);
749 __napi_schedule(napi
);
753 static bool virtqueue_napi_complete(struct napi_struct
*napi
,
754 struct virtqueue
*vq
, int processed
)
758 opaque
= virtqueue_enable_cb_prepare(vq
);
759 if (napi_complete_done(napi
, processed
)) {
760 if (unlikely(virtqueue_poll(vq
, opaque
)))
761 virtqueue_napi_schedule(napi
, vq
);
765 virtqueue_disable_cb(vq
);
771 static void skb_xmit_done(struct virtqueue
*vq
)
773 struct virtnet_info
*vi
= vq
->vdev
->priv
;
774 struct napi_struct
*napi
= &vi
->sq
[vq2txq(vq
)].napi
;
776 /* Suppress further interrupts. */
777 virtqueue_disable_cb(vq
);
780 virtqueue_napi_schedule(napi
, vq
);
782 /* We were probably waiting for more output buffers. */
783 netif_wake_subqueue(vi
->dev
, vq2txq(vq
));
786 #define MRG_CTX_HEADER_SHIFT 22
787 static void *mergeable_len_to_ctx(unsigned int truesize
,
788 unsigned int headroom
)
790 return (void *)(unsigned long)((headroom
<< MRG_CTX_HEADER_SHIFT
) | truesize
);
793 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx
)
795 return (unsigned long)mrg_ctx
>> MRG_CTX_HEADER_SHIFT
;
798 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx
)
800 return (unsigned long)mrg_ctx
& ((1 << MRG_CTX_HEADER_SHIFT
) - 1);
803 static struct sk_buff
*virtnet_build_skb(void *buf
, unsigned int buflen
,
804 unsigned int headroom
,
809 skb
= build_skb(buf
, buflen
);
813 skb_reserve(skb
, headroom
);
819 /* Called from bottom half context */
820 static struct sk_buff
*page_to_skb(struct virtnet_info
*vi
,
821 struct receive_queue
*rq
,
822 struct page
*page
, unsigned int offset
,
823 unsigned int len
, unsigned int truesize
,
824 unsigned int headroom
)
827 struct virtio_net_common_hdr
*hdr
;
828 unsigned int copy
, hdr_len
, hdr_padded_len
;
829 struct page
*page_to_free
= NULL
;
830 int tailroom
, shinfo_size
;
831 char *p
, *hdr_p
, *buf
;
833 p
= page_address(page
) + offset
;
836 hdr_len
= vi
->hdr_len
;
837 if (vi
->mergeable_rx_bufs
)
838 hdr_padded_len
= hdr_len
;
840 hdr_padded_len
= sizeof(struct padded_vnet_hdr
);
844 offset
+= hdr_padded_len
;
846 tailroom
= truesize
- headroom
- hdr_padded_len
- len
;
848 shinfo_size
= SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
850 if (!NET_IP_ALIGN
&& len
> GOOD_COPY_LEN
&& tailroom
>= shinfo_size
) {
851 skb
= virtnet_build_skb(buf
, truesize
, p
- buf
, len
);
855 page
= (struct page
*)page
->private;
857 give_pages(rq
, page
);
861 /* copy small packet so we can reuse these pages for small data */
862 skb
= napi_alloc_skb(&rq
->napi
, GOOD_COPY_LEN
);
866 /* Copy all frame if it fits skb->head, otherwise
867 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
869 if (len
<= skb_tailroom(skb
))
873 skb_put_data(skb
, p
, copy
);
878 if (vi
->mergeable_rx_bufs
) {
880 skb_add_rx_frag(skb
, 0, page
, offset
, len
, truesize
);
887 * Verify that we can indeed put this data into a skb.
888 * This is here to handle cases when the device erroneously
889 * tries to receive more than is possible. This is usually
890 * the case of a broken device.
892 if (unlikely(len
> MAX_SKB_FRAGS
* PAGE_SIZE
)) {
893 net_dbg_ratelimited("%s: too much data\n", skb
->dev
->name
);
897 BUG_ON(offset
>= PAGE_SIZE
);
899 unsigned int frag_size
= min((unsigned)PAGE_SIZE
- offset
, len
);
900 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
, page
, offset
,
901 frag_size
, truesize
);
903 page
= (struct page
*)page
->private;
908 give_pages(rq
, page
);
911 hdr
= skb_vnet_common_hdr(skb
);
912 memcpy(hdr
, hdr_p
, hdr_len
);
914 put_page(page_to_free
);
919 static void virtnet_rq_unmap(struct receive_queue
*rq
, void *buf
, u32 len
)
921 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
922 struct page
*page
= virt_to_head_page(buf
);
923 struct virtnet_rq_dma
*dma
;
927 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
929 head
= page_address(page
);
935 if (dma
->need_sync
&& len
) {
936 offset
= buf
- (head
+ sizeof(*dma
));
938 virtqueue_dma_sync_single_range_for_cpu(rq
->vq
, dma
->addr
,
946 virtqueue_dma_unmap_single_attrs(rq
->vq
, dma
->addr
, dma
->len
,
947 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
951 static void *virtnet_rq_get_buf(struct receive_queue
*rq
, u32
*len
, void **ctx
)
953 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
956 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
958 buf
= virtqueue_get_buf_ctx(rq
->vq
, len
, ctx
);
960 virtnet_rq_unmap(rq
, buf
, *len
);
965 static void virtnet_rq_init_one_sg(struct receive_queue
*rq
, void *buf
, u32 len
)
967 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
968 struct virtnet_rq_dma
*dma
;
973 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
975 head
= page_address(rq
->alloc_frag
.page
);
981 addr
= dma
->addr
- sizeof(*dma
) + offset
;
983 sg_init_table(rq
->sg
, 1);
984 sg_fill_dma(rq
->sg
, addr
, len
);
987 static void *virtnet_rq_alloc(struct receive_queue
*rq
, u32 size
, gfp_t gfp
)
989 struct page_frag
*alloc_frag
= &rq
->alloc_frag
;
990 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
991 struct virtnet_rq_dma
*dma
;
995 BUG_ON(vi
->big_packets
&& !vi
->mergeable_rx_bufs
);
997 head
= page_address(alloc_frag
->page
);
1002 if (!alloc_frag
->offset
) {
1004 /* Now, the new page is allocated, the last dma
1005 * will not be used. So the dma can be unmapped
1008 virtnet_rq_unmap(rq
, rq
->last_dma
, 0);
1009 rq
->last_dma
= NULL
;
1012 dma
->len
= alloc_frag
->size
- sizeof(*dma
);
1014 addr
= virtqueue_dma_map_single_attrs(rq
->vq
, dma
+ 1,
1015 dma
->len
, DMA_FROM_DEVICE
, 0);
1016 if (virtqueue_dma_mapping_error(rq
->vq
, addr
))
1020 dma
->need_sync
= virtqueue_dma_need_sync(rq
->vq
, addr
);
1022 /* Add a reference to dma to prevent the entire dma from
1023 * being released during error handling. This reference
1024 * will be freed after the pages are no longer used.
1026 get_page(alloc_frag
->page
);
1028 alloc_frag
->offset
= sizeof(*dma
);
1035 buf
= head
+ alloc_frag
->offset
;
1037 get_page(alloc_frag
->page
);
1038 alloc_frag
->offset
+= size
;
1043 static void virtnet_rq_unmap_free_buf(struct virtqueue
*vq
, void *buf
)
1045 struct virtnet_info
*vi
= vq
->vdev
->priv
;
1046 struct receive_queue
*rq
;
1052 xsk_buff_free((struct xdp_buff
*)buf
);
1056 if (!vi
->big_packets
|| vi
->mergeable_rx_bufs
)
1057 virtnet_rq_unmap(rq
, buf
, 0);
1059 virtnet_rq_free_buf(vi
, rq
, buf
);
1062 static void free_old_xmit(struct send_queue
*sq
, struct netdev_queue
*txq
,
1065 struct virtnet_sq_free_stats stats
= {0};
1067 virtnet_free_old_xmit(sq
, txq
, in_napi
, &stats
);
1069 /* Avoid overhead when no packets have been processed
1070 * happens when called speculatively from start_xmit.
1072 if (!stats
.packets
&& !stats
.napi_packets
)
1075 u64_stats_update_begin(&sq
->stats
.syncp
);
1076 u64_stats_add(&sq
->stats
.bytes
, stats
.bytes
+ stats
.napi_bytes
);
1077 u64_stats_add(&sq
->stats
.packets
, stats
.packets
+ stats
.napi_packets
);
1078 u64_stats_update_end(&sq
->stats
.syncp
);
1081 static bool is_xdp_raw_buffer_queue(struct virtnet_info
*vi
, int q
)
1083 if (q
< (vi
->curr_queue_pairs
- vi
->xdp_queue_pairs
))
1085 else if (q
< vi
->curr_queue_pairs
)
1091 static void check_sq_full_and_disable(struct virtnet_info
*vi
,
1092 struct net_device
*dev
,
1093 struct send_queue
*sq
)
1095 bool use_napi
= sq
->napi
.weight
;
1100 /* If running out of space, stop queue to avoid getting packets that we
1101 * are then unable to transmit.
1102 * An alternative would be to force queuing layer to requeue the skb by
1103 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1104 * returned in a normal path of operation: it means that driver is not
1105 * maintaining the TX queue stop/start state properly, and causes
1106 * the stack to do a non-trivial amount of useless work.
1107 * Since most packets only take 1 or 2 ring slots, stopping the queue
1108 * early means 16 slots are typically wasted.
1110 if (sq
->vq
->num_free
< 2+MAX_SKB_FRAGS
) {
1111 struct netdev_queue
*txq
= netdev_get_tx_queue(dev
, qnum
);
1113 netif_tx_stop_queue(txq
);
1114 u64_stats_update_begin(&sq
->stats
.syncp
);
1115 u64_stats_inc(&sq
->stats
.stop
);
1116 u64_stats_update_end(&sq
->stats
.syncp
);
1118 if (unlikely(!virtqueue_enable_cb_delayed(sq
->vq
)))
1119 virtqueue_napi_schedule(&sq
->napi
, sq
->vq
);
1120 } else if (unlikely(!virtqueue_enable_cb_delayed(sq
->vq
))) {
1121 /* More just got used, free them then recheck. */
1122 free_old_xmit(sq
, txq
, false);
1123 if (sq
->vq
->num_free
>= 2+MAX_SKB_FRAGS
) {
1124 netif_start_subqueue(dev
, qnum
);
1125 u64_stats_update_begin(&sq
->stats
.syncp
);
1126 u64_stats_inc(&sq
->stats
.wake
);
1127 u64_stats_update_end(&sq
->stats
.syncp
);
1128 virtqueue_disable_cb(sq
->vq
);
1134 static struct xdp_buff
*buf_to_xdp(struct virtnet_info
*vi
,
1135 struct receive_queue
*rq
, void *buf
, u32 len
)
1137 struct xdp_buff
*xdp
;
1140 xdp
= (struct xdp_buff
*)buf
;
1142 bufsize
= xsk_pool_get_rx_frame_size(rq
->xsk_pool
) + vi
->hdr_len
;
1144 if (unlikely(len
> bufsize
)) {
1145 pr_debug("%s: rx error: len %u exceeds truesize %u\n",
1146 vi
->dev
->name
, len
, bufsize
);
1147 DEV_STATS_INC(vi
->dev
, rx_length_errors
);
1152 xsk_buff_set_size(xdp
, len
);
1153 xsk_buff_dma_sync_for_cpu(xdp
);
1158 static struct sk_buff
*xsk_construct_skb(struct receive_queue
*rq
,
1159 struct xdp_buff
*xdp
)
1161 unsigned int metasize
= xdp
->data
- xdp
->data_meta
;
1162 struct sk_buff
*skb
;
1165 size
= xdp
->data_end
- xdp
->data_hard_start
;
1166 skb
= napi_alloc_skb(&rq
->napi
, size
);
1167 if (unlikely(!skb
)) {
1172 skb_reserve(skb
, xdp
->data_meta
- xdp
->data_hard_start
);
1174 size
= xdp
->data_end
- xdp
->data_meta
;
1175 memcpy(__skb_put(skb
, size
), xdp
->data_meta
, size
);
1178 __skb_pull(skb
, metasize
);
1179 skb_metadata_set(skb
, metasize
);
1187 static struct sk_buff
*virtnet_receive_xsk_small(struct net_device
*dev
, struct virtnet_info
*vi
,
1188 struct receive_queue
*rq
, struct xdp_buff
*xdp
,
1189 unsigned int *xdp_xmit
,
1190 struct virtnet_rq_stats
*stats
)
1192 struct bpf_prog
*prog
;
1197 prog
= rcu_dereference(rq
->xdp_prog
);
1199 ret
= virtnet_xdp_handler(prog
, xdp
, dev
, xdp_xmit
, stats
);
1204 return xsk_construct_skb(rq
, xdp
);
1213 u64_stats_inc(&stats
->drops
);
1218 static void xsk_drop_follow_bufs(struct net_device
*dev
,
1219 struct receive_queue
*rq
,
1221 struct virtnet_rq_stats
*stats
)
1223 struct xdp_buff
*xdp
;
1226 while (num_buf
-- > 1) {
1227 xdp
= virtqueue_get_buf(rq
->vq
, &len
);
1228 if (unlikely(!xdp
)) {
1229 pr_debug("%s: rx error: %d buffers missing\n",
1230 dev
->name
, num_buf
);
1231 DEV_STATS_INC(dev
, rx_length_errors
);
1234 u64_stats_add(&stats
->bytes
, len
);
1239 static int xsk_append_merge_buffer(struct virtnet_info
*vi
,
1240 struct receive_queue
*rq
,
1241 struct sk_buff
*head_skb
,
1243 struct virtio_net_hdr_mrg_rxbuf
*hdr
,
1244 struct virtnet_rq_stats
*stats
)
1246 struct sk_buff
*curr_skb
;
1247 struct xdp_buff
*xdp
;
1252 curr_skb
= head_skb
;
1255 buf
= virtqueue_get_buf(rq
->vq
, &len
);
1256 if (unlikely(!buf
)) {
1257 pr_debug("%s: rx error: %d buffers out of %d missing\n",
1258 vi
->dev
->name
, num_buf
,
1259 virtio16_to_cpu(vi
->vdev
,
1261 DEV_STATS_INC(vi
->dev
, rx_length_errors
);
1265 u64_stats_add(&stats
->bytes
, len
);
1267 xdp
= buf_to_xdp(vi
, rq
, buf
, len
);
1271 buf
= napi_alloc_frag(len
);
1277 memcpy(buf
, xdp
->data
- vi
->hdr_len
, len
);
1281 page
= virt_to_page(buf
);
1285 curr_skb
= virtnet_skb_append_frag(head_skb
, curr_skb
, page
,
1286 buf
, len
, truesize
);
1296 xsk_drop_follow_bufs(vi
->dev
, rq
, num_buf
, stats
);
1300 static struct sk_buff
*virtnet_receive_xsk_merge(struct net_device
*dev
, struct virtnet_info
*vi
,
1301 struct receive_queue
*rq
, struct xdp_buff
*xdp
,
1302 unsigned int *xdp_xmit
,
1303 struct virtnet_rq_stats
*stats
)
1305 struct virtio_net_hdr_mrg_rxbuf
*hdr
;
1306 struct bpf_prog
*prog
;
1307 struct sk_buff
*skb
;
1310 hdr
= xdp
->data
- vi
->hdr_len
;
1311 num_buf
= virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
);
1315 prog
= rcu_dereference(rq
->xdp_prog
);
1316 /* TODO: support multi buffer. */
1317 if (prog
&& num_buf
== 1)
1318 ret
= virtnet_xdp_handler(prog
, xdp
, dev
, xdp_xmit
, stats
);
1323 skb
= xsk_construct_skb(rq
, xdp
);
1327 if (xsk_append_merge_buffer(vi
, rq
, skb
, num_buf
, hdr
, stats
)) {
1344 xsk_drop_follow_bufs(dev
, rq
, num_buf
, stats
);
1347 u64_stats_inc(&stats
->drops
);
1351 static void virtnet_receive_xsk_buf(struct virtnet_info
*vi
, struct receive_queue
*rq
,
1353 unsigned int *xdp_xmit
,
1354 struct virtnet_rq_stats
*stats
)
1356 struct net_device
*dev
= vi
->dev
;
1357 struct sk_buff
*skb
= NULL
;
1358 struct xdp_buff
*xdp
;
1363 u64_stats_add(&stats
->bytes
, len
);
1365 xdp
= buf_to_xdp(vi
, rq
, buf
, len
);
1369 if (unlikely(len
< ETH_HLEN
)) {
1370 pr_debug("%s: short packet %i\n", dev
->name
, len
);
1371 DEV_STATS_INC(dev
, rx_length_errors
);
1376 flags
= ((struct virtio_net_common_hdr
*)(xdp
->data
- vi
->hdr_len
))->hdr
.flags
;
1378 if (!vi
->mergeable_rx_bufs
)
1379 skb
= virtnet_receive_xsk_small(dev
, vi
, rq
, xdp
, xdp_xmit
, stats
);
1381 skb
= virtnet_receive_xsk_merge(dev
, vi
, rq
, xdp
, xdp_xmit
, stats
);
1384 virtnet_receive_done(vi
, rq
, skb
, flags
);
1387 static int virtnet_add_recvbuf_xsk(struct virtnet_info
*vi
, struct receive_queue
*rq
,
1388 struct xsk_buff_pool
*pool
, gfp_t gfp
)
1390 struct xdp_buff
**xsk_buffs
;
1396 xsk_buffs
= rq
->xsk_buffs
;
1398 num
= xsk_buff_alloc_batch(pool
, xsk_buffs
, rq
->vq
->num_free
);
1402 len
= xsk_pool_get_rx_frame_size(pool
) + vi
->hdr_len
;
1404 for (i
= 0; i
< num
; ++i
) {
1405 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
1406 * We assume XDP_PACKET_HEADROOM is larger than hdr->len.
1407 * (see function virtnet_xsk_pool_enable)
1409 addr
= xsk_buff_xdp_get_dma(xsk_buffs
[i
]) - vi
->hdr_len
;
1411 sg_init_table(rq
->sg
, 1);
1412 sg_fill_dma(rq
->sg
, addr
, len
);
1414 err
= virtqueue_add_inbuf_premapped(rq
->vq
, rq
->sg
, 1,
1415 xsk_buffs
[i
], NULL
, gfp
);
1423 for (; i
< num
; ++i
)
1424 xsk_buff_free(xsk_buffs
[i
]);
1429 static void *virtnet_xsk_to_ptr(u32 len
)
1433 p
= len
<< VIRTIO_XSK_FLAG_OFFSET
;
1435 return virtnet_xmit_ptr_pack((void *)p
, VIRTNET_XMIT_TYPE_XSK
);
1438 static int virtnet_xsk_xmit_one(struct send_queue
*sq
,
1439 struct xsk_buff_pool
*pool
,
1440 struct xdp_desc
*desc
)
1442 struct virtnet_info
*vi
;
1445 vi
= sq
->vq
->vdev
->priv
;
1447 addr
= xsk_buff_raw_get_dma(pool
, desc
->addr
);
1448 xsk_buff_raw_dma_sync_for_device(pool
, addr
, desc
->len
);
1450 sg_init_table(sq
->sg
, 2);
1451 sg_fill_dma(sq
->sg
, sq
->xsk_hdr_dma_addr
, vi
->hdr_len
);
1452 sg_fill_dma(sq
->sg
+ 1, addr
, desc
->len
);
1454 return virtqueue_add_outbuf_premapped(sq
->vq
, sq
->sg
, 2,
1455 virtnet_xsk_to_ptr(desc
->len
),
1459 static int virtnet_xsk_xmit_batch(struct send_queue
*sq
,
1460 struct xsk_buff_pool
*pool
,
1461 unsigned int budget
,
1464 struct xdp_desc
*descs
= pool
->tx_descs
;
1469 budget
= min_t(u32
, budget
, sq
->vq
->num_free
);
1471 nb_pkts
= xsk_tx_peek_release_desc_batch(pool
, budget
);
1475 for (i
= 0; i
< nb_pkts
; i
++) {
1476 err
= virtnet_xsk_xmit_one(sq
, pool
, &descs
[i
]);
1477 if (unlikely(err
)) {
1478 xsk_tx_completed(sq
->xsk_pool
, nb_pkts
- i
);
1485 if (kick
&& virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
))
1491 static bool virtnet_xsk_xmit(struct send_queue
*sq
, struct xsk_buff_pool
*pool
,
1494 struct virtnet_info
*vi
= sq
->vq
->vdev
->priv
;
1495 struct virtnet_sq_free_stats stats
= {};
1496 struct net_device
*dev
= vi
->dev
;
1500 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of
1503 __free_old_xmit(sq
, netdev_get_tx_queue(dev
, sq
- vi
->sq
), true, &stats
);
1506 xsk_tx_completed(sq
->xsk_pool
, stats
.xsk
);
1508 sent
= virtnet_xsk_xmit_batch(sq
, pool
, budget
, &kicks
);
1510 if (!is_xdp_raw_buffer_queue(vi
, sq
- vi
->sq
))
1511 check_sq_full_and_disable(vi
, vi
->dev
, sq
);
1514 struct netdev_queue
*txq
;
1516 txq
= netdev_get_tx_queue(vi
->dev
, sq
- vi
->sq
);
1517 txq_trans_cond_update(txq
);
1520 u64_stats_update_begin(&sq
->stats
.syncp
);
1521 u64_stats_add(&sq
->stats
.packets
, stats
.packets
);
1522 u64_stats_add(&sq
->stats
.bytes
, stats
.bytes
);
1523 u64_stats_add(&sq
->stats
.kicks
, kicks
);
1524 u64_stats_add(&sq
->stats
.xdp_tx
, sent
);
1525 u64_stats_update_end(&sq
->stats
.syncp
);
1527 if (xsk_uses_need_wakeup(pool
))
1528 xsk_set_tx_need_wakeup(pool
);
1533 static void xsk_wakeup(struct send_queue
*sq
)
1535 if (napi_if_scheduled_mark_missed(&sq
->napi
))
1539 virtqueue_napi_schedule(&sq
->napi
, sq
->vq
);
1543 static int virtnet_xsk_wakeup(struct net_device
*dev
, u32 qid
, u32 flag
)
1545 struct virtnet_info
*vi
= netdev_priv(dev
);
1546 struct send_queue
*sq
;
1548 if (!netif_running(dev
))
1551 if (qid
>= vi
->curr_queue_pairs
)
1560 static void virtnet_xsk_completed(struct send_queue
*sq
, int num
)
1562 xsk_tx_completed(sq
->xsk_pool
, num
);
1564 /* If this is called by rx poll, start_xmit and xdp xmit we should
1565 * wakeup the tx napi to consume the xsk tx queue, because the tx
1566 * interrupt may not be triggered.
1571 static int __virtnet_xdp_xmit_one(struct virtnet_info
*vi
,
1572 struct send_queue
*sq
,
1573 struct xdp_frame
*xdpf
)
1575 struct virtio_net_hdr_mrg_rxbuf
*hdr
;
1576 struct skb_shared_info
*shinfo
;
1580 if (unlikely(xdpf
->headroom
< vi
->hdr_len
))
1583 if (unlikely(xdp_frame_has_frags(xdpf
))) {
1584 shinfo
= xdp_get_shared_info_from_frame(xdpf
);
1585 nr_frags
= shinfo
->nr_frags
;
1588 /* In wrapping function virtnet_xdp_xmit(), we need to free
1589 * up the pending old buffers, where we need to calculate the
1590 * position of skb_shared_info in xdp_get_frame_len() and
1591 * xdp_return_frame(), which will involve to xdpf->data and
1592 * xdpf->headroom. Therefore, we need to update the value of
1593 * headroom synchronously here.
1595 xdpf
->headroom
-= vi
->hdr_len
;
1596 xdpf
->data
-= vi
->hdr_len
;
1597 /* Zero header and leave csum up to XDP layers */
1599 memset(hdr
, 0, vi
->hdr_len
);
1600 xdpf
->len
+= vi
->hdr_len
;
1602 sg_init_table(sq
->sg
, nr_frags
+ 1);
1603 sg_set_buf(sq
->sg
, xdpf
->data
, xdpf
->len
);
1604 for (i
= 0; i
< nr_frags
; i
++) {
1605 skb_frag_t
*frag
= &shinfo
->frags
[i
];
1607 sg_set_page(&sq
->sg
[i
+ 1], skb_frag_page(frag
),
1608 skb_frag_size(frag
), skb_frag_off(frag
));
1611 err
= virtnet_add_outbuf(sq
, nr_frags
+ 1, xdpf
, VIRTNET_XMIT_TYPE_XDP
);
1613 return -ENOSPC
; /* Caller handle free/refcnt */
1618 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
1619 * the current cpu, so it does not need to be locked.
1621 * Here we use marco instead of inline functions because we have to deal with
1622 * three issues at the same time: 1. the choice of sq. 2. judge and execute the
1623 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
1624 * functions to perfectly solve these three problems at the same time.
1626 #define virtnet_xdp_get_sq(vi) ({ \
1627 int cpu = smp_processor_id(); \
1628 struct netdev_queue *txq; \
1629 typeof(vi) v = (vi); \
1632 if (v->curr_queue_pairs > nr_cpu_ids) { \
1633 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
1635 txq = netdev_get_tx_queue(v->dev, qp); \
1636 __netif_tx_acquire(txq); \
1638 qp = cpu % v->curr_queue_pairs; \
1639 txq = netdev_get_tx_queue(v->dev, qp); \
1640 __netif_tx_lock(txq, cpu); \
1645 #define virtnet_xdp_put_sq(vi, q) { \
1646 struct netdev_queue *txq; \
1647 typeof(vi) v = (vi); \
1649 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
1650 if (v->curr_queue_pairs > nr_cpu_ids) \
1651 __netif_tx_release(txq); \
1653 __netif_tx_unlock(txq); \
1656 static int virtnet_xdp_xmit(struct net_device
*dev
,
1657 int n
, struct xdp_frame
**frames
, u32 flags
)
1659 struct virtnet_info
*vi
= netdev_priv(dev
);
1660 struct virtnet_sq_free_stats stats
= {0};
1661 struct receive_queue
*rq
= vi
->rq
;
1662 struct bpf_prog
*xdp_prog
;
1663 struct send_queue
*sq
;
1669 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
1670 * indicate XDP resources have been successfully allocated.
1672 xdp_prog
= rcu_access_pointer(rq
->xdp_prog
);
1676 sq
= virtnet_xdp_get_sq(vi
);
1678 if (unlikely(flags
& ~XDP_XMIT_FLAGS_MASK
)) {
1683 /* Free up any pending old buffers before queueing new ones. */
1684 virtnet_free_old_xmit(sq
, netdev_get_tx_queue(dev
, sq
- vi
->sq
),
1687 for (i
= 0; i
< n
; i
++) {
1688 struct xdp_frame
*xdpf
= frames
[i
];
1690 if (__virtnet_xdp_xmit_one(vi
, sq
, xdpf
))
1696 if (!is_xdp_raw_buffer_queue(vi
, sq
- vi
->sq
))
1697 check_sq_full_and_disable(vi
, dev
, sq
);
1699 if (flags
& XDP_XMIT_FLUSH
) {
1700 if (virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
))
1704 u64_stats_update_begin(&sq
->stats
.syncp
);
1705 u64_stats_add(&sq
->stats
.bytes
, stats
.bytes
);
1706 u64_stats_add(&sq
->stats
.packets
, stats
.packets
);
1707 u64_stats_add(&sq
->stats
.xdp_tx
, n
);
1708 u64_stats_add(&sq
->stats
.xdp_tx_drops
, n
- nxmit
);
1709 u64_stats_add(&sq
->stats
.kicks
, kicks
);
1710 u64_stats_update_end(&sq
->stats
.syncp
);
1712 virtnet_xdp_put_sq(vi
, sq
);
1716 static void put_xdp_frags(struct xdp_buff
*xdp
)
1718 struct skb_shared_info
*shinfo
;
1719 struct page
*xdp_page
;
1722 if (xdp_buff_has_frags(xdp
)) {
1723 shinfo
= xdp_get_shared_info_from_buff(xdp
);
1724 for (i
= 0; i
< shinfo
->nr_frags
; i
++) {
1725 xdp_page
= skb_frag_page(&shinfo
->frags
[i
]);
1731 static int virtnet_xdp_handler(struct bpf_prog
*xdp_prog
, struct xdp_buff
*xdp
,
1732 struct net_device
*dev
,
1733 unsigned int *xdp_xmit
,
1734 struct virtnet_rq_stats
*stats
)
1736 struct xdp_frame
*xdpf
;
1740 act
= bpf_prog_run_xdp(xdp_prog
, xdp
);
1741 u64_stats_inc(&stats
->xdp_packets
);
1748 u64_stats_inc(&stats
->xdp_tx
);
1749 xdpf
= xdp_convert_buff_to_frame(xdp
);
1750 if (unlikely(!xdpf
)) {
1751 netdev_dbg(dev
, "convert buff to frame failed for xdp\n");
1755 err
= virtnet_xdp_xmit(dev
, 1, &xdpf
, 0);
1756 if (unlikely(!err
)) {
1757 xdp_return_frame_rx_napi(xdpf
);
1758 } else if (unlikely(err
< 0)) {
1759 trace_xdp_exception(dev
, xdp_prog
, act
);
1762 *xdp_xmit
|= VIRTIO_XDP_TX
;
1766 u64_stats_inc(&stats
->xdp_redirects
);
1767 err
= xdp_do_redirect(dev
, xdp
, xdp_prog
);
1771 *xdp_xmit
|= VIRTIO_XDP_REDIR
;
1775 bpf_warn_invalid_xdp_action(dev
, xdp_prog
, act
);
1778 trace_xdp_exception(dev
, xdp_prog
, act
);
1785 static unsigned int virtnet_get_headroom(struct virtnet_info
*vi
)
1787 return vi
->xdp_enabled
? XDP_PACKET_HEADROOM
: 0;
1790 /* We copy the packet for XDP in the following cases:
1792 * 1) Packet is scattered across multiple rx buffers.
1793 * 2) Headroom space is insufficient.
1795 * This is inefficient but it's a temporary condition that
1796 * we hit right after XDP is enabled and until queue is refilled
1797 * with large buffers with sufficient headroom - so it should affect
1798 * at most queue size packets.
1799 * Afterwards, the conditions to enable
1800 * XDP should preclude the underlying device from sending packets
1801 * across multiple buffers (num_buf > 1), and we make sure buffers
1802 * have enough headroom.
1804 static struct page
*xdp_linearize_page(struct receive_queue
*rq
,
1811 int tailroom
= SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1814 if (page_off
+ *len
+ tailroom
> PAGE_SIZE
)
1817 page
= alloc_page(GFP_ATOMIC
);
1821 memcpy(page_address(page
) + page_off
, page_address(p
) + offset
, *len
);
1824 while (--*num_buf
) {
1825 unsigned int buflen
;
1829 buf
= virtnet_rq_get_buf(rq
, &buflen
, NULL
);
1833 p
= virt_to_head_page(buf
);
1834 off
= buf
- page_address(p
);
1836 /* guard against a misconfigured or uncooperative backend that
1837 * is sending packet larger than the MTU.
1839 if ((page_off
+ buflen
+ tailroom
) > PAGE_SIZE
) {
1844 memcpy(page_address(page
) + page_off
,
1845 page_address(p
) + off
, buflen
);
1850 /* Headroom does not contribute to packet length */
1851 *len
= page_off
- XDP_PACKET_HEADROOM
;
1854 __free_pages(page
, 0);
1858 static struct sk_buff
*receive_small_build_skb(struct virtnet_info
*vi
,
1859 unsigned int xdp_headroom
,
1863 unsigned int header_offset
;
1864 unsigned int headroom
;
1865 unsigned int buflen
;
1866 struct sk_buff
*skb
;
1868 header_offset
= VIRTNET_RX_PAD
+ xdp_headroom
;
1869 headroom
= vi
->hdr_len
+ header_offset
;
1870 buflen
= SKB_DATA_ALIGN(GOOD_PACKET_LEN
+ headroom
) +
1871 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1873 skb
= virtnet_build_skb(buf
, buflen
, headroom
, len
);
1877 buf
+= header_offset
;
1878 memcpy(skb_vnet_common_hdr(skb
), buf
, vi
->hdr_len
);
1883 static struct sk_buff
*receive_small_xdp(struct net_device
*dev
,
1884 struct virtnet_info
*vi
,
1885 struct receive_queue
*rq
,
1886 struct bpf_prog
*xdp_prog
,
1888 unsigned int xdp_headroom
,
1890 unsigned int *xdp_xmit
,
1891 struct virtnet_rq_stats
*stats
)
1893 unsigned int header_offset
= VIRTNET_RX_PAD
+ xdp_headroom
;
1894 unsigned int headroom
= vi
->hdr_len
+ header_offset
;
1895 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
+ header_offset
;
1896 struct page
*page
= virt_to_head_page(buf
);
1897 struct page
*xdp_page
;
1898 unsigned int buflen
;
1899 struct xdp_buff xdp
;
1900 struct sk_buff
*skb
;
1901 unsigned int metasize
= 0;
1904 if (unlikely(hdr
->hdr
.gso_type
))
1907 /* Partially checksummed packets must be dropped. */
1908 if (unlikely(hdr
->hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
))
1911 buflen
= SKB_DATA_ALIGN(GOOD_PACKET_LEN
+ headroom
) +
1912 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1914 if (unlikely(xdp_headroom
< virtnet_get_headroom(vi
))) {
1915 int offset
= buf
- page_address(page
) + header_offset
;
1916 unsigned int tlen
= len
+ vi
->hdr_len
;
1919 xdp_headroom
= virtnet_get_headroom(vi
);
1920 header_offset
= VIRTNET_RX_PAD
+ xdp_headroom
;
1921 headroom
= vi
->hdr_len
+ header_offset
;
1922 buflen
= SKB_DATA_ALIGN(GOOD_PACKET_LEN
+ headroom
) +
1923 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1924 xdp_page
= xdp_linearize_page(rq
, &num_buf
, page
,
1925 offset
, header_offset
,
1930 buf
= page_address(xdp_page
);
1935 xdp_init_buff(&xdp
, buflen
, &rq
->xdp_rxq
);
1936 xdp_prepare_buff(&xdp
, buf
+ VIRTNET_RX_PAD
+ vi
->hdr_len
,
1937 xdp_headroom
, len
, true);
1939 act
= virtnet_xdp_handler(xdp_prog
, &xdp
, dev
, xdp_xmit
, stats
);
1943 /* Recalculate length in case bpf program changed it */
1944 len
= xdp
.data_end
- xdp
.data
;
1945 metasize
= xdp
.data
- xdp
.data_meta
;
1956 skb
= virtnet_build_skb(buf
, buflen
, xdp
.data
- buf
, len
);
1961 skb_metadata_set(skb
, metasize
);
1966 u64_stats_inc(&stats
->xdp_drops
);
1968 u64_stats_inc(&stats
->drops
);
1974 static struct sk_buff
*receive_small(struct net_device
*dev
,
1975 struct virtnet_info
*vi
,
1976 struct receive_queue
*rq
,
1977 void *buf
, void *ctx
,
1979 unsigned int *xdp_xmit
,
1980 struct virtnet_rq_stats
*stats
)
1982 unsigned int xdp_headroom
= (unsigned long)ctx
;
1983 struct page
*page
= virt_to_head_page(buf
);
1984 struct sk_buff
*skb
;
1986 /* We passed the address of virtnet header to virtio-core,
1987 * so truncate the padding.
1989 buf
-= VIRTNET_RX_PAD
+ xdp_headroom
;
1992 u64_stats_add(&stats
->bytes
, len
);
1994 if (unlikely(len
> GOOD_PACKET_LEN
)) {
1995 pr_debug("%s: rx error: len %u exceeds max size %d\n",
1996 dev
->name
, len
, GOOD_PACKET_LEN
);
1997 DEV_STATS_INC(dev
, rx_length_errors
);
2001 if (unlikely(vi
->xdp_enabled
)) {
2002 struct bpf_prog
*xdp_prog
;
2005 xdp_prog
= rcu_dereference(rq
->xdp_prog
);
2007 skb
= receive_small_xdp(dev
, vi
, rq
, xdp_prog
, buf
,
2008 xdp_headroom
, len
, xdp_xmit
,
2016 skb
= receive_small_build_skb(vi
, xdp_headroom
, buf
, len
);
2021 u64_stats_inc(&stats
->drops
);
2026 static struct sk_buff
*receive_big(struct net_device
*dev
,
2027 struct virtnet_info
*vi
,
2028 struct receive_queue
*rq
,
2031 struct virtnet_rq_stats
*stats
)
2033 struct page
*page
= buf
;
2034 struct sk_buff
*skb
=
2035 page_to_skb(vi
, rq
, page
, 0, len
, PAGE_SIZE
, 0);
2037 u64_stats_add(&stats
->bytes
, len
- vi
->hdr_len
);
2044 u64_stats_inc(&stats
->drops
);
2045 give_pages(rq
, page
);
2049 static void mergeable_buf_free(struct receive_queue
*rq
, int num_buf
,
2050 struct net_device
*dev
,
2051 struct virtnet_rq_stats
*stats
)
2057 while (num_buf
-- > 1) {
2058 buf
= virtnet_rq_get_buf(rq
, &len
, NULL
);
2059 if (unlikely(!buf
)) {
2060 pr_debug("%s: rx error: %d buffers missing\n",
2061 dev
->name
, num_buf
);
2062 DEV_STATS_INC(dev
, rx_length_errors
);
2065 u64_stats_add(&stats
->bytes
, len
);
2066 page
= virt_to_head_page(buf
);
2071 /* Why not use xdp_build_skb_from_frame() ?
2072 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
2073 * virtio-net there are 2 points that do not match its requirements:
2074 * 1. The size of the prefilled buffer is not fixed before xdp is set.
2075 * 2. xdp_build_skb_from_frame() does more checks that we don't need,
2076 * like eth_type_trans() (which virtio-net does in receive_buf()).
2078 static struct sk_buff
*build_skb_from_xdp_buff(struct net_device
*dev
,
2079 struct virtnet_info
*vi
,
2080 struct xdp_buff
*xdp
,
2081 unsigned int xdp_frags_truesz
)
2083 struct skb_shared_info
*sinfo
= xdp_get_shared_info_from_buff(xdp
);
2084 unsigned int headroom
, data_len
;
2085 struct sk_buff
*skb
;
2089 if (unlikely(xdp
->data_end
> xdp_data_hard_end(xdp
))) {
2090 pr_debug("Error building skb as missing reserved tailroom for xdp");
2094 if (unlikely(xdp_buff_has_frags(xdp
)))
2095 nr_frags
= sinfo
->nr_frags
;
2097 skb
= build_skb(xdp
->data_hard_start
, xdp
->frame_sz
);
2101 headroom
= xdp
->data
- xdp
->data_hard_start
;
2102 data_len
= xdp
->data_end
- xdp
->data
;
2103 skb_reserve(skb
, headroom
);
2104 __skb_put(skb
, data_len
);
2106 metasize
= xdp
->data
- xdp
->data_meta
;
2107 metasize
= metasize
> 0 ? metasize
: 0;
2109 skb_metadata_set(skb
, metasize
);
2111 if (unlikely(xdp_buff_has_frags(xdp
)))
2112 xdp_update_skb_shared_info(skb
, nr_frags
,
2113 sinfo
->xdp_frags_size
,
2115 xdp_buff_is_frag_pfmemalloc(xdp
));
2120 /* TODO: build xdp in big mode */
2121 static int virtnet_build_xdp_buff_mrg(struct net_device
*dev
,
2122 struct virtnet_info
*vi
,
2123 struct receive_queue
*rq
,
2124 struct xdp_buff
*xdp
,
2127 unsigned int frame_sz
,
2129 unsigned int *xdp_frags_truesize
,
2130 struct virtnet_rq_stats
*stats
)
2132 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
;
2133 unsigned int headroom
, tailroom
, room
;
2134 unsigned int truesize
, cur_frag_size
;
2135 struct skb_shared_info
*shinfo
;
2136 unsigned int xdp_frags_truesz
= 0;
2142 xdp_init_buff(xdp
, frame_sz
, &rq
->xdp_rxq
);
2143 xdp_prepare_buff(xdp
, buf
- XDP_PACKET_HEADROOM
,
2144 XDP_PACKET_HEADROOM
+ vi
->hdr_len
, len
- vi
->hdr_len
, true);
2150 /* If we want to build multi-buffer xdp, we need
2151 * to specify that the flags of xdp_buff have the
2152 * XDP_FLAGS_HAS_FRAG bit.
2154 if (!xdp_buff_has_frags(xdp
))
2155 xdp_buff_set_frags_flag(xdp
);
2157 shinfo
= xdp_get_shared_info_from_buff(xdp
);
2158 shinfo
->nr_frags
= 0;
2159 shinfo
->xdp_frags_size
= 0;
2162 if (*num_buf
> MAX_SKB_FRAGS
+ 1)
2165 while (--*num_buf
> 0) {
2166 buf
= virtnet_rq_get_buf(rq
, &len
, &ctx
);
2167 if (unlikely(!buf
)) {
2168 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2169 dev
->name
, *num_buf
,
2170 virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
));
2171 DEV_STATS_INC(dev
, rx_length_errors
);
2175 u64_stats_add(&stats
->bytes
, len
);
2176 page
= virt_to_head_page(buf
);
2177 offset
= buf
- page_address(page
);
2179 truesize
= mergeable_ctx_to_truesize(ctx
);
2180 headroom
= mergeable_ctx_to_headroom(ctx
);
2181 tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2182 room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2184 cur_frag_size
= truesize
;
2185 xdp_frags_truesz
+= cur_frag_size
;
2186 if (unlikely(len
> truesize
- room
|| cur_frag_size
> PAGE_SIZE
)) {
2188 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2189 dev
->name
, len
, (unsigned long)(truesize
- room
));
2190 DEV_STATS_INC(dev
, rx_length_errors
);
2194 frag
= &shinfo
->frags
[shinfo
->nr_frags
++];
2195 skb_frag_fill_page_desc(frag
, page
, offset
, len
);
2196 if (page_is_pfmemalloc(page
))
2197 xdp_buff_set_frag_pfmemalloc(xdp
);
2199 shinfo
->xdp_frags_size
+= len
;
2202 *xdp_frags_truesize
= xdp_frags_truesz
;
2210 static void *mergeable_xdp_get_buf(struct virtnet_info
*vi
,
2211 struct receive_queue
*rq
,
2212 struct bpf_prog
*xdp_prog
,
2214 unsigned int *frame_sz
,
2219 struct virtio_net_hdr_mrg_rxbuf
*hdr
)
2221 unsigned int truesize
= mergeable_ctx_to_truesize(ctx
);
2222 unsigned int headroom
= mergeable_ctx_to_headroom(ctx
);
2223 struct page
*xdp_page
;
2224 unsigned int xdp_room
;
2226 /* Transient failure which in theory could occur if
2227 * in-flight packets from before XDP was enabled reach
2228 * the receive path after XDP is loaded.
2230 if (unlikely(hdr
->hdr
.gso_type
))
2233 /* Partially checksummed packets must be dropped. */
2234 if (unlikely(hdr
->hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
))
2237 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
2238 * with headroom may add hole in truesize, which
2239 * make their length exceed PAGE_SIZE. So we disabled the
2240 * hole mechanism for xdp. See add_recvbuf_mergeable().
2242 *frame_sz
= truesize
;
2244 if (likely(headroom
>= virtnet_get_headroom(vi
) &&
2245 (*num_buf
== 1 || xdp_prog
->aux
->xdp_has_frags
))) {
2246 return page_address(*page
) + offset
;
2249 /* This happens when headroom is not enough because
2250 * of the buffer was prefilled before XDP is set.
2251 * This should only happen for the first several packets.
2252 * In fact, vq reset can be used here to help us clean up
2253 * the prefilled buffers, but many existing devices do not
2254 * support it, and we don't want to bother users who are
2255 * using xdp normally.
2257 if (!xdp_prog
->aux
->xdp_has_frags
) {
2258 /* linearize data for XDP */
2259 xdp_page
= xdp_linearize_page(rq
, num_buf
,
2261 XDP_PACKET_HEADROOM
,
2266 xdp_room
= SKB_DATA_ALIGN(XDP_PACKET_HEADROOM
+
2267 sizeof(struct skb_shared_info
));
2268 if (*len
+ xdp_room
> PAGE_SIZE
)
2271 xdp_page
= alloc_page(GFP_ATOMIC
);
2275 memcpy(page_address(xdp_page
) + XDP_PACKET_HEADROOM
,
2276 page_address(*page
) + offset
, *len
);
2279 *frame_sz
= PAGE_SIZE
;
2285 return page_address(*page
) + XDP_PACKET_HEADROOM
;
2288 static struct sk_buff
*receive_mergeable_xdp(struct net_device
*dev
,
2289 struct virtnet_info
*vi
,
2290 struct receive_queue
*rq
,
2291 struct bpf_prog
*xdp_prog
,
2295 unsigned int *xdp_xmit
,
2296 struct virtnet_rq_stats
*stats
)
2298 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
;
2299 int num_buf
= virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
);
2300 struct page
*page
= virt_to_head_page(buf
);
2301 int offset
= buf
- page_address(page
);
2302 unsigned int xdp_frags_truesz
= 0;
2303 struct sk_buff
*head_skb
;
2304 unsigned int frame_sz
;
2305 struct xdp_buff xdp
;
2310 data
= mergeable_xdp_get_buf(vi
, rq
, xdp_prog
, ctx
, &frame_sz
, &num_buf
, &page
,
2312 if (unlikely(!data
))
2315 err
= virtnet_build_xdp_buff_mrg(dev
, vi
, rq
, &xdp
, data
, len
, frame_sz
,
2316 &num_buf
, &xdp_frags_truesz
, stats
);
2320 act
= virtnet_xdp_handler(xdp_prog
, &xdp
, dev
, xdp_xmit
, stats
);
2324 head_skb
= build_skb_from_xdp_buff(dev
, vi
, &xdp
, xdp_frags_truesz
);
2325 if (unlikely(!head_skb
))
2337 put_xdp_frags(&xdp
);
2341 mergeable_buf_free(rq
, num_buf
, dev
, stats
);
2343 u64_stats_inc(&stats
->xdp_drops
);
2344 u64_stats_inc(&stats
->drops
);
2348 static struct sk_buff
*virtnet_skb_append_frag(struct sk_buff
*head_skb
,
2349 struct sk_buff
*curr_skb
,
2350 struct page
*page
, void *buf
,
2351 int len
, int truesize
)
2356 num_skb_frags
= skb_shinfo(curr_skb
)->nr_frags
;
2357 if (unlikely(num_skb_frags
== MAX_SKB_FRAGS
)) {
2358 struct sk_buff
*nskb
= alloc_skb(0, GFP_ATOMIC
);
2360 if (unlikely(!nskb
))
2363 if (curr_skb
== head_skb
)
2364 skb_shinfo(curr_skb
)->frag_list
= nskb
;
2366 curr_skb
->next
= nskb
;
2368 head_skb
->truesize
+= nskb
->truesize
;
2372 if (curr_skb
!= head_skb
) {
2373 head_skb
->data_len
+= len
;
2374 head_skb
->len
+= len
;
2375 head_skb
->truesize
+= truesize
;
2378 offset
= buf
- page_address(page
);
2379 if (skb_can_coalesce(curr_skb
, num_skb_frags
, page
, offset
)) {
2381 skb_coalesce_rx_frag(curr_skb
, num_skb_frags
- 1,
2384 skb_add_rx_frag(curr_skb
, num_skb_frags
, page
,
2385 offset
, len
, truesize
);
2391 static struct sk_buff
*receive_mergeable(struct net_device
*dev
,
2392 struct virtnet_info
*vi
,
2393 struct receive_queue
*rq
,
2397 unsigned int *xdp_xmit
,
2398 struct virtnet_rq_stats
*stats
)
2400 struct virtio_net_hdr_mrg_rxbuf
*hdr
= buf
;
2401 int num_buf
= virtio16_to_cpu(vi
->vdev
, hdr
->num_buffers
);
2402 struct page
*page
= virt_to_head_page(buf
);
2403 int offset
= buf
- page_address(page
);
2404 struct sk_buff
*head_skb
, *curr_skb
;
2405 unsigned int truesize
= mergeable_ctx_to_truesize(ctx
);
2406 unsigned int headroom
= mergeable_ctx_to_headroom(ctx
);
2407 unsigned int tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2408 unsigned int room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2411 u64_stats_add(&stats
->bytes
, len
- vi
->hdr_len
);
2413 if (unlikely(len
> truesize
- room
)) {
2414 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2415 dev
->name
, len
, (unsigned long)(truesize
- room
));
2416 DEV_STATS_INC(dev
, rx_length_errors
);
2420 if (unlikely(vi
->xdp_enabled
)) {
2421 struct bpf_prog
*xdp_prog
;
2424 xdp_prog
= rcu_dereference(rq
->xdp_prog
);
2426 head_skb
= receive_mergeable_xdp(dev
, vi
, rq
, xdp_prog
, buf
, ctx
,
2427 len
, xdp_xmit
, stats
);
2434 head_skb
= page_to_skb(vi
, rq
, page
, offset
, len
, truesize
, headroom
);
2435 curr_skb
= head_skb
;
2437 if (unlikely(!curr_skb
))
2440 buf
= virtnet_rq_get_buf(rq
, &len
, &ctx
);
2441 if (unlikely(!buf
)) {
2442 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2444 virtio16_to_cpu(vi
->vdev
,
2446 DEV_STATS_INC(dev
, rx_length_errors
);
2450 u64_stats_add(&stats
->bytes
, len
);
2451 page
= virt_to_head_page(buf
);
2453 truesize
= mergeable_ctx_to_truesize(ctx
);
2454 headroom
= mergeable_ctx_to_headroom(ctx
);
2455 tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2456 room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2457 if (unlikely(len
> truesize
- room
)) {
2458 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2459 dev
->name
, len
, (unsigned long)(truesize
- room
));
2460 DEV_STATS_INC(dev
, rx_length_errors
);
2464 curr_skb
= virtnet_skb_append_frag(head_skb
, curr_skb
, page
,
2465 buf
, len
, truesize
);
2470 ewma_pkt_len_add(&rq
->mrg_avg_pkt_len
, head_skb
->len
);
2475 mergeable_buf_free(rq
, num_buf
, dev
, stats
);
2478 u64_stats_inc(&stats
->drops
);
2479 dev_kfree_skb(head_skb
);
2483 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash
*hdr_hash
,
2484 struct sk_buff
*skb
)
2486 enum pkt_hash_types rss_hash_type
;
2488 if (!hdr_hash
|| !skb
)
2491 switch (__le16_to_cpu(hdr_hash
->hash_report
)) {
2492 case VIRTIO_NET_HASH_REPORT_TCPv4
:
2493 case VIRTIO_NET_HASH_REPORT_UDPv4
:
2494 case VIRTIO_NET_HASH_REPORT_TCPv6
:
2495 case VIRTIO_NET_HASH_REPORT_UDPv6
:
2496 case VIRTIO_NET_HASH_REPORT_TCPv6_EX
:
2497 case VIRTIO_NET_HASH_REPORT_UDPv6_EX
:
2498 rss_hash_type
= PKT_HASH_TYPE_L4
;
2500 case VIRTIO_NET_HASH_REPORT_IPv4
:
2501 case VIRTIO_NET_HASH_REPORT_IPv6
:
2502 case VIRTIO_NET_HASH_REPORT_IPv6_EX
:
2503 rss_hash_type
= PKT_HASH_TYPE_L3
;
2505 case VIRTIO_NET_HASH_REPORT_NONE
:
2507 rss_hash_type
= PKT_HASH_TYPE_NONE
;
2509 skb_set_hash(skb
, __le32_to_cpu(hdr_hash
->hash_value
), rss_hash_type
);
2512 static void virtnet_receive_done(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2513 struct sk_buff
*skb
, u8 flags
)
2515 struct virtio_net_common_hdr
*hdr
;
2516 struct net_device
*dev
= vi
->dev
;
2518 hdr
= skb_vnet_common_hdr(skb
);
2519 if (dev
->features
& NETIF_F_RXHASH
&& vi
->has_rss_hash_report
)
2520 virtio_skb_set_hash(&hdr
->hash_v1_hdr
, skb
);
2522 if (flags
& VIRTIO_NET_HDR_F_DATA_VALID
)
2523 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
2525 if (virtio_net_hdr_to_skb(skb
, &hdr
->hdr
,
2526 virtio_is_little_endian(vi
->vdev
))) {
2527 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
2528 dev
->name
, hdr
->hdr
.gso_type
,
2533 skb_record_rx_queue(skb
, vq2rxq(rq
->vq
));
2534 skb
->protocol
= eth_type_trans(skb
, dev
);
2535 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
2536 ntohs(skb
->protocol
), skb
->len
, skb
->pkt_type
);
2538 napi_gro_receive(&rq
->napi
, skb
);
2542 DEV_STATS_INC(dev
, rx_frame_errors
);
2546 static void receive_buf(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2547 void *buf
, unsigned int len
, void **ctx
,
2548 unsigned int *xdp_xmit
,
2549 struct virtnet_rq_stats
*stats
)
2551 struct net_device
*dev
= vi
->dev
;
2552 struct sk_buff
*skb
;
2555 if (unlikely(len
< vi
->hdr_len
+ ETH_HLEN
)) {
2556 pr_debug("%s: short packet %i\n", dev
->name
, len
);
2557 DEV_STATS_INC(dev
, rx_length_errors
);
2558 virtnet_rq_free_buf(vi
, rq
, buf
);
2562 /* 1. Save the flags early, as the XDP program might overwrite them.
2563 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
2564 * stay valid after XDP processing.
2565 * 2. XDP doesn't work with partially checksummed packets (refer to
2566 * virtnet_xdp_set()), so packets marked as
2567 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
2569 flags
= ((struct virtio_net_common_hdr
*)buf
)->hdr
.flags
;
2571 if (vi
->mergeable_rx_bufs
)
2572 skb
= receive_mergeable(dev
, vi
, rq
, buf
, ctx
, len
, xdp_xmit
,
2574 else if (vi
->big_packets
)
2575 skb
= receive_big(dev
, vi
, rq
, buf
, len
, stats
);
2577 skb
= receive_small(dev
, vi
, rq
, buf
, ctx
, len
, xdp_xmit
, stats
);
2582 virtnet_receive_done(vi
, rq
, skb
, flags
);
2585 /* Unlike mergeable buffers, all buffers are allocated to the
2586 * same size, except for the headroom. For this reason we do
2587 * not need to use mergeable_len_to_ctx here - it is enough
2588 * to store the headroom as the context ignoring the truesize.
2590 static int add_recvbuf_small(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2594 unsigned int xdp_headroom
= virtnet_get_headroom(vi
);
2595 void *ctx
= (void *)(unsigned long)xdp_headroom
;
2596 int len
= vi
->hdr_len
+ VIRTNET_RX_PAD
+ GOOD_PACKET_LEN
+ xdp_headroom
;
2599 len
= SKB_DATA_ALIGN(len
) +
2600 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
2602 if (unlikely(!skb_page_frag_refill(len
, &rq
->alloc_frag
, gfp
)))
2605 buf
= virtnet_rq_alloc(rq
, len
, gfp
);
2609 buf
+= VIRTNET_RX_PAD
+ xdp_headroom
;
2611 virtnet_rq_init_one_sg(rq
, buf
, vi
->hdr_len
+ GOOD_PACKET_LEN
);
2613 err
= virtqueue_add_inbuf_premapped(rq
->vq
, rq
->sg
, 1, buf
, ctx
, gfp
);
2615 virtnet_rq_unmap(rq
, buf
, 0);
2616 put_page(virt_to_head_page(buf
));
2622 static int add_recvbuf_big(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2625 struct page
*first
, *list
= NULL
;
2629 sg_init_table(rq
->sg
, vi
->big_packets_num_skbfrags
+ 2);
2631 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
2632 for (i
= vi
->big_packets_num_skbfrags
+ 1; i
> 1; --i
) {
2633 first
= get_a_page(rq
, gfp
);
2636 give_pages(rq
, list
);
2639 sg_set_buf(&rq
->sg
[i
], page_address(first
), PAGE_SIZE
);
2641 /* chain new page in list head to match sg */
2642 first
->private = (unsigned long)list
;
2646 first
= get_a_page(rq
, gfp
);
2648 give_pages(rq
, list
);
2651 p
= page_address(first
);
2653 /* rq->sg[0], rq->sg[1] share the same page */
2654 /* a separated rq->sg[0] for header - required in case !any_header_sg */
2655 sg_set_buf(&rq
->sg
[0], p
, vi
->hdr_len
);
2657 /* rq->sg[1] for data packet, from offset */
2658 offset
= sizeof(struct padded_vnet_hdr
);
2659 sg_set_buf(&rq
->sg
[1], p
+ offset
, PAGE_SIZE
- offset
);
2661 /* chain first in list head */
2662 first
->private = (unsigned long)list
;
2663 err
= virtqueue_add_inbuf(rq
->vq
, rq
->sg
, vi
->big_packets_num_skbfrags
+ 2,
2666 give_pages(rq
, first
);
2671 static unsigned int get_mergeable_buf_len(struct receive_queue
*rq
,
2672 struct ewma_pkt_len
*avg_pkt_len
,
2675 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2676 const size_t hdr_len
= vi
->hdr_len
;
2680 return PAGE_SIZE
- room
;
2682 len
= hdr_len
+ clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len
),
2683 rq
->min_buf_len
, PAGE_SIZE
- hdr_len
);
2685 return ALIGN(len
, L1_CACHE_BYTES
);
2688 static int add_recvbuf_mergeable(struct virtnet_info
*vi
,
2689 struct receive_queue
*rq
, gfp_t gfp
)
2691 struct page_frag
*alloc_frag
= &rq
->alloc_frag
;
2692 unsigned int headroom
= virtnet_get_headroom(vi
);
2693 unsigned int tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
2694 unsigned int room
= SKB_DATA_ALIGN(headroom
+ tailroom
);
2695 unsigned int len
, hole
;
2700 /* Extra tailroom is needed to satisfy XDP's assumption. This
2701 * means rx frags coalescing won't work, but consider we've
2702 * disabled GSO for XDP, it won't be a big issue.
2704 len
= get_mergeable_buf_len(rq
, &rq
->mrg_avg_pkt_len
, room
);
2706 if (unlikely(!skb_page_frag_refill(len
+ room
, alloc_frag
, gfp
)))
2709 if (!alloc_frag
->offset
&& len
+ room
+ sizeof(struct virtnet_rq_dma
) > alloc_frag
->size
)
2710 len
-= sizeof(struct virtnet_rq_dma
);
2712 buf
= virtnet_rq_alloc(rq
, len
+ room
, gfp
);
2716 buf
+= headroom
; /* advance address leaving hole at front of pkt */
2717 hole
= alloc_frag
->size
- alloc_frag
->offset
;
2718 if (hole
< len
+ room
) {
2719 /* To avoid internal fragmentation, if there is very likely not
2720 * enough space for another buffer, add the remaining space to
2721 * the current buffer.
2722 * XDP core assumes that frame_size of xdp_buff and the length
2723 * of the frag are PAGE_SIZE, so we disable the hole mechanism.
2727 alloc_frag
->offset
+= hole
;
2730 virtnet_rq_init_one_sg(rq
, buf
, len
);
2732 ctx
= mergeable_len_to_ctx(len
+ room
, headroom
);
2733 err
= virtqueue_add_inbuf_premapped(rq
->vq
, rq
->sg
, 1, buf
, ctx
, gfp
);
2735 virtnet_rq_unmap(rq
, buf
, 0);
2736 put_page(virt_to_head_page(buf
));
2743 * Returns false if we couldn't fill entirely (OOM).
2745 * Normally run in the receive path, but can also be run from ndo_open
2746 * before we're receiving packets, or from refill_work which is
2747 * careful to disable receiving (using napi_disable).
2749 static bool try_fill_recv(struct virtnet_info
*vi
, struct receive_queue
*rq
,
2755 err
= virtnet_add_recvbuf_xsk(vi
, rq
, rq
->xsk_pool
, gfp
);
2760 if (vi
->mergeable_rx_bufs
)
2761 err
= add_recvbuf_mergeable(vi
, rq
, gfp
);
2762 else if (vi
->big_packets
)
2763 err
= add_recvbuf_big(vi
, rq
, gfp
);
2765 err
= add_recvbuf_small(vi
, rq
, gfp
);
2769 } while (rq
->vq
->num_free
);
2772 if (virtqueue_kick_prepare(rq
->vq
) && virtqueue_notify(rq
->vq
)) {
2773 unsigned long flags
;
2775 flags
= u64_stats_update_begin_irqsave(&rq
->stats
.syncp
);
2776 u64_stats_inc(&rq
->stats
.kicks
);
2777 u64_stats_update_end_irqrestore(&rq
->stats
.syncp
, flags
);
2780 return err
!= -ENOMEM
;
2783 static void skb_recv_done(struct virtqueue
*rvq
)
2785 struct virtnet_info
*vi
= rvq
->vdev
->priv
;
2786 struct receive_queue
*rq
= &vi
->rq
[vq2rxq(rvq
)];
2789 virtqueue_napi_schedule(&rq
->napi
, rvq
);
2792 static void virtnet_napi_enable(struct virtqueue
*vq
, struct napi_struct
*napi
)
2796 /* If all buffers were filled by other side before we napi_enabled, we
2797 * won't get another interrupt, so process any outstanding packets now.
2798 * Call local_bh_enable after to trigger softIRQ processing.
2801 virtqueue_napi_schedule(napi
, vq
);
2805 static void virtnet_napi_tx_enable(struct virtnet_info
*vi
,
2806 struct virtqueue
*vq
,
2807 struct napi_struct
*napi
)
2812 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
2813 * enable the feature if this is likely affine with the transmit path.
2815 if (!vi
->affinity_hint_set
) {
2820 return virtnet_napi_enable(vq
, napi
);
2823 static void virtnet_napi_tx_disable(struct napi_struct
*napi
)
2829 static void refill_work(struct work_struct
*work
)
2831 struct virtnet_info
*vi
=
2832 container_of(work
, struct virtnet_info
, refill
.work
);
2836 for (i
= 0; i
< vi
->curr_queue_pairs
; i
++) {
2837 struct receive_queue
*rq
= &vi
->rq
[i
];
2839 napi_disable(&rq
->napi
);
2840 still_empty
= !try_fill_recv(vi
, rq
, GFP_KERNEL
);
2841 virtnet_napi_enable(rq
->vq
, &rq
->napi
);
2843 /* In theory, this can happen: if we don't get any buffers in
2844 * we will *never* try to fill again.
2847 schedule_delayed_work(&vi
->refill
, HZ
/2);
2851 static int virtnet_receive_xsk_bufs(struct virtnet_info
*vi
,
2852 struct receive_queue
*rq
,
2854 unsigned int *xdp_xmit
,
2855 struct virtnet_rq_stats
*stats
)
2861 while (packets
< budget
) {
2862 buf
= virtqueue_get_buf(rq
->vq
, &len
);
2866 virtnet_receive_xsk_buf(vi
, rq
, buf
, len
, xdp_xmit
, stats
);
2873 static int virtnet_receive_packets(struct virtnet_info
*vi
,
2874 struct receive_queue
*rq
,
2876 unsigned int *xdp_xmit
,
2877 struct virtnet_rq_stats
*stats
)
2883 if (!vi
->big_packets
|| vi
->mergeable_rx_bufs
) {
2885 while (packets
< budget
&&
2886 (buf
= virtnet_rq_get_buf(rq
, &len
, &ctx
))) {
2887 receive_buf(vi
, rq
, buf
, len
, ctx
, xdp_xmit
, stats
);
2891 while (packets
< budget
&&
2892 (buf
= virtqueue_get_buf(rq
->vq
, &len
)) != NULL
) {
2893 receive_buf(vi
, rq
, buf
, len
, NULL
, xdp_xmit
, stats
);
2901 static int virtnet_receive(struct receive_queue
*rq
, int budget
,
2902 unsigned int *xdp_xmit
)
2904 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2905 struct virtnet_rq_stats stats
= {};
2909 packets
= virtnet_receive_xsk_bufs(vi
, rq
, budget
, xdp_xmit
, &stats
);
2911 packets
= virtnet_receive_packets(vi
, rq
, budget
, xdp_xmit
, &stats
);
2913 if (rq
->vq
->num_free
> min((unsigned int)budget
, virtqueue_get_vring_size(rq
->vq
)) / 2) {
2914 if (!try_fill_recv(vi
, rq
, GFP_ATOMIC
)) {
2915 spin_lock(&vi
->refill_lock
);
2916 if (vi
->refill_enabled
)
2917 schedule_delayed_work(&vi
->refill
, 0);
2918 spin_unlock(&vi
->refill_lock
);
2922 u64_stats_set(&stats
.packets
, packets
);
2923 u64_stats_update_begin(&rq
->stats
.syncp
);
2924 for (i
= 0; i
< ARRAY_SIZE(virtnet_rq_stats_desc
); i
++) {
2925 size_t offset
= virtnet_rq_stats_desc
[i
].offset
;
2926 u64_stats_t
*item
, *src
;
2928 item
= (u64_stats_t
*)((u8
*)&rq
->stats
+ offset
);
2929 src
= (u64_stats_t
*)((u8
*)&stats
+ offset
);
2930 u64_stats_add(item
, u64_stats_read(src
));
2933 u64_stats_add(&rq
->stats
.packets
, u64_stats_read(&stats
.packets
));
2934 u64_stats_add(&rq
->stats
.bytes
, u64_stats_read(&stats
.bytes
));
2936 u64_stats_update_end(&rq
->stats
.syncp
);
2941 static void virtnet_poll_cleantx(struct receive_queue
*rq
, int budget
)
2943 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2944 unsigned int index
= vq2rxq(rq
->vq
);
2945 struct send_queue
*sq
= &vi
->sq
[index
];
2946 struct netdev_queue
*txq
= netdev_get_tx_queue(vi
->dev
, index
);
2948 if (!sq
->napi
.weight
|| is_xdp_raw_buffer_queue(vi
, index
))
2951 if (__netif_tx_trylock(txq
)) {
2953 __netif_tx_unlock(txq
);
2958 virtqueue_disable_cb(sq
->vq
);
2959 free_old_xmit(sq
, txq
, !!budget
);
2960 } while (unlikely(!virtqueue_enable_cb_delayed(sq
->vq
)));
2962 if (sq
->vq
->num_free
>= 2 + MAX_SKB_FRAGS
) {
2963 if (netif_tx_queue_stopped(txq
)) {
2964 u64_stats_update_begin(&sq
->stats
.syncp
);
2965 u64_stats_inc(&sq
->stats
.wake
);
2966 u64_stats_update_end(&sq
->stats
.syncp
);
2968 netif_tx_wake_queue(txq
);
2971 __netif_tx_unlock(txq
);
2975 static void virtnet_rx_dim_update(struct virtnet_info
*vi
, struct receive_queue
*rq
)
2977 struct dim_sample cur_sample
= {};
2979 if (!rq
->packets_in_napi
)
2982 /* Don't need protection when fetching stats, since fetcher and
2983 * updater of the stats are in same context
2985 dim_update_sample(rq
->calls
,
2986 u64_stats_read(&rq
->stats
.packets
),
2987 u64_stats_read(&rq
->stats
.bytes
),
2990 net_dim(&rq
->dim
, &cur_sample
);
2991 rq
->packets_in_napi
= 0;
2994 static int virtnet_poll(struct napi_struct
*napi
, int budget
)
2996 struct receive_queue
*rq
=
2997 container_of(napi
, struct receive_queue
, napi
);
2998 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
2999 struct send_queue
*sq
;
3000 unsigned int received
;
3001 unsigned int xdp_xmit
= 0;
3004 virtnet_poll_cleantx(rq
, budget
);
3006 received
= virtnet_receive(rq
, budget
, &xdp_xmit
);
3007 rq
->packets_in_napi
+= received
;
3009 if (xdp_xmit
& VIRTIO_XDP_REDIR
)
3012 /* Out of packets? */
3013 if (received
< budget
) {
3014 napi_complete
= virtqueue_napi_complete(napi
, rq
->vq
, received
);
3015 /* Intentionally not taking dim_lock here. This may result in a
3016 * spurious net_dim call. But if that happens virtnet_rx_dim_work
3017 * will not act on the scheduled work.
3019 if (napi_complete
&& rq
->dim_enabled
)
3020 virtnet_rx_dim_update(vi
, rq
);
3023 if (xdp_xmit
& VIRTIO_XDP_TX
) {
3024 sq
= virtnet_xdp_get_sq(vi
);
3025 if (virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
)) {
3026 u64_stats_update_begin(&sq
->stats
.syncp
);
3027 u64_stats_inc(&sq
->stats
.kicks
);
3028 u64_stats_update_end(&sq
->stats
.syncp
);
3030 virtnet_xdp_put_sq(vi
, sq
);
3036 static void virtnet_disable_queue_pair(struct virtnet_info
*vi
, int qp_index
)
3038 virtnet_napi_tx_disable(&vi
->sq
[qp_index
].napi
);
3039 napi_disable(&vi
->rq
[qp_index
].napi
);
3040 xdp_rxq_info_unreg(&vi
->rq
[qp_index
].xdp_rxq
);
3043 static int virtnet_enable_queue_pair(struct virtnet_info
*vi
, int qp_index
)
3045 struct net_device
*dev
= vi
->dev
;
3048 err
= xdp_rxq_info_reg(&vi
->rq
[qp_index
].xdp_rxq
, dev
, qp_index
,
3049 vi
->rq
[qp_index
].napi
.napi_id
);
3053 err
= xdp_rxq_info_reg_mem_model(&vi
->rq
[qp_index
].xdp_rxq
,
3054 MEM_TYPE_PAGE_SHARED
, NULL
);
3056 goto err_xdp_reg_mem_model
;
3058 virtnet_napi_enable(vi
->rq
[qp_index
].vq
, &vi
->rq
[qp_index
].napi
);
3059 virtnet_napi_tx_enable(vi
, vi
->sq
[qp_index
].vq
, &vi
->sq
[qp_index
].napi
);
3063 err_xdp_reg_mem_model
:
3064 xdp_rxq_info_unreg(&vi
->rq
[qp_index
].xdp_rxq
);
3068 static void virtnet_cancel_dim(struct virtnet_info
*vi
, struct dim
*dim
)
3070 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
3072 net_dim_work_cancel(dim
);
3075 static void virtnet_update_settings(struct virtnet_info
*vi
)
3080 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_SPEED_DUPLEX
))
3083 virtio_cread_le(vi
->vdev
, struct virtio_net_config
, speed
, &speed
);
3085 if (ethtool_validate_speed(speed
))
3088 virtio_cread_le(vi
->vdev
, struct virtio_net_config
, duplex
, &duplex
);
3090 if (ethtool_validate_duplex(duplex
))
3091 vi
->duplex
= duplex
;
3094 static int virtnet_open(struct net_device
*dev
)
3096 struct virtnet_info
*vi
= netdev_priv(dev
);
3099 enable_delayed_refill(vi
);
3101 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3102 if (i
< vi
->curr_queue_pairs
)
3103 /* Make sure we have some buffers: if oom use wq. */
3104 if (!try_fill_recv(vi
, &vi
->rq
[i
], GFP_KERNEL
))
3105 schedule_delayed_work(&vi
->refill
, 0);
3107 err
= virtnet_enable_queue_pair(vi
, i
);
3112 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STATUS
)) {
3113 if (vi
->status
& VIRTIO_NET_S_LINK_UP
)
3114 netif_carrier_on(vi
->dev
);
3115 virtio_config_driver_enable(vi
->vdev
);
3117 vi
->status
= VIRTIO_NET_S_LINK_UP
;
3118 netif_carrier_on(dev
);
3124 disable_delayed_refill(vi
);
3125 cancel_delayed_work_sync(&vi
->refill
);
3127 for (i
--; i
>= 0; i
--) {
3128 virtnet_disable_queue_pair(vi
, i
);
3129 virtnet_cancel_dim(vi
, &vi
->rq
[i
].dim
);
3135 static int virtnet_poll_tx(struct napi_struct
*napi
, int budget
)
3137 struct send_queue
*sq
= container_of(napi
, struct send_queue
, napi
);
3138 struct virtnet_info
*vi
= sq
->vq
->vdev
->priv
;
3139 unsigned int index
= vq2txq(sq
->vq
);
3140 struct netdev_queue
*txq
;
3141 int opaque
, xsk_done
= 0;
3144 if (unlikely(is_xdp_raw_buffer_queue(vi
, index
))) {
3145 /* We don't need to enable cb for XDP */
3146 napi_complete_done(napi
, 0);
3150 txq
= netdev_get_tx_queue(vi
->dev
, index
);
3151 __netif_tx_lock(txq
, raw_smp_processor_id());
3152 virtqueue_disable_cb(sq
->vq
);
3155 xsk_done
= virtnet_xsk_xmit(sq
, sq
->xsk_pool
, budget
);
3157 free_old_xmit(sq
, txq
, !!budget
);
3159 if (sq
->vq
->num_free
>= 2 + MAX_SKB_FRAGS
) {
3160 if (netif_tx_queue_stopped(txq
)) {
3161 u64_stats_update_begin(&sq
->stats
.syncp
);
3162 u64_stats_inc(&sq
->stats
.wake
);
3163 u64_stats_update_end(&sq
->stats
.syncp
);
3165 netif_tx_wake_queue(txq
);
3168 if (xsk_done
>= budget
) {
3169 __netif_tx_unlock(txq
);
3173 opaque
= virtqueue_enable_cb_prepare(sq
->vq
);
3175 done
= napi_complete_done(napi
, 0);
3178 virtqueue_disable_cb(sq
->vq
);
3180 __netif_tx_unlock(txq
);
3183 if (unlikely(virtqueue_poll(sq
->vq
, opaque
))) {
3184 if (napi_schedule_prep(napi
)) {
3185 __netif_tx_lock(txq
, raw_smp_processor_id());
3186 virtqueue_disable_cb(sq
->vq
);
3187 __netif_tx_unlock(txq
);
3188 __napi_schedule(napi
);
3196 static int xmit_skb(struct send_queue
*sq
, struct sk_buff
*skb
, bool orphan
)
3198 struct virtio_net_hdr_mrg_rxbuf
*hdr
;
3199 const unsigned char *dest
= ((struct ethhdr
*)skb
->data
)->h_dest
;
3200 struct virtnet_info
*vi
= sq
->vq
->vdev
->priv
;
3202 unsigned hdr_len
= vi
->hdr_len
;
3205 pr_debug("%s: xmit %p %pM\n", vi
->dev
->name
, skb
, dest
);
3207 can_push
= vi
->any_header_sg
&&
3208 !((unsigned long)skb
->data
& (__alignof__(*hdr
) - 1)) &&
3209 !skb_header_cloned(skb
) && skb_headroom(skb
) >= hdr_len
;
3210 /* Even if we can, don't push here yet as this would skew
3211 * csum_start offset below. */
3213 hdr
= (struct virtio_net_hdr_mrg_rxbuf
*)(skb
->data
- hdr_len
);
3215 hdr
= &skb_vnet_common_hdr(skb
)->mrg_hdr
;
3217 if (virtio_net_hdr_from_skb(skb
, &hdr
->hdr
,
3218 virtio_is_little_endian(vi
->vdev
), false,
3222 if (vi
->mergeable_rx_bufs
)
3223 hdr
->num_buffers
= 0;
3225 sg_init_table(sq
->sg
, skb_shinfo(skb
)->nr_frags
+ (can_push
? 1 : 2));
3227 __skb_push(skb
, hdr_len
);
3228 num_sg
= skb_to_sgvec(skb
, sq
->sg
, 0, skb
->len
);
3229 if (unlikely(num_sg
< 0))
3231 /* Pull header back to avoid skew in tx bytes calculations. */
3232 __skb_pull(skb
, hdr_len
);
3234 sg_set_buf(sq
->sg
, hdr
, hdr_len
);
3235 num_sg
= skb_to_sgvec(skb
, sq
->sg
+ 1, 0, skb
->len
);
3236 if (unlikely(num_sg
< 0))
3241 return virtnet_add_outbuf(sq
, num_sg
, skb
,
3242 orphan
? VIRTNET_XMIT_TYPE_SKB_ORPHAN
: VIRTNET_XMIT_TYPE_SKB
);
3245 static netdev_tx_t
start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
3247 struct virtnet_info
*vi
= netdev_priv(dev
);
3248 int qnum
= skb_get_queue_mapping(skb
);
3249 struct send_queue
*sq
= &vi
->sq
[qnum
];
3251 struct netdev_queue
*txq
= netdev_get_tx_queue(dev
, qnum
);
3252 bool xmit_more
= netdev_xmit_more();
3253 bool use_napi
= sq
->napi
.weight
;
3256 /* Free up any pending old buffers before queueing new ones. */
3259 virtqueue_disable_cb(sq
->vq
);
3261 free_old_xmit(sq
, txq
, false);
3263 } while (use_napi
&& !xmit_more
&&
3264 unlikely(!virtqueue_enable_cb_delayed(sq
->vq
)));
3266 /* timestamp packet in software */
3267 skb_tx_timestamp(skb
);
3269 /* Try to transmit */
3270 err
= xmit_skb(sq
, skb
, !use_napi
);
3272 /* This should not happen! */
3273 if (unlikely(err
)) {
3274 DEV_STATS_INC(dev
, tx_fifo_errors
);
3275 if (net_ratelimit())
3277 "Unexpected TXQ (%d) queue failure: %d\n",
3279 DEV_STATS_INC(dev
, tx_dropped
);
3280 dev_kfree_skb_any(skb
);
3281 return NETDEV_TX_OK
;
3284 /* Don't wait up for transmitted skbs to be freed. */
3290 check_sq_full_and_disable(vi
, dev
, sq
);
3292 kick
= use_napi
? __netdev_tx_sent_queue(txq
, skb
->len
, xmit_more
) :
3293 !xmit_more
|| netif_xmit_stopped(txq
);
3295 if (virtqueue_kick_prepare(sq
->vq
) && virtqueue_notify(sq
->vq
)) {
3296 u64_stats_update_begin(&sq
->stats
.syncp
);
3297 u64_stats_inc(&sq
->stats
.kicks
);
3298 u64_stats_update_end(&sq
->stats
.syncp
);
3302 return NETDEV_TX_OK
;
3305 static void virtnet_rx_pause(struct virtnet_info
*vi
, struct receive_queue
*rq
)
3307 bool running
= netif_running(vi
->dev
);
3310 napi_disable(&rq
->napi
);
3311 virtnet_cancel_dim(vi
, &rq
->dim
);
3315 static void virtnet_rx_resume(struct virtnet_info
*vi
, struct receive_queue
*rq
)
3317 bool running
= netif_running(vi
->dev
);
3319 if (!try_fill_recv(vi
, rq
, GFP_KERNEL
))
3320 schedule_delayed_work(&vi
->refill
, 0);
3323 virtnet_napi_enable(rq
->vq
, &rq
->napi
);
3326 static int virtnet_rx_resize(struct virtnet_info
*vi
,
3327 struct receive_queue
*rq
, u32 ring_num
)
3331 qindex
= rq
- vi
->rq
;
3333 virtnet_rx_pause(vi
, rq
);
3335 err
= virtqueue_resize(rq
->vq
, ring_num
, virtnet_rq_unmap_free_buf
, NULL
);
3337 netdev_err(vi
->dev
, "resize rx fail: rx queue index: %d err: %d\n", qindex
, err
);
3339 virtnet_rx_resume(vi
, rq
);
3343 static void virtnet_tx_pause(struct virtnet_info
*vi
, struct send_queue
*sq
)
3345 bool running
= netif_running(vi
->dev
);
3346 struct netdev_queue
*txq
;
3349 qindex
= sq
- vi
->sq
;
3352 virtnet_napi_tx_disable(&sq
->napi
);
3354 txq
= netdev_get_tx_queue(vi
->dev
, qindex
);
3356 /* 1. wait all ximt complete
3357 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
3359 __netif_tx_lock_bh(txq
);
3361 /* Prevent rx poll from accessing sq. */
3364 /* Prevent the upper layer from trying to send packets. */
3365 netif_stop_subqueue(vi
->dev
, qindex
);
3367 __netif_tx_unlock_bh(txq
);
3370 static void virtnet_tx_resume(struct virtnet_info
*vi
, struct send_queue
*sq
)
3372 bool running
= netif_running(vi
->dev
);
3373 struct netdev_queue
*txq
;
3376 qindex
= sq
- vi
->sq
;
3378 txq
= netdev_get_tx_queue(vi
->dev
, qindex
);
3380 __netif_tx_lock_bh(txq
);
3382 netif_tx_wake_queue(txq
);
3383 __netif_tx_unlock_bh(txq
);
3386 virtnet_napi_tx_enable(vi
, sq
->vq
, &sq
->napi
);
3389 static int virtnet_tx_resize(struct virtnet_info
*vi
, struct send_queue
*sq
,
3394 qindex
= sq
- vi
->sq
;
3396 virtnet_tx_pause(vi
, sq
);
3398 err
= virtqueue_resize(sq
->vq
, ring_num
, virtnet_sq_free_unused_buf
,
3399 virtnet_sq_free_unused_buf_done
);
3401 netdev_err(vi
->dev
, "resize tx fail: tx queue index: %d err: %d\n", qindex
, err
);
3403 virtnet_tx_resume(vi
, sq
);
3409 * Send command via the control virtqueue and check status. Commands
3410 * supported by the hypervisor, as indicated by feature bits, should
3411 * never fail unless improperly formatted.
3413 static bool virtnet_send_command_reply(struct virtnet_info
*vi
, u8
class, u8 cmd
,
3414 struct scatterlist
*out
,
3415 struct scatterlist
*in
)
3417 struct scatterlist
*sgs
[5], hdr
, stat
;
3418 u32 out_num
= 0, tmp
, in_num
= 0;
3422 /* Caller should know better */
3423 BUG_ON(!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_VQ
));
3425 mutex_lock(&vi
->cvq_lock
);
3426 vi
->ctrl
->status
= ~0;
3427 vi
->ctrl
->hdr
.class = class;
3428 vi
->ctrl
->hdr
.cmd
= cmd
;
3430 sg_init_one(&hdr
, &vi
->ctrl
->hdr
, sizeof(vi
->ctrl
->hdr
));
3431 sgs
[out_num
++] = &hdr
;
3434 sgs
[out_num
++] = out
;
3436 /* Add return status. */
3437 sg_init_one(&stat
, &vi
->ctrl
->status
, sizeof(vi
->ctrl
->status
));
3438 sgs
[out_num
+ in_num
++] = &stat
;
3441 sgs
[out_num
+ in_num
++] = in
;
3443 BUG_ON(out_num
+ in_num
> ARRAY_SIZE(sgs
));
3444 ret
= virtqueue_add_sgs(vi
->cvq
, sgs
, out_num
, in_num
, vi
, GFP_ATOMIC
);
3446 dev_warn(&vi
->vdev
->dev
,
3447 "Failed to add sgs for command vq: %d\n.", ret
);
3448 mutex_unlock(&vi
->cvq_lock
);
3452 if (unlikely(!virtqueue_kick(vi
->cvq
)))
3455 /* Spin for a response, the kick causes an ioport write, trapping
3456 * into the hypervisor, so the request should be handled immediately.
3458 while (!virtqueue_get_buf(vi
->cvq
, &tmp
) &&
3459 !virtqueue_is_broken(vi
->cvq
)) {
3465 ok
= vi
->ctrl
->status
== VIRTIO_NET_OK
;
3466 mutex_unlock(&vi
->cvq_lock
);
3470 static bool virtnet_send_command(struct virtnet_info
*vi
, u8
class, u8 cmd
,
3471 struct scatterlist
*out
)
3473 return virtnet_send_command_reply(vi
, class, cmd
, out
, NULL
);
3476 static int virtnet_set_mac_address(struct net_device
*dev
, void *p
)
3478 struct virtnet_info
*vi
= netdev_priv(dev
);
3479 struct virtio_device
*vdev
= vi
->vdev
;
3481 struct sockaddr
*addr
;
3482 struct scatterlist sg
;
3484 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STANDBY
))
3487 addr
= kmemdup(p
, sizeof(*addr
), GFP_KERNEL
);
3491 ret
= eth_prepare_mac_addr_change(dev
, addr
);
3495 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
)) {
3496 sg_init_one(&sg
, addr
->sa_data
, dev
->addr_len
);
3497 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MAC
,
3498 VIRTIO_NET_CTRL_MAC_ADDR_SET
, &sg
)) {
3499 dev_warn(&vdev
->dev
,
3500 "Failed to set mac address by vq command.\n");
3504 } else if (virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
) &&
3505 !virtio_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3508 /* Naturally, this has an atomicity problem. */
3509 for (i
= 0; i
< dev
->addr_len
; i
++)
3510 virtio_cwrite8(vdev
,
3511 offsetof(struct virtio_net_config
, mac
) +
3512 i
, addr
->sa_data
[i
]);
3515 eth_commit_mac_addr_change(dev
, p
);
3523 static void virtnet_stats(struct net_device
*dev
,
3524 struct rtnl_link_stats64
*tot
)
3526 struct virtnet_info
*vi
= netdev_priv(dev
);
3530 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3531 u64 tpackets
, tbytes
, terrors
, rpackets
, rbytes
, rdrops
;
3532 struct receive_queue
*rq
= &vi
->rq
[i
];
3533 struct send_queue
*sq
= &vi
->sq
[i
];
3536 start
= u64_stats_fetch_begin(&sq
->stats
.syncp
);
3537 tpackets
= u64_stats_read(&sq
->stats
.packets
);
3538 tbytes
= u64_stats_read(&sq
->stats
.bytes
);
3539 terrors
= u64_stats_read(&sq
->stats
.tx_timeouts
);
3540 } while (u64_stats_fetch_retry(&sq
->stats
.syncp
, start
));
3543 start
= u64_stats_fetch_begin(&rq
->stats
.syncp
);
3544 rpackets
= u64_stats_read(&rq
->stats
.packets
);
3545 rbytes
= u64_stats_read(&rq
->stats
.bytes
);
3546 rdrops
= u64_stats_read(&rq
->stats
.drops
);
3547 } while (u64_stats_fetch_retry(&rq
->stats
.syncp
, start
));
3549 tot
->rx_packets
+= rpackets
;
3550 tot
->tx_packets
+= tpackets
;
3551 tot
->rx_bytes
+= rbytes
;
3552 tot
->tx_bytes
+= tbytes
;
3553 tot
->rx_dropped
+= rdrops
;
3554 tot
->tx_errors
+= terrors
;
3557 tot
->tx_dropped
= DEV_STATS_READ(dev
, tx_dropped
);
3558 tot
->tx_fifo_errors
= DEV_STATS_READ(dev
, tx_fifo_errors
);
3559 tot
->rx_length_errors
= DEV_STATS_READ(dev
, rx_length_errors
);
3560 tot
->rx_frame_errors
= DEV_STATS_READ(dev
, rx_frame_errors
);
3563 static void virtnet_ack_link_announce(struct virtnet_info
*vi
)
3565 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_ANNOUNCE
,
3566 VIRTIO_NET_CTRL_ANNOUNCE_ACK
, NULL
))
3567 dev_warn(&vi
->dev
->dev
, "Failed to ack link announce.\n");
3570 static bool virtnet_commit_rss_command(struct virtnet_info
*vi
);
3572 static void virtnet_rss_update_by_qpairs(struct virtnet_info
*vi
, u16 queue_pairs
)
3577 for (; i
< vi
->rss_indir_table_size
; ++i
) {
3578 indir_val
= ethtool_rxfh_indir_default(i
, queue_pairs
);
3579 vi
->rss
.indirection_table
[i
] = indir_val
;
3581 vi
->rss
.max_tx_vq
= queue_pairs
;
3584 static int virtnet_set_queues(struct virtnet_info
*vi
, u16 queue_pairs
)
3586 struct virtio_net_ctrl_mq
*mq
__free(kfree
) = NULL
;
3587 struct virtio_net_ctrl_rss old_rss
;
3588 struct net_device
*dev
= vi
->dev
;
3589 struct scatterlist sg
;
3591 if (!vi
->has_cvq
|| !virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_MQ
))
3594 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and
3595 * (2) no user configuration.
3597 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is,
3598 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs
3599 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly.
3601 if (vi
->has_rss
&& !netif_is_rxfh_configured(dev
)) {
3602 memcpy(&old_rss
, &vi
->rss
, sizeof(old_rss
));
3603 if (rss_indirection_table_alloc(&vi
->rss
, vi
->rss_indir_table_size
)) {
3604 vi
->rss
.indirection_table
= old_rss
.indirection_table
;
3608 virtnet_rss_update_by_qpairs(vi
, queue_pairs
);
3610 if (!virtnet_commit_rss_command(vi
)) {
3611 /* restore ctrl_rss if commit_rss_command failed */
3612 rss_indirection_table_free(&vi
->rss
);
3613 memcpy(&vi
->rss
, &old_rss
, sizeof(old_rss
));
3615 dev_warn(&dev
->dev
, "Fail to set num of queue pairs to %d, because committing RSS failed\n",
3619 rss_indirection_table_free(&old_rss
);
3623 mq
= kzalloc(sizeof(*mq
), GFP_KERNEL
);
3627 mq
->virtqueue_pairs
= cpu_to_virtio16(vi
->vdev
, queue_pairs
);
3628 sg_init_one(&sg
, mq
, sizeof(*mq
));
3630 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MQ
,
3631 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
, &sg
)) {
3632 dev_warn(&dev
->dev
, "Fail to set num of queue pairs to %d\n",
3637 vi
->curr_queue_pairs
= queue_pairs
;
3638 /* virtnet_open() will refill when device is going to up. */
3639 if (dev
->flags
& IFF_UP
)
3640 schedule_delayed_work(&vi
->refill
, 0);
3645 static int virtnet_close(struct net_device
*dev
)
3647 struct virtnet_info
*vi
= netdev_priv(dev
);
3650 /* Make sure NAPI doesn't schedule refill work */
3651 disable_delayed_refill(vi
);
3652 /* Make sure refill_work doesn't re-enable napi! */
3653 cancel_delayed_work_sync(&vi
->refill
);
3654 /* Prevent the config change callback from changing carrier
3657 virtio_config_driver_disable(vi
->vdev
);
3658 /* Stop getting status/speed updates: we don't care until next
3661 cancel_work_sync(&vi
->config_work
);
3663 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3664 virtnet_disable_queue_pair(vi
, i
);
3665 virtnet_cancel_dim(vi
, &vi
->rq
[i
].dim
);
3668 netif_carrier_off(dev
);
3673 static void virtnet_rx_mode_work(struct work_struct
*work
)
3675 struct virtnet_info
*vi
=
3676 container_of(work
, struct virtnet_info
, rx_mode_work
);
3677 u8
*promisc_allmulti
__free(kfree
) = NULL
;
3678 struct net_device
*dev
= vi
->dev
;
3679 struct scatterlist sg
[2];
3680 struct virtio_net_ctrl_mac
*mac_data
;
3681 struct netdev_hw_addr
*ha
;
3687 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
3688 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_RX
))
3691 promisc_allmulti
= kzalloc(sizeof(*promisc_allmulti
), GFP_KERNEL
);
3692 if (!promisc_allmulti
) {
3693 dev_warn(&dev
->dev
, "Failed to set RX mode, no memory.\n");
3699 *promisc_allmulti
= !!(dev
->flags
& IFF_PROMISC
);
3700 sg_init_one(sg
, promisc_allmulti
, sizeof(*promisc_allmulti
));
3702 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_RX
,
3703 VIRTIO_NET_CTRL_RX_PROMISC
, sg
))
3704 dev_warn(&dev
->dev
, "Failed to %sable promisc mode.\n",
3705 *promisc_allmulti
? "en" : "dis");
3707 *promisc_allmulti
= !!(dev
->flags
& IFF_ALLMULTI
);
3708 sg_init_one(sg
, promisc_allmulti
, sizeof(*promisc_allmulti
));
3710 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_RX
,
3711 VIRTIO_NET_CTRL_RX_ALLMULTI
, sg
))
3712 dev_warn(&dev
->dev
, "Failed to %sable allmulti mode.\n",
3713 *promisc_allmulti
? "en" : "dis");
3715 netif_addr_lock_bh(dev
);
3717 uc_count
= netdev_uc_count(dev
);
3718 mc_count
= netdev_mc_count(dev
);
3719 /* MAC filter - use one buffer for both lists */
3720 buf
= kzalloc(((uc_count
+ mc_count
) * ETH_ALEN
) +
3721 (2 * sizeof(mac_data
->entries
)), GFP_ATOMIC
);
3724 netif_addr_unlock_bh(dev
);
3729 sg_init_table(sg
, 2);
3731 /* Store the unicast list and count in the front of the buffer */
3732 mac_data
->entries
= cpu_to_virtio32(vi
->vdev
, uc_count
);
3734 netdev_for_each_uc_addr(ha
, dev
)
3735 memcpy(&mac_data
->macs
[i
++][0], ha
->addr
, ETH_ALEN
);
3737 sg_set_buf(&sg
[0], mac_data
,
3738 sizeof(mac_data
->entries
) + (uc_count
* ETH_ALEN
));
3740 /* multicast list and count fill the end */
3741 mac_data
= (void *)&mac_data
->macs
[uc_count
][0];
3743 mac_data
->entries
= cpu_to_virtio32(vi
->vdev
, mc_count
);
3745 netdev_for_each_mc_addr(ha
, dev
)
3746 memcpy(&mac_data
->macs
[i
++][0], ha
->addr
, ETH_ALEN
);
3748 netif_addr_unlock_bh(dev
);
3750 sg_set_buf(&sg
[1], mac_data
,
3751 sizeof(mac_data
->entries
) + (mc_count
* ETH_ALEN
));
3753 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MAC
,
3754 VIRTIO_NET_CTRL_MAC_TABLE_SET
, sg
))
3755 dev_warn(&dev
->dev
, "Failed to set MAC filter table.\n");
3762 static void virtnet_set_rx_mode(struct net_device
*dev
)
3764 struct virtnet_info
*vi
= netdev_priv(dev
);
3766 if (vi
->rx_mode_work_enabled
)
3767 schedule_work(&vi
->rx_mode_work
);
3770 static int virtnet_vlan_rx_add_vid(struct net_device
*dev
,
3771 __be16 proto
, u16 vid
)
3773 struct virtnet_info
*vi
= netdev_priv(dev
);
3774 __virtio16
*_vid
__free(kfree
) = NULL
;
3775 struct scatterlist sg
;
3777 _vid
= kzalloc(sizeof(*_vid
), GFP_KERNEL
);
3781 *_vid
= cpu_to_virtio16(vi
->vdev
, vid
);
3782 sg_init_one(&sg
, _vid
, sizeof(*_vid
));
3784 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_VLAN
,
3785 VIRTIO_NET_CTRL_VLAN_ADD
, &sg
))
3786 dev_warn(&dev
->dev
, "Failed to add VLAN ID %d.\n", vid
);
3790 static int virtnet_vlan_rx_kill_vid(struct net_device
*dev
,
3791 __be16 proto
, u16 vid
)
3793 struct virtnet_info
*vi
= netdev_priv(dev
);
3794 __virtio16
*_vid
__free(kfree
) = NULL
;
3795 struct scatterlist sg
;
3797 _vid
= kzalloc(sizeof(*_vid
), GFP_KERNEL
);
3801 *_vid
= cpu_to_virtio16(vi
->vdev
, vid
);
3802 sg_init_one(&sg
, _vid
, sizeof(*_vid
));
3804 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_VLAN
,
3805 VIRTIO_NET_CTRL_VLAN_DEL
, &sg
))
3806 dev_warn(&dev
->dev
, "Failed to kill VLAN ID %d.\n", vid
);
3810 static void virtnet_clean_affinity(struct virtnet_info
*vi
)
3814 if (vi
->affinity_hint_set
) {
3815 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
3816 virtqueue_set_affinity(vi
->rq
[i
].vq
, NULL
);
3817 virtqueue_set_affinity(vi
->sq
[i
].vq
, NULL
);
3820 vi
->affinity_hint_set
= false;
3824 static void virtnet_set_affinity(struct virtnet_info
*vi
)
3833 if (!zalloc_cpumask_var(&mask
, GFP_KERNEL
)) {
3834 virtnet_clean_affinity(vi
);
3838 num_cpu
= num_online_cpus();
3839 stride
= max_t(int, num_cpu
/ vi
->curr_queue_pairs
, 1);
3840 stragglers
= num_cpu
>= vi
->curr_queue_pairs
?
3841 num_cpu
% vi
->curr_queue_pairs
:
3843 cpu
= cpumask_first(cpu_online_mask
);
3845 for (i
= 0; i
< vi
->curr_queue_pairs
; i
++) {
3846 group_size
= stride
+ (i
< stragglers
? 1 : 0);
3848 for (j
= 0; j
< group_size
; j
++) {
3849 cpumask_set_cpu(cpu
, mask
);
3850 cpu
= cpumask_next_wrap(cpu
, cpu_online_mask
,
3853 virtqueue_set_affinity(vi
->rq
[i
].vq
, mask
);
3854 virtqueue_set_affinity(vi
->sq
[i
].vq
, mask
);
3855 __netif_set_xps_queue(vi
->dev
, cpumask_bits(mask
), i
, XPS_CPUS
);
3856 cpumask_clear(mask
);
3859 vi
->affinity_hint_set
= true;
3860 free_cpumask_var(mask
);
3863 static int virtnet_cpu_online(unsigned int cpu
, struct hlist_node
*node
)
3865 struct virtnet_info
*vi
= hlist_entry_safe(node
, struct virtnet_info
,
3867 virtnet_set_affinity(vi
);
3871 static int virtnet_cpu_dead(unsigned int cpu
, struct hlist_node
*node
)
3873 struct virtnet_info
*vi
= hlist_entry_safe(node
, struct virtnet_info
,
3875 virtnet_set_affinity(vi
);
3879 static int virtnet_cpu_down_prep(unsigned int cpu
, struct hlist_node
*node
)
3881 struct virtnet_info
*vi
= hlist_entry_safe(node
, struct virtnet_info
,
3884 virtnet_clean_affinity(vi
);
3888 static enum cpuhp_state virtionet_online
;
3890 static int virtnet_cpu_notif_add(struct virtnet_info
*vi
)
3894 ret
= cpuhp_state_add_instance_nocalls(virtionet_online
, &vi
->node
);
3897 ret
= cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD
,
3901 cpuhp_state_remove_instance_nocalls(virtionet_online
, &vi
->node
);
3905 static void virtnet_cpu_notif_remove(struct virtnet_info
*vi
)
3907 cpuhp_state_remove_instance_nocalls(virtionet_online
, &vi
->node
);
3908 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD
,
3912 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info
*vi
,
3913 u16 vqn
, u32 max_usecs
, u32 max_packets
)
3915 struct virtio_net_ctrl_coal_vq
*coal_vq
__free(kfree
) = NULL
;
3916 struct scatterlist sgs
;
3918 coal_vq
= kzalloc(sizeof(*coal_vq
), GFP_KERNEL
);
3922 coal_vq
->vqn
= cpu_to_le16(vqn
);
3923 coal_vq
->coal
.max_usecs
= cpu_to_le32(max_usecs
);
3924 coal_vq
->coal
.max_packets
= cpu_to_le32(max_packets
);
3925 sg_init_one(&sgs
, coal_vq
, sizeof(*coal_vq
));
3927 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_NOTF_COAL
,
3928 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET
,
3935 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info
*vi
,
3936 u16 queue
, u32 max_usecs
,
3941 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
3944 err
= virtnet_send_ctrl_coal_vq_cmd(vi
, rxq2vq(queue
),
3945 max_usecs
, max_packets
);
3949 vi
->rq
[queue
].intr_coal
.max_usecs
= max_usecs
;
3950 vi
->rq
[queue
].intr_coal
.max_packets
= max_packets
;
3955 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info
*vi
,
3956 u16 queue
, u32 max_usecs
,
3961 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
3964 err
= virtnet_send_ctrl_coal_vq_cmd(vi
, txq2vq(queue
),
3965 max_usecs
, max_packets
);
3969 vi
->sq
[queue
].intr_coal
.max_usecs
= max_usecs
;
3970 vi
->sq
[queue
].intr_coal
.max_packets
= max_packets
;
3975 static void virtnet_get_ringparam(struct net_device
*dev
,
3976 struct ethtool_ringparam
*ring
,
3977 struct kernel_ethtool_ringparam
*kernel_ring
,
3978 struct netlink_ext_ack
*extack
)
3980 struct virtnet_info
*vi
= netdev_priv(dev
);
3982 ring
->rx_max_pending
= vi
->rq
[0].vq
->num_max
;
3983 ring
->tx_max_pending
= vi
->sq
[0].vq
->num_max
;
3984 ring
->rx_pending
= virtqueue_get_vring_size(vi
->rq
[0].vq
);
3985 ring
->tx_pending
= virtqueue_get_vring_size(vi
->sq
[0].vq
);
3988 static int virtnet_set_ringparam(struct net_device
*dev
,
3989 struct ethtool_ringparam
*ring
,
3990 struct kernel_ethtool_ringparam
*kernel_ring
,
3991 struct netlink_ext_ack
*extack
)
3993 struct virtnet_info
*vi
= netdev_priv(dev
);
3994 u32 rx_pending
, tx_pending
;
3995 struct receive_queue
*rq
;
3996 struct send_queue
*sq
;
3999 if (ring
->rx_mini_pending
|| ring
->rx_jumbo_pending
)
4002 rx_pending
= virtqueue_get_vring_size(vi
->rq
[0].vq
);
4003 tx_pending
= virtqueue_get_vring_size(vi
->sq
[0].vq
);
4005 if (ring
->rx_pending
== rx_pending
&&
4006 ring
->tx_pending
== tx_pending
)
4009 if (ring
->rx_pending
> vi
->rq
[0].vq
->num_max
)
4012 if (ring
->tx_pending
> vi
->sq
[0].vq
->num_max
)
4015 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
4019 if (ring
->tx_pending
!= tx_pending
) {
4020 err
= virtnet_tx_resize(vi
, sq
, ring
->tx_pending
);
4024 /* Upon disabling and re-enabling a transmit virtqueue, the device must
4025 * set the coalescing parameters of the virtqueue to those configured
4026 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
4027 * did not set any TX coalescing parameters, to 0.
4029 err
= virtnet_send_tx_ctrl_coal_vq_cmd(vi
, i
,
4030 vi
->intr_coal_tx
.max_usecs
,
4031 vi
->intr_coal_tx
.max_packets
);
4033 /* Don't break the tx resize action if the vq coalescing is not
4034 * supported. The same is true for rx resize below.
4036 if (err
&& err
!= -EOPNOTSUPP
)
4040 if (ring
->rx_pending
!= rx_pending
) {
4041 err
= virtnet_rx_resize(vi
, rq
, ring
->rx_pending
);
4045 /* The reason is same as the transmit virtqueue reset */
4046 mutex_lock(&vi
->rq
[i
].dim_lock
);
4047 err
= virtnet_send_rx_ctrl_coal_vq_cmd(vi
, i
,
4048 vi
->intr_coal_rx
.max_usecs
,
4049 vi
->intr_coal_rx
.max_packets
);
4050 mutex_unlock(&vi
->rq
[i
].dim_lock
);
4051 if (err
&& err
!= -EOPNOTSUPP
)
4059 static bool virtnet_commit_rss_command(struct virtnet_info
*vi
)
4061 struct net_device
*dev
= vi
->dev
;
4062 struct scatterlist sgs
[4];
4063 unsigned int sg_buf_size
;
4066 sg_init_table(sgs
, 4);
4068 sg_buf_size
= offsetof(struct virtio_net_ctrl_rss
, hash_cfg_reserved
);
4069 sg_set_buf(&sgs
[0], &vi
->rss
, sg_buf_size
);
4072 sg_buf_size
= sizeof(uint16_t) * vi
->rss_indir_table_size
;
4073 sg_set_buf(&sgs
[1], vi
->rss
.indirection_table
, sg_buf_size
);
4075 sg_set_buf(&sgs
[1], &vi
->rss
.hash_cfg_reserved
, sizeof(uint16_t));
4078 sg_buf_size
= offsetof(struct virtio_net_ctrl_rss
, key
)
4079 - offsetof(struct virtio_net_ctrl_rss
, max_tx_vq
);
4080 sg_set_buf(&sgs
[2], &vi
->rss
.max_tx_vq
, sg_buf_size
);
4082 sg_buf_size
= vi
->rss_key_size
;
4083 sg_set_buf(&sgs
[3], vi
->rss
.key
, sg_buf_size
);
4085 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MQ
,
4086 vi
->has_rss
? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
4087 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG
, sgs
))
4093 dev_warn(&dev
->dev
, "VIRTIONET issue with committing RSS sgs\n");
4098 static void virtnet_init_default_rss(struct virtnet_info
*vi
)
4100 vi
->rss
.hash_types
= vi
->rss_hash_types_supported
;
4101 vi
->rss_hash_types_saved
= vi
->rss_hash_types_supported
;
4102 vi
->rss
.indirection_table_mask
= vi
->rss_indir_table_size
4103 ? vi
->rss_indir_table_size
- 1 : 0;
4104 vi
->rss
.unclassified_queue
= 0;
4106 virtnet_rss_update_by_qpairs(vi
, vi
->curr_queue_pairs
);
4108 vi
->rss
.hash_key_length
= vi
->rss_key_size
;
4110 netdev_rss_key_fill(vi
->rss
.key
, vi
->rss_key_size
);
4113 static void virtnet_get_hashflow(const struct virtnet_info
*vi
, struct ethtool_rxnfc
*info
)
4116 switch (info
->flow_type
) {
4118 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_TCPv4
) {
4119 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4120 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4121 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
) {
4122 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4126 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_TCPv6
) {
4127 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4128 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4129 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv6
) {
4130 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4134 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_UDPv4
) {
4135 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4136 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4137 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
) {
4138 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4142 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_UDPv6
) {
4143 info
->data
= RXH_IP_SRC
| RXH_IP_DST
|
4144 RXH_L4_B_0_1
| RXH_L4_B_2_3
;
4145 } else if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv6
) {
4146 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4150 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
)
4151 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4155 if (vi
->rss_hash_types_saved
& VIRTIO_NET_RSS_HASH_TYPE_IPv6
)
4156 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
4165 static bool virtnet_set_hashflow(struct virtnet_info
*vi
, struct ethtool_rxnfc
*info
)
4167 u32 new_hashtypes
= vi
->rss_hash_types_saved
;
4168 bool is_disable
= info
->data
& RXH_DISCARD
;
4169 bool is_l4
= info
->data
== (RXH_IP_SRC
| RXH_IP_DST
| RXH_L4_B_0_1
| RXH_L4_B_2_3
);
4171 /* supports only 'sd', 'sdfn' and 'r' */
4172 if (!((info
->data
== (RXH_IP_SRC
| RXH_IP_DST
)) | is_l4
| is_disable
))
4175 switch (info
->flow_type
) {
4177 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4
| VIRTIO_NET_RSS_HASH_TYPE_TCPv4
);
4179 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4180 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_TCPv4
: 0);
4183 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4
| VIRTIO_NET_RSS_HASH_TYPE_UDPv4
);
4185 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4186 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_UDPv4
: 0);
4189 new_hashtypes
&= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4
;
4191 new_hashtypes
= VIRTIO_NET_RSS_HASH_TYPE_IPv4
;
4194 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6
| VIRTIO_NET_RSS_HASH_TYPE_TCPv6
);
4196 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4197 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_TCPv6
: 0);
4200 new_hashtypes
&= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6
| VIRTIO_NET_RSS_HASH_TYPE_UDPv6
);
4202 new_hashtypes
|= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4203 | (is_l4
? VIRTIO_NET_RSS_HASH_TYPE_UDPv6
: 0);
4206 new_hashtypes
&= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6
;
4208 new_hashtypes
= VIRTIO_NET_RSS_HASH_TYPE_IPv6
;
4211 /* unsupported flow */
4215 /* if unsupported hashtype was set */
4216 if (new_hashtypes
!= (new_hashtypes
& vi
->rss_hash_types_supported
))
4219 if (new_hashtypes
!= vi
->rss_hash_types_saved
) {
4220 vi
->rss_hash_types_saved
= new_hashtypes
;
4221 vi
->rss
.hash_types
= vi
->rss_hash_types_saved
;
4222 if (vi
->dev
->features
& NETIF_F_RXHASH
)
4223 return virtnet_commit_rss_command(vi
);
4229 static void virtnet_get_drvinfo(struct net_device
*dev
,
4230 struct ethtool_drvinfo
*info
)
4232 struct virtnet_info
*vi
= netdev_priv(dev
);
4233 struct virtio_device
*vdev
= vi
->vdev
;
4235 strscpy(info
->driver
, KBUILD_MODNAME
, sizeof(info
->driver
));
4236 strscpy(info
->version
, VIRTNET_DRIVER_VERSION
, sizeof(info
->version
));
4237 strscpy(info
->bus_info
, virtio_bus_name(vdev
), sizeof(info
->bus_info
));
4241 /* TODO: Eliminate OOO packets during switching */
4242 static int virtnet_set_channels(struct net_device
*dev
,
4243 struct ethtool_channels
*channels
)
4245 struct virtnet_info
*vi
= netdev_priv(dev
);
4246 u16 queue_pairs
= channels
->combined_count
;
4249 /* We don't support separate rx/tx channels.
4250 * We don't allow setting 'other' channels.
4252 if (channels
->rx_count
|| channels
->tx_count
|| channels
->other_count
)
4255 if (queue_pairs
> vi
->max_queue_pairs
|| queue_pairs
== 0)
4258 /* For now we don't support modifying channels while XDP is loaded
4259 * also when XDP is loaded all RX queues have XDP programs so we only
4260 * need to check a single RX queue.
4262 if (vi
->rq
[0].xdp_prog
)
4266 err
= virtnet_set_queues(vi
, queue_pairs
);
4271 virtnet_set_affinity(vi
);
4274 netif_set_real_num_tx_queues(dev
, queue_pairs
);
4275 netif_set_real_num_rx_queues(dev
, queue_pairs
);
4280 static void virtnet_stats_sprintf(u8
**p
, const char *fmt
, const char *noq_fmt
,
4281 int num
, int qid
, const struct virtnet_stat_desc
*desc
)
4286 for (i
= 0; i
< num
; ++i
)
4287 ethtool_sprintf(p
, noq_fmt
, desc
[i
].desc
);
4289 for (i
= 0; i
< num
; ++i
)
4290 ethtool_sprintf(p
, fmt
, qid
, desc
[i
].desc
);
4294 /* qid == -1: for rx/tx queue total field */
4295 static void virtnet_get_stats_string(struct virtnet_info
*vi
, int type
, int qid
, u8
**data
)
4297 const struct virtnet_stat_desc
*desc
;
4298 const char *fmt
, *noq_fmt
;
4302 if (type
== VIRTNET_Q_TYPE_CQ
&& qid
>= 0) {
4303 noq_fmt
= "cq_hw_%s";
4305 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_CVQ
) {
4306 desc
= &virtnet_stats_cvq_desc
[0];
4307 num
= ARRAY_SIZE(virtnet_stats_cvq_desc
);
4309 virtnet_stats_sprintf(&p
, NULL
, noq_fmt
, num
, -1, desc
);
4313 if (type
== VIRTNET_Q_TYPE_RX
) {
4317 desc
= &virtnet_rq_stats_desc
[0];
4318 num
= ARRAY_SIZE(virtnet_rq_stats_desc
);
4320 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4323 noq_fmt
= "rx_hw_%s";
4325 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4326 desc
= &virtnet_stats_rx_basic_desc
[0];
4327 num
= ARRAY_SIZE(virtnet_stats_rx_basic_desc
);
4329 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4332 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4333 desc
= &virtnet_stats_rx_csum_desc
[0];
4334 num
= ARRAY_SIZE(virtnet_stats_rx_csum_desc
);
4336 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4339 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4340 desc
= &virtnet_stats_rx_speed_desc
[0];
4341 num
= ARRAY_SIZE(virtnet_stats_rx_speed_desc
);
4343 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4347 if (type
== VIRTNET_Q_TYPE_TX
) {
4351 desc
= &virtnet_sq_stats_desc
[0];
4352 num
= ARRAY_SIZE(virtnet_sq_stats_desc
);
4354 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4357 noq_fmt
= "tx_hw_%s";
4359 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4360 desc
= &virtnet_stats_tx_basic_desc
[0];
4361 num
= ARRAY_SIZE(virtnet_stats_tx_basic_desc
);
4363 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4366 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4367 desc
= &virtnet_stats_tx_gso_desc
[0];
4368 num
= ARRAY_SIZE(virtnet_stats_tx_gso_desc
);
4370 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4373 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4374 desc
= &virtnet_stats_tx_speed_desc
[0];
4375 num
= ARRAY_SIZE(virtnet_stats_tx_speed_desc
);
4377 virtnet_stats_sprintf(&p
, fmt
, noq_fmt
, num
, qid
, desc
);
4384 struct virtnet_stats_ctx
{
4385 /* The stats are write to qstats or ethtool -S */
4388 /* Used to calculate the offset inside the output buffer. */
4391 /* The actual supported stat types. */
4394 /* Used to calculate the reply buffer size. */
4397 /* Record the output buffer. */
4401 static void virtnet_stats_ctx_init(struct virtnet_info
*vi
,
4402 struct virtnet_stats_ctx
*ctx
,
4403 u64
*data
, bool to_qstat
)
4408 ctx
->to_qstat
= to_qstat
;
4411 ctx
->desc_num
[VIRTNET_Q_TYPE_RX
] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat
);
4412 ctx
->desc_num
[VIRTNET_Q_TYPE_TX
] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat
);
4414 queue_type
= VIRTNET_Q_TYPE_RX
;
4416 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4417 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_BASIC
;
4418 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat
);
4419 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_basic
);
4422 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4423 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_CSUM
;
4424 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat
);
4425 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_csum
);
4428 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_GSO
) {
4429 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_GSO
;
4430 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat
);
4431 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_gso
);
4434 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4435 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_SPEED
;
4436 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat
);
4437 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_speed
);
4440 queue_type
= VIRTNET_Q_TYPE_TX
;
4442 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4443 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_BASIC
;
4444 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat
);
4445 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_basic
);
4448 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_CSUM
) {
4449 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_CSUM
;
4450 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat
);
4451 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_csum
);
4454 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4455 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_GSO
;
4456 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat
);
4457 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_gso
);
4460 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4461 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_SPEED
;
4462 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat
);
4463 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_speed
);
4469 ctx
->desc_num
[VIRTNET_Q_TYPE_RX
] = ARRAY_SIZE(virtnet_rq_stats_desc
);
4470 ctx
->desc_num
[VIRTNET_Q_TYPE_TX
] = ARRAY_SIZE(virtnet_sq_stats_desc
);
4472 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_CVQ
) {
4473 queue_type
= VIRTNET_Q_TYPE_CQ
;
4475 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_CVQ
;
4476 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_cvq_desc
);
4477 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_cvq
);
4480 queue_type
= VIRTNET_Q_TYPE_RX
;
4482 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4483 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_BASIC
;
4484 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_basic_desc
);
4485 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_basic
);
4488 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4489 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_CSUM
;
4490 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_csum_desc
);
4491 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_csum
);
4494 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4495 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_RX_SPEED
;
4496 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_rx_speed_desc
);
4497 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_rx_speed
);
4500 queue_type
= VIRTNET_Q_TYPE_TX
;
4502 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4503 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_BASIC
;
4504 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_basic_desc
);
4505 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_basic
);
4508 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4509 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_GSO
;
4510 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_gso_desc
);
4511 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_gso
);
4514 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4515 ctx
->bitmap
[queue_type
] |= VIRTIO_NET_STATS_TYPE_TX_SPEED
;
4516 ctx
->desc_num
[queue_type
] += ARRAY_SIZE(virtnet_stats_tx_speed_desc
);
4517 ctx
->size
[queue_type
] += sizeof(struct virtio_net_stats_tx_speed
);
4521 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq.
4522 * @sum: the position to store the sum values
4524 * @q_value: the first queue fields
4525 * @q_num: number of the queues
4527 static void stats_sum_queue(u64
*sum
, u32 num
, u64
*q_value
, u32 q_num
)
4533 for (i
= 0; i
< num
; ++i
) {
4537 for (j
= 0; j
< q_num
; ++j
)
4538 *p
+= *(q_value
+ i
+ j
* step
);
4542 static void virtnet_fill_total_fields(struct virtnet_info
*vi
,
4543 struct virtnet_stats_ctx
*ctx
)
4545 u64
*data
, *first_rx_q
, *first_tx_q
;
4546 u32 num_cq
, num_rx
, num_tx
;
4548 num_cq
= ctx
->desc_num
[VIRTNET_Q_TYPE_CQ
];
4549 num_rx
= ctx
->desc_num
[VIRTNET_Q_TYPE_RX
];
4550 num_tx
= ctx
->desc_num
[VIRTNET_Q_TYPE_TX
];
4552 first_rx_q
= ctx
->data
+ num_rx
+ num_tx
+ num_cq
;
4553 first_tx_q
= first_rx_q
+ vi
->curr_queue_pairs
* num_rx
;
4557 stats_sum_queue(data
, num_rx
, first_rx_q
, vi
->curr_queue_pairs
);
4559 data
= ctx
->data
+ num_rx
;
4561 stats_sum_queue(data
, num_tx
, first_tx_q
, vi
->curr_queue_pairs
);
4564 static void virtnet_fill_stats_qstat(struct virtnet_info
*vi
, u32 qid
,
4565 struct virtnet_stats_ctx
*ctx
,
4566 const u8
*base
, bool drv_stats
, u8 reply_type
)
4568 const struct virtnet_stat_desc
*desc
;
4569 const u64_stats_t
*v_stat
;
4575 queue_type
= vq_type(vi
, qid
);
4576 bitmap
= ctx
->bitmap
[queue_type
];
4579 if (queue_type
== VIRTNET_Q_TYPE_RX
) {
4580 desc
= &virtnet_rq_stats_desc_qstat
[0];
4581 num
= ARRAY_SIZE(virtnet_rq_stats_desc_qstat
);
4583 desc
= &virtnet_sq_stats_desc_qstat
[0];
4584 num
= ARRAY_SIZE(virtnet_sq_stats_desc_qstat
);
4587 for (i
= 0; i
< num
; ++i
) {
4588 offset
= desc
[i
].qstat_offset
/ sizeof(*ctx
->data
);
4589 v_stat
= (const u64_stats_t
*)(base
+ desc
[i
].offset
);
4590 ctx
->data
[offset
] = u64_stats_read(v_stat
);
4595 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4596 desc
= &virtnet_stats_rx_basic_desc_qstat
[0];
4597 num
= ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat
);
4598 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC
)
4602 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4603 desc
= &virtnet_stats_rx_csum_desc_qstat
[0];
4604 num
= ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat
);
4605 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM
)
4609 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_GSO
) {
4610 desc
= &virtnet_stats_rx_gso_desc_qstat
[0];
4611 num
= ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat
);
4612 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO
)
4616 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4617 desc
= &virtnet_stats_rx_speed_desc_qstat
[0];
4618 num
= ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat
);
4619 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED
)
4623 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4624 desc
= &virtnet_stats_tx_basic_desc_qstat
[0];
4625 num
= ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat
);
4626 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC
)
4630 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_CSUM
) {
4631 desc
= &virtnet_stats_tx_csum_desc_qstat
[0];
4632 num
= ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat
);
4633 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM
)
4637 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4638 desc
= &virtnet_stats_tx_gso_desc_qstat
[0];
4639 num
= ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat
);
4640 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO
)
4644 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4645 desc
= &virtnet_stats_tx_speed_desc_qstat
[0];
4646 num
= ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat
);
4647 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED
)
4654 for (i
= 0; i
< num
; ++i
) {
4655 offset
= desc
[i
].qstat_offset
/ sizeof(*ctx
->data
);
4656 v
= (const __le64
*)(base
+ desc
[i
].offset
);
4657 ctx
->data
[offset
] = le64_to_cpu(*v
);
4661 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S
4662 * The stats source is the device or the driver.
4664 * @vi: virtio net info
4666 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
4667 * @base: pointer to the device reply or the driver stats structure.
4668 * @drv_stats: designate the base type (device reply, driver stats)
4669 * @type: the type of the device reply (if drv_stats is true, this must be zero)
4671 static void virtnet_fill_stats(struct virtnet_info
*vi
, u32 qid
,
4672 struct virtnet_stats_ctx
*ctx
,
4673 const u8
*base
, bool drv_stats
, u8 reply_type
)
4675 u32 queue_type
, num_rx
, num_tx
, num_cq
;
4676 const struct virtnet_stat_desc
*desc
;
4677 const u64_stats_t
*v_stat
;
4683 return virtnet_fill_stats_qstat(vi
, qid
, ctx
, base
, drv_stats
, reply_type
);
4685 num_cq
= ctx
->desc_num
[VIRTNET_Q_TYPE_CQ
];
4686 num_rx
= ctx
->desc_num
[VIRTNET_Q_TYPE_RX
];
4687 num_tx
= ctx
->desc_num
[VIRTNET_Q_TYPE_TX
];
4689 queue_type
= vq_type(vi
, qid
);
4690 bitmap
= ctx
->bitmap
[queue_type
];
4692 /* skip the total fields of pairs */
4693 offset
= num_rx
+ num_tx
;
4695 if (queue_type
== VIRTNET_Q_TYPE_TX
) {
4696 offset
+= num_cq
+ num_rx
* vi
->curr_queue_pairs
+ num_tx
* (qid
/ 2);
4698 num
= ARRAY_SIZE(virtnet_sq_stats_desc
);
4700 desc
= &virtnet_sq_stats_desc
[0];
4706 } else if (queue_type
== VIRTNET_Q_TYPE_RX
) {
4707 offset
+= num_cq
+ num_rx
* (qid
/ 2);
4709 num
= ARRAY_SIZE(virtnet_rq_stats_desc
);
4711 desc
= &virtnet_rq_stats_desc
[0];
4718 if (bitmap
& VIRTIO_NET_STATS_TYPE_CVQ
) {
4719 desc
= &virtnet_stats_cvq_desc
[0];
4720 num
= ARRAY_SIZE(virtnet_stats_cvq_desc
);
4721 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_CVQ
)
4727 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
4728 desc
= &virtnet_stats_rx_basic_desc
[0];
4729 num
= ARRAY_SIZE(virtnet_stats_rx_basic_desc
);
4730 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC
)
4736 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
4737 desc
= &virtnet_stats_rx_csum_desc
[0];
4738 num
= ARRAY_SIZE(virtnet_stats_rx_csum_desc
);
4739 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM
)
4745 if (bitmap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
) {
4746 desc
= &virtnet_stats_rx_speed_desc
[0];
4747 num
= ARRAY_SIZE(virtnet_stats_rx_speed_desc
);
4748 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED
)
4754 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
4755 desc
= &virtnet_stats_tx_basic_desc
[0];
4756 num
= ARRAY_SIZE(virtnet_stats_tx_basic_desc
);
4757 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC
)
4763 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
4764 desc
= &virtnet_stats_tx_gso_desc
[0];
4765 num
= ARRAY_SIZE(virtnet_stats_tx_gso_desc
);
4766 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO
)
4772 if (bitmap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
) {
4773 desc
= &virtnet_stats_tx_speed_desc
[0];
4774 num
= ARRAY_SIZE(virtnet_stats_tx_speed_desc
);
4775 if (reply_type
== VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED
)
4784 for (i
= 0; i
< num
; ++i
) {
4785 v
= (const __le64
*)(base
+ desc
[i
].offset
);
4786 ctx
->data
[offset
+ i
] = le64_to_cpu(*v
);
4792 for (i
= 0; i
< num
; ++i
) {
4793 v_stat
= (const u64_stats_t
*)(base
+ desc
[i
].offset
);
4794 ctx
->data
[offset
+ i
] = u64_stats_read(v_stat
);
4798 static int __virtnet_get_hw_stats(struct virtnet_info
*vi
,
4799 struct virtnet_stats_ctx
*ctx
,
4800 struct virtio_net_ctrl_queue_stats
*req
,
4801 int req_size
, void *reply
, int res_size
)
4803 struct virtio_net_stats_reply_hdr
*hdr
;
4804 struct scatterlist sgs_in
, sgs_out
;
4809 sg_init_one(&sgs_out
, req
, req_size
);
4810 sg_init_one(&sgs_in
, reply
, res_size
);
4812 ok
= virtnet_send_command_reply(vi
, VIRTIO_NET_CTRL_STATS
,
4813 VIRTIO_NET_CTRL_STATS_GET
,
4819 for (p
= reply
; p
- reply
< res_size
; p
+= le16_to_cpu(hdr
->size
)) {
4821 qid
= le16_to_cpu(hdr
->vq_index
);
4822 virtnet_fill_stats(vi
, qid
, ctx
, p
, false, hdr
->type
);
4828 static void virtnet_make_stat_req(struct virtnet_info
*vi
,
4829 struct virtnet_stats_ctx
*ctx
,
4830 struct virtio_net_ctrl_queue_stats
*req
,
4833 int qtype
= vq_type(vi
, qid
);
4834 u64 bitmap
= ctx
->bitmap
[qtype
];
4839 req
->stats
[*idx
].vq_index
= cpu_to_le16(qid
);
4840 req
->stats
[*idx
].types_bitmap
[0] = cpu_to_le64(bitmap
);
4844 /* qid: -1: get stats of all vq.
4845 * > 0: get the stats for the special vq. This must not be cvq.
4847 static int virtnet_get_hw_stats(struct virtnet_info
*vi
,
4848 struct virtnet_stats_ctx
*ctx
, int qid
)
4850 int qnum
, i
, j
, res_size
, qtype
, last_vq
, first_vq
;
4851 struct virtio_net_ctrl_queue_stats
*req
;
4856 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_DEVICE_STATS
))
4860 last_vq
= vi
->curr_queue_pairs
* 2 - 1;
4871 for (i
= first_vq
; i
<= last_vq
; ++i
) {
4872 qtype
= vq_type(vi
, i
);
4873 if (ctx
->bitmap
[qtype
]) {
4875 res_size
+= ctx
->size
[qtype
];
4879 if (enable_cvq
&& ctx
->bitmap
[VIRTNET_Q_TYPE_CQ
]) {
4880 res_size
+= ctx
->size
[VIRTNET_Q_TYPE_CQ
];
4884 req
= kcalloc(qnum
, sizeof(*req
), GFP_KERNEL
);
4888 reply
= kmalloc(res_size
, GFP_KERNEL
);
4895 for (i
= first_vq
; i
<= last_vq
; ++i
)
4896 virtnet_make_stat_req(vi
, ctx
, req
, i
, &j
);
4899 virtnet_make_stat_req(vi
, ctx
, req
, vi
->max_queue_pairs
* 2, &j
);
4901 ok
= __virtnet_get_hw_stats(vi
, ctx
, req
, sizeof(*req
) * j
, reply
, res_size
);
4909 static void virtnet_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
4911 struct virtnet_info
*vi
= netdev_priv(dev
);
4915 switch (stringset
) {
4917 /* Generate the total field names. */
4918 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_RX
, -1, &p
);
4919 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_TX
, -1, &p
);
4921 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_CQ
, 0, &p
);
4923 for (i
= 0; i
< vi
->curr_queue_pairs
; ++i
)
4924 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_RX
, i
, &p
);
4926 for (i
= 0; i
< vi
->curr_queue_pairs
; ++i
)
4927 virtnet_get_stats_string(vi
, VIRTNET_Q_TYPE_TX
, i
, &p
);
4932 static int virtnet_get_sset_count(struct net_device
*dev
, int sset
)
4934 struct virtnet_info
*vi
= netdev_priv(dev
);
4935 struct virtnet_stats_ctx ctx
= {0};
4940 virtnet_stats_ctx_init(vi
, &ctx
, NULL
, false);
4942 pair_count
= ctx
.desc_num
[VIRTNET_Q_TYPE_RX
] + ctx
.desc_num
[VIRTNET_Q_TYPE_TX
];
4944 return pair_count
+ ctx
.desc_num
[VIRTNET_Q_TYPE_CQ
] +
4945 vi
->curr_queue_pairs
* pair_count
;
4951 static void virtnet_get_ethtool_stats(struct net_device
*dev
,
4952 struct ethtool_stats
*stats
, u64
*data
)
4954 struct virtnet_info
*vi
= netdev_priv(dev
);
4955 struct virtnet_stats_ctx ctx
= {0};
4956 unsigned int start
, i
;
4957 const u8
*stats_base
;
4959 virtnet_stats_ctx_init(vi
, &ctx
, data
, false);
4960 if (virtnet_get_hw_stats(vi
, &ctx
, -1))
4961 dev_warn(&vi
->dev
->dev
, "Failed to get hw stats.\n");
4963 for (i
= 0; i
< vi
->curr_queue_pairs
; i
++) {
4964 struct receive_queue
*rq
= &vi
->rq
[i
];
4965 struct send_queue
*sq
= &vi
->sq
[i
];
4967 stats_base
= (const u8
*)&rq
->stats
;
4969 start
= u64_stats_fetch_begin(&rq
->stats
.syncp
);
4970 virtnet_fill_stats(vi
, i
* 2, &ctx
, stats_base
, true, 0);
4971 } while (u64_stats_fetch_retry(&rq
->stats
.syncp
, start
));
4973 stats_base
= (const u8
*)&sq
->stats
;
4975 start
= u64_stats_fetch_begin(&sq
->stats
.syncp
);
4976 virtnet_fill_stats(vi
, i
* 2 + 1, &ctx
, stats_base
, true, 0);
4977 } while (u64_stats_fetch_retry(&sq
->stats
.syncp
, start
));
4980 virtnet_fill_total_fields(vi
, &ctx
);
4983 static void virtnet_get_channels(struct net_device
*dev
,
4984 struct ethtool_channels
*channels
)
4986 struct virtnet_info
*vi
= netdev_priv(dev
);
4988 channels
->combined_count
= vi
->curr_queue_pairs
;
4989 channels
->max_combined
= vi
->max_queue_pairs
;
4990 channels
->max_other
= 0;
4991 channels
->rx_count
= 0;
4992 channels
->tx_count
= 0;
4993 channels
->other_count
= 0;
4996 static int virtnet_set_link_ksettings(struct net_device
*dev
,
4997 const struct ethtool_link_ksettings
*cmd
)
4999 struct virtnet_info
*vi
= netdev_priv(dev
);
5001 return ethtool_virtdev_set_link_ksettings(dev
, cmd
,
5002 &vi
->speed
, &vi
->duplex
);
5005 static int virtnet_get_link_ksettings(struct net_device
*dev
,
5006 struct ethtool_link_ksettings
*cmd
)
5008 struct virtnet_info
*vi
= netdev_priv(dev
);
5010 cmd
->base
.speed
= vi
->speed
;
5011 cmd
->base
.duplex
= vi
->duplex
;
5012 cmd
->base
.port
= PORT_OTHER
;
5017 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info
*vi
,
5018 struct ethtool_coalesce
*ec
)
5020 struct virtio_net_ctrl_coal_tx
*coal_tx
__free(kfree
) = NULL
;
5021 struct scatterlist sgs_tx
;
5024 coal_tx
= kzalloc(sizeof(*coal_tx
), GFP_KERNEL
);
5028 coal_tx
->tx_usecs
= cpu_to_le32(ec
->tx_coalesce_usecs
);
5029 coal_tx
->tx_max_packets
= cpu_to_le32(ec
->tx_max_coalesced_frames
);
5030 sg_init_one(&sgs_tx
, coal_tx
, sizeof(*coal_tx
));
5032 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_NOTF_COAL
,
5033 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET
,
5037 vi
->intr_coal_tx
.max_usecs
= ec
->tx_coalesce_usecs
;
5038 vi
->intr_coal_tx
.max_packets
= ec
->tx_max_coalesced_frames
;
5039 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5040 vi
->sq
[i
].intr_coal
.max_usecs
= ec
->tx_coalesce_usecs
;
5041 vi
->sq
[i
].intr_coal
.max_packets
= ec
->tx_max_coalesced_frames
;
5047 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info
*vi
,
5048 struct ethtool_coalesce
*ec
)
5050 struct virtio_net_ctrl_coal_rx
*coal_rx
__free(kfree
) = NULL
;
5051 bool rx_ctrl_dim_on
= !!ec
->use_adaptive_rx_coalesce
;
5052 struct scatterlist sgs_rx
;
5055 if (rx_ctrl_dim_on
&& !virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
5058 if (rx_ctrl_dim_on
&& (ec
->rx_coalesce_usecs
!= vi
->intr_coal_rx
.max_usecs
||
5059 ec
->rx_max_coalesced_frames
!= vi
->intr_coal_rx
.max_packets
))
5062 if (rx_ctrl_dim_on
&& !vi
->rx_dim_enabled
) {
5063 vi
->rx_dim_enabled
= true;
5064 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5065 mutex_lock(&vi
->rq
[i
].dim_lock
);
5066 vi
->rq
[i
].dim_enabled
= true;
5067 mutex_unlock(&vi
->rq
[i
].dim_lock
);
5072 coal_rx
= kzalloc(sizeof(*coal_rx
), GFP_KERNEL
);
5076 if (!rx_ctrl_dim_on
&& vi
->rx_dim_enabled
) {
5077 vi
->rx_dim_enabled
= false;
5078 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5079 mutex_lock(&vi
->rq
[i
].dim_lock
);
5080 vi
->rq
[i
].dim_enabled
= false;
5081 mutex_unlock(&vi
->rq
[i
].dim_lock
);
5085 /* Since the per-queue coalescing params can be set,
5086 * we need apply the global new params even if they
5089 coal_rx
->rx_usecs
= cpu_to_le32(ec
->rx_coalesce_usecs
);
5090 coal_rx
->rx_max_packets
= cpu_to_le32(ec
->rx_max_coalesced_frames
);
5091 sg_init_one(&sgs_rx
, coal_rx
, sizeof(*coal_rx
));
5093 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_NOTF_COAL
,
5094 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET
,
5098 vi
->intr_coal_rx
.max_usecs
= ec
->rx_coalesce_usecs
;
5099 vi
->intr_coal_rx
.max_packets
= ec
->rx_max_coalesced_frames
;
5100 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5101 mutex_lock(&vi
->rq
[i
].dim_lock
);
5102 vi
->rq
[i
].intr_coal
.max_usecs
= ec
->rx_coalesce_usecs
;
5103 vi
->rq
[i
].intr_coal
.max_packets
= ec
->rx_max_coalesced_frames
;
5104 mutex_unlock(&vi
->rq
[i
].dim_lock
);
5110 static int virtnet_send_notf_coal_cmds(struct virtnet_info
*vi
,
5111 struct ethtool_coalesce
*ec
)
5115 err
= virtnet_send_tx_notf_coal_cmds(vi
, ec
);
5119 err
= virtnet_send_rx_notf_coal_cmds(vi
, ec
);
5126 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info
*vi
,
5127 struct ethtool_coalesce
*ec
,
5130 bool rx_ctrl_dim_on
= !!ec
->use_adaptive_rx_coalesce
;
5131 u32 max_usecs
, max_packets
;
5135 mutex_lock(&vi
->rq
[queue
].dim_lock
);
5136 cur_rx_dim
= vi
->rq
[queue
].dim_enabled
;
5137 max_usecs
= vi
->rq
[queue
].intr_coal
.max_usecs
;
5138 max_packets
= vi
->rq
[queue
].intr_coal
.max_packets
;
5140 if (rx_ctrl_dim_on
&& (ec
->rx_coalesce_usecs
!= max_usecs
||
5141 ec
->rx_max_coalesced_frames
!= max_packets
)) {
5142 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5146 if (rx_ctrl_dim_on
&& !cur_rx_dim
) {
5147 vi
->rq
[queue
].dim_enabled
= true;
5148 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5152 if (!rx_ctrl_dim_on
&& cur_rx_dim
)
5153 vi
->rq
[queue
].dim_enabled
= false;
5155 /* If no params are updated, userspace ethtool will
5156 * reject the modification.
5158 err
= virtnet_send_rx_ctrl_coal_vq_cmd(vi
, queue
,
5159 ec
->rx_coalesce_usecs
,
5160 ec
->rx_max_coalesced_frames
);
5161 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5165 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info
*vi
,
5166 struct ethtool_coalesce
*ec
,
5171 err
= virtnet_send_rx_notf_coal_vq_cmds(vi
, ec
, queue
);
5175 err
= virtnet_send_tx_ctrl_coal_vq_cmd(vi
, queue
,
5176 ec
->tx_coalesce_usecs
,
5177 ec
->tx_max_coalesced_frames
);
5184 static void virtnet_rx_dim_work(struct work_struct
*work
)
5186 struct dim
*dim
= container_of(work
, struct dim
, work
);
5187 struct receive_queue
*rq
= container_of(dim
,
5188 struct receive_queue
, dim
);
5189 struct virtnet_info
*vi
= rq
->vq
->vdev
->priv
;
5190 struct net_device
*dev
= vi
->dev
;
5191 struct dim_cq_moder update_moder
;
5196 mutex_lock(&rq
->dim_lock
);
5197 if (!rq
->dim_enabled
)
5200 update_moder
= net_dim_get_rx_irq_moder(dev
, dim
);
5201 if (update_moder
.usec
!= rq
->intr_coal
.max_usecs
||
5202 update_moder
.pkts
!= rq
->intr_coal
.max_packets
) {
5203 err
= virtnet_send_rx_ctrl_coal_vq_cmd(vi
, qnum
,
5207 pr_debug("%s: Failed to send dim parameters on rxq%d\n",
5211 dim
->state
= DIM_START_MEASURE
;
5212 mutex_unlock(&rq
->dim_lock
);
5215 static int virtnet_coal_params_supported(struct ethtool_coalesce
*ec
)
5217 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
5218 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
5220 if (ec
->rx_coalesce_usecs
|| ec
->tx_coalesce_usecs
)
5223 if (ec
->tx_max_coalesced_frames
> 1 ||
5224 ec
->rx_max_coalesced_frames
!= 1)
5230 static int virtnet_should_update_vq_weight(int dev_flags
, int weight
,
5231 int vq_weight
, bool *should_update
)
5233 if (weight
^ vq_weight
) {
5234 if (dev_flags
& IFF_UP
)
5236 *should_update
= true;
5242 static int virtnet_set_coalesce(struct net_device
*dev
,
5243 struct ethtool_coalesce
*ec
,
5244 struct kernel_ethtool_coalesce
*kernel_coal
,
5245 struct netlink_ext_ack
*extack
)
5247 struct virtnet_info
*vi
= netdev_priv(dev
);
5248 int ret
, queue_number
, napi_weight
, i
;
5249 bool update_napi
= false;
5251 /* Can't change NAPI weight if the link is up */
5252 napi_weight
= ec
->tx_max_coalesced_frames
? NAPI_POLL_WEIGHT
: 0;
5253 for (queue_number
= 0; queue_number
< vi
->max_queue_pairs
; queue_number
++) {
5254 ret
= virtnet_should_update_vq_weight(dev
->flags
, napi_weight
,
5255 vi
->sq
[queue_number
].napi
.weight
,
5261 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
5262 * updated for the sake of simplicity, which might not be necessary
5268 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_NOTF_COAL
))
5269 ret
= virtnet_send_notf_coal_cmds(vi
, ec
);
5271 ret
= virtnet_coal_params_supported(ec
);
5277 /* xsk xmit depends on the tx napi. So if xsk is active,
5278 * prevent modifications to tx napi.
5280 for (i
= queue_number
; i
< vi
->max_queue_pairs
; i
++) {
5281 if (vi
->sq
[i
].xsk_pool
)
5285 for (; queue_number
< vi
->max_queue_pairs
; queue_number
++)
5286 vi
->sq
[queue_number
].napi
.weight
= napi_weight
;
5292 static int virtnet_get_coalesce(struct net_device
*dev
,
5293 struct ethtool_coalesce
*ec
,
5294 struct kernel_ethtool_coalesce
*kernel_coal
,
5295 struct netlink_ext_ack
*extack
)
5297 struct virtnet_info
*vi
= netdev_priv(dev
);
5299 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_NOTF_COAL
)) {
5300 ec
->rx_coalesce_usecs
= vi
->intr_coal_rx
.max_usecs
;
5301 ec
->tx_coalesce_usecs
= vi
->intr_coal_tx
.max_usecs
;
5302 ec
->tx_max_coalesced_frames
= vi
->intr_coal_tx
.max_packets
;
5303 ec
->rx_max_coalesced_frames
= vi
->intr_coal_rx
.max_packets
;
5304 ec
->use_adaptive_rx_coalesce
= vi
->rx_dim_enabled
;
5306 ec
->rx_max_coalesced_frames
= 1;
5308 if (vi
->sq
[0].napi
.weight
)
5309 ec
->tx_max_coalesced_frames
= 1;
5315 static int virtnet_set_per_queue_coalesce(struct net_device
*dev
,
5317 struct ethtool_coalesce
*ec
)
5319 struct virtnet_info
*vi
= netdev_priv(dev
);
5320 int ret
, napi_weight
;
5321 bool update_napi
= false;
5323 if (queue
>= vi
->max_queue_pairs
)
5326 /* Can't change NAPI weight if the link is up */
5327 napi_weight
= ec
->tx_max_coalesced_frames
? NAPI_POLL_WEIGHT
: 0;
5328 ret
= virtnet_should_update_vq_weight(dev
->flags
, napi_weight
,
5329 vi
->sq
[queue
].napi
.weight
,
5334 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
5335 ret
= virtnet_send_notf_coal_vq_cmds(vi
, ec
, queue
);
5337 ret
= virtnet_coal_params_supported(ec
);
5343 vi
->sq
[queue
].napi
.weight
= napi_weight
;
5348 static int virtnet_get_per_queue_coalesce(struct net_device
*dev
,
5350 struct ethtool_coalesce
*ec
)
5352 struct virtnet_info
*vi
= netdev_priv(dev
);
5354 if (queue
>= vi
->max_queue_pairs
)
5357 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
)) {
5358 mutex_lock(&vi
->rq
[queue
].dim_lock
);
5359 ec
->rx_coalesce_usecs
= vi
->rq
[queue
].intr_coal
.max_usecs
;
5360 ec
->tx_coalesce_usecs
= vi
->sq
[queue
].intr_coal
.max_usecs
;
5361 ec
->tx_max_coalesced_frames
= vi
->sq
[queue
].intr_coal
.max_packets
;
5362 ec
->rx_max_coalesced_frames
= vi
->rq
[queue
].intr_coal
.max_packets
;
5363 ec
->use_adaptive_rx_coalesce
= vi
->rq
[queue
].dim_enabled
;
5364 mutex_unlock(&vi
->rq
[queue
].dim_lock
);
5366 ec
->rx_max_coalesced_frames
= 1;
5368 if (vi
->sq
[queue
].napi
.weight
)
5369 ec
->tx_max_coalesced_frames
= 1;
5375 static void virtnet_init_settings(struct net_device
*dev
)
5377 struct virtnet_info
*vi
= netdev_priv(dev
);
5379 vi
->speed
= SPEED_UNKNOWN
;
5380 vi
->duplex
= DUPLEX_UNKNOWN
;
5383 static u32
virtnet_get_rxfh_key_size(struct net_device
*dev
)
5385 return ((struct virtnet_info
*)netdev_priv(dev
))->rss_key_size
;
5388 static u32
virtnet_get_rxfh_indir_size(struct net_device
*dev
)
5390 return ((struct virtnet_info
*)netdev_priv(dev
))->rss_indir_table_size
;
5393 static int virtnet_get_rxfh(struct net_device
*dev
,
5394 struct ethtool_rxfh_param
*rxfh
)
5396 struct virtnet_info
*vi
= netdev_priv(dev
);
5400 for (i
= 0; i
< vi
->rss_indir_table_size
; ++i
)
5401 rxfh
->indir
[i
] = vi
->rss
.indirection_table
[i
];
5405 memcpy(rxfh
->key
, vi
->rss
.key
, vi
->rss_key_size
);
5407 rxfh
->hfunc
= ETH_RSS_HASH_TOP
;
5412 static int virtnet_set_rxfh(struct net_device
*dev
,
5413 struct ethtool_rxfh_param
*rxfh
,
5414 struct netlink_ext_ack
*extack
)
5416 struct virtnet_info
*vi
= netdev_priv(dev
);
5417 bool update
= false;
5420 if (rxfh
->hfunc
!= ETH_RSS_HASH_NO_CHANGE
&&
5421 rxfh
->hfunc
!= ETH_RSS_HASH_TOP
)
5428 for (i
= 0; i
< vi
->rss_indir_table_size
; ++i
)
5429 vi
->rss
.indirection_table
[i
] = rxfh
->indir
[i
];
5434 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
5435 * device provides hash calculation capabilities, that is,
5436 * hash_key is configured.
5438 if (!vi
->has_rss
&& !vi
->has_rss_hash_report
)
5441 memcpy(vi
->rss
.key
, rxfh
->key
, vi
->rss_key_size
);
5446 virtnet_commit_rss_command(vi
);
5451 static int virtnet_get_rxnfc(struct net_device
*dev
, struct ethtool_rxnfc
*info
, u32
*rule_locs
)
5453 struct virtnet_info
*vi
= netdev_priv(dev
);
5456 switch (info
->cmd
) {
5457 case ETHTOOL_GRXRINGS
:
5458 info
->data
= vi
->curr_queue_pairs
;
5461 virtnet_get_hashflow(vi
, info
);
5470 static int virtnet_set_rxnfc(struct net_device
*dev
, struct ethtool_rxnfc
*info
)
5472 struct virtnet_info
*vi
= netdev_priv(dev
);
5475 switch (info
->cmd
) {
5477 if (!virtnet_set_hashflow(vi
, info
))
5488 static const struct ethtool_ops virtnet_ethtool_ops
= {
5489 .supported_coalesce_params
= ETHTOOL_COALESCE_MAX_FRAMES
|
5490 ETHTOOL_COALESCE_USECS
| ETHTOOL_COALESCE_USE_ADAPTIVE_RX
,
5491 .get_drvinfo
= virtnet_get_drvinfo
,
5492 .get_link
= ethtool_op_get_link
,
5493 .get_ringparam
= virtnet_get_ringparam
,
5494 .set_ringparam
= virtnet_set_ringparam
,
5495 .get_strings
= virtnet_get_strings
,
5496 .get_sset_count
= virtnet_get_sset_count
,
5497 .get_ethtool_stats
= virtnet_get_ethtool_stats
,
5498 .set_channels
= virtnet_set_channels
,
5499 .get_channels
= virtnet_get_channels
,
5500 .get_ts_info
= ethtool_op_get_ts_info
,
5501 .get_link_ksettings
= virtnet_get_link_ksettings
,
5502 .set_link_ksettings
= virtnet_set_link_ksettings
,
5503 .set_coalesce
= virtnet_set_coalesce
,
5504 .get_coalesce
= virtnet_get_coalesce
,
5505 .set_per_queue_coalesce
= virtnet_set_per_queue_coalesce
,
5506 .get_per_queue_coalesce
= virtnet_get_per_queue_coalesce
,
5507 .get_rxfh_key_size
= virtnet_get_rxfh_key_size
,
5508 .get_rxfh_indir_size
= virtnet_get_rxfh_indir_size
,
5509 .get_rxfh
= virtnet_get_rxfh
,
5510 .set_rxfh
= virtnet_set_rxfh
,
5511 .get_rxnfc
= virtnet_get_rxnfc
,
5512 .set_rxnfc
= virtnet_set_rxnfc
,
5515 static void virtnet_get_queue_stats_rx(struct net_device
*dev
, int i
,
5516 struct netdev_queue_stats_rx
*stats
)
5518 struct virtnet_info
*vi
= netdev_priv(dev
);
5519 struct receive_queue
*rq
= &vi
->rq
[i
];
5520 struct virtnet_stats_ctx ctx
= {0};
5522 virtnet_stats_ctx_init(vi
, &ctx
, (void *)stats
, true);
5524 virtnet_get_hw_stats(vi
, &ctx
, i
* 2);
5525 virtnet_fill_stats(vi
, i
* 2, &ctx
, (void *)&rq
->stats
, true, 0);
5528 static void virtnet_get_queue_stats_tx(struct net_device
*dev
, int i
,
5529 struct netdev_queue_stats_tx
*stats
)
5531 struct virtnet_info
*vi
= netdev_priv(dev
);
5532 struct send_queue
*sq
= &vi
->sq
[i
];
5533 struct virtnet_stats_ctx ctx
= {0};
5535 virtnet_stats_ctx_init(vi
, &ctx
, (void *)stats
, true);
5537 virtnet_get_hw_stats(vi
, &ctx
, i
* 2 + 1);
5538 virtnet_fill_stats(vi
, i
* 2 + 1, &ctx
, (void *)&sq
->stats
, true, 0);
5541 static void virtnet_get_base_stats(struct net_device
*dev
,
5542 struct netdev_queue_stats_rx
*rx
,
5543 struct netdev_queue_stats_tx
*tx
)
5545 struct virtnet_info
*vi
= netdev_priv(dev
);
5547 /* The queue stats of the virtio-net will not be reset. So here we
5553 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_BASIC
) {
5555 rx
->hw_drop_overruns
= 0;
5558 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_CSUM
) {
5559 rx
->csum_unnecessary
= 0;
5564 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_GSO
) {
5565 rx
->hw_gro_packets
= 0;
5566 rx
->hw_gro_bytes
= 0;
5567 rx
->hw_gro_wire_packets
= 0;
5568 rx
->hw_gro_wire_bytes
= 0;
5571 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_RX_SPEED
)
5572 rx
->hw_drop_ratelimits
= 0;
5579 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_BASIC
) {
5581 tx
->hw_drop_errors
= 0;
5584 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_CSUM
) {
5589 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_GSO
) {
5590 tx
->hw_gso_packets
= 0;
5591 tx
->hw_gso_bytes
= 0;
5592 tx
->hw_gso_wire_packets
= 0;
5593 tx
->hw_gso_wire_bytes
= 0;
5596 if (vi
->device_stats_cap
& VIRTIO_NET_STATS_TYPE_TX_SPEED
)
5597 tx
->hw_drop_ratelimits
= 0;
5600 static const struct netdev_stat_ops virtnet_stat_ops
= {
5601 .get_queue_stats_rx
= virtnet_get_queue_stats_rx
,
5602 .get_queue_stats_tx
= virtnet_get_queue_stats_tx
,
5603 .get_base_stats
= virtnet_get_base_stats
,
5606 static void virtnet_freeze_down(struct virtio_device
*vdev
)
5608 struct virtnet_info
*vi
= vdev
->priv
;
5610 /* Make sure no work handler is accessing the device */
5611 flush_work(&vi
->config_work
);
5612 disable_rx_mode_work(vi
);
5613 flush_work(&vi
->rx_mode_work
);
5615 netif_tx_lock_bh(vi
->dev
);
5616 netif_device_detach(vi
->dev
);
5617 netif_tx_unlock_bh(vi
->dev
);
5618 if (netif_running(vi
->dev
))
5619 virtnet_close(vi
->dev
);
5622 static int init_vqs(struct virtnet_info
*vi
);
5624 static int virtnet_restore_up(struct virtio_device
*vdev
)
5626 struct virtnet_info
*vi
= vdev
->priv
;
5633 virtio_device_ready(vdev
);
5635 enable_delayed_refill(vi
);
5636 enable_rx_mode_work(vi
);
5638 if (netif_running(vi
->dev
)) {
5639 err
= virtnet_open(vi
->dev
);
5644 netif_tx_lock_bh(vi
->dev
);
5645 netif_device_attach(vi
->dev
);
5646 netif_tx_unlock_bh(vi
->dev
);
5650 static int virtnet_set_guest_offloads(struct virtnet_info
*vi
, u64 offloads
)
5652 __virtio64
*_offloads
__free(kfree
) = NULL
;
5653 struct scatterlist sg
;
5655 _offloads
= kzalloc(sizeof(*_offloads
), GFP_KERNEL
);
5659 *_offloads
= cpu_to_virtio64(vi
->vdev
, offloads
);
5661 sg_init_one(&sg
, _offloads
, sizeof(*_offloads
));
5663 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_GUEST_OFFLOADS
,
5664 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
, &sg
)) {
5665 dev_warn(&vi
->dev
->dev
, "Fail to set guest offload.\n");
5672 static int virtnet_clear_guest_offloads(struct virtnet_info
*vi
)
5676 if (!vi
->guest_offloads
)
5679 return virtnet_set_guest_offloads(vi
, offloads
);
5682 static int virtnet_restore_guest_offloads(struct virtnet_info
*vi
)
5684 u64 offloads
= vi
->guest_offloads
;
5686 if (!vi
->guest_offloads
)
5689 return virtnet_set_guest_offloads(vi
, offloads
);
5692 static int virtnet_rq_bind_xsk_pool(struct virtnet_info
*vi
, struct receive_queue
*rq
,
5693 struct xsk_buff_pool
*pool
)
5697 qindex
= rq
- vi
->rq
;
5700 err
= xdp_rxq_info_reg(&rq
->xsk_rxq_info
, vi
->dev
, qindex
, rq
->napi
.napi_id
);
5704 err
= xdp_rxq_info_reg_mem_model(&rq
->xsk_rxq_info
,
5705 MEM_TYPE_XSK_BUFF_POOL
, NULL
);
5709 xsk_pool_set_rxq_info(pool
, &rq
->xsk_rxq_info
);
5712 virtnet_rx_pause(vi
, rq
);
5714 err
= virtqueue_reset(rq
->vq
, virtnet_rq_unmap_free_buf
, NULL
);
5716 netdev_err(vi
->dev
, "reset rx fail: rx queue index: %d err: %d\n", qindex
, err
);
5721 rq
->xsk_pool
= pool
;
5723 virtnet_rx_resume(vi
, rq
);
5729 xdp_rxq_info_unreg(&rq
->xsk_rxq_info
);
5733 static int virtnet_sq_bind_xsk_pool(struct virtnet_info
*vi
,
5734 struct send_queue
*sq
,
5735 struct xsk_buff_pool
*pool
)
5739 qindex
= sq
- vi
->sq
;
5741 virtnet_tx_pause(vi
, sq
);
5743 err
= virtqueue_reset(sq
->vq
, virtnet_sq_free_unused_buf
,
5744 virtnet_sq_free_unused_buf_done
);
5746 netdev_err(vi
->dev
, "reset tx fail: tx queue index: %d err: %d\n", qindex
, err
);
5750 sq
->xsk_pool
= pool
;
5752 virtnet_tx_resume(vi
, sq
);
5757 static int virtnet_xsk_pool_enable(struct net_device
*dev
,
5758 struct xsk_buff_pool
*pool
,
5761 struct virtnet_info
*vi
= netdev_priv(dev
);
5762 struct receive_queue
*rq
;
5763 struct device
*dma_dev
;
5764 struct send_queue
*sq
;
5768 if (vi
->hdr_len
> xsk_pool_get_headroom(pool
))
5771 /* In big_packets mode, xdp cannot work, so there is no need to
5772 * initialize xsk of rq.
5774 if (vi
->big_packets
&& !vi
->mergeable_rx_bufs
)
5777 if (qid
>= vi
->curr_queue_pairs
)
5783 /* xsk assumes that tx and rx must have the same dma device. The af-xdp
5784 * may use one buffer to receive from the rx and reuse this buffer to
5785 * send by the tx. So the dma dev of sq and rq must be the same one.
5787 * But vq->dma_dev allows every vq has the respective dma dev. So I
5788 * check the dma dev of vq and sq is the same dev.
5790 if (virtqueue_dma_dev(rq
->vq
) != virtqueue_dma_dev(sq
->vq
))
5793 dma_dev
= virtqueue_dma_dev(rq
->vq
);
5797 size
= virtqueue_get_vring_size(rq
->vq
);
5799 rq
->xsk_buffs
= kvcalloc(size
, sizeof(*rq
->xsk_buffs
), GFP_KERNEL
);
5803 hdr_dma
= virtqueue_dma_map_single_attrs(sq
->vq
, &xsk_hdr
, vi
->hdr_len
,
5805 if (virtqueue_dma_mapping_error(sq
->vq
, hdr_dma
))
5808 err
= xsk_pool_dma_map(pool
, dma_dev
, 0);
5812 err
= virtnet_rq_bind_xsk_pool(vi
, rq
, pool
);
5816 err
= virtnet_sq_bind_xsk_pool(vi
, sq
, pool
);
5820 /* Now, we do not support tx offload(such as tx csum), so all the tx
5821 * virtnet hdr is zero. So all the tx packets can share a single hdr.
5823 sq
->xsk_hdr_dma_addr
= hdr_dma
;
5828 virtnet_rq_bind_xsk_pool(vi
, rq
, NULL
);
5830 xsk_pool_dma_unmap(pool
, 0);
5832 virtqueue_dma_unmap_single_attrs(rq
->vq
, hdr_dma
, vi
->hdr_len
,
5837 static int virtnet_xsk_pool_disable(struct net_device
*dev
, u16 qid
)
5839 struct virtnet_info
*vi
= netdev_priv(dev
);
5840 struct xsk_buff_pool
*pool
;
5841 struct receive_queue
*rq
;
5842 struct send_queue
*sq
;
5845 if (qid
>= vi
->curr_queue_pairs
)
5851 pool
= rq
->xsk_pool
;
5853 err
= virtnet_rq_bind_xsk_pool(vi
, rq
, NULL
);
5854 err
|= virtnet_sq_bind_xsk_pool(vi
, sq
, NULL
);
5856 xsk_pool_dma_unmap(pool
, 0);
5858 virtqueue_dma_unmap_single_attrs(sq
->vq
, sq
->xsk_hdr_dma_addr
,
5859 vi
->hdr_len
, DMA_TO_DEVICE
, 0);
5860 kvfree(rq
->xsk_buffs
);
5865 static int virtnet_xsk_pool_setup(struct net_device
*dev
, struct netdev_bpf
*xdp
)
5868 return virtnet_xsk_pool_enable(dev
, xdp
->xsk
.pool
,
5871 return virtnet_xsk_pool_disable(dev
, xdp
->xsk
.queue_id
);
5874 static int virtnet_xdp_set(struct net_device
*dev
, struct bpf_prog
*prog
,
5875 struct netlink_ext_ack
*extack
)
5877 unsigned int room
= SKB_DATA_ALIGN(XDP_PACKET_HEADROOM
+
5878 sizeof(struct skb_shared_info
));
5879 unsigned int max_sz
= PAGE_SIZE
- room
- ETH_HLEN
;
5880 struct virtnet_info
*vi
= netdev_priv(dev
);
5881 struct bpf_prog
*old_prog
;
5882 u16 xdp_qp
= 0, curr_qp
;
5885 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
)
5886 && (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO4
) ||
5887 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO6
) ||
5888 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_ECN
) ||
5889 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_UFO
) ||
5890 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_CSUM
) ||
5891 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO4
) ||
5892 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO6
))) {
5893 NL_SET_ERR_MSG_MOD(extack
, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
5897 if (vi
->mergeable_rx_bufs
&& !vi
->any_header_sg
) {
5898 NL_SET_ERR_MSG_MOD(extack
, "XDP expects header/data in single page, any_header_sg required");
5902 if (prog
&& !prog
->aux
->xdp_has_frags
&& dev
->mtu
> max_sz
) {
5903 NL_SET_ERR_MSG_MOD(extack
, "MTU too large to enable XDP without frags");
5904 netdev_warn(dev
, "single-buffer XDP requires MTU less than %u\n", max_sz
);
5908 curr_qp
= vi
->curr_queue_pairs
- vi
->xdp_queue_pairs
;
5910 xdp_qp
= nr_cpu_ids
;
5912 /* XDP requires extra queues for XDP_TX */
5913 if (curr_qp
+ xdp_qp
> vi
->max_queue_pairs
) {
5914 netdev_warn_once(dev
, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
5915 curr_qp
+ xdp_qp
, vi
->max_queue_pairs
);
5919 old_prog
= rtnl_dereference(vi
->rq
[0].xdp_prog
);
5920 if (!prog
&& !old_prog
)
5924 bpf_prog_add(prog
, vi
->max_queue_pairs
- 1);
5926 /* Make sure NAPI is not using any XDP TX queues for RX. */
5927 if (netif_running(dev
)) {
5928 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5929 napi_disable(&vi
->rq
[i
].napi
);
5930 virtnet_napi_tx_disable(&vi
->sq
[i
].napi
);
5935 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5936 rcu_assign_pointer(vi
->rq
[i
].xdp_prog
, prog
);
5938 virtnet_restore_guest_offloads(vi
);
5943 err
= virtnet_set_queues(vi
, curr_qp
+ xdp_qp
);
5946 netif_set_real_num_rx_queues(dev
, curr_qp
+ xdp_qp
);
5947 vi
->xdp_queue_pairs
= xdp_qp
;
5950 vi
->xdp_enabled
= true;
5951 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5952 rcu_assign_pointer(vi
->rq
[i
].xdp_prog
, prog
);
5953 if (i
== 0 && !old_prog
)
5954 virtnet_clear_guest_offloads(vi
);
5957 xdp_features_set_redirect_target(dev
, true);
5959 xdp_features_clear_redirect_target(dev
);
5960 vi
->xdp_enabled
= false;
5963 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5965 bpf_prog_put(old_prog
);
5966 if (netif_running(dev
)) {
5967 virtnet_napi_enable(vi
->rq
[i
].vq
, &vi
->rq
[i
].napi
);
5968 virtnet_napi_tx_enable(vi
, vi
->sq
[i
].vq
,
5977 virtnet_clear_guest_offloads(vi
);
5978 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
5979 rcu_assign_pointer(vi
->rq
[i
].xdp_prog
, old_prog
);
5982 if (netif_running(dev
)) {
5983 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
5984 virtnet_napi_enable(vi
->rq
[i
].vq
, &vi
->rq
[i
].napi
);
5985 virtnet_napi_tx_enable(vi
, vi
->sq
[i
].vq
,
5990 bpf_prog_sub(prog
, vi
->max_queue_pairs
- 1);
5994 static int virtnet_xdp(struct net_device
*dev
, struct netdev_bpf
*xdp
)
5996 switch (xdp
->command
) {
5997 case XDP_SETUP_PROG
:
5998 return virtnet_xdp_set(dev
, xdp
->prog
, xdp
->extack
);
5999 case XDP_SETUP_XSK_POOL
:
6000 return virtnet_xsk_pool_setup(dev
, xdp
);
6006 static int virtnet_get_phys_port_name(struct net_device
*dev
, char *buf
,
6009 struct virtnet_info
*vi
= netdev_priv(dev
);
6012 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STANDBY
))
6015 ret
= snprintf(buf
, len
, "sby");
6022 static int virtnet_set_features(struct net_device
*dev
,
6023 netdev_features_t features
)
6025 struct virtnet_info
*vi
= netdev_priv(dev
);
6029 if ((dev
->features
^ features
) & NETIF_F_GRO_HW
) {
6030 if (vi
->xdp_enabled
)
6033 if (features
& NETIF_F_GRO_HW
)
6034 offloads
= vi
->guest_offloads_capable
;
6036 offloads
= vi
->guest_offloads_capable
&
6037 ~GUEST_OFFLOAD_GRO_HW_MASK
;
6039 err
= virtnet_set_guest_offloads(vi
, offloads
);
6042 vi
->guest_offloads
= offloads
;
6045 if ((dev
->features
^ features
) & NETIF_F_RXHASH
) {
6046 if (features
& NETIF_F_RXHASH
)
6047 vi
->rss
.hash_types
= vi
->rss_hash_types_saved
;
6049 vi
->rss
.hash_types
= VIRTIO_NET_HASH_REPORT_NONE
;
6051 if (!virtnet_commit_rss_command(vi
))
6058 static void virtnet_tx_timeout(struct net_device
*dev
, unsigned int txqueue
)
6060 struct virtnet_info
*priv
= netdev_priv(dev
);
6061 struct send_queue
*sq
= &priv
->sq
[txqueue
];
6062 struct netdev_queue
*txq
= netdev_get_tx_queue(dev
, txqueue
);
6064 u64_stats_update_begin(&sq
->stats
.syncp
);
6065 u64_stats_inc(&sq
->stats
.tx_timeouts
);
6066 u64_stats_update_end(&sq
->stats
.syncp
);
6068 netdev_err(dev
, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
6069 txqueue
, sq
->name
, sq
->vq
->index
, sq
->vq
->name
,
6070 jiffies_to_usecs(jiffies
- READ_ONCE(txq
->trans_start
)));
6073 static int virtnet_init_irq_moder(struct virtnet_info
*vi
)
6075 u8 profile_flags
= 0, coal_flags
= 0;
6078 profile_flags
|= DIM_PROFILE_RX
;
6079 coal_flags
|= DIM_COALESCE_USEC
| DIM_COALESCE_PKTS
;
6080 ret
= net_dim_init_irq_moder(vi
->dev
, profile_flags
, coal_flags
,
6081 DIM_CQ_PERIOD_MODE_START_FROM_EQE
,
6082 0, virtnet_rx_dim_work
, NULL
);
6087 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6088 net_dim_setting(vi
->dev
, &vi
->rq
[i
].dim
, false);
6093 static void virtnet_free_irq_moder(struct virtnet_info
*vi
)
6095 if (!virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
))
6099 net_dim_free_irq_moder(vi
->dev
);
6103 static const struct net_device_ops virtnet_netdev
= {
6104 .ndo_open
= virtnet_open
,
6105 .ndo_stop
= virtnet_close
,
6106 .ndo_start_xmit
= start_xmit
,
6107 .ndo_validate_addr
= eth_validate_addr
,
6108 .ndo_set_mac_address
= virtnet_set_mac_address
,
6109 .ndo_set_rx_mode
= virtnet_set_rx_mode
,
6110 .ndo_get_stats64
= virtnet_stats
,
6111 .ndo_vlan_rx_add_vid
= virtnet_vlan_rx_add_vid
,
6112 .ndo_vlan_rx_kill_vid
= virtnet_vlan_rx_kill_vid
,
6113 .ndo_bpf
= virtnet_xdp
,
6114 .ndo_xdp_xmit
= virtnet_xdp_xmit
,
6115 .ndo_xsk_wakeup
= virtnet_xsk_wakeup
,
6116 .ndo_features_check
= passthru_features_check
,
6117 .ndo_get_phys_port_name
= virtnet_get_phys_port_name
,
6118 .ndo_set_features
= virtnet_set_features
,
6119 .ndo_tx_timeout
= virtnet_tx_timeout
,
6122 static void virtnet_config_changed_work(struct work_struct
*work
)
6124 struct virtnet_info
*vi
=
6125 container_of(work
, struct virtnet_info
, config_work
);
6128 if (virtio_cread_feature(vi
->vdev
, VIRTIO_NET_F_STATUS
,
6129 struct virtio_net_config
, status
, &v
) < 0)
6132 if (v
& VIRTIO_NET_S_ANNOUNCE
) {
6133 netdev_notify_peers(vi
->dev
);
6134 virtnet_ack_link_announce(vi
);
6137 /* Ignore unknown (future) status bits */
6138 v
&= VIRTIO_NET_S_LINK_UP
;
6140 if (vi
->status
== v
)
6145 if (vi
->status
& VIRTIO_NET_S_LINK_UP
) {
6146 virtnet_update_settings(vi
);
6147 netif_carrier_on(vi
->dev
);
6148 netif_tx_wake_all_queues(vi
->dev
);
6150 netif_carrier_off(vi
->dev
);
6151 netif_tx_stop_all_queues(vi
->dev
);
6155 static void virtnet_config_changed(struct virtio_device
*vdev
)
6157 struct virtnet_info
*vi
= vdev
->priv
;
6159 schedule_work(&vi
->config_work
);
6162 static void virtnet_free_queues(struct virtnet_info
*vi
)
6166 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6167 __netif_napi_del(&vi
->rq
[i
].napi
);
6168 __netif_napi_del(&vi
->sq
[i
].napi
);
6171 /* We called __netif_napi_del(),
6172 * we need to respect an RCU grace period before freeing vi->rq
6181 static void _free_receive_bufs(struct virtnet_info
*vi
)
6183 struct bpf_prog
*old_prog
;
6186 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6187 while (vi
->rq
[i
].pages
)
6188 __free_pages(get_a_page(&vi
->rq
[i
], GFP_KERNEL
), 0);
6190 old_prog
= rtnl_dereference(vi
->rq
[i
].xdp_prog
);
6191 RCU_INIT_POINTER(vi
->rq
[i
].xdp_prog
, NULL
);
6193 bpf_prog_put(old_prog
);
6197 static void free_receive_bufs(struct virtnet_info
*vi
)
6200 _free_receive_bufs(vi
);
6204 static void free_receive_page_frags(struct virtnet_info
*vi
)
6207 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6208 if (vi
->rq
[i
].alloc_frag
.page
) {
6209 if (vi
->rq
[i
].last_dma
)
6210 virtnet_rq_unmap(&vi
->rq
[i
], vi
->rq
[i
].last_dma
, 0);
6211 put_page(vi
->rq
[i
].alloc_frag
.page
);
6215 static void virtnet_sq_free_unused_buf(struct virtqueue
*vq
, void *buf
)
6217 struct virtnet_info
*vi
= vq
->vdev
->priv
;
6218 struct send_queue
*sq
;
6223 switch (virtnet_xmit_ptr_unpack(&buf
)) {
6224 case VIRTNET_XMIT_TYPE_SKB
:
6225 case VIRTNET_XMIT_TYPE_SKB_ORPHAN
:
6229 case VIRTNET_XMIT_TYPE_XDP
:
6230 xdp_return_frame(buf
);
6233 case VIRTNET_XMIT_TYPE_XSK
:
6234 xsk_tx_completed(sq
->xsk_pool
, 1);
6239 static void virtnet_sq_free_unused_buf_done(struct virtqueue
*vq
)
6241 struct virtnet_info
*vi
= vq
->vdev
->priv
;
6244 netdev_tx_reset_queue(netdev_get_tx_queue(vi
->dev
, i
));
6247 static void free_unused_bufs(struct virtnet_info
*vi
)
6252 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6253 struct virtqueue
*vq
= vi
->sq
[i
].vq
;
6254 while ((buf
= virtqueue_detach_unused_buf(vq
)) != NULL
)
6255 virtnet_sq_free_unused_buf(vq
, buf
);
6259 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6260 struct virtqueue
*vq
= vi
->rq
[i
].vq
;
6262 while ((buf
= virtqueue_detach_unused_buf(vq
)) != NULL
)
6263 virtnet_rq_unmap_free_buf(vq
, buf
);
6268 static void virtnet_del_vqs(struct virtnet_info
*vi
)
6270 struct virtio_device
*vdev
= vi
->vdev
;
6272 virtnet_clean_affinity(vi
);
6274 vdev
->config
->del_vqs(vdev
);
6276 virtnet_free_queues(vi
);
6279 /* How large should a single buffer be so a queue full of these can fit at
6280 * least one full packet?
6281 * Logic below assumes the mergeable buffer header is used.
6283 static unsigned int mergeable_min_buf_len(struct virtnet_info
*vi
, struct virtqueue
*vq
)
6285 const unsigned int hdr_len
= vi
->hdr_len
;
6286 unsigned int rq_size
= virtqueue_get_vring_size(vq
);
6287 unsigned int packet_len
= vi
->big_packets
? IP_MAX_MTU
: vi
->dev
->max_mtu
;
6288 unsigned int buf_len
= hdr_len
+ ETH_HLEN
+ VLAN_HLEN
+ packet_len
;
6289 unsigned int min_buf_len
= DIV_ROUND_UP(buf_len
, rq_size
);
6291 return max(max(min_buf_len
, hdr_len
) - hdr_len
,
6292 (unsigned int)GOOD_PACKET_LEN
);
6295 static int virtnet_find_vqs(struct virtnet_info
*vi
)
6297 struct virtqueue_info
*vqs_info
;
6298 struct virtqueue
**vqs
;
6304 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
6305 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
6306 * possible control vq.
6308 total_vqs
= vi
->max_queue_pairs
* 2 +
6309 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_VQ
);
6311 /* Allocate space for find_vqs parameters */
6312 vqs
= kcalloc(total_vqs
, sizeof(*vqs
), GFP_KERNEL
);
6315 vqs_info
= kcalloc(total_vqs
, sizeof(*vqs_info
), GFP_KERNEL
);
6318 if (!vi
->big_packets
|| vi
->mergeable_rx_bufs
) {
6319 ctx
= kcalloc(total_vqs
, sizeof(*ctx
), GFP_KERNEL
);
6326 /* Parameters for control virtqueue, if any */
6328 vqs_info
[total_vqs
- 1].name
= "control";
6331 /* Allocate/initialize parameters for send/receive virtqueues */
6332 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6333 vqs_info
[rxq2vq(i
)].callback
= skb_recv_done
;
6334 vqs_info
[txq2vq(i
)].callback
= skb_xmit_done
;
6335 sprintf(vi
->rq
[i
].name
, "input.%u", i
);
6336 sprintf(vi
->sq
[i
].name
, "output.%u", i
);
6337 vqs_info
[rxq2vq(i
)].name
= vi
->rq
[i
].name
;
6338 vqs_info
[txq2vq(i
)].name
= vi
->sq
[i
].name
;
6340 vqs_info
[rxq2vq(i
)].ctx
= true;
6343 ret
= virtio_find_vqs(vi
->vdev
, total_vqs
, vqs
, vqs_info
, NULL
);
6348 vi
->cvq
= vqs
[total_vqs
- 1];
6349 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_VLAN
))
6350 vi
->dev
->features
|= NETIF_F_HW_VLAN_CTAG_FILTER
;
6353 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6354 vi
->rq
[i
].vq
= vqs
[rxq2vq(i
)];
6355 vi
->rq
[i
].min_buf_len
= mergeable_min_buf_len(vi
, vi
->rq
[i
].vq
);
6356 vi
->sq
[i
].vq
= vqs
[txq2vq(i
)];
6359 /* run here: ret == 0. */
6372 static int virtnet_alloc_queues(struct virtnet_info
*vi
)
6377 vi
->ctrl
= kzalloc(sizeof(*vi
->ctrl
), GFP_KERNEL
);
6383 vi
->sq
= kcalloc(vi
->max_queue_pairs
, sizeof(*vi
->sq
), GFP_KERNEL
);
6386 vi
->rq
= kcalloc(vi
->max_queue_pairs
, sizeof(*vi
->rq
), GFP_KERNEL
);
6390 INIT_DELAYED_WORK(&vi
->refill
, refill_work
);
6391 for (i
= 0; i
< vi
->max_queue_pairs
; i
++) {
6392 vi
->rq
[i
].pages
= NULL
;
6393 netif_napi_add_weight(vi
->dev
, &vi
->rq
[i
].napi
, virtnet_poll
,
6395 netif_napi_add_tx_weight(vi
->dev
, &vi
->sq
[i
].napi
,
6397 napi_tx
? napi_weight
: 0);
6399 sg_init_table(vi
->rq
[i
].sg
, ARRAY_SIZE(vi
->rq
[i
].sg
));
6400 ewma_pkt_len_init(&vi
->rq
[i
].mrg_avg_pkt_len
);
6401 sg_init_table(vi
->sq
[i
].sg
, ARRAY_SIZE(vi
->sq
[i
].sg
));
6403 u64_stats_init(&vi
->rq
[i
].stats
.syncp
);
6404 u64_stats_init(&vi
->sq
[i
].stats
.syncp
);
6405 mutex_init(&vi
->rq
[i
].dim_lock
);
6418 static int init_vqs(struct virtnet_info
*vi
)
6422 /* Allocate send & receive queues */
6423 ret
= virtnet_alloc_queues(vi
);
6427 ret
= virtnet_find_vqs(vi
);
6432 virtnet_set_affinity(vi
);
6438 virtnet_free_queues(vi
);
6444 static ssize_t
mergeable_rx_buffer_size_show(struct netdev_rx_queue
*queue
,
6447 struct virtnet_info
*vi
= netdev_priv(queue
->dev
);
6448 unsigned int queue_index
= get_netdev_rx_queue_index(queue
);
6449 unsigned int headroom
= virtnet_get_headroom(vi
);
6450 unsigned int tailroom
= headroom
? sizeof(struct skb_shared_info
) : 0;
6451 struct ewma_pkt_len
*avg
;
6453 BUG_ON(queue_index
>= vi
->max_queue_pairs
);
6454 avg
= &vi
->rq
[queue_index
].mrg_avg_pkt_len
;
6455 return sprintf(buf
, "%u\n",
6456 get_mergeable_buf_len(&vi
->rq
[queue_index
], avg
,
6457 SKB_DATA_ALIGN(headroom
+ tailroom
)));
6460 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute
=
6461 __ATTR_RO(mergeable_rx_buffer_size
);
6463 static struct attribute
*virtio_net_mrg_rx_attrs
[] = {
6464 &mergeable_rx_buffer_size_attribute
.attr
,
6468 static const struct attribute_group virtio_net_mrg_rx_group
= {
6469 .name
= "virtio_net",
6470 .attrs
= virtio_net_mrg_rx_attrs
6474 static bool virtnet_fail_on_feature(struct virtio_device
*vdev
,
6476 const char *fname
, const char *dname
)
6478 if (!virtio_has_feature(vdev
, fbit
))
6481 dev_err(&vdev
->dev
, "device advertises feature %s but not %s",
6487 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
6488 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
6490 static bool virtnet_validate_features(struct virtio_device
*vdev
)
6492 if (!virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
) &&
6493 (VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_CTRL_RX
,
6494 "VIRTIO_NET_F_CTRL_VQ") ||
6495 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_CTRL_VLAN
,
6496 "VIRTIO_NET_F_CTRL_VQ") ||
6497 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_GUEST_ANNOUNCE
,
6498 "VIRTIO_NET_F_CTRL_VQ") ||
6499 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_MQ
, "VIRTIO_NET_F_CTRL_VQ") ||
6500 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
,
6501 "VIRTIO_NET_F_CTRL_VQ") ||
6502 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_RSS
,
6503 "VIRTIO_NET_F_CTRL_VQ") ||
6504 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_HASH_REPORT
,
6505 "VIRTIO_NET_F_CTRL_VQ") ||
6506 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_NOTF_COAL
,
6507 "VIRTIO_NET_F_CTRL_VQ") ||
6508 VIRTNET_FAIL_ON(vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
,
6509 "VIRTIO_NET_F_CTRL_VQ"))) {
6516 #define MIN_MTU ETH_MIN_MTU
6517 #define MAX_MTU ETH_MAX_MTU
6519 static int virtnet_validate(struct virtio_device
*vdev
)
6521 if (!vdev
->config
->get
) {
6522 dev_err(&vdev
->dev
, "%s failure: config access disabled\n",
6527 if (!virtnet_validate_features(vdev
))
6530 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MTU
)) {
6531 int mtu
= virtio_cread16(vdev
,
6532 offsetof(struct virtio_net_config
,
6535 __virtio_clear_bit(vdev
, VIRTIO_NET_F_MTU
);
6538 if (virtio_has_feature(vdev
, VIRTIO_NET_F_STANDBY
) &&
6539 !virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
)) {
6540 dev_warn(&vdev
->dev
, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
6541 __virtio_clear_bit(vdev
, VIRTIO_NET_F_STANDBY
);
6547 static bool virtnet_check_guest_gso(const struct virtnet_info
*vi
)
6549 return virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO4
) ||
6550 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_TSO6
) ||
6551 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_ECN
) ||
6552 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_UFO
) ||
6553 (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO4
) &&
6554 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_GUEST_USO6
));
6557 static void virtnet_set_big_packets(struct virtnet_info
*vi
, const int mtu
)
6559 bool guest_gso
= virtnet_check_guest_gso(vi
);
6561 /* If device can receive ANY guest GSO packets, regardless of mtu,
6562 * allocate packets of maximum size, otherwise limit it to only
6563 * mtu size worth only.
6565 if (mtu
> ETH_DATA_LEN
|| guest_gso
) {
6566 vi
->big_packets
= true;
6567 vi
->big_packets_num_skbfrags
= guest_gso
? MAX_SKB_FRAGS
: DIV_ROUND_UP(mtu
, PAGE_SIZE
);
6571 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
6572 static enum xdp_rss_hash_type
6573 virtnet_xdp_rss_type
[VIRTIO_NET_HASH_REPORT_MAX_TABLE
] = {
6574 [VIRTIO_NET_HASH_REPORT_NONE
] = XDP_RSS_TYPE_NONE
,
6575 [VIRTIO_NET_HASH_REPORT_IPv4
] = XDP_RSS_TYPE_L3_IPV4
,
6576 [VIRTIO_NET_HASH_REPORT_TCPv4
] = XDP_RSS_TYPE_L4_IPV4_TCP
,
6577 [VIRTIO_NET_HASH_REPORT_UDPv4
] = XDP_RSS_TYPE_L4_IPV4_UDP
,
6578 [VIRTIO_NET_HASH_REPORT_IPv6
] = XDP_RSS_TYPE_L3_IPV6
,
6579 [VIRTIO_NET_HASH_REPORT_TCPv6
] = XDP_RSS_TYPE_L4_IPV6_TCP
,
6580 [VIRTIO_NET_HASH_REPORT_UDPv6
] = XDP_RSS_TYPE_L4_IPV6_UDP
,
6581 [VIRTIO_NET_HASH_REPORT_IPv6_EX
] = XDP_RSS_TYPE_L3_IPV6_EX
,
6582 [VIRTIO_NET_HASH_REPORT_TCPv6_EX
] = XDP_RSS_TYPE_L4_IPV6_TCP_EX
,
6583 [VIRTIO_NET_HASH_REPORT_UDPv6_EX
] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
6586 static int virtnet_xdp_rx_hash(const struct xdp_md
*_ctx
, u32
*hash
,
6587 enum xdp_rss_hash_type
*rss_type
)
6589 const struct xdp_buff
*xdp
= (void *)_ctx
;
6590 struct virtio_net_hdr_v1_hash
*hdr_hash
;
6591 struct virtnet_info
*vi
;
6594 if (!(xdp
->rxq
->dev
->features
& NETIF_F_RXHASH
))
6597 vi
= netdev_priv(xdp
->rxq
->dev
);
6598 hdr_hash
= (struct virtio_net_hdr_v1_hash
*)(xdp
->data
- vi
->hdr_len
);
6599 hash_report
= __le16_to_cpu(hdr_hash
->hash_report
);
6601 if (hash_report
>= VIRTIO_NET_HASH_REPORT_MAX_TABLE
)
6602 hash_report
= VIRTIO_NET_HASH_REPORT_NONE
;
6604 *rss_type
= virtnet_xdp_rss_type
[hash_report
];
6605 *hash
= __le32_to_cpu(hdr_hash
->hash_value
);
6609 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops
= {
6610 .xmo_rx_hash
= virtnet_xdp_rx_hash
,
6613 static int virtnet_probe(struct virtio_device
*vdev
)
6615 int i
, err
= -ENOMEM
;
6616 struct net_device
*dev
;
6617 struct virtnet_info
*vi
;
6618 u16 max_queue_pairs
;
6621 /* Find if host supports multiqueue/rss virtio_net device */
6622 max_queue_pairs
= 1;
6623 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MQ
) || virtio_has_feature(vdev
, VIRTIO_NET_F_RSS
))
6625 virtio_cread16(vdev
, offsetof(struct virtio_net_config
, max_virtqueue_pairs
));
6627 /* We need at least 2 queue's */
6628 if (max_queue_pairs
< VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN
||
6629 max_queue_pairs
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
||
6630 !virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
))
6631 max_queue_pairs
= 1;
6633 /* Allocate ourselves a network device with room for our info */
6634 dev
= alloc_etherdev_mq(sizeof(struct virtnet_info
), max_queue_pairs
);
6638 /* Set up network device as normal. */
6639 dev
->priv_flags
|= IFF_UNICAST_FLT
| IFF_LIVE_ADDR_CHANGE
|
6640 IFF_TX_SKB_NO_LINEAR
;
6641 dev
->netdev_ops
= &virtnet_netdev
;
6642 dev
->stat_ops
= &virtnet_stat_ops
;
6643 dev
->features
= NETIF_F_HIGHDMA
;
6645 dev
->ethtool_ops
= &virtnet_ethtool_ops
;
6646 SET_NETDEV_DEV(dev
, &vdev
->dev
);
6648 /* Do we support "hardware" checksums? */
6649 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CSUM
)) {
6650 /* This opens up the world of extra features. */
6651 dev
->hw_features
|= NETIF_F_HW_CSUM
| NETIF_F_SG
;
6653 dev
->features
|= NETIF_F_HW_CSUM
| NETIF_F_SG
;
6655 if (virtio_has_feature(vdev
, VIRTIO_NET_F_GSO
)) {
6656 dev
->hw_features
|= NETIF_F_TSO
6657 | NETIF_F_TSO_ECN
| NETIF_F_TSO6
;
6659 /* Individual feature bits: what can host handle? */
6660 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_TSO4
))
6661 dev
->hw_features
|= NETIF_F_TSO
;
6662 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_TSO6
))
6663 dev
->hw_features
|= NETIF_F_TSO6
;
6664 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_ECN
))
6665 dev
->hw_features
|= NETIF_F_TSO_ECN
;
6666 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HOST_USO
))
6667 dev
->hw_features
|= NETIF_F_GSO_UDP_L4
;
6669 dev
->features
|= NETIF_F_GSO_ROBUST
;
6672 dev
->features
|= dev
->hw_features
& NETIF_F_ALL_TSO
;
6673 /* (!csum && gso) case will be fixed by register_netdev() */
6676 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
6677 * need to calculate checksums for partially checksummed packets,
6678 * as they're considered valid by the upper layer.
6679 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
6680 * receives fully checksummed packets. The device may assist in
6681 * validating these packets' checksums, so the driver won't have to.
6683 dev
->features
|= NETIF_F_RXCSUM
;
6685 if (virtio_has_feature(vdev
, VIRTIO_NET_F_GUEST_TSO4
) ||
6686 virtio_has_feature(vdev
, VIRTIO_NET_F_GUEST_TSO6
))
6687 dev
->features
|= NETIF_F_GRO_HW
;
6688 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
))
6689 dev
->hw_features
|= NETIF_F_GRO_HW
;
6691 dev
->vlan_features
= dev
->features
;
6692 dev
->xdp_features
= NETDEV_XDP_ACT_BASIC
| NETDEV_XDP_ACT_REDIRECT
|
6693 NETDEV_XDP_ACT_XSK_ZEROCOPY
;
6695 /* MTU range: 68 - 65535 */
6696 dev
->min_mtu
= MIN_MTU
;
6697 dev
->max_mtu
= MAX_MTU
;
6699 /* Configuration may specify what MAC to use. Otherwise random. */
6700 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
)) {
6703 virtio_cread_bytes(vdev
,
6704 offsetof(struct virtio_net_config
, mac
),
6706 eth_hw_addr_set(dev
, addr
);
6708 eth_hw_addr_random(dev
);
6709 dev_info(&vdev
->dev
, "Assigned random MAC address %pM\n",
6713 /* Set up our device-specific information */
6714 vi
= netdev_priv(dev
);
6719 INIT_WORK(&vi
->config_work
, virtnet_config_changed_work
);
6720 INIT_WORK(&vi
->rx_mode_work
, virtnet_rx_mode_work
);
6721 spin_lock_init(&vi
->refill_lock
);
6723 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MRG_RXBUF
)) {
6724 vi
->mergeable_rx_bufs
= true;
6725 dev
->xdp_features
|= NETDEV_XDP_ACT_RX_SG
;
6728 if (virtio_has_feature(vdev
, VIRTIO_NET_F_HASH_REPORT
))
6729 vi
->has_rss_hash_report
= true;
6731 if (virtio_has_feature(vdev
, VIRTIO_NET_F_RSS
)) {
6734 vi
->rss_indir_table_size
=
6735 virtio_cread16(vdev
, offsetof(struct virtio_net_config
,
6736 rss_max_indirection_table_length
));
6738 err
= rss_indirection_table_alloc(&vi
->rss
, vi
->rss_indir_table_size
);
6742 if (vi
->has_rss
|| vi
->has_rss_hash_report
) {
6744 virtio_cread8(vdev
, offsetof(struct virtio_net_config
, rss_max_key_size
));
6745 if (vi
->rss_key_size
> VIRTIO_NET_RSS_MAX_KEY_SIZE
) {
6746 dev_err(&vdev
->dev
, "rss_max_key_size=%u exceeds the limit %u.\n",
6747 vi
->rss_key_size
, VIRTIO_NET_RSS_MAX_KEY_SIZE
);
6752 vi
->rss_hash_types_supported
=
6753 virtio_cread32(vdev
, offsetof(struct virtio_net_config
, supported_hash_types
));
6754 vi
->rss_hash_types_supported
&=
6755 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX
|
6756 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX
|
6757 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX
);
6759 dev
->hw_features
|= NETIF_F_RXHASH
;
6760 dev
->xdp_metadata_ops
= &virtnet_xdp_metadata_ops
;
6763 if (vi
->has_rss_hash_report
)
6764 vi
->hdr_len
= sizeof(struct virtio_net_hdr_v1_hash
);
6765 else if (virtio_has_feature(vdev
, VIRTIO_NET_F_MRG_RXBUF
) ||
6766 virtio_has_feature(vdev
, VIRTIO_F_VERSION_1
))
6767 vi
->hdr_len
= sizeof(struct virtio_net_hdr_mrg_rxbuf
);
6769 vi
->hdr_len
= sizeof(struct virtio_net_hdr
);
6771 if (virtio_has_feature(vdev
, VIRTIO_F_ANY_LAYOUT
) ||
6772 virtio_has_feature(vdev
, VIRTIO_F_VERSION_1
))
6773 vi
->any_header_sg
= true;
6775 if (virtio_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
))
6778 mutex_init(&vi
->cvq_lock
);
6780 if (virtio_has_feature(vdev
, VIRTIO_NET_F_MTU
)) {
6781 mtu
= virtio_cread16(vdev
,
6782 offsetof(struct virtio_net_config
,
6784 if (mtu
< dev
->min_mtu
) {
6785 /* Should never trigger: MTU was previously validated
6786 * in virtnet_validate.
6789 "device MTU appears to have changed it is now %d < %d",
6799 virtnet_set_big_packets(vi
, mtu
);
6801 if (vi
->any_header_sg
)
6802 dev
->needed_headroom
= vi
->hdr_len
;
6804 /* Enable multiqueue by default */
6805 if (num_online_cpus() >= max_queue_pairs
)
6806 vi
->curr_queue_pairs
= max_queue_pairs
;
6808 vi
->curr_queue_pairs
= num_online_cpus();
6809 vi
->max_queue_pairs
= max_queue_pairs
;
6811 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
6816 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_NOTF_COAL
)) {
6817 vi
->intr_coal_rx
.max_usecs
= 0;
6818 vi
->intr_coal_tx
.max_usecs
= 0;
6819 vi
->intr_coal_rx
.max_packets
= 0;
6821 /* Keep the default values of the coalescing parameters
6822 * aligned with the default napi_tx state.
6824 if (vi
->sq
[0].napi
.weight
)
6825 vi
->intr_coal_tx
.max_packets
= 1;
6827 vi
->intr_coal_tx
.max_packets
= 0;
6830 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_VQ_NOTF_COAL
)) {
6831 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
6832 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6833 if (vi
->sq
[i
].napi
.weight
)
6834 vi
->sq
[i
].intr_coal
.max_packets
= 1;
6836 err
= virtnet_init_irq_moder(vi
);
6842 if (vi
->mergeable_rx_bufs
)
6843 dev
->sysfs_rx_queue_group
= &virtio_net_mrg_rx_group
;
6845 netif_set_real_num_tx_queues(dev
, vi
->curr_queue_pairs
);
6846 netif_set_real_num_rx_queues(dev
, vi
->curr_queue_pairs
);
6848 virtnet_init_settings(dev
);
6850 if (virtio_has_feature(vdev
, VIRTIO_NET_F_STANDBY
)) {
6851 vi
->failover
= net_failover_create(vi
->dev
);
6852 if (IS_ERR(vi
->failover
)) {
6853 err
= PTR_ERR(vi
->failover
);
6858 if (vi
->has_rss
|| vi
->has_rss_hash_report
)
6859 virtnet_init_default_rss(vi
);
6861 enable_rx_mode_work(vi
);
6863 /* serialize netdev register + virtio_device_ready() with ndo_open() */
6866 err
= register_netdevice(dev
);
6868 pr_debug("virtio_net: registering device failed\n");
6873 /* Disable config change notification until ndo_open. */
6874 virtio_config_driver_disable(vi
->vdev
);
6876 virtio_device_ready(vdev
);
6878 if (vi
->has_rss
|| vi
->has_rss_hash_report
) {
6879 if (!virtnet_commit_rss_command(vi
)) {
6880 dev_warn(&vdev
->dev
, "RSS disabled because committing failed.\n");
6881 dev
->hw_features
&= ~NETIF_F_RXHASH
;
6882 vi
->has_rss_hash_report
= false;
6883 vi
->has_rss
= false;
6887 virtnet_set_queues(vi
, vi
->curr_queue_pairs
);
6889 /* a random MAC address has been assigned, notify the device.
6890 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
6891 * because many devices work fine without getting MAC explicitly
6893 if (!virtio_has_feature(vdev
, VIRTIO_NET_F_MAC
) &&
6894 virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
)) {
6895 struct scatterlist sg
;
6897 sg_init_one(&sg
, dev
->dev_addr
, dev
->addr_len
);
6898 if (!virtnet_send_command(vi
, VIRTIO_NET_CTRL_MAC
,
6899 VIRTIO_NET_CTRL_MAC_ADDR_SET
, &sg
)) {
6900 pr_debug("virtio_net: setting MAC address failed\n");
6903 goto free_unregister_netdev
;
6907 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_DEVICE_STATS
)) {
6908 struct virtio_net_stats_capabilities
*stats_cap
__free(kfree
) = NULL
;
6909 struct scatterlist sg
;
6912 stats_cap
= kzalloc(sizeof(*stats_cap
), GFP_KERNEL
);
6916 goto free_unregister_netdev
;
6919 sg_init_one(&sg
, stats_cap
, sizeof(*stats_cap
));
6921 if (!virtnet_send_command_reply(vi
, VIRTIO_NET_CTRL_STATS
,
6922 VIRTIO_NET_CTRL_STATS_QUERY
,
6924 pr_debug("virtio_net: fail to get stats capability\n");
6927 goto free_unregister_netdev
;
6930 v
= stats_cap
->supported_stats_types
[0];
6931 vi
->device_stats_cap
= le64_to_cpu(v
);
6934 /* Assume link up if device can't report link status,
6935 otherwise get link status from config. */
6936 netif_carrier_off(dev
);
6937 if (virtio_has_feature(vi
->vdev
, VIRTIO_NET_F_STATUS
)) {
6938 virtnet_config_changed_work(&vi
->config_work
);
6940 vi
->status
= VIRTIO_NET_S_LINK_UP
;
6941 virtnet_update_settings(vi
);
6942 netif_carrier_on(dev
);
6945 for (i
= 0; i
< ARRAY_SIZE(guest_offloads
); i
++)
6946 if (virtio_has_feature(vi
->vdev
, guest_offloads
[i
]))
6947 set_bit(guest_offloads
[i
], &vi
->guest_offloads
);
6948 vi
->guest_offloads_capable
= vi
->guest_offloads
;
6952 err
= virtnet_cpu_notif_add(vi
);
6954 pr_debug("virtio_net: registering cpu notifier failed\n");
6955 goto free_unregister_netdev
;
6958 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
6959 dev
->name
, max_queue_pairs
);
6963 free_unregister_netdev
:
6964 unregister_netdev(dev
);
6966 net_failover_destroy(vi
->failover
);
6968 virtio_reset_device(vdev
);
6969 cancel_delayed_work_sync(&vi
->refill
);
6970 free_receive_page_frags(vi
);
6971 virtnet_del_vqs(vi
);
6977 static void remove_vq_common(struct virtnet_info
*vi
)
6981 virtio_reset_device(vi
->vdev
);
6983 /* Free unused buffers in both send and recv, if any. */
6984 free_unused_bufs(vi
);
6987 * Rule of thumb is netdev_tx_reset_queue() should follow any
6988 * skb freeing not followed by netdev_tx_completed_queue()
6990 for (i
= 0; i
< vi
->max_queue_pairs
; i
++)
6991 netdev_tx_reset_queue(netdev_get_tx_queue(vi
->dev
, i
));
6993 free_receive_bufs(vi
);
6995 free_receive_page_frags(vi
);
6997 virtnet_del_vqs(vi
);
7000 static void virtnet_remove(struct virtio_device
*vdev
)
7002 struct virtnet_info
*vi
= vdev
->priv
;
7004 virtnet_cpu_notif_remove(vi
);
7006 /* Make sure no work handler is accessing the device. */
7007 flush_work(&vi
->config_work
);
7008 disable_rx_mode_work(vi
);
7009 flush_work(&vi
->rx_mode_work
);
7011 virtnet_free_irq_moder(vi
);
7013 unregister_netdev(vi
->dev
);
7015 net_failover_destroy(vi
->failover
);
7017 remove_vq_common(vi
);
7019 rss_indirection_table_free(&vi
->rss
);
7021 free_netdev(vi
->dev
);
7024 static __maybe_unused
int virtnet_freeze(struct virtio_device
*vdev
)
7026 struct virtnet_info
*vi
= vdev
->priv
;
7028 virtnet_cpu_notif_remove(vi
);
7029 virtnet_freeze_down(vdev
);
7030 remove_vq_common(vi
);
7035 static __maybe_unused
int virtnet_restore(struct virtio_device
*vdev
)
7037 struct virtnet_info
*vi
= vdev
->priv
;
7040 err
= virtnet_restore_up(vdev
);
7043 virtnet_set_queues(vi
, vi
->curr_queue_pairs
);
7045 err
= virtnet_cpu_notif_add(vi
);
7047 virtnet_freeze_down(vdev
);
7048 remove_vq_common(vi
);
7055 static struct virtio_device_id id_table
[] = {
7056 { VIRTIO_ID_NET
, VIRTIO_DEV_ANY_ID
},
7060 #define VIRTNET_FEATURES \
7061 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
7063 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
7064 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
7065 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
7066 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
7067 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
7068 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
7069 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
7070 VIRTIO_NET_F_CTRL_MAC_ADDR, \
7071 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
7072 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
7073 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
7074 VIRTIO_NET_F_VQ_NOTF_COAL, \
7075 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
7077 static unsigned int features
[] = {
7081 static unsigned int features_legacy
[] = {
7084 VIRTIO_F_ANY_LAYOUT
,
7087 static struct virtio_driver virtio_net_driver
= {
7088 .feature_table
= features
,
7089 .feature_table_size
= ARRAY_SIZE(features
),
7090 .feature_table_legacy
= features_legacy
,
7091 .feature_table_size_legacy
= ARRAY_SIZE(features_legacy
),
7092 .driver
.name
= KBUILD_MODNAME
,
7093 .id_table
= id_table
,
7094 .validate
= virtnet_validate
,
7095 .probe
= virtnet_probe
,
7096 .remove
= virtnet_remove
,
7097 .config_changed
= virtnet_config_changed
,
7098 #ifdef CONFIG_PM_SLEEP
7099 .freeze
= virtnet_freeze
,
7100 .restore
= virtnet_restore
,
7104 static __init
int virtio_net_driver_init(void)
7108 ret
= cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN
, "virtio/net:online",
7110 virtnet_cpu_down_prep
);
7113 virtionet_online
= ret
;
7114 ret
= cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD
, "virtio/net:dead",
7115 NULL
, virtnet_cpu_dead
);
7118 ret
= register_virtio_driver(&virtio_net_driver
);
7123 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD
);
7125 cpuhp_remove_multi_state(virtionet_online
);
7129 module_init(virtio_net_driver_init
);
7131 static __exit
void virtio_net_driver_exit(void)
7133 unregister_virtio_driver(&virtio_net_driver
);
7134 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD
);
7135 cpuhp_remove_multi_state(virtionet_online
);
7137 module_exit(virtio_net_driver_exit
);
7139 MODULE_DEVICE_TABLE(virtio
, id_table
);
7140 MODULE_DESCRIPTION("Virtio network driver");
7141 MODULE_LICENSE("GPL");